>>>?import?string
>>>?import?re
>>>?import?urllib2
>>>?class?DouBanSpider(object):
def?__init__(self):
self.page=1
self.cur_url='http://movie.douban.com/top250?strart={page}&filter=&type='
self.datas=[]
self._top_num=1
print?'豆瓣電影爬蟲準備完畢,準備爬?。?
>>>?def?get_page(self,cur_page):
url=self.cur_url
try:
my_page=urllib2.urlopen(url.format(page=(cur_page-1)*25)).read().decode('utf-8')
except?urllib2.URLError,e:
if?hasattr(e,'code'):
print?"The?Server?couldn't?fulfill?the?request."
print?"Error?code:%s"?%?e.code
elif?hasattr(e,'reason'):
print?'We?faied?to?reach?a?server.?Please?check?your?url?and?read?the?Reason'
print?'Reason:%s'?%?e.reason
return?my_page
>>>?def?find_title(self,my_page):
temp_data=[]
movie_items=re.findall(r'<span.*?class="title">)(.*?)</span>',my_page,re.S)
for?index,item?in?enumerate(movie_item):
if?item.find(' ')==-1:
temp_data.append('Top'+str(self._top_num)+'?'+item)
self._top_num?+=1
self.datas.extend(temp_data)
>>>?def?satrt_spider(self):
while?self.page<=4:
my_page=self.get_page(self.page)
self.find_title(my_page)
self.page?+=?1
>>>?def?main():
my_spider=DouBanSpider()
my_spider.start_spider()
for?item?in?my_spider.datas:
print?item
print?"豆瓣爬蟲結束"
>>>?if?__name__?=='__main__':
main()
豆瓣電影爬蟲準備完畢,準備爬?。?
Traceback?(most?recent?call?last):
??File?"<pyshell#27>",?line?2,?in?<module>
????main()
??File?"<pyshell#24>",?line?3,?in?main
????my_spider.start_spider()
AttributeError:?'DouBanSpider'?object?has?no?attribute?'start_spider'
以上,最后出現(xiàn)錯誤,哪里不對?
python 爬取豆瓣電影 前100哪里不對?
qq_飛月_04079078
2016-12-24 16:28:48