報(bào)錯(cuò)求助,
from baike import url_manager, html_downloader, html_parser, html_outputer
class SpiderMain(object):
? ?def __init__(self):
? ? ? ?self.urls = url_manager.UrlManager()
? ? ? ?self.downloader = html_downloader.HtmlDownloader()
? ? ? ?self.parser = html_parser.HtmlParser()
? ? ? ?self.outputer = html_outputer.HtmlOutputer()
? ?def craw(self,root_url):
? ? ? ?count = 1
? ? ? ?self.urls.add_new_url(root_url)
? ? ? ?while self.urls.has_new_url():
? ? ? ? ? ?#try:
? ? ? ? ? ? ? ?new_url = self.urls.get_new_url()
? ? ? ? ? ? ? ?print 'craw %d : %s' % (count,new_url)
? ? ? ? ? ? ? ?html_cont = self.downloader.download(new_url)
? ? ? ? ? ? ? ?new_urls, new_data = self.parser.parse(new_url, html_cont)
? ? ? ? ? ? ? ?self.urls.add_new_urls(new_urls)
? ? ? ? ? ? ? ?self.outputer.collect_data(new_data)
? ? ? ? ? ? ? ?if count == 1000:
? ? ? ? ? ? ? ? ? ?break
? ? ? ? ? ? ? ?count = count + 1
? ? ? ? ? ?#except:
? ? ? ? ? ? ? ?#print "craw failed"
? ? ? ?self.outputer.output_html()
if __name__ == "__main__":
? ?root_url = "https://baike.baidu.com/item/Python/407313?fr=aladdin"
? ?obj_spider = SpiderMain()
? ?obj_spider.craw(root_url)
報(bào)錯(cuò)如下
C:\Python27\python.exe C:/Users/Administrator/PycharmProjects/baike/baike/spider_main.py
craw 1 : https://baike.baidu.com/item/Python/407313?fr=aladdin
Traceback (most recent call last):
? File "C:/Users/Administrator/PycharmProjects/baike/baike/spider_main.py", line 34, in <module>
? ? obj_spider.craw(root_url)
? File "C:/Users/Administrator/PycharmProjects/baike/baike/spider_main.py", line 19, in craw
? ? new_urls, new_data = self.parser.parse(new_url, html_cont)
TypeError: 'NoneType' object is not iterable
Process finished with exit code 1
2017-08-07
不要注釋掉 try 與? except ,? parser 出現(xiàn)了 None 結(jié)果情況