runfile('C:/Users/Administrator/Desktop/新建文件夾/SpiderMain.py',?wdir='C:/Users/Administrator/Desktop/新建文件夾')
Reloaded?modules:?html_downloader,?html_outputer,?html_parser,?url_manager
Traceback?(most?recent?call?last):
??File?"<ipython-input-21-acc2b5e5b102>",?line?1,?in?<module>
????runfile('C:/Users/Administrator/Desktop/新建文件夾/SpiderMain.py',?wdir='C:/Users/Administrator/Desktop/新建文件夾')
??File?"C:\ProgramData\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py",?line?880,?in?runfile
????execfile(filename,?namespace)
??File?"C:\ProgramData\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py",?line?102,?in?execfile
????exec(compile(f.read(),?filename,?'exec'),?namespace)
??File?"C:/Users/Administrator/Desktop/新建文件夾/SpiderMain.py",?line?56,?in?<module>
????obj_spider.craw(root_url)
??File?"C:/Users/Administrator/Desktop/新建文件夾/SpiderMain.py",?line?27,?in?craw
????self.urls.add_new_url(root_url)
AttributeError:?'SpiderMain'?object?has?no?attribute?'urls'
上面這是報(bào)錯(cuò)的,然后下面的代碼。
import?html_downloader
import?html_outputer
import?html_parser
import?url_manager
class?SpiderMain(object):
????def?_init_(self):
????????#初始化url管理器
????????self.urls=url_manager.UrlManager()
????????#初始化url下載器
????????self.downloader=html_downloader.HtmlDownloader()
????????#初始化url解析器
????????self.parser=html_parser.HtmlParser()
????????#初始化url輸出
????????self.outputer=html_outputer.HtmlOutputer()
????????
????def?craw(self,root_url):
????????count=1
????????#url管理器中添加一個(gè)new?url
????????self.urls.add_new_url(root_url)
????????#判斷是否有新的url?開(kāi)始爬去
????????while?self.urls.has_new_url():
????????????try:
????????????????#得到新的url
????????????????new_url=self.urls.get_new_url()
????????????????print?('craw?%d:%s'?%?(count,new_url))
????????????????#下載新的url數(shù)據(jù)
????????????????html_cont=self.downloader.download(new_url)
????????????????#解析出來(lái)url的內(nèi)容和地址
????????????????new_urls,new_data=self.parser.parse(new_url,html_cont)
????????????????#新的url補(bǔ)充到url管理器
????????????????self.urls.add_new_urls(new_urls)
????????????????#輸出數(shù)據(jù)
????????????????self.outputer.collect_data(new_data)
????????????????
????????????????if?count==1000:
????????????????????print?("finished")
????????????????????break
????????????????count=count+1
????????????????print?(count)?
????????????except:
????????????????print?("ceaw?failed!")
????????self.outputer.output_html()
????????
if?__name__=="__main__":
????root_url="http://baike.baidu.com/view/21087.htm"
????obj_spider=SpiderMain()
????obj_spider.craw(root_url)
報(bào)錯(cuò)'SpiderMain' object has no attribute 'urls'。
Phenomenal_0
2017-08-02 15:24:02