課程
                    
                        /后端開發(fā)
                        
                            /Python
                        
                        /Python開發(fā)簡(jiǎn)單爬蟲

module 'url_manager' has no attribute 'UrlManager'

#這是我的主代碼，

#coding:utf-8
import url_manager,html_downloader,html_parser,html_outputer

class SpiderMain(object):
?? ?def __init__(self):
?? ??? ?# 在構(gòu)造函數(shù)中進(jìn)行初始化各個(gè)對(duì)象
?? ??? ?self.urls=url_manager.UrlManager()#url的管理器
?? ??? ?self.downloader=html_downloader.HtmlDownloader()#下載器
?? ??? ?self.parser=html_parser.HtmlParser()#解析器
?? ??? ?self.outputer=html_outputer.HtmlOutputer()#輸出器
?? ?# 爬蟲的調(diào)度程序
?? ?def craw(self,root_url):
?? ??? ?count = 1
?? ??? ?self.urls.add_new_url(root_url)
?? ??? ?while self.urls.has_new_url():
?? ??? ??? ?try:
?? ??? ??? ??? ?new_url = self.urls.get_new_url()
?? ??? ??? ??? ?print('craw %d:%s'%(count,new_url))
?? ??? ??? ??? ?html_cont = self.downloader.download(new_url)
?? ??? ??? ??? ?new_urls,new_data=self.parser.parse(new_url,html_cont)
?? ??? ??? ??? ?self.urls.add_new_urls(new_urls)
?? ??? ??? ??? ?self.outputer.collect_data(new_data)

?? ??? ??? ??? ?if count==1000:
?? ??? ??? ??? ??? ?break
?? ??? ??? ??? ?count = count+1
?? ??? ??? ?except:
?? ??? ??? ??? ?print('craw failed')

?? ??? ?self.outputer.output_html()?? ?

if __name__=="__main__":
?? ?root_url = "https://baike.baidu.com/item/python/407313"
?? ?obj_spider = SpiderMain()
?? ?obj_spider.craw(root_url)

class UrlManager(object):
??? """docstring for UrlManager"""
??? def __init__ (self):
??????? self.new_urls = set()
??????? self.old_urls = set()

??? def add_new_url(self, url):
??????? if url is None:
??????????? return
??????? if url not in self.new_urls and url not in self.old_urls:
??????????? self.new_urls.add(url)

??? def add_new_urls(self, urls):
??????? if urls is None or len(urls)==0:
??????????? return
??????? for url in urls:
??????????? self.add_new_url(url)

??? def has_new_url(self):
??????? return len(self.new_urls) != 0

??? def get_new_url(self):
??????? new_url = self.new_urls.pop()
??????? self.old_urls.add(new_url)
?? ??? ?return new_url