class UrlManager(object):? ? def __init____(self):? ? ? ? self.new_urls = set()? ? ? ? self.old_urls = set()? ??? ? def add_new_url(self, url):? ? ? ? if url is None:? ? ? ? ? ? return? ? ? ? if url not in self.new_urls and url not in self.old_urls:? ? ? ? ? ? self.new_urls.add(url)? ? def add_new_urls(self, urls):? ? ? ? if urls is None or len(urls) == 0:? ? ? ? ? ? return? ? ? ? for url in urls:? ? ? ? ? ? self.add_new_url(url)? ? ? ?? ? def has_new_url(self):? ? ? ? return len(self.new_urls) != 0? ??? ? def get_new_url(self):? ? ? ? new_url = self.new_urls.pop()? ? ? ? self.old_urls.add(new_url)? ? ? ? return new_urlTraceback (most recent call last):? File "E:\java\imooc\baike_spider\spider_main.py", line 38, in <module>? ? obj_spider.craw(root_url)?? File "E:\java\imooc\baike_spider\spider_main.py", line 13, in craw? ? self.urls.add_new_url(root_url)? File "E:\java\imooc\baike_spider\url_manager.py", line 9, in add_new_url? ? if url not in self.new_urls and url not in self.old_urls:AttributeError: 'UrlManager' object has no attribute 'new_urls'
添加回答
舉報
0/150
提交
取消