一直提示SyntaxError: invalid syntax
運行spider_main時一直提示:
? File "spider_main.py", line 22
??? html_cont = self.downloader.download(new_url)
? ? ? ? ? ? ? ? ? ^
SyntaxError: invalid syntax
求大神指導(dǎo)!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
spider_main代碼如下(我用的Grany):
#coding=gbk
import url_manager
import html_downloader
import html_parser
import html_outputer
class SpiderMain(object):
?def __init__(self):
??self.urls = url_manager.UrlManager()
??self.downloader = html_downloader.HtmlDownloader()
??self.parser = html_parser.HtmlParser()
??self.outputer = html_outputer.HtmlOutputer()
?
?def craw(self,root_url):
??count = 1
??self.urls.add_new_url(root_url)
??while self.urls.has_new_url():
???try:
????new_url = self.urls.get_new_url()
????print("craw %d: %s" %(count,new_url)
????html_cont = self.downloader.download(new_url)
????new_urls,new_data = self.parser.parse(new_url,html_cont)
????self.urls.add_new_urls(new_urls)
????self.outputer.collect_data(new_data)
????
????if count == 1000:
?????break
????
????count++
???
???except:
????print("craw failed!")
??
??self.outputer.output_html()
if __name__ == '__main__':
?root_url = "https://baike.baidu.com/item/Python/407313"
?obj_spider = SpiderMain()
?obj_spider.craw(root_url)
html_downloader代碼如下:
#coding=gbk
import urllib.request
class HtmlDownloader(object):
?
?def download(self,url):
??if url is None:
???return None
??response = urllib.request.urlopen(url)
??if response.get_code() != 200:
???return None
??return response.read()
2019-02-13