global name 'BeautifulSoup' is not defined
#coding:utf8 from?bs4?import?BeautifulSoup import?urlparse import?re class?HtmlParser(object): ????def?parse(self,?page_url,?html_cont): ????????if?page_url?is?None?or?html_cont?is?None: ????????????return ????????soup?=?BeautifulSoup(html_cont,?'html.parser',?from_encoding='utf-8') ????????new_urls?=?self._get_new_urls(page_url,?soup) ????????new_data?=?self._get_new_data(page_url,?soup) ????????return?new_urls,?new_data ????def?_get_new_urls(self,?page_url,?soup): ????????new_urls?=?set() ????????links?=?soup.find_all('a',?href=re.compile(r"/item/(.*)")) ????????for?link?in?links: ????????????new_url?=?link['href'] ????????????new_full_url?=?urlparse.urljoin(page_url,new_url) ????????????new_urls.add(new_full_url) ????????return?new_urls ????def?_get_new_data(self,?page_url,?soup): ????????res_data?=?{} ????????res_data['url']?=?page_url ????????title_node?=?soup.find('dd',class_=?"lemmaWgt-lemmaTitle-title").find("h1") ????????res_data['title']?=?title_node.get_text() ????????summary_node?=?soup.find('div',?class_?=?"lemma-summary") ????????res_data['summary']?=?summary_node.get_text() ????????return?res_data
在html_parser里面加了:from bs4 import BeautifulSoup 但是還是報錯。神奇的是前面的測試BeautifulSoup的程序跑起來沒有問題。有大蝦幫忙看下怎么回事么
錯誤如下:
NameErrorTraceback?(most?recent?call?last) /Users/yang/PythonSource/pachong/spider_main.py?in?<module>() ?????34?????root_url?=?"https://baike.baidu.com/item/Python/407313?fr=aladdin" ?????35?????obj_spider?=?SpiderMain() --->?36?????obj_spider.craw(root_url) ?????37? /Users/yang/PythonSource/pachong/spider_main.py?in?craw(self,?root_url) ?????21?????????????html_cont?=?self.downloader.download(new_url) ?????22?????????????print?new_url --->?23?????????????new_urls,?new_data?=?self.parser.parse(new_url,?html_cont) ?????24?????????????self.urls.add_new_urls(new_urls) ?????25?????????????self.outputer.collect_data(new_data) /Users/yang/PythonSource/pachong/html_parser.py?in?parse(self,?page_url,?html_cont) ?????31?????????title_node?=?soup.find('dd',class_=?"lemmaWgt-lemmaTitle-title").find("h1") ?????32?????????res_data['title']?=?title_node.get_text() --->?33? ?????34?????????summary_node?=?soup.find('div',?class_?=?"lemma-summary") ?????35?????????res_data['summary']?=?summary_node.get_text() NameError:?global?name?'BeautifulSoup'?is?not?defined
2018-02-19
同學有沒有安裝BeautifulSoup