import?requests
from?bs4?import?BeautifulSoup
import?lxml
#?url='http://www.qiushibaike.com/'?
headers={'User-Agent':'Mozilla/5.0?(Windows?NT?10.0;?WOW64)?AppleWebKit/537.36?(KHTML,?like?Gecko)?Chrome/45.0.2454.101?Safari/537.36'}
urls?=?[];
for?i?in?range(1,36):
????url?=??'http://www.qiushibaike.com/8hr/page/'+str(i)+'/?s=4940923'
????urls.append(url)??
def?get_substance(url,data=None):
????
????web_data=requests.get(url,headers=headers)
????url_data=web_data.text.encode('utf-8')
????soup=BeautifulSoup(url_data,'lxml')
????names=soup.select('.author?>?a?>?h2')
????numberds=soup.select('.stats-vote?>?.number')
????contents=soup.select('.content?>?span')
??
????for?name,numberd,content?in?zip?(names,numberds,contents):
????????data={
???????????????'name':list(name.stripped_strings),
???????????????'numberd':list(numberd.stripped_strings),
???????????????'content':list(content.stripped_strings)
????????????}
????print(data)
????????
for?single_url?in?urls:
????get_substance(single_url)
python3.5 爬去糗事百科返回none 怎么破?
eleven01234567
2016-12-21 17:57:30