from?lxml?import?etree
import?requests
#爬取糗事百科
page?=?eval(input("請輸入需要爬取的總頁數(shù):"))
#print(type(page))
#page?=?3
data?=?""
x?=?""
for?p?in?range(1,page+1):
????url?=?"https://www.qiushibaike.com/8hr/page/{}/".format(p)
????print(url)
????headers?=?{
????????"User-Agent":?"Mozilla/5.0?(Windows?NT?6.3;?Win64;?x64)?AppleWebKit/537.36?(KHTML,?like?Gecko)?Chrome/64.0.3282.186?Safari/537.36"
????}
????res?=?requests.get(url,?headers?=?headers)
????tree?=?etree.HTML(res.text)
????all?=?tree.xpath('//div[@id="content-left"]/div')
????data?+=?url+'\n'
????for?div?in?all:
????????author?=?div.xpath('.//h2/text()')
????????age?=?div.xpath('.//div[contains(@class,?"articleGender")]/text()')
????????content?=?div.xpath('.//span/text()')
????????xinbie?=?div.xpath('.//div[contains(@class,?"articleGender")]/@class')
????????funny?=?div.xpath('.//span[@class="stats-vote"]/i/text()')
????????conment?=?div.xpath('.//a[@class="qiushi_comments"]/i/text()')
????????if?xinbie?==?'articleGender?manIcon':
????????????x?='男'
????????elif?xinbie?=='articleGender?womenIcon':
????????????x?=?'女'
????????else:
????????????x?=?"性別不明"
????????up?=?'作者:'?+?author[0].strip()?+?'\t性別:'+?x?+'\t年齡:'?+?str(age)
????????middle?=?content[0].strip()
????????bottom?=?'好笑數(shù):'+?funny[0]?+?'\t評論數(shù):'?+?conment[0]
????????data+=?up?+?'\n'?+?middle?+'\n'+bottom+'\n'
????????print('author:',author[0],type(author))
????????print('age:',?age[0],type(age))
????????print('content:',?content[0],?type(content))
????????print('funny:',?funny[0],?type(funny))
????????print('conment:',?conment[0],?type(conment))
???????
with?open('xiushibaike_spider.txt',?'w',?encoding="utf-8")?as?f:
????f.write(data)爬取了5個字段,類型都為list,為什么唯獨age[0]提示? ?IndexError: list index out of range? ?? ?2. xinbie的判斷那里,我知道是list和字符串不能比較,怎么改才對?
關(guān)于list的索引?
錯過了年華
2018-03-24 22:52:16