#?-*-?coding:?utf-8?-*-
import?scrapy
from?douban.items?import?DoubanItem
class?DoubanSpiderSpider(scrapy.Spider):
????#爬蟲(chóng)名
????name?=?'douban_spider'
????allowed_domains?=?['movic.douban.com']
????start_urls?=?['https://movie.douban.com/top250']
????def?parse(self,?response):
????????movic_list=response.xpath("http://div[@class='article']//ol[@class='grid_view']/li")
????????for?i_item?in?movic_list:
????????????douban_item=DoubanItem()
????????????douban_item['serial_number']=i_item.xpath(".//div[@class='item']//em/text()").extract_first()
????????????douban_item['movic_name']=i_item.xpath(".//div[@class='info']/div[@class='hd']/a/span[1]/text()").extract_first()
????????????content=i_item.xpath(".//div[@class='info']//div[@class='bd']/p[1]/text()").extract_first()
????????????for?i_content?in?content:
????????????????content_s="".join(i_content.split())
????????????????douban_item['introduce']=content_s
????????????douban_item['star']=i_item.xpath(".//span[@class='rating_num']/text()").extract_first()
????????????douban_item['evaluate']=i_item.xpath(".//div[@class='star']//span[4]/text()").extract_first()
????????????douban_item["des"]=i_item.xpath(".//p[@class='quote']/span/text()").extract_first()
????????????print(douban_item)
????????????yield??douban_item
????????next_link=response.xpath("http://span[@class='next']/link/@href").extract()
????????if?next_link:
????????????next_link=next_link[0]
????????????yield?scrapy.Request("https://movie.douban.com/top250"?+?next_link,?callback=self.parse)
2018-09-09
這個(gè)是scrapy中回調(diào)時(shí)被過(guò)濾了? 解決方法
在?
allowed_domains
?中加入 url?在 scrapy.Request() 函數(shù)中將參數(shù)?
dont_filter=True
?設(shè)置為 True修改這一行代碼如下,我個(gè)人采取第二種方式dont_filter=Ture: