拿到下一頁(yè)的鏈接就是沒(méi)有翻頁(yè)
# -*- coding: utf-8 -*-
import scrapy
from Lz.items import LzItem
class LzSpiderSpider(scrapy.Spider):
? ? name = 'Lz_spider'
? ? allowed_domains = ['xxgk.linzhang.gov.cn']
? ? start_urls = ['http://xxgk.linzhang.gov.cn/zxxxgk/index_1.html']
? ? def parse(self, response):
? ? ? ? info_lists = response.xpath("http://div[@class='lzgk_wenjianlist']/table//tr")
? ? ? ? for content in info_lists:
? ? ? ? ? ? lz_item = LzItem()
? ? ? ? ? ? lz_item['title'] = content.xpath("./td[1]/a/text()").extract_first()
? ? ? ? ? ? lz_item['times'] = content.xpath("./td[4]/text()").extract_first()
? ? ? ? ? ? yield lz_item
? ? ? ? next_link = response.xpath("http://div[@class='page']/a[7]/@href").extract()
? ? ? ? if next_link:
? ? ? ? ? ? next_link = next_link[0]
? ? ? ? ? ? yield scrapy.Request("http://xxgk.linzhang.gov.cn/zxxxgk/"+next_link,callback=self.parse)