-
我也想要一份源代碼,還有安裝包查看全部
-
很呢查看全部
-
1554查看全部
-
源碼呢?想要一份源碼查看全部
-
有人知道數(shù)據(jù)溢出怎么辦嘛,我按教程寫的爬蟲頁面提示棧溢出查看全部
-
做筆記了1
查看全部 -
基礎(chǔ)不牢,地動山搖
查看全部 -
排好隊做筆記查看全部
-
創(chuàng)建數(shù)據(jù)庫連接
查看全部 -
開啟多進(jìn)程
查看全部 -
#-*-coding:utf-8-*- import?json import?time import?multiprocessing import?requests from?lxml?import?etree class?HandelLaGou(object): ????def?__init__(self): ????????self.lagou_session?=?requests.session() ????????self.header?=?{ ????????????'User-Agent':?'Mozilla/5.0?(Windows;?U;?Windows?NT?5.1;?en-US)?AppleWebKit/525.19?(KHTML,?like?Gecko)?Chrome/0.4.154.18?Safari/525.19' ????????} ????????self.city_list?=?"" ????def?handle_city(self): ????????city_url?=?"https://www.lagou.com/jobs/allCity.html" ????????city_result?=?self.handle_request(method="GET",?url=city_url) ????????list?=?city_result.xpath('//ul[contains(@class,?"city_list")]/li/a/text()') ????????self.city_list?=?[x?for?x?in?list] ????????self.lagou_session.cookies.clear() ????def?handle_city_job(self,?city): ????????first_request_url?=?"https://www.lagou.com/jobs/list_python?&px=default&city=%s"%city ????????first_response?=?self.handle_request(method="GET",url=first_request_url) ????????try: ????????????total_page?=?first_response.xpath('//span[contains(@class,?"span?totalNum")]/text()') ????????????print(total_page) ????????except: ????????????return ????????else: ????????????for?i?in?range(1,?int(total_page[0])+1): ????????????????data?=?{ ????????????????????"pn":i, ????????????????????"kd":"python" ????????????????} ????????????????print(i) ????????????????page_url?=?"https://www.lagou.com/jobs/positionAjax.json?px=default&city=%s&needAddtionalResult=false"%city ????????????????referer_url?=?"https://www.lagou.com/jobs/list_python?&px=default&city=%s"%city ????????????????self.header['Referer']?=?referer_url.encode('utf-8') ????????????????response?=?self.handle_request(method="POST",?url=page_url,?data=data,?info=city) ????????????????lagou_data?=?json.loads(response) ????????????????job_list?=?lagou_data['content']['positionResult']['result'] ????????????????for?job?in?job_list: ????????????????????print(job) ????def?handle_request(self,?method,?url?,?data=None,?info=None): ????????while?True: ????????????if?method?==?"GET": ????????????????response?=?self.lagou_session.get(url=url,?headers=self.header) ????????????????item?=?etree.HTML(response.text) ????????????elif?method?==?"POST": ????????????????response?=?self.lagou_session.post(url=url,?headers=self.header,?data=data) ????????????????item?=?response.text ????????????if?'頻繁'?in?response.text: ????????????????self.lagou_session.cookies.clear() ????????????????first_request_url?=?"https://www.lagou.com/jobs/list_python?&px=default&city=%s"?%info ????????????????self.handle_request(method="GET",?url=first_request_url) ????????????????time.sleep(15) ????????????????continue ????????????return?item if?__name__?==?'__main__': ????lagou?=?HandelLaGou() ????lagou.handle_city() ????pool?=?multiprocessing.Pool(2) ????for?city?in?lagou.city_list: ????????pool.apply_async(lagou.handle_city_job,?args=(city,)) ????pool.close() ????pool.join()
查看全部 -
import?requests from?lxml?import?etree class?HandelLaGou(object): ????def?__init__(self): ????????self.lagou_session?=?requests.session() ????????self.hander?=?{ ????????????'User-Agent':?'Mozilla/5.0?(Windows;?U;?Windows?NT?5.1;?en-US)?AppleWebKit/525.19?(KHTML,?like?Gecko)?Chrome/0.4.154.18?Safari/525.19' ????????} ????????self.city_list?=?"" ????def?handle_city(self): ????????city_url?=?"https://www.lagou.com/jobs/allCity.html" ????????city_result?=?self.handle_request(method="GET",?url=city_url) ????????list?=?city_result.xpath('//ul[contains(@class,?"city_list")]/li/a/text()') ????????self.city_list?=?[x?for?x?in?list] ????def?handle_city_job(self,?city): ???????? ????def?handle_request(self,?method,?url?,?data=None,?info=None): ????????if?method?==?"GET": ????????????response?=?self.lagou_session.get(url=url,?headers=self.hander) ????????????item?=?etree.HTML(response.text) ????????????return?item if?__name__?==?'__main__': ????lagou?=?HandelLaGou() ????lagou.handle_city() ????print(lagou.city_list)
查看全部 -
import?re import?requests #在此處設(shè)置取消警告信息 import?urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) #?這是另一個大佬的辦法,可惜過于復(fù)雜,我沒看懂,簡簡單單才是真 #?requests.packages.urllib3.disable_warnings() #?requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS?+=?'HIGH:!DH:!aNULL' #?try: #?????requests.packages.urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST?+=?'HIGH:!DH:!aNULL' #?except?AttributeError: #?????#?no?pyopenssl?support?used?/?needed?/?available #?????pass class?Handle_Lagou(object): ????def?__init__(self): ????????#使用session保存cookies信息 ????????self.lagou_session?=?requests.session() ????????self.header?=?{ ????????????'Connection':?'close', ????????????'User-Agent':?'Mozilla/5.0?(Windows?NT?10.0;?Win64;?x64)?AppleWebKit/537.36?(KHTML,?like?Gecko)?Chrome/75.0.3770.100?Safari/537.36' ????????} ????????self.city_list="" ????#獲取全國所有城市列表的方法 ????def?handle_city(self): ????????city_search?=?re.compile(r'zhaopin/">(.*?)</a>') ????????city_url?=?"https://www.lagou.com/jobs/allCity.html" ????????city_result?=?self.handle_request(method="GET",url=city_url) ????????self.city_list?=?city_search.findall(city_result) ????def?handle_request(self,method,url,data=None,info=None): ????????if?method?==?"GET": ????????????#?在此處設(shè)置verify?=?False ????????????response?=?self.lagou_session.get(url=url,headers=self.header,verify=False) ????????return??response.text if?__name__=='__main__': ????lagou?=?Handle_Lagou() ????lagou.handle_city() ????print(lagou.city_list)
查看全部
舉報
0/150
提交
取消