apply_async里面的函數(shù)不執(zhí)行
#?coding=utf-8 import?json import?re import?time import?multiprocessing import?requests class?HandleLaGou(object): ????def?__init__(self): ????????self.lagou_session?=?requests.session() ????????self.header?=?{ ????????????'User-Agent':?'Mozilla/5.0?(Macintosh;?Intel?Mac?OS?X?10_15_0)?AppleWebKit/537.36?(KHTML,?like?Gecko)?Chrome/78.0.3904.108?Safari/537.36' ????????} ????????self.city_list?=?"" ????#?獲取全國(guó)所有城市列表 ????def?handle_city(self): ????????city_search?=?re.compile(r'zhaopin/">(.*?)</a>') ????????city_url?=?'https://www.lagou.com/jobs/allCity.html' ????????city_result?=?self.handle_request(method='GET',?url=city_url) ????????#?使用正則表達(dá)式獲取城市列表 ????????self.city_list?=?city_search.findall(city_result) ????????self.lagou_session.cookies.clear() ????def?handle_city_job(self,?city): ????????first_request_url?=?'https://www.lagou.com/jobs/list_python?&px=default&city=%s'?%?city ????????first_response?=?self.handle_request(method='GET',?url=first_request_url) ????????total_page_search?=?re.compile(r'class="span\stotalNum">(\d+)</span>') ????????try: ????????????total_page?=?total_page_search.search(first_response).group(1) ????????except: ????????????return ????????else: ????????????for?i?in?range(1,?int(total_page)?+?1): ????????????????data?=?{ ????????????????????'pn':?i, ????????????????????'kd':?'python' ????????????????} ????????????????page_url?=?'https://www.lagou.com/jobs/positionAjax.json?px=default&city=%s&needAddtionalResult=false'?%?city ????????????????referer_url?=?'https://www.lagou.com/jobs/list_python?&px=default&city=%s'?%?city ????????????????self.header['Referer']?=?referer_url.encode() ????????????????response?=?self.handle_request(method='POST',?url=page_url,?data=data,?info=city) ????????????????print?response ????????????????lagou_data?=?json.loads(response) ????????????????job_list?=?lagou_data['content']['positionResult']['result'] ????????????????for?job?in?job_list: ????????????????????print?job ????def?handle_request(self,?method,?url,?data=None,?info=None): ????????global?response ????????while?True: ????????????#?阿布云代理 ????????????#?代理服務(wù)器 ????????????proxyHost?=?"http-dyn.abuyun.com" ????????????proxyPort?=?"9020" ????????????#?代理隧道驗(yàn)證信息 ????????????proxyUser?=?"H6451437A9W24E7D" ????????????proxyPass?=?"A86CD1F6AF3AD760" ????????????proxyMeta?=?"http://%(user)s:%(pass)s@%(host)s:%(port)s"?%?{ ????????????????"host":?proxyHost, ????????????????"port":?proxyPort, ????????????????"user":?proxyUser, ????????????????"pass":?proxyPass, ????????????} ????????????proxies?=?{ ????????????????"http":?proxyMeta, ????????????????"https":?proxyMeta, ????????????} ????????????try: ????????????????if?method?==?'GET': ????????????????????response?=?self.lagou_session.get( ????????????????????????url=url, ????????????????????????headers=self.header, ????????????????????????proxies=proxies, ????????????????????????timeout=6 ????????????????????) ????????????????elif?method?==?'POST': ????????????????????response?=?self.lagou_session.post( ????????????????????????url=url, ????????????????????????headers=self.header, ????????????????????????data=data, ????????????????????????proxies=proxies, ????????????????????????timeout=6 ????????????????????) ????????????except: ????????????????self.lagou_session.cookies.clear() ????????????????first_request_url?=?'https://www.lagou.com/jobs/list_python?&px=default&city=%s'?%?info ????????????????self.handle_request(method='GET',?url=first_request_url) ????????????????time.sleep(10) ????????????????continue ????????????response.encoding?=?'utf-8' ????????????if?'頻繁'?in?response.text: ????????????????print?response.text ????????????????self.lagou_session.cookies.clear() ????????????????first_request_url?=?'https://www.lagou.com/jobs/list_python?&px=default&city=%s'?%?info ????????????????self.handle_request(method='GET',?url=first_request_url) ????????????????time.sleep(10) ????????????????continue ????????????return?response.text if?__name__?==?'__main__': ????lagou?=?HandleLaGou() ????lagou.handle_city() ????#?引入多進(jìn)程 ????pool?=?multiprocessing.Pool(1) ????for?city?in?lagou.city_list: ????????pool.apply_async(lagou.handle_city_job,?args=(city,1)) ????pool.close() ????pool.join()
執(zhí)行結(jié)果
/usr/local/bin/python2.7?/Users/imooc_lagou/handle_crawl_lagou.pyProcess? finished?with?exit?code?0
2020-03-14
pool.apply_async(lagou.handle_city_job,?args
={city}
)