代碼出現問題
import?requests,time,random from?lxml?import?etree from?multiprocessing?import?Pool from?threading?import?Thread from?urllib?import?request user_agent?=?[ ???????"Mozilla/5.0?(compatible;?Baiduspider/2.0;?+http://www.baidu.com/search/spider.html)", ???????"Mozilla/4.0?(compatible;?MSIE?6.0;?Windows?NT?5.1;?SV1;?AcooBrowser;?.NET?CLR?1.1.4322;?.NET?CLR?2.0.50727)", ???????"Mozilla/4.0?(compatible;?MSIE?7.0;?Windows?NT?6.0;?Acoo?Browser;?SLCC1;?.NET?CLR?2.0.50727;?Media?Center?PC?5.0;?.NET?CLR?3.0.04506)", ???????"Mozilla/4.0?(compatible;?MSIE?7.0;?AOL?9.5;?AOLBuild?4337.35;?Windows?NT?5.1;?.NET?CLR?1.1.4322;?.NET?CLR?2.0.50727)", ???????"Mozilla/5.0?(Windows;?U;?MSIE?9.0;?Windows?NT?9.0;?en-US)", ???????"Mozilla/5.0?(compatible;?MSIE?9.0;?Windows?NT?6.1;?Win64;?x64;?Trident/5.0;?.NET?CLR?3.5.30729;?.NET?CLR?3.0.30729;?.NET?CLR?2.0.50727;?Media?Center?PC?6.0)", ] headers={"User-Agent":?random.choice(user_agent)} filk=r'C:\Users\25063\Desktop\音樂' class?jiuku(Thread): ????url='http://www.9ku.com/x1/music/by_new.php?act=t_m_hits&page=' ????def?__init__(self,star_page,end_page): ????????super(jiuku,self).__init__() ????????self.star_page=star_page ????????self.end_page=end_page ????????self.music_list=[] ????def?get_one_html(self,page): ????????'''獲取一個頁面的html''' ????????now_url=self.url+str(page) ????????print(now_url) ????????r=requests.get(now_url,headers=headers) ????????return?r.text ????def?get_many_url(self,html): ????????'''根據html解析得到音樂的url''' ????????xml=etree.HTML(html) ????????music_url=xml.xpath('//ol/li/a/@href') ????????for?urls?in?music_url: ????????????url='http://www.9ku.com'+urls ????????????self.music_list.append(url) ????????return?self.music_list ????def?down_one_music(self,url): ????????r=requests.get(url,headers=headers) ????????xml=etree.HTML(r.text) ????????title=xml.xpath('//div[@class="playingTit"]/h1/text()')[0] ????????singer=xml.xpath('//div[@class="playingTit"]/h2/a/text()')[0] ????????music=title+'-'+singer ????????music_id=url.split('/')[-1].split('.')[0]#獲取音樂的id ????????print('正在下載:{}'.format(music)) ????????music_url='http://mp3.9ku.com/m4a/{}.m4a'.format(music_id) ????????request.urlretrieve(music_url,filk+music+'.mp3') ????def?run(self): ????????for?page?in?range(self.star_page,self.end_page): ????????????time.sleep(1) ????????????html=self.get_one_html(page) ????????????music_list=self.get_many_url(html) ????????????pool=Pool() ????????????'''使用多進程進行下載,但是出錯''' ????????????pool.map(self.down_one_music,[url?for?url?in?music_list]) ????????????#?for?i?in?music_list: ????????????#?????self.down_one_music(i) def?main(): ????number?=?int(input('請輸入要爬取的頁數:')) ????if?number?<=?4: ????????a?=?[i?for?i?in?range(0,?number+1)] ????????a1_min?=?min(a) ????????a1_max?=?max(a) ????????down?=?jiuku(a1_min,?a1_max) ????????down.start() ????else: ????????a?=?[i?for?i?in?range(0,?number)] ????????d?=?number?/?4??#?得到的數是一個浮點數 ????????e?=?int(d)??#?這一步是對浮點數變?yōu)檎麛?,程序會將整數后面的小數全部清理這是的e就會小于d ????????if?d?>?e:??#?判斷如果e小于b則就需要將每個小列表中的數量為e+1 ????????????step?=?e?+?1 ????????????b?=?[a[i:i?+?step]?for?i?in?range(0,?len(a),?step)] ????????else: ????????????step?=?e ????????????b?=?[a[i:i?+?step]?for?i?in?range(0,?len(a),?step)] ????????a1_max?=?max(b[0]) ????????a1_min?=?min(b[0]) ????????a2_max?=?max(b[1]) ????????a2_min?=?min(b[1]) ????????a3_max?=?max(b[2]) ????????a3_min?=?min(b[2]) ????????a4_max?=?max(b[3]) ????????a4_min?=?min(b[3]) ????????down?=?jiuku(a1_min,?a1_max) ????????down.start() ????????down1?=?jiuku(a2_min,?a2_max) ????????down1.start() ????????down2?=?jiuku(a3_min,?a3_max) ????????down2.start() ????????down3?=?jiuku(a4_min,?a4_max) ????????down3.start() if?__name__?==?'__main__': ????main() #?def?main(): #?????a=int(input('da:')) #?????d=int(input('agd:')) #?????down=jiuku(a,d) #?????down.run() #?if?__name__?==?'__main__': #?????main()
上面的代碼 我是打算使用多進程家多線程進行下載但是出錯 為什么?
2019-11-02
1、線程如果使用Thread,在子線程中再用進程池Pool?就會報錯,原理不太清楚,可能需要老師來回答了,換用Process?就沒問題了;
2、這個音樂的頁面應該是從page=1開始的,所以我稍微改了一下入參。你可以看一下。
2019-11-02
2019-11-02
回復居然不能編輯,上面的格式不知道怎么回事,重新上傳一下代碼
2019-11-02
我將你的程序根據我的理解優(yōu)化了一下,目前運行正常??梢詤⒖家幌隆?/p>