悄悄說一句,我學(xué)過c,c++,java,js,html再看這個(gè)根本烏鴉梨,應(yīng)該沒有人會(huì)發(fā)現(xiàn)
2018-02-05
百度百科目前url 沒有htm結(jié)尾,需要修改正則匹配規(guī)則為re.compile(r"/item/+")
類似 url https://baike.baidu.com/item/%E7%99%BE%E5%BA%A6%E7%99%BE%E7%A7%91%EF%BC%9A%E9%94%81%E5%AE%9A%E8%AF%8D%E6%9D%A1
類似 url https://baike.baidu.com/item/%E7%99%BE%E5%BA%A6%E7%99%BE%E7%A7%91%EF%BC%9A%E9%94%81%E5%AE%9A%E8%AF%8D%E6%9D%A1
2018-02-05
python -m pip install bs4
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup
2018-02-05
print("第三種方法")
cj = http.cookiejar.CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
urllib.request.install_opener(opener)
response3 = urllib.request.urlopen(url)
print(response3.getcode())
print(cj)
print(response3.read())
cj = http.cookiejar.CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
urllib.request.install_opener(opener)
response3 = urllib.request.urlopen(url)
print(response3.getcode())
print(cj)
print(response3.read())
2018-02-05
print("第二種方法")
request = urllib.request.Request(url)
request.add_header("user-agent", 'Mozilla/5.0')
response2 = urllib.request.urlopen(request)
print(response2.getcode())
print(len(response2.read()))
request = urllib.request.Request(url)
request.add_header("user-agent", 'Mozilla/5.0')
response2 = urllib.request.urlopen(request)
print(response2.getcode())
print(len(response2.read()))
2018-02-05
# coding:utf-8
import urllib.request,http.cookiejar
url = "http://www.baidu.com"
print("第一種方法")
response1 = urllib.request.urlopen(url)
print(response1.getcode())
print(len(response1.read()))
import urllib.request,http.cookiejar
url = "http://www.baidu.com"
print("第一種方法")
response1 = urllib.request.urlopen(url)
print(response1.getcode())
print(len(response1.read()))
2018-02-05
#coding=utf-8
import urllib, http.cookiejar
#創(chuàng)建cookie容器
cj = http.cookiejar.CookieJar()
#創(chuàng)建1個(gè)opener
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
#給urlib安裝opener
urllib.request.install_opener(opener)
response = urllib.request.urlopen("http://www.baidu.com")
import urllib, http.cookiejar
#創(chuàng)建cookie容器
cj = http.cookiejar.CookieJar()
#創(chuàng)建1個(gè)opener
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
#給urlib安裝opener
urllib.request.install_opener(opener)
response = urllib.request.urlopen("http://www.baidu.com")
2018-02-05