輸出亂碼可嘗試做以下修改:
fout = open('output.html', 'w', encoding='utf-8') # 此處添加encoding='utf-8'
# 在fout.write("<html>")后添加這一行:
fout.write("<head><meta http-equiv='content-type' content='text/html;charset=utf-8'></head>")
fout = open('output.html', 'w', encoding='utf-8') # 此處添加encoding='utf-8'
# 在fout.write("<html>")后添加這一行:
fout.write("<head><meta http-equiv='content-type' content='text/html;charset=utf-8'></head>")
2018-04-08
對于https需要做以下處理
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
response = urllib.request.urlopen(url)
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
response = urllib.request.urlopen(url)
2018-04-04
print("正則匹配")
link_node = soup.find('a', href = re.compile(r"ill"))
print(link_node.name, link_node['href'], link_node.get_text())
link_node = soup.find('a', href = re.compile(r"ill"))
print(link_node.name, link_node['href'], link_node.get_text())
2018-04-04
print("獲取lacie的鏈接")
link_node = soup.find('a', href = 'http://example.com/lacie')
print(link_node.name, link_node['href'], link_node.get_text())
link_node = soup.find('a', href = 'http://example.com/lacie')
print(link_node.name, link_node['href'], link_node.get_text())
2018-04-04
soup = BeautifulSoup(html_doc,'html.parser',from_encoding='utf-8')
print("獲取所有的鏈接")
links = soup.find_all('a')
for link in links:
print(link.name , link['href'],link.get_text())
print("獲取所有的鏈接")
links = soup.find_all('a')
for link in links:
print(link.name , link['href'],link.get_text())
2018-04-04