from?bs4?import?BeautifulSoup
import?re
html_doc?=?"""
<html><head><title>The?Dormouse's?story</title></head>
<p?class="title"><b>The?Dormouse's?story</b></p>
<p?class="story">Once?upon?a?time?there?were?three?little?sisters;?and?their?names?were
<a?>Elsie</a>,
<a?>Lacie</a>?and
<a?>Tillie</a>;
and?they?lived?at?the?bottom?of?a?well.</p>
<p?class="story">...</p>
"""
soup?=?BeautifulSoup(html_doc,"html.parser",from_encoding="utf-8")
print("獲取所有連接")
links?=soup.find_all('a')
for?link?in?links:
????????print?(link.name,link['href'],link.get_text())
print("獲取lacie的連接")
link_node=soup.find('a',)
print(link_node.name,link_node['href'],link_node.get_text())
print("正則匹配")
link_node=soup.find('a',href=re.compile(r"ill"))
print(link_node.name,link_node['href'],link_node.get_text())
print("獲取P段落文字")
P_node=soup.find('p',class_="title")
print(P_node.name,P_node.get_text())
2018-12-24
python3.7.2(版本)
from bs4 import BeautifulSoup
import re?
html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" id="link1">Elsie</a>,
<a class="sister" id="link2">Lacie</a> and
<a class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
"""
soup = BeautifulSoup(
html_doc,
'html.parser',
from_encoding='utf8'
)
link = soup.find('a',class_='sister')
print(link.name,link.get_text())
2018-12-24