#?_*_coding:utf-8??_*_
from?bs4?import?BeautifulSoup
import?re
html_doc?=?"""
<html><head><title>The?Dormouse's?story</title></head>
<body>
<p?class="itle"><b>The?Dormouse's?story</b></p>
<p?class="tory">Once?upon?a?time?there?were?three?little?sisters;?and?their?names?were
<a?>Elsie</a>,
<a?>Lacie</a>?and
<a?>Tillie</a>;
and?they?lived?at?the?bottom?of?a?well.</p>
<p?class="tory">...</p>
"""
soup?=?BeautifulSoup(html_doc,?'html.parser',?from_encoding='utf-8')
print?'獲取所有的鏈接'
links?=?soup.find_all('a')
for?link?in?links:
????print?link.name,?link.get_text(),?link['href']
print?'獲取lacie的鏈接'
link_node?=?soup.find('a',?href?=?'http://example.com/lacie')
print?link_node.name,?link_node.get_text(),?link_node['href']
print?u'正則匹配'
link_node?=?soup.find('a',?href?=?re.compile(r'il'))
print?link_node.name,?link_node.get_text(),?link_node['href']
print?'獲取p段落文字'
p_node?=?soup.find('p',?class_?=?re.compile(r't'))
print?p_node.name,?p_node.get_text()
2017-08-26
因為find只會顯示查找到的第一個內(nèi)容