Traceback?(most?recent?call?last):
??File?"D:\pythonxxhj\pycode\imooc\wiki2mysql.py",?line?6,?in?<module>
????import?pymysql.cursors
??File?"C:\Users\Administrator\AppData\Local\Programs\Python\Python36-32\lib\site-packages\pymysql\__init__.py",?line?92,?in?<module>
????from?.?import?connections?as?_orig_conn
??File?"C:\Users\Administrator\AppData\Local\Programs\Python\Python36-32\lib\site-packages\pymysql\connections.py",?line?22,?in?<module>
????from?.cursors?import?Cursor
??File?"C:\Users\Administrator\AppData\Local\Programs\Python\Python36-32\lib\site-packages\pymysql\cursors.py",?line?138
????connection?=?self._get_db()
??????????????????????????????^
IndentationError:?unindent?does?not?match?any?outer?indentation?level
#代碼如下
#!?/usr/local/bin/python3
#?-*-?coding:utf-8?-*-
from?urllib.request?import?urlopen
from?bs4?import?BeautifulSoup
import?re
import?pymysql.cursors
resp?=?urlopen("https://en.wikipedia.org/wiki/Main_Page").read().decode("utf-8")
soup?=?BeautifulSoup(resp,"html.parser")
listUrls?=?soup.findAll("a",href=re.compile(r"^/wiki/"))
for?url?in?listUrls:
????if?not?re.search("\.(jpg|JPG)$",?url["href"]):
????????print(url.get_text(),"<---->","https://en.wikipedia.org"+url["href"])
????????connection=pymysql.connect(
????????????host='localhost',
?????????????user='root',
?????????????password='root',
?????????????db='wikiurl',
?????????????charset='utf8mb4'
?????????????)
????????try:
????????????with?connection.cursor()?as?cursor:
????????????????sql="insert?into?`urls`(`urlhref`,`urlname`)values(%s,%s)"
????????????????cursor.execute(sql,(url.get_text(),"https://en.wikipedia.org"+url["href"]))
????????????????connection.commit()
????????finally:
????????????????connection.close()
2017-08-16
Connect,C大寫,不曉得是這個原因不