1 回答

TA貢獻(xiàn)1799條經(jīng)驗(yàn) 獲得超8個(gè)贊
由于某種原因,無論是 R api 還是 Python API,每個(gè)人似乎都有相同的問題。我找到了一種解決方法來獲得相同的結(jié)果。它很慢,但它可以完成工作。如果你的結(jié)果小于 10k,你可以使用 Selenium 來獲取 pubmedid。否則,我們可以使用下面的代碼來抓取數(shù)據(jù)。我希望這對(duì)將來的人有幫助。
import requests
# # Custom Date Range
# req = requests.get("https://pubmed.ncbi.nlm.nih.gov/?term=covid&filter=dates.2009/01/01-2020/03/01&format=pmid&sort=pubdate&size=200&page={}".format(i))
# # Custom Year Range
# req = requests.get("https://pubmed.ncbi.nlm.nih.gov/?term=covid&filter=years.2010-2019&format=pmid&sort=pubdate&size=200&page={}".format(i))
# #Relative Date
# req = requests.get("https://pubmed.ncbi.nlm.nih.gov/?term=covid&filter=datesearch.y_1&format=pmid&sort=pubdate&size=200&page={}".format(i))
# # filter language
# # &filter=lang.english
# # filter human
# #&filter=hum_ani.humans
# Systematic Review
#&filter=pubt.systematicreview
# Case Reports
# &filter=pubt.casereports
# Age
# &filter=age.newborn
search = "covid lungs"
# search_list = "+".join(search.split(' '))
def id_retriever(search_string):
string = "+".join(search_string.split(' '))
result = []
old_result = len(result)
for page in range(1,10000000):
req = requests.get("https://pubmed.ncbi.nlm.nih.gov/?term={string}&format=pmid&sort=pubdate&size=200&page={page}".format(page=page,string=string))
for j in req.iter_lines():
decoded = j.decode("utf-8").strip(" ")
length = len(decoded)
if "log_displayeduids" in decoded and length > 46:
data = (str(j).split('"')[-2].split(","))
result = result + data
data = []
new_result = len(result)
if new_result != old_result:
old_result = new_result
else:
break
return result
ids=id_retriever(search)
len(ids)
添加回答
舉報(bào)