as-cle-bert commited on
Commit
6a0f953
·
verified ·
1 Parent(s): 8d0e4db

Create pubmedScraper.py

Browse files
Files changed (1) hide show
  1. pubmedScraper.py +55 -0
pubmedScraper.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from Bio import Entrez
2
+
3
+ def remove_blankets(ls):
4
+ for i in range(len(ls)):
5
+ if i<len(ls):
6
+ if ls[i]=="" or ls[i]==" ":
7
+ ls.remove(ls[i])
8
+ else:
9
+ pass
10
+ else:
11
+ pass
12
+
13
+ def search_pubmed(query, max_results, address):
14
+ Entrez.email = address # Replace with your email
15
+ handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
16
+ record = Entrez.read(handle)
17
+ handle.close()
18
+ return record["IdList"]
19
+
20
+ def fetch_pubmed_details(pubmed_ids, address):
21
+ Entrez.email = address # Replace with your email
22
+ handle = Entrez.efetch(db="pubmed", id=pubmed_ids, rettype="medline", retmode="text")
23
+ records = handle.read()
24
+ handle.close()
25
+ return records
26
+
27
+ def respond_to_query(query,address,max_results=10):
28
+
29
+ # Perform the PubMed search
30
+ pubmed_ids = search_pubmed(query, max_results,address)
31
+
32
+ # Fetch details for the retrieved PubMed IDs
33
+ pubmed_details = fetch_pubmed_details(pubmed_ids,address)
34
+
35
+ pubmed_split=pubmed_details.split("\n")
36
+ str_container=[]
37
+ counter=-1
38
+ for i in pubmed_split:
39
+ str_container.append({})
40
+ counter+=1
41
+ if i.startswith("TI"):
42
+ str_container[counter].update({"Title (sometimes not complete)": i.replace('TI - ', '')})
43
+ if i.startswith("AU - "):
44
+ str_container[counter].update({"Author": i.replace('AU - ', '')})
45
+ if i.startswith("PHST") and i.endswith("[pubmed]"):
46
+ str_container[counter].update({"Published on PubMed on": i.replace('PHST- ', '').replace('[pubmed]','')})
47
+ if i.endswith("[doi]") and i.startswith("AID - "):
48
+ str_container[counter].update({"doi": f"https://doi.org/{i[6:len(i)-5]}\n"})
49
+ results=[]
50
+ for j in str_container:
51
+ ls=[f"{key}: {j[key]}\n" for key in list(j.keys())]
52
+ results.append("".join(ls))
53
+ remove_blankets(results)
54
+ defstr="".join(results)
55
+ return defstr