Spaces:
Sleeping
Sleeping
File size: 2,351 Bytes
57273d8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import requests
from bs4 import BeautifulSoup
from sheets import ExcelAutomator
def get_headers(data: str) -> dict:
"""This funciton helps to get the headers form the string to the dict
Args:
data (str): Pass the headers as a string (You can go to firefox click on copy and copy request or response header and it will convert it to th e)
Returns:
dict: Return the dict or you can say header
"""
data = data.strip()
data = data.split("\n")
out = {}
for dt in data:
key = dt.split(":", 1)[0].strip()
value = dt.split(":", 1)[1].strip()
if value.lower() == "none":
value = None
elif value.lower() == "true":
value = True
elif value.lower() == "false":
value = False
out[key] = value
return out
def getlinks(url: str) -> list:
browser = requests.session()
# url = f"https://www.ams.org/journals/jams/{year}-{volume}-{issue}/home.html?active=allissues"
data = browser.get(url)
fullPage = BeautifulSoup(data.text, "lxml")
article = fullPage.find("article", {"class" : "contentList"})
output = []
lnk = url.split('home.html', 1)[0]
for allarticle in article.findAll("dl"):
output.append(f'{lnk}{allarticle.find("dt").find("a").get("href")}')
return output
def get_authors(url: str):
browser = requests.session()
data = browser.get(url)
fullPage = BeautifulSoup(data.text, "lxml")
details = fullPage.find("section", {"id" : "additionalinformation"})
email = None
address = None
author_name = None
output = []
for author in details.findAll("li"):
if email != None and author_name != None and address != None:
output.append(
{
"Name" : author_name,
"Email" : email,
"Address" : address
}
)
email = None
author_name = None
address = None
if author.find("strong"):
author_name = author.text
elif "Email:" in author.text:
email = author.text.split(":", 1)[1].strip()
elif "Affiliation:" in author.text:
address = author.text.split(":", 1)[1].strip()
if author_name == None:
continue
return output
|