Spaces:
Running
Running
import requests | |
from bs4 import BeautifulSoup | |
from sheets import ExcelAutomator | |
def get_headers(data: str) -> dict: | |
"""This funciton helps to get the headers form the string to the dict | |
Args: | |
data (str): Pass the headers as a string (You can go to firefox click on copy and copy request or response header and it will convert it to th e) | |
Returns: | |
dict: Return the dict or you can say header | |
""" | |
data = data.strip() | |
data = data.split("\n") | |
out = {} | |
for dt in data: | |
key = dt.split(":", 1)[0].strip() | |
value = dt.split(":", 1)[1].strip() | |
if value.lower() == "none": | |
value = None | |
elif value.lower() == "true": | |
value = True | |
elif value.lower() == "false": | |
value = False | |
out[key] = value | |
return out | |
def getlinks(url: str) -> list: | |
browser = requests.session() | |
# url = f"https://www.ams.org/journals/jams/{year}-{volume}-{issue}/home.html?active=allissues" | |
data = browser.get(url) | |
fullPage = BeautifulSoup(data.text, "lxml") | |
article = fullPage.find("article", {"class" : "contentList"}) | |
output = [] | |
lnk = url.split('home.html', 1)[0] | |
for allarticle in article.findAll("dl"): | |
output.append(f'{lnk}{allarticle.find("dt").find("a").get("href")}') | |
return output | |
def get_authors(url: str): | |
browser = requests.session() | |
data = browser.get(url) | |
fullPage = BeautifulSoup(data.text, "lxml") | |
details = fullPage.find("section", {"id" : "additionalinformation"}) | |
email = None | |
address = None | |
author_name = None | |
output = [] | |
for author in details.findAll("li"): | |
if email != None and author_name != None and address != None: | |
output.append( | |
{ | |
"Name" : author_name, | |
"Email" : email, | |
"Address" : address | |
} | |
) | |
email = None | |
author_name = None | |
address = None | |
if author.find("strong"): | |
author_name = author.text | |
elif "Email:" in author.text: | |
email = author.text.split(":", 1)[1].strip() | |
elif "Affiliation:" in author.text: | |
address = author.text.split(":", 1)[1].strip() | |
if author_name == None: | |
continue | |
return output | |