import requests from bs4 import BeautifulSoup from sheets import ExcelAutomator def get_headers(data: str) -> dict: """This funciton helps to get the headers form the string to the dict Args: data (str): Pass the headers as a string (You can go to firefox click on copy and copy request or response header and it will convert it to th e) Returns: dict: Return the dict or you can say header """ data = data.strip() data = data.split("\n") out = {} for dt in data: key = dt.split(":", 1)[0].strip() value = dt.split(":", 1)[1].strip() if value.lower() == "none": value = None elif value.lower() == "true": value = True elif value.lower() == "false": value = False out[key] = value return out def getlinks(url: str) -> list: browser = requests.session() # url = f"https://www.ams.org/journals/jams/{year}-{volume}-{issue}/home.html?active=allissues" data = browser.get(url) fullPage = BeautifulSoup(data.text, "lxml") article = fullPage.find("article", {"class" : "contentList"}) output = [] lnk = url.split('home.html', 1)[0] for allarticle in article.findAll("dl"): output.append(f'{lnk}{allarticle.find("dt").find("a").get("href")}') return output def get_authors(url: str): browser = requests.session() data = browser.get(url) fullPage = BeautifulSoup(data.text, "lxml") details = fullPage.find("section", {"id" : "additionalinformation"}) email = None address = None author_name = None output = [] for author in details.findAll("li"): if email != None and author_name != None and address != None: output.append( { "Name" : author_name, "Email" : email, "Address" : address } ) email = None author_name = None address = None if author.find("strong"): author_name = author.text elif "Email:" in author.text: email = author.text.split(":", 1)[1].strip() elif "Affiliation:" in author.text: address = author.text.split(":", 1)[1].strip() if author_name == None: continue return output