import requests from bs4 import BeautifulSoup def get_headers(data: str) -> dict: """This funciton helps to get the headers form the string to the dict Args: data (str): Pass the headers as a string (You can go to firefox click on copy and copy request or response header and it will convert it to th e) Returns: dict: Return the dict or you can say header """ data = data.strip() data = data.split("\n") out = {} for dt in data: key = dt.split(":", 1)[0].strip() value = dt.split(":", 1)[1].strip() if value.lower() == "none": value = None elif value.lower() == "true": value = True elif value.lower() == "false": value = False out[key] = value return out def getLinks(url: str) -> list: browser = requests.session() data = browser.get(url) fullPage = BeautifulSoup(data.text, "lxml") try: links = fullPage.find("div", {"id" : "issue-subject-group-researchpaper"}) output = [] for link in links.findAll("div", {"class" : "text-container"}): link = link.find("a", {"class" : "issueContentsArticleLink linkHoverDark d-inline-block"}).get("href") output.append(f"https://www.degruyter.com{link}") except: links = fullPage.findAll("a", {"class" : "issueContentsArticleLink linkHoverDark d-inline-block"}) if len(links) < 1: raise AttributeError("Not found") for link in links: output.append(f"https://www.degruyter.com{link.get('href')}") return output def get_author_details(url: str) -> list: browser = requests.session() data = browser.get(url) authors = BeautifulSoup(data.text, "lxml") authors = authors.find("ul", {"class" : "contributors list-unstyled mb-2"}) output = [] for author in authors.findAll("span", {"class" : "contributor"}): author_name = author.text.strip() author_address = author.find("contributor-popdown").get("affiliations").strip() # if ";" in author_address: # author_address = author_address.split(";")[0] email = author.find("contributor-popdown").get("email").strip() if len(email.strip()) < 1: continue output.append( { "Name" : author_name, "Email" : email, "Address" : author_address } ) return output