Spaces:
Sleeping
Sleeping
File size: 2,137 Bytes
57273d8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import requests
from bs4 import BeautifulSoup
def get_headers(data: str) -> dict:
"""This funciton helps to get the headers form the string to the dict
Args:
data (str): Pass the headers as a string (You can go to firefox click on copy and copy request or response header and it will convert it to th e)
Returns:
dict: Return the dict or you can say header
"""
data = data.strip()
data = data.split("\n")
out = {}
for dt in data:
key = dt.split(":", 1)[0].strip()
value = dt.split(":", 1)[1].strip()
if value.lower() == "none":
value = None
elif value.lower() == "true":
value = True
elif value.lower() == "false":
value = False
out[key] = value
return out
def getLinks(url: str) -> list:
browser = requests.session()
# url = f"https://www.degruyter.com/journal/key/fca/{volume}/{issue}/html"
data = browser.get(url)
fullPage = BeautifulSoup(data.text, "lxml")
links = fullPage.find("div", {"id" : "issue-subject-group-researchpaper"})
output = []
for link in links.findAll("div", {"class" : "text-container"}):
link = link.find("a", {"class" : "issueContentsArticleLink linkHoverDark d-inline-block"}).get("href")
output.append(f"https://www.degruyter.com{link}")
return output
def get_author_details(url: str) -> list:
browser = requests.session()
data = browser.get(url)
authors = BeautifulSoup(data.text, "lxml")
authors = authors.find("ul", {"class" : "contributors list-unstyled mb-2"})
output = []
for author in authors.findAll("span", {"class" : "contributor"}):
author_name = author.text.strip()
author_address = author.find("contributor-popdown").get("affiliations").strip()
email = author.find("contributor-popdown").get("email").strip()
if len(email.strip()) < 1:
continue
output.append(
{
"Name" : author_name,
"Email" : email,
"Address" : author_address
}
)
return output
|