Spaces:
Sleeping
Sleeping
File size: 2,476 Bytes
57273d8 a69569c a73bd7d a69569c 57273d8 f8800b5 57273d8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import requests
from bs4 import BeautifulSoup
def get_headers(data: str) -> dict:
"""This funciton helps to get the headers form the string to the dict
Args:
data (str): Pass the headers as a string (You can go to firefox click on copy and copy request or response header and it will convert it to th e)
Returns:
dict: Return the dict or you can say header
"""
data = data.strip()
data = data.split("\n")
out = {}
for dt in data:
key = dt.split(":", 1)[0].strip()
value = dt.split(":", 1)[1].strip()
if value.lower() == "none":
value = None
elif value.lower() == "true":
value = True
elif value.lower() == "false":
value = False
out[key] = value
return out
def getLinks(url: str) -> list:
browser = requests.session()
data = browser.get(url)
fullPage = BeautifulSoup(data.text, "lxml")
try:
links = fullPage.find("div", {"id" : "issue-subject-group-researchpaper"})
output = []
for link in links.findAll("div", {"class" : "text-container"}):
link = link.find("a", {"class" : "issueContentsArticleLink linkHoverDark d-inline-block"}).get("href")
output.append(f"https://www.degruyter.com{link}")
except:
links = fullPage.findAll("a", {"class" : "issueContentsArticleLink linkHoverDark d-inline-block"})
if len(links) < 1:
raise AttributeError("Not found")
for link in links:
output.append(f"https://www.degruyter.com{link.get('href')}")
return output
def get_author_details(url: str) -> list:
browser = requests.session()
data = browser.get(url)
authors = BeautifulSoup(data.text, "lxml")
authors = authors.find("ul", {"class" : "contributors list-unstyled mb-2"})
output = []
for author in authors.findAll("span", {"class" : "contributor"}):
author_name = author.text.strip()
author_address = author.find("contributor-popdown").get("affiliations").strip()
# if ";" in author_address:
# author_address = author_address.split(";")[0]
email = author.find("contributor-popdown").get("email").strip()
if len(email.strip()) < 1:
continue
output.append(
{
"Name" : author_name,
"Email" : email,
"Address" : author_address
}
)
return output
|