File size: 2,476 Bytes
57273d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a69569c
 
 
 
 
 
 
 
a73bd7d
 
a69569c
 
57273d8
 
 
 
 
 
 
 
 
 
 
f8800b5
 
57273d8
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import requests
from bs4 import BeautifulSoup

def get_headers(data: str) -> dict:
    """This funciton helps to get the headers form the string to the dict

    Args:
        data (str): Pass the headers as a string (You can go to firefox click on copy and copy request or response header and it will convert it to th e)

    Returns:
        dict: Return the dict or you can say header
    """
    data = data.strip()
    data = data.split("\n")
    out = {}
    for dt in data:
        key = dt.split(":", 1)[0].strip()
        value = dt.split(":", 1)[1].strip()

        if value.lower() == "none":
            value = None
        elif value.lower() == "true":
            value = True
        elif value.lower() == "false":
            value = False

        out[key] = value
    return out

def getLinks(url: str) -> list:
    browser = requests.session()
    data = browser.get(url)
    fullPage = BeautifulSoup(data.text, "lxml")
    try:
        links = fullPage.find("div", {"id" : "issue-subject-group-researchpaper"})
        output = []
        for link in links.findAll("div", {"class" : "text-container"}):
            link = link.find("a", {"class" : "issueContentsArticleLink linkHoverDark d-inline-block"}).get("href")
            output.append(f"https://www.degruyter.com{link}")
    except:
        links = fullPage.findAll("a", {"class" : "issueContentsArticleLink linkHoverDark d-inline-block"})
        if len(links) < 1:
            raise AttributeError("Not found")
        for link in links:
            output.append(f"https://www.degruyter.com{link.get('href')}")
    return output

def get_author_details(url: str) -> list:
    browser = requests.session()
    data = browser.get(url)
    authors = BeautifulSoup(data.text, "lxml")
    authors = authors.find("ul", {"class" : "contributors list-unstyled mb-2"})
    output = []
    for author in authors.findAll("span", {"class" : "contributor"}):
        author_name = author.text.strip()
        author_address = author.find("contributor-popdown").get("affiliations").strip()
        # if ";" in author_address:
        #     author_address = author_address.split(";")[0]
        email = author.find("contributor-popdown").get("email").strip()
        if len(email.strip()) < 1:
            continue
        output.append(
            {
                "Name" : author_name,
                "Email" : email,
                "Address" : author_address
            }
        )
    return output