Spaces:

pryanshusharma
/

PrmScrp

Sleeping

File size: 2,351 Bytes

57273d8

import requests
from bs4 import BeautifulSoup
from sheets import ExcelAutomator

def get_headers(data: str) -> dict:
    """This funciton helps to get the headers form the string to the dict

    Args:
        data (str): Pass the headers as a string (You can go to firefox click on copy and copy request or response header and it will convert it to th e)

    Returns:
        dict: Return the dict or you can say header
    """
    data = data.strip()
    data = data.split("\n")
    out = {}
    for dt in data:
        key = dt.split(":", 1)[0].strip()
        value = dt.split(":", 1)[1].strip()

        if value.lower() == "none":
            value = None
        elif value.lower() == "true":
            value = True
        elif value.lower() == "false":
            value = False

        out[key] = value
    return out

def getlinks(url: str) -> list:
    browser = requests.session()
    # url = f"https://www.ams.org/journals/jams/{year}-{volume}-{issue}/home.html?active=allissues"
    data = browser.get(url)
    fullPage = BeautifulSoup(data.text, "lxml")
    article = fullPage.find("article", {"class" : "contentList"})
    output = []
    lnk = url.split('home.html', 1)[0]
    for allarticle in article.findAll("dl"):
        output.append(f'{lnk}{allarticle.find("dt").find("a").get("href")}')
    return output

def get_authors(url: str):
    browser = requests.session()
    data = browser.get(url)
    fullPage = BeautifulSoup(data.text, "lxml")
    details = fullPage.find("section", {"id" : "additionalinformation"})
    email = None
    address = None
    author_name = None
    output = []
    for author in details.findAll("li"):
        if email != None and author_name != None and address != None:
            output.append(
                {
                    "Name" : author_name,
                    "Email" : email,
                    "Address" : address
                }
            )
            email = None
            author_name = None
            address = None
        if author.find("strong"):
            author_name = author.text
        elif "Email:" in author.text:
            email = author.text.split(":", 1)[1].strip()
        elif "Affiliation:" in author.text:
            address = author.text.split(":", 1)[1].strip()
        if author_name == None:
            continue
    return output