from seleliumdriver import WebScraper from bs4 import BeautifulSoup import time import requests def get_links(url: str): browser = WebScraper("huggingface", hidden=True) browser.get(url) time.sleep(5) # Important to sleep to continue using this pagehtml = browser.get_html() browser.close_browser() fullPage = BeautifulSoup(pagehtml, "lxml") articals = fullPage.find("div", {"class": "j-archive-article"}) output = [] for link in articals.findAll("a", {"class" : "tit ng-binding ng-scope"}): output.append("https://www.aimspress.com" + link.get("href")) if len(output) < 1: raise ValueError("Invalid url found") return output def save(dt): with open("data.html", "w") as op: op.write(str(dt)) print("Done saved") def get_author_details(url: str): browser = requests.session() data = browser.get(url) fullPage = BeautifulSoup(data.text, "lxml") authors = fullPage.find("ul", {"class" : "article-author clear"}) output = [] author_about = fullPage.find("ul", {"class" : "about-author"}) authors_about = [d.text.strip() for d in author_about.findAll("div", {"class" : "lostOf"})] for author in authors.findAll("li"): author_name = author.find("a", {"type" : "authors.authorNameEn"}).text.strip() mail = author.find("a", {"class" : "com-mail"}) if mail: mail = mail.get("href").split(":", 1)[1].strip() else: continue try: author_value_tag = author.find("a", {"class" : "com-num"}).get("data-tagval") if "," in author_value_tag: author_value_tag = [int(da) - 1 for da in author_value_tag.split(",")] else: author_value_tag = [int(author_value_tag) - 1] address = None for a in author_value_tag: address = authors_about[a] except: author_name = author.find("a", {"type" : "authors.authorNameEn"}).text.strip() mail = author.find("a", {"class" : "com-mail"}).get("title").split(":", 1)[1].strip() try: address = author.find("a", {"class" : "com-user"}).get("title").split(":", 1)[1].strip() except: address = "" if "@" in address: address = "" output.append( { "Name" : author_name, "Email" : mail, "Address" : address } ) return output