pryanshusharma commited on
Commit
a69569c
·
verified ·
1 Parent(s): c5e1cb7

Update degruyterscrapper.py

Browse files
Files changed (1) hide show
  1. degruyterscrapper.py +10 -6
degruyterscrapper.py CHANGED
@@ -29,14 +29,18 @@ def get_headers(data: str) -> dict:
29
 
30
  def getLinks(url: str) -> list:
31
  browser = requests.session()
32
- # url = f"https://www.degruyter.com/journal/key/fca/{volume}/{issue}/html"
33
  data = browser.get(url)
34
  fullPage = BeautifulSoup(data.text, "lxml")
35
- links = fullPage.find("div", {"id" : "issue-subject-group-researchpaper"})
36
- output = []
37
- for link in links.findAll("div", {"class" : "text-container"}):
38
- link = link.find("a", {"class" : "issueContentsArticleLink linkHoverDark d-inline-block"}).get("href")
39
- output.append(f"https://www.degruyter.com{link}")
 
 
 
 
 
40
  return output
41
 
42
  def get_author_details(url: str) -> list:
 
29
 
30
  def getLinks(url: str) -> list:
31
  browser = requests.session()
 
32
  data = browser.get(url)
33
  fullPage = BeautifulSoup(data.text, "lxml")
34
+ try:
35
+ links = fullPage.find("div", {"id" : "issue-subject-group-researchpaper"})
36
+ output = []
37
+ for link in links.findAll("div", {"class" : "text-container"}):
38
+ link = link.find("a", {"class" : "issueContentsArticleLink linkHoverDark d-inline-block"}).get("href")
39
+ output.append(f"https://www.degruyter.com{link}")
40
+ except:
41
+ links = fullPage.findAll("a", {"class" : "issueContentsArticleLink linkHoverDark d-inline-block"})
42
+ for link in links:
43
+ output.append(f"https://www.degruyter.com{link.get('href')}")
44
  return output
45
 
46
  def get_author_details(url: str) -> list: