Spaces:
Sleeping
Sleeping
Update degruyterscrapper.py
Browse files- degruyterscrapper.py +10 -6
degruyterscrapper.py
CHANGED
@@ -29,14 +29,18 @@ def get_headers(data: str) -> dict:
|
|
29 |
|
30 |
def getLinks(url: str) -> list:
|
31 |
browser = requests.session()
|
32 |
-
# url = f"https://www.degruyter.com/journal/key/fca/{volume}/{issue}/html"
|
33 |
data = browser.get(url)
|
34 |
fullPage = BeautifulSoup(data.text, "lxml")
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
link
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
40 |
return output
|
41 |
|
42 |
def get_author_details(url: str) -> list:
|
|
|
29 |
|
30 |
def getLinks(url: str) -> list:
|
31 |
browser = requests.session()
|
|
|
32 |
data = browser.get(url)
|
33 |
fullPage = BeautifulSoup(data.text, "lxml")
|
34 |
+
try:
|
35 |
+
links = fullPage.find("div", {"id" : "issue-subject-group-researchpaper"})
|
36 |
+
output = []
|
37 |
+
for link in links.findAll("div", {"class" : "text-container"}):
|
38 |
+
link = link.find("a", {"class" : "issueContentsArticleLink linkHoverDark d-inline-block"}).get("href")
|
39 |
+
output.append(f"https://www.degruyter.com{link}")
|
40 |
+
except:
|
41 |
+
links = fullPage.findAll("a", {"class" : "issueContentsArticleLink linkHoverDark d-inline-block"})
|
42 |
+
for link in links:
|
43 |
+
output.append(f"https://www.degruyter.com{link.get('href')}")
|
44 |
return output
|
45 |
|
46 |
def get_author_details(url: str) -> list:
|