Spaces:

pryanshusharma
/

PrmScrp

Sleeping

pryanshusharma commited on Aug 3, 2024

Commit

5cde1d2

verified ·

1 Parent(s): 2a1d161

Update springerscrapper.py

Files changed (1) hide show

springerscrapper.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import requests
 from bs4 import BeautifulSoup
 import json
-from sheets import ExcelAutomator
 def get_headers(data: str) -> dict:
     """This funciton helps to get the headers form the string to the dict
@@ -86,18 +85,21 @@ TE: trailers
     data = browser.get(url, headers=head)
     main_page = BeautifulSoup(data.text, "lxml")
     json_data = main_page.find("script", {"type" : "application/ld+json"}).text
     json_data = json.loads(json_data)
     authors = json_data['mainEntity']['author']
     output = []
     for author in authors:
         if 'email' in author:
             output.append(
                 {
                     "Name" : author['name'],
                     'Email' : author['email'],
-                    'Address' : ", ".join(item['address']['name'] for item in author['affiliation'] if 'address' in item and 'name' in item['address'])
                 }
             )
     return output

 import requests
 from bs4 import BeautifulSoup
 import json
 def get_headers(data: str) -> dict:
     """This funciton helps to get the headers form the string to the dict
     data = browser.get(url, headers=head)
     main_page = BeautifulSoup(data.text, "lxml")
     json_data = main_page.find("script", {"type" : "application/ld+json"}).text
     json_data = json.loads(json_data)
     authors = json_data['mainEntity']['author']
     output = []
     for author in authors:
         if 'email' in author:
+            try:
+                address = author['affiliation'][0]['address']['name']
+            except:
+                address = ""
             output.append(
                 {
                     "Name" : author['name'],
                     'Email' : author['email'],
+                    'Address' : address
                 }
             )
     return output