pryanshusharma commited on
Commit
5cde1d2
·
verified ·
1 Parent(s): 2a1d161

Update springerscrapper.py

Browse files
Files changed (1) hide show
  1. springerscrapper.py +5 -3
springerscrapper.py CHANGED
@@ -1,7 +1,6 @@
1
  import requests
2
  from bs4 import BeautifulSoup
3
  import json
4
- from sheets import ExcelAutomator
5
 
6
  def get_headers(data: str) -> dict:
7
  """This funciton helps to get the headers form the string to the dict
@@ -86,18 +85,21 @@ TE: trailers
86
  data = browser.get(url, headers=head)
87
 
88
  main_page = BeautifulSoup(data.text, "lxml")
89
-
90
  json_data = main_page.find("script", {"type" : "application/ld+json"}).text
91
  json_data = json.loads(json_data)
92
  authors = json_data['mainEntity']['author']
93
  output = []
94
  for author in authors:
95
  if 'email' in author:
 
 
 
 
96
  output.append(
97
  {
98
  "Name" : author['name'],
99
  'Email' : author['email'],
100
- 'Address' : ", ".join(item['address']['name'] for item in author['affiliation'] if 'address' in item and 'name' in item['address'])
101
  }
102
  )
103
  return output
 
1
  import requests
2
  from bs4 import BeautifulSoup
3
  import json
 
4
 
5
  def get_headers(data: str) -> dict:
6
  """This funciton helps to get the headers form the string to the dict
 
85
  data = browser.get(url, headers=head)
86
 
87
  main_page = BeautifulSoup(data.text, "lxml")
 
88
  json_data = main_page.find("script", {"type" : "application/ld+json"}).text
89
  json_data = json.loads(json_data)
90
  authors = json_data['mainEntity']['author']
91
  output = []
92
  for author in authors:
93
  if 'email' in author:
94
+ try:
95
+ address = author['affiliation'][0]['address']['name']
96
+ except:
97
+ address = ""
98
  output.append(
99
  {
100
  "Name" : author['name'],
101
  'Email' : author['email'],
102
+ 'Address' : address
103
  }
104
  )
105
  return output