Spaces:
Runtime error
Runtime error
gamingflexer
commited on
Commit
·
424c175
1
Parent(s):
6f88ba1
Add database functions for inserting and fetching papers data
Browse files- src/db/db_functions.py +55 -0
src/db/db_functions.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import logging
|
3 |
+
import os
|
4 |
+
from supabase import create_client, Client
|
5 |
+
from fuzzywuzzy import fuzz
|
6 |
+
from config import SUPABASE_URL, SUPABASE_KEY
|
7 |
+
import json
|
8 |
+
import logging
|
9 |
+
|
10 |
+
url: str = SUPABASE_URL
|
11 |
+
key: str = SUPABASE_KEY
|
12 |
+
supabase: Client = create_client(url, key)
|
13 |
+
|
14 |
+
def insert_papers_data(data,author_name ,table_name: str = 'papers'):
|
15 |
+
if data == []:
|
16 |
+
print("No data to insert")
|
17 |
+
return
|
18 |
+
formatted_data = []
|
19 |
+
for entry in data:
|
20 |
+
entry = json.loads(entry)
|
21 |
+
data_db = {
|
22 |
+
'doi_no': entry.get('doi'),
|
23 |
+
'title': entry.get('title'),
|
24 |
+
'summary': entry.get('summary'),
|
25 |
+
'authors': ", ".join(entry.get('authors',[])),
|
26 |
+
'year': entry.get('year'),
|
27 |
+
'pdf_link': entry.get('pdf_link'),
|
28 |
+
'references': ", ".join(entry.get('references')),
|
29 |
+
'categories': ", ".join(entry.get('categories')),
|
30 |
+
'comment': entry.get('comment'),
|
31 |
+
'journal_ref': entry.get('journal_ref'),
|
32 |
+
'source': entry.get('source'),
|
33 |
+
'primary_category': entry.get('primary_category'),
|
34 |
+
'published': entry.get('published'),
|
35 |
+
'author_name' : author_name,
|
36 |
+
}
|
37 |
+
formatted_data.append(data_db)
|
38 |
+
data, count = supabase.table(table_name).insert(formatted_data).execute()
|
39 |
+
|
40 |
+
|
41 |
+
def get_correct_author_name(user_input_author):
|
42 |
+
authors_name_data = supabase.table('papers').select('author_name').execute()
|
43 |
+
unique_authors = set(author_dict['author_name'] for author_dict in authors_name_data.data)
|
44 |
+
unique_authors_list = list(unique_authors)
|
45 |
+
similar_authors = [author for author in unique_authors_list if fuzz.ratio(user_input_author, author) > 60]
|
46 |
+
if similar_authors:
|
47 |
+
return similar_authors[0]
|
48 |
+
else:
|
49 |
+
print(f"No similar author found for '{user_input_author}'")
|
50 |
+
return None
|
51 |
+
|
52 |
+
|
53 |
+
def fetch_papers_data(author_name, fields_to_query = ["doi_no"],table_name: str = 'papers'):
|
54 |
+
data, count = supabase.table(table_name).select(",".join(fields_to_query)).eq('author_name', author_name).execute()
|
55 |
+
return data[1]
|