Sarthak005 commited on
Commit
2981d40
·
verified ·
1 Parent(s): 9d4be14

Upload 5 files

Browse files
Files changed (5) hide show
  1. Dockerfile +14 -0
  2. Final_Research_Dataset_2.csv +0 -0
  3. README.md +12 -12
  4. app.py +554 -0
  5. requirements.txt +12 -0
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python 3.10.9 image
2
+ FROM python:3.12.6
3
+
4
+ # Copy the current directory contents into the container at .
5
+ COPY . .
6
+
7
+ # Set the working directory to /
8
+ WORKDIR /
9
+
10
+ # Install requirements.txt
11
+ RUN pip install --no-cache-dir --upgrade -r /requirements.txt
12
+
13
+ # Start the FastAPI app on port 7860, the default port expected by Spaces
14
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
Final_Research_Dataset_2.csv ADDED
The diff for this file is too large to render. See raw diff
 
README.md CHANGED
@@ -1,12 +1,12 @@
1
- ---
2
- title: Deploy.FastAPi.Application
3
- emoji: ⚡
4
- colorFrom: red
5
- colorTo: gray
6
- sdk: docker
7
- pinned: false
8
- license: apache-2.0
9
- short_description: Journal-Finder
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Deploy.FastAPi.Application
3
+ emoji: ⚡
4
+ colorFrom: red
5
+ colorTo: gray
6
+ sdk: docker
7
+ pinned: false
8
+ license: apache-2.0
9
+ short_description: Journal-Finder
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,554 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Query
2
+ from pydantic import BaseModel
3
+ import os
4
+ from langchain_community.embeddings import HuggingFaceEmbeddings
5
+ from langchain_community.vectorstores import FAISS
6
+ from langchain_community.document_loaders import CSVLoader
7
+ from langchain_openai import ChatOpenAI
8
+ from langchain_groq import ChatGroq
9
+ from langchain_core.prompts import ChatPromptTemplate
10
+ from langchain.chains.combine_documents import create_stuff_documents_chain
11
+ from langchain.chains import create_retrieval_chain
12
+ from langchain_google_genai import ChatGoogleGenerativeAI
13
+ from dotenv import load_dotenv
14
+ from fastapi.responses import PlainTextResponse
15
+ from fastapi.middleware.cors import CORSMiddleware
16
+ import asyncio
17
+ import json
18
+ import re
19
+ # Load environment variables
20
+ load_dotenv()
21
+ os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
22
+ os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
23
+ os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
24
+ key = os.getenv("GOOGLE_API_KEY")
25
+ # Define paths
26
+ DB_FAISS_PATH = "bgi/db_faiss"
27
+
28
+ # Initialize FastAPI app
29
+ app = FastAPI()
30
+ app.add_middleware(
31
+ CORSMiddleware,
32
+ allow_origins=["*"], # Add the React app's URL
33
+ allow_credentials=True,
34
+ allow_methods=["*"], # Allow all HTTP methods
35
+ allow_headers=["*"], # Allow all headers
36
+ )
37
+ # Initialize variables
38
+ embeddings = None
39
+ db = None
40
+
41
+ # Load or create FAISS vector store
42
+ @app.on_event("startup")
43
+ def load_vector_store():
44
+ global embeddings, db
45
+ if os.path.exists(DB_FAISS_PATH):
46
+ print("Loading existing FAISS vector store.")
47
+ embeddings = HuggingFaceEmbeddings(model_name='BAAI/bge-small-en', model_kwargs={'device': 'cpu'})
48
+ db = FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)
49
+ print("Vector store loaded.")
50
+ else:
51
+ print("Creating new FAISS vector store.")
52
+ loader = CSVLoader(file_path="Final_Research_Dataset_2.csv", encoding="utf-8", csv_args={'delimiter': ','})
53
+ data = loader.load()
54
+ embeddings = HuggingFaceEmbeddings(model_name='BAAI/bge-small-en', model_kwargs={'device': 'cpu'})
55
+ db = FAISS.from_documents(data, embeddings)
56
+ db.save_local(DB_FAISS_PATH)
57
+
58
+
59
+ # Define request and response models
60
+ from typing import List, Optional
61
+
62
+ class FilterCriteria(BaseModel):
63
+ impactFactor: float
64
+ firstDecisionTime: int
65
+ publisher: Optional[str]
66
+ llmModel: str
67
+
68
+ class QueryRequest(BaseModel):
69
+ abstract: str
70
+ criteria: FilterCriteria
71
+
72
+ class Journal(BaseModel):
73
+ id: int
74
+ Name: str
75
+ JIF: float
76
+ Category: str
77
+ Keywords: str
78
+ Publisher: str
79
+ Decision_Time: int
80
+
81
+ # Define the QueryResponse model with a list of journals
82
+ class QueryResponse(BaseModel):
83
+ result: List[Journal]
84
+
85
+
86
+ @app.get("/", response_class=PlainTextResponse)
87
+ def read_root():
88
+ return "Welcome to the Journal Recommender API!"
89
+ # Define models
90
+ @app.get("/models")
91
+ def get_models():
92
+ return {"available_models": ["openai", "groq","mixtral","gemini-pro","faiss"]}
93
+
94
+ def fix_incomplete_json(raw_response):
95
+ """
96
+ Fixes incomplete JSON by adding missing braces or brackets.
97
+ Returns a valid JSON string or None if not fixable.
98
+ """
99
+ # Ensure the response ends with a closing bracket if it's a list
100
+ if raw_response.endswith("},"):
101
+ raw_response = raw_response[:-1] # Remove the last comma
102
+ if raw_response.count("{") > raw_response.count("}"):
103
+ raw_response += "}"
104
+ if raw_response.count("[") > raw_response.count("]"):
105
+ raw_response += "]"
106
+
107
+ # Try to load the fixed response
108
+ try:
109
+ json_response = json.loads(raw_response)
110
+ return json_response
111
+ except json.JSONDecodeError as e:
112
+ print(f"Error fixing JSON: {e}")
113
+ return None
114
+
115
+
116
+ # Query endpoint
117
+ @app.post("/query", response_model=QueryResponse)
118
+ async def query(request: QueryRequest):
119
+ global db
120
+ if not db:
121
+ raise HTTPException(status_code=500, detail="Vector store not loaded.")
122
+
123
+ query_text = request.abstract
124
+ model_choice = request.criteria.llmModel
125
+ impact_factor = request.criteria.impactFactor
126
+ preferred_publisher = request.criteria.publisher
127
+ # Perform the query
128
+ docs = db.similarity_search(query_text, k=5)
129
+ context = "\n".join([doc.page_content for doc in docs])
130
+
131
+ messages = [
132
+ {
133
+ "role": "system",
134
+ "content": (
135
+ "Give a strict comma-separated list of exactly 15 keywords from the following text. "
136
+ "Give a strict comma-separated list of exactly 15 keywords from the following text. "
137
+ "Do not include any bullet points, introductory text, or ending text. "
138
+ "No introductory or ending text strictly" # Added to ensure can be removed if results deteriorate
139
+ "Do not say anything like 'Here are the keywords.' "
140
+ "Only return the keywords, strictly comma-separated, without any additional words."
141
+ ),
142
+ },
143
+ {"role": "user", "content": query_text},
144
+ ]
145
+ llm = ChatGroq(model="llama3-8b-8192", temperature=0)
146
+ ai_msg = llm.invoke(messages)
147
+ keywords = ai_msg.content.split("keywords extracted from the text:\n")[-1].strip()
148
+ print("Keywords:", keywords)
149
+ if model_choice == "openai":
150
+ retriever = db.as_retriever()
151
+
152
+ # Set up system prompt
153
+ system_prompt = (
154
+ f"You are a specialized Journal recommender that compares all journals in database to given research paper keywords and based on JIF and publisher gives result."
155
+ f"From the provided context, recommend all journals that are suitable for research paper with {keywords} keywords."
156
+ f"Ensure that you include **every** journal with a Journal Impact Factor (JIF) strictly greater than {impact_factor}, and the Journal must be only from any Publishers in list: {preferred_publisher}. And Pls show that jif as in Context database "
157
+ f"Make sure to include both exact matches and related journals, and prioritize including **all relevant high-JIF journals without repetition**. "
158
+ f"Present the results in a json format with the following information: Journal Name, Publisher, JIF, Decsion Time. "
159
+ f"Ensure no introductory or ending texts are included. Give max 30 results"
160
+ "Context: {context}"
161
+ )
162
+
163
+ prompt = ChatPromptTemplate.from_messages(
164
+ [("system", system_prompt), ("user", "{input}")]
165
+ )
166
+
167
+
168
+ async def create_chain():
169
+ client = ChatOpenAI(model="gpt-4o")
170
+ return create_stuff_documents_chain(client, prompt)
171
+
172
+ # Create the question-answer chain using async function
173
+ question_answer_chain = await create_chain()
174
+ rag_chain = create_retrieval_chain(retriever, question_answer_chain)
175
+
176
+ # Ensure the vector dimensions match the FAISS index
177
+
178
+ # Invoke the RAG chain
179
+ answer = rag_chain.invoke(
180
+ {"input": f"Keywords: {keywords}, Minimum JIF: {impact_factor},Publisher list: {preferred_publisher}"}
181
+ )
182
+
183
+ # Inspect the result structure
184
+ result = []
185
+ raw_response = answer['answer']
186
+ cleaned_response = raw_response.strip('```json\n').strip('```').strip()
187
+
188
+ # Parse the cleaned JSON response
189
+ try:
190
+ json_response = json.loads(cleaned_response)
191
+
192
+ # Initialize an empty list to hold the journal objects
193
+ result = []
194
+
195
+ # Process the JSON data and create Journal objects
196
+ for i, journal in enumerate(json_response):
197
+ try:
198
+ journal_name = journal.get('Journal Name')
199
+ publisher = journal.get('Publisher')
200
+ jif = float(journal.get('JIF', 0)) # Ensure valid float
201
+ decision_time = journal.get('Decsion Time', 0) # Default to 0 if not available
202
+
203
+ # Only include if JIF is greater than the minimum threshold
204
+ if jif > impact_factor:
205
+ result.append(
206
+ Journal(
207
+ id=i + 1,
208
+ Name=journal_name,
209
+ Publisher=publisher,
210
+ JIF=jif,
211
+ Category="", # Set to empty if not available
212
+ Keywords=keywords, # Use provided keywords
213
+ Decision_Time=decision_time,
214
+ )
215
+ )
216
+ except Exception as e:
217
+ print(f"Error processing journal data: {e}")
218
+
219
+ except json.JSONDecodeError as e:
220
+ print(f"Error parsing JSON response: {e}")
221
+ result = []
222
+
223
+ # Return the result wrapped in a QueryResponse
224
+ return QueryResponse(result=result)
225
+ elif model_choice == "groq":
226
+ retriever = db.as_retriever()
227
+
228
+ # Set up system prompt
229
+ system_prompt = (
230
+ f"You are a specialized Journal recommender that compares all journals in database to given research paper keywords and based on JIF and publisher gives result."
231
+ f"From the provided context, recommend all journals that are suitable for research paper with {keywords} keywords."
232
+ f"Ensure that you include **every** journal with a Journal Impact Factor (JIF) strictly greater than {impact_factor}, and the Journal must be only from any Publishers in list: {preferred_publisher}. And Pls show that jif as in Context database "
233
+ f"Make sure to include both exact matches and related journals, and prioritize including **all relevant high-JIF journals without repetition**. "
234
+ f"Present the results in a json format with the following information: Journal Name, Publisher, JIF, Decsion Time. "
235
+ f"Ensure no introductory or ending texts are included. Dont give more than 10 results"
236
+ "Context: {context}"
237
+ )
238
+
239
+
240
+
241
+ prompt = ChatPromptTemplate.from_messages(
242
+ [("system", system_prompt), ("user", "{input}")]
243
+ )
244
+
245
+ # Create the question-answer chain
246
+ async def create_chain():
247
+ client = ChatGroq(model="llama-3.2-3b-preview", temperature=0)
248
+ return create_stuff_documents_chain(client, prompt)
249
+
250
+ # Create the question-answer chain using async function
251
+ question_answer_chain = await create_chain()
252
+ rag_chain = create_retrieval_chain(retriever, question_answer_chain)
253
+
254
+ # Ensure the vector dimensions match the FAISS index
255
+
256
+ # Invoke the RAG chain
257
+ answer = rag_chain.invoke(
258
+ {"input": f"Keywords: {keywords}, Minimum JIF: {impact_factor},Publisher list: {preferred_publisher}"}
259
+ )
260
+
261
+ # Inspect the result structure
262
+ result = []
263
+ raw_response = answer['answer']
264
+
265
+ cleaned_response = raw_response.strip('```json\n').strip('```').strip()
266
+
267
+ # Parse the cleaned JSON response
268
+ try:
269
+ # Parse the cleaned response
270
+ print("Cleaned Response:", cleaned_response) # For debugging
271
+ json_response = json.loads(cleaned_response)
272
+
273
+ # Initialize an empty list to hold the journal objects
274
+ result = []
275
+
276
+ # Process the JSON data and create Journal objects
277
+ for i, journal in enumerate(json_response["journals"]): # Accessing the 'journals' key
278
+ print("Journal entry:", journal) # For debugging
279
+
280
+ try:
281
+ if isinstance(journal, dict): # Ensure journal is a dictionary
282
+ journal_name = journal.get('Journal Name')
283
+ publisher = journal.get('Publisher')
284
+ jif = float(journal.get('JIF', 0)) # Ensure valid float
285
+ decision_time = journal.get('Decision Time', 0) # Default to 0 if not available
286
+
287
+ # Only include if JIF is greater than the minimum threshold
288
+ if jif > impact_factor:
289
+ result.append(
290
+ Journal(
291
+ id=i + 1,
292
+ Name=journal_name,
293
+ Publisher=publisher,
294
+ JIF=jif,
295
+ Category="", # Set to empty if not available
296
+ Keywords=keywords, # Use provided keywords
297
+ Decision_Time=decision_time,
298
+ )
299
+ )
300
+ else:
301
+ print(f"Skipping invalid journal entry: {journal}")
302
+ except Exception as e:
303
+ print(f"Error processing journal data: {e}")
304
+
305
+ except json.JSONDecodeError as e:
306
+ print(f"Error parsing JSON response: {e}")
307
+ result = []
308
+
309
+ # Return the result wrapped in a QueryResponse
310
+ return QueryResponse(result=result)
311
+
312
+
313
+ elif model_choice == "mixtral":
314
+ retriever = db.as_retriever()
315
+
316
+ # Set up system prompt
317
+ system_prompt = (
318
+ f"You are a specialized Journal recommender that compares all journals in database to given research paper keywords and based on JIF and publisher gives result."
319
+ f"From the provided context, recommend all journals that are suitable for research paper with {keywords} keywords."
320
+ f"Ensure that you include **every** journal with a Journal Impact Factor (JIF) strictly greater than {impact_factor}, and the Journal must be only from any Publishers in list: {preferred_publisher}. And Pls show that jif as in Context database "
321
+ f"Make sure to include both exact matches and related journals, and prioritize including **all relevant high-JIF journals without repetition**. "
322
+ f"Present the results in a json format with the following information: Journal Name, Publisher, JIF, Decsion Time. "
323
+ f"Ensure no introductory or ending texts are included. Dont give more than 10 results"
324
+ "Context: {context}"
325
+ )
326
+
327
+ prompt = ChatPromptTemplate.from_messages(
328
+ [("system", system_prompt), ("user", "{input}")]
329
+ )
330
+
331
+ # Create the question-answer chain
332
+
333
+
334
+ async def create_chain():
335
+ client = ChatGroq(model="mixtral-8x7b-32768",temperature=0)
336
+ return create_stuff_documents_chain(client, prompt)
337
+
338
+ # Create the question-answer chain using async function
339
+ question_answer_chain = await create_chain()
340
+ rag_chain = create_retrieval_chain(retriever, question_answer_chain)
341
+
342
+ # Ensure the vector dimensions match the FAISS index
343
+
344
+ # Invoke the RAG chain
345
+ answer = rag_chain.invoke(
346
+ {"input": f"Keywords: {keywords}, Minimum JIF: {impact_factor},Publisher list: {preferred_publisher}"}
347
+ )
348
+
349
+ # Inspect the result structure
350
+ result = []
351
+ raw_response = answer['answer']
352
+
353
+ cleaned_response = raw_response.strip('```json\n').strip('```').strip()
354
+
355
+ # Parse the cleaned JSON response
356
+ try:
357
+ # Parse the cleaned response
358
+ print("Cleaned Response:", cleaned_response) # For debugging
359
+ json_response = json.loads(cleaned_response)
360
+
361
+ # Initialize an empty list to hold the journal objects
362
+ result = []
363
+
364
+ # Process the JSON data and create Journal objects
365
+ for i, journal in enumerate(json_response): # Iterate directly over the list
366
+ print("Journal entry:", journal) # For debugging
367
+
368
+ try:
369
+ if isinstance(journal, dict): # Ensure journal is a dictionary
370
+ journal_name = journal.get('Journal Name')
371
+ publisher = journal.get('Publisher')
372
+ jif = float(journal.get('JIF', 0)) # Ensure valid float
373
+ decision_time = journal.get('Decsion Time', 0) # Default to 0 if not available
374
+
375
+ # Only include if JIF is greater than the minimum threshold
376
+ if jif > impact_factor:
377
+ result.append(
378
+ Journal(
379
+ id=i + 1,
380
+ Name=journal_name,
381
+ Publisher=publisher,
382
+ JIF=jif,
383
+ Category="", # Set to empty if not available
384
+ Keywords=keywords, # Use provided keywords
385
+ Decision_Time=decision_time,
386
+ )
387
+ )
388
+ else:
389
+ print(f"Skipping invalid journal entry: {journal}")
390
+ except Exception as e:
391
+ print(f"Error processing journal data: {e}")
392
+
393
+ except json.JSONDecodeError as e:
394
+ print(f"Error parsing JSON response: {e}")
395
+ result = []
396
+
397
+ # Return the result wrapped in a QueryResponse
398
+ return QueryResponse(result=result)
399
+
400
+ elif model_choice == "gemini-pro":
401
+ print("Using Gemini-Pro model")
402
+ retriever = db.as_retriever()
403
+
404
+ # Set up system prompt
405
+ system_prompt = (
406
+ f"You are a specialized Journal recommender that compares all journals in database to given research paper keywords and based on JIF and publisher gives result."
407
+ f"From the provided context, recommend all journals that are suitable for research paper with {keywords} keywords."
408
+ f"Ensure that you include **every** journal with a Journal Impact Factor (JIF) strictly greater than {impact_factor}, and the Journal must be only from any Publishers in list: {preferred_publisher}. And Pls show that jif as in Context database "
409
+ f"Make sure to include both exact matches and related journals, and prioritize including **all relevant high-JIF journals without repetition**. "
410
+ f"Present the results in a json format with the following information: Journal Name, Publisher, JIF, Decsion Time. "
411
+ f"Ensure no introductory or ending texts are included."
412
+ "Context: {context}"
413
+ )
414
+
415
+ prompt = ChatPromptTemplate.from_messages(
416
+ [("system", system_prompt), ("user", "{input}")]
417
+ )
418
+
419
+ async def create_chain():
420
+ client = ChatGoogleGenerativeAI(
421
+ model="gemini-pro",
422
+ google_api_key=key,
423
+ convert_system_message_to_human=True,
424
+ )
425
+ return create_stuff_documents_chain(client, prompt)
426
+
427
+ # Create the question-answer chain using async function
428
+ question_answer_chain = await create_chain()
429
+ rag_chain = create_retrieval_chain(retriever, question_answer_chain)
430
+
431
+
432
+ # Ensure the vector dimensions match the FAISS index
433
+
434
+ # Invoke the RAG chain
435
+ answer = rag_chain.invoke(
436
+ {"input": f"Keywords: {keywords}, Minimum JIF: {impact_factor},Publisher list: {preferred_publisher}"}
437
+ )
438
+
439
+ # Inspect the result structure
440
+ result = []
441
+ raw_response = answer['answer']
442
+ cleaned_response = raw_response.strip('```json\n').strip('```').strip()
443
+
444
+ # Parse the cleaned JSON response
445
+ try:
446
+ json_response = json.loads(cleaned_response)
447
+
448
+ # Initialize an empty list to hold the journal objects
449
+ result = []
450
+
451
+ # Process the JSON data and create Journal objects
452
+ for i, journal in enumerate(json_response):
453
+ try:
454
+ journal_name = journal.get('Journal Name')
455
+ publisher = journal.get('Publisher')
456
+ jif = float(journal.get('JIF', 0)) # Ensure valid float
457
+ decision_time = journal.get('Decsion Time', 0) # Default to 0 if not available
458
+
459
+ # Only include if JIF is greater than the minimum threshold
460
+ if jif > impact_factor:
461
+ result.append(
462
+ Journal(
463
+ id=i + 1,
464
+ Name=journal_name,
465
+ Publisher=publisher,
466
+ JIF=jif,
467
+ Category="", # Set to empty if not available
468
+ Keywords=keywords, # Use provided keywords
469
+ Decision_Time=decision_time,
470
+ )
471
+ )
472
+ except Exception as e:
473
+ print(f"Error processing journal data: {e}")
474
+
475
+ except json.JSONDecodeError as e:
476
+ print(f"Error parsing JSON response: {e}")
477
+ result = []
478
+
479
+ # Return the result wrapped in a QueryResponse
480
+ return QueryResponse(result=result)
481
+ elif model_choice == "faiss":
482
+ embeddings = HuggingFaceEmbeddings(
483
+ model_name="BAAI/bge-small-en", model_kwargs={"device": "cpu"}
484
+ )
485
+ jif = impact_factor # Minimum JIF value for filtering
486
+ publisher = preferred_publisher # Preferred publisher list or "no preference"
487
+
488
+ # Load the FAISS index from local storage
489
+ db1 = FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)
490
+
491
+ # Embed the query
492
+ query_embedding = embeddings.embed_query(keywords)
493
+
494
+ # Perform similarity search with FAISS (retrieve top 20 results)
495
+ results = db1.similarity_search_by_vector(query_embedding, k=20)
496
+
497
+ # Prepare the context for processing results
498
+ context = "\n\n".join(doc.page_content for doc in results)
499
+
500
+ # Apply filters for JIF and publisher
501
+ min_jif = jif
502
+ valid_publishers = publisher if publisher != ["no preference"] else None
503
+
504
+ # Split the output based on each entry starting with 'Name: '
505
+ entries = re.split(r"\n(?=Name:)", context.strip())
506
+
507
+ # Initialize an empty list to hold the Journal models
508
+ journal_list = []
509
+
510
+ # Process each entry
511
+ for entry in entries:
512
+ # Use regex to capture different fields
513
+ name = re.search(r"Name: (.+)", entry)
514
+ jif_match = re.search(r"JIF: (.+)", entry)
515
+ category = re.search(r"Category: (.+)", entry)
516
+ keywords_match = re.search(r"Keywords: (.+)", entry)
517
+ publisher_match = re.search(r"Publisher: (.+)", entry)
518
+ first_decision_match = re.search(r"Decsion Time: (.+)", entry)
519
+
520
+ if jif_match :
521
+ # Extract values from regex matches
522
+ name_value = name.group(1).strip()
523
+ jif_value = float(jif_match.group(1).strip())
524
+ category_value = category.group(1).strip()
525
+ keywords_value = keywords_match.group(1).strip()
526
+ publisher_value = publisher_match.group(1).strip()
527
+ decision_time = first_decision_match.group(1).strip()
528
+ # Filter based on JIF and publisher preferences
529
+ if jif_value >= min_jif :
530
+ # Create the Journal model instance
531
+ journal = Journal(
532
+ id=len(journal_list) + 1, # Incrementing ID for each journal
533
+ Name=name_value,
534
+ JIF=jif_value,
535
+ Category=category_value,
536
+ Keywords=keywords_value,
537
+ Publisher=publisher_value,
538
+ Decision_Time = decision_time
539
+ )
540
+
541
+ # Add the journal to the list
542
+ journal_list.append(journal)
543
+
544
+ # Return the list of journals as a response or process it further
545
+ return {"result": [journal.dict() for journal in journal_list]}
546
+ else:
547
+ raise HTTPException(status_code=400, detail="Invalid model choice.")
548
+
549
+ # Generate response using LLM
550
+ response = llm.predict(f"Context: {context}\n\nQuestion: {query_text}")
551
+ return QueryResponse(result=response)
552
+
553
+ # Run the app with Uvicorn
554
+ # Command: uvicorn app:app --reload
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pydantic
4
+ python-dotenv
5
+ langchain-community
6
+ langchain-openai
7
+ langchain-google-genai
8
+ langchain-core
9
+ langchain-groq
10
+ faiss-cpu
11
+ numpy
12
+ sentence-transformers