Upload 5 files
Browse files- Dockerfile +14 -0
- Final_Research_Dataset_2.csv +0 -0
- README.md +12 -12
- app.py +554 -0
- requirements.txt +12 -0
Dockerfile
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use the official Python 3.10.9 image
|
2 |
+
FROM python:3.12.6
|
3 |
+
|
4 |
+
# Copy the current directory contents into the container at .
|
5 |
+
COPY . .
|
6 |
+
|
7 |
+
# Set the working directory to /
|
8 |
+
WORKDIR /
|
9 |
+
|
10 |
+
# Install requirements.txt
|
11 |
+
RUN pip install --no-cache-dir --upgrade -r /requirements.txt
|
12 |
+
|
13 |
+
# Start the FastAPI app on port 7860, the default port expected by Spaces
|
14 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
Final_Research_Dataset_2.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
README.md
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
-
---
|
2 |
-
title: Deploy.FastAPi.Application
|
3 |
-
emoji: ⚡
|
4 |
-
colorFrom: red
|
5 |
-
colorTo: gray
|
6 |
-
sdk: docker
|
7 |
-
pinned: false
|
8 |
-
license: apache-2.0
|
9 |
-
short_description: Journal-Finder
|
10 |
-
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
---
|
2 |
+
title: Deploy.FastAPi.Application
|
3 |
+
emoji: ⚡
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: gray
|
6 |
+
sdk: docker
|
7 |
+
pinned: false
|
8 |
+
license: apache-2.0
|
9 |
+
short_description: Journal-Finder
|
10 |
+
---
|
11 |
+
|
12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,554 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException, Query
|
2 |
+
from pydantic import BaseModel
|
3 |
+
import os
|
4 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
5 |
+
from langchain_community.vectorstores import FAISS
|
6 |
+
from langchain_community.document_loaders import CSVLoader
|
7 |
+
from langchain_openai import ChatOpenAI
|
8 |
+
from langchain_groq import ChatGroq
|
9 |
+
from langchain_core.prompts import ChatPromptTemplate
|
10 |
+
from langchain.chains.combine_documents import create_stuff_documents_chain
|
11 |
+
from langchain.chains import create_retrieval_chain
|
12 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
13 |
+
from dotenv import load_dotenv
|
14 |
+
from fastapi.responses import PlainTextResponse
|
15 |
+
from fastapi.middleware.cors import CORSMiddleware
|
16 |
+
import asyncio
|
17 |
+
import json
|
18 |
+
import re
|
19 |
+
# Load environment variables
|
20 |
+
load_dotenv()
|
21 |
+
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
|
22 |
+
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
|
23 |
+
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
|
24 |
+
key = os.getenv("GOOGLE_API_KEY")
|
25 |
+
# Define paths
|
26 |
+
DB_FAISS_PATH = "bgi/db_faiss"
|
27 |
+
|
28 |
+
# Initialize FastAPI app
|
29 |
+
app = FastAPI()
|
30 |
+
app.add_middleware(
|
31 |
+
CORSMiddleware,
|
32 |
+
allow_origins=["*"], # Add the React app's URL
|
33 |
+
allow_credentials=True,
|
34 |
+
allow_methods=["*"], # Allow all HTTP methods
|
35 |
+
allow_headers=["*"], # Allow all headers
|
36 |
+
)
|
37 |
+
# Initialize variables
|
38 |
+
embeddings = None
|
39 |
+
db = None
|
40 |
+
|
41 |
+
# Load or create FAISS vector store
|
42 |
+
@app.on_event("startup")
|
43 |
+
def load_vector_store():
|
44 |
+
global embeddings, db
|
45 |
+
if os.path.exists(DB_FAISS_PATH):
|
46 |
+
print("Loading existing FAISS vector store.")
|
47 |
+
embeddings = HuggingFaceEmbeddings(model_name='BAAI/bge-small-en', model_kwargs={'device': 'cpu'})
|
48 |
+
db = FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)
|
49 |
+
print("Vector store loaded.")
|
50 |
+
else:
|
51 |
+
print("Creating new FAISS vector store.")
|
52 |
+
loader = CSVLoader(file_path="Final_Research_Dataset_2.csv", encoding="utf-8", csv_args={'delimiter': ','})
|
53 |
+
data = loader.load()
|
54 |
+
embeddings = HuggingFaceEmbeddings(model_name='BAAI/bge-small-en', model_kwargs={'device': 'cpu'})
|
55 |
+
db = FAISS.from_documents(data, embeddings)
|
56 |
+
db.save_local(DB_FAISS_PATH)
|
57 |
+
|
58 |
+
|
59 |
+
# Define request and response models
|
60 |
+
from typing import List, Optional
|
61 |
+
|
62 |
+
class FilterCriteria(BaseModel):
|
63 |
+
impactFactor: float
|
64 |
+
firstDecisionTime: int
|
65 |
+
publisher: Optional[str]
|
66 |
+
llmModel: str
|
67 |
+
|
68 |
+
class QueryRequest(BaseModel):
|
69 |
+
abstract: str
|
70 |
+
criteria: FilterCriteria
|
71 |
+
|
72 |
+
class Journal(BaseModel):
|
73 |
+
id: int
|
74 |
+
Name: str
|
75 |
+
JIF: float
|
76 |
+
Category: str
|
77 |
+
Keywords: str
|
78 |
+
Publisher: str
|
79 |
+
Decision_Time: int
|
80 |
+
|
81 |
+
# Define the QueryResponse model with a list of journals
|
82 |
+
class QueryResponse(BaseModel):
|
83 |
+
result: List[Journal]
|
84 |
+
|
85 |
+
|
86 |
+
@app.get("/", response_class=PlainTextResponse)
|
87 |
+
def read_root():
|
88 |
+
return "Welcome to the Journal Recommender API!"
|
89 |
+
# Define models
|
90 |
+
@app.get("/models")
|
91 |
+
def get_models():
|
92 |
+
return {"available_models": ["openai", "groq","mixtral","gemini-pro","faiss"]}
|
93 |
+
|
94 |
+
def fix_incomplete_json(raw_response):
|
95 |
+
"""
|
96 |
+
Fixes incomplete JSON by adding missing braces or brackets.
|
97 |
+
Returns a valid JSON string or None if not fixable.
|
98 |
+
"""
|
99 |
+
# Ensure the response ends with a closing bracket if it's a list
|
100 |
+
if raw_response.endswith("},"):
|
101 |
+
raw_response = raw_response[:-1] # Remove the last comma
|
102 |
+
if raw_response.count("{") > raw_response.count("}"):
|
103 |
+
raw_response += "}"
|
104 |
+
if raw_response.count("[") > raw_response.count("]"):
|
105 |
+
raw_response += "]"
|
106 |
+
|
107 |
+
# Try to load the fixed response
|
108 |
+
try:
|
109 |
+
json_response = json.loads(raw_response)
|
110 |
+
return json_response
|
111 |
+
except json.JSONDecodeError as e:
|
112 |
+
print(f"Error fixing JSON: {e}")
|
113 |
+
return None
|
114 |
+
|
115 |
+
|
116 |
+
# Query endpoint
|
117 |
+
@app.post("/query", response_model=QueryResponse)
|
118 |
+
async def query(request: QueryRequest):
|
119 |
+
global db
|
120 |
+
if not db:
|
121 |
+
raise HTTPException(status_code=500, detail="Vector store not loaded.")
|
122 |
+
|
123 |
+
query_text = request.abstract
|
124 |
+
model_choice = request.criteria.llmModel
|
125 |
+
impact_factor = request.criteria.impactFactor
|
126 |
+
preferred_publisher = request.criteria.publisher
|
127 |
+
# Perform the query
|
128 |
+
docs = db.similarity_search(query_text, k=5)
|
129 |
+
context = "\n".join([doc.page_content for doc in docs])
|
130 |
+
|
131 |
+
messages = [
|
132 |
+
{
|
133 |
+
"role": "system",
|
134 |
+
"content": (
|
135 |
+
"Give a strict comma-separated list of exactly 15 keywords from the following text. "
|
136 |
+
"Give a strict comma-separated list of exactly 15 keywords from the following text. "
|
137 |
+
"Do not include any bullet points, introductory text, or ending text. "
|
138 |
+
"No introductory or ending text strictly" # Added to ensure can be removed if results deteriorate
|
139 |
+
"Do not say anything like 'Here are the keywords.' "
|
140 |
+
"Only return the keywords, strictly comma-separated, without any additional words."
|
141 |
+
),
|
142 |
+
},
|
143 |
+
{"role": "user", "content": query_text},
|
144 |
+
]
|
145 |
+
llm = ChatGroq(model="llama3-8b-8192", temperature=0)
|
146 |
+
ai_msg = llm.invoke(messages)
|
147 |
+
keywords = ai_msg.content.split("keywords extracted from the text:\n")[-1].strip()
|
148 |
+
print("Keywords:", keywords)
|
149 |
+
if model_choice == "openai":
|
150 |
+
retriever = db.as_retriever()
|
151 |
+
|
152 |
+
# Set up system prompt
|
153 |
+
system_prompt = (
|
154 |
+
f"You are a specialized Journal recommender that compares all journals in database to given research paper keywords and based on JIF and publisher gives result."
|
155 |
+
f"From the provided context, recommend all journals that are suitable for research paper with {keywords} keywords."
|
156 |
+
f"Ensure that you include **every** journal with a Journal Impact Factor (JIF) strictly greater than {impact_factor}, and the Journal must be only from any Publishers in list: {preferred_publisher}. And Pls show that jif as in Context database "
|
157 |
+
f"Make sure to include both exact matches and related journals, and prioritize including **all relevant high-JIF journals without repetition**. "
|
158 |
+
f"Present the results in a json format with the following information: Journal Name, Publisher, JIF, Decsion Time. "
|
159 |
+
f"Ensure no introductory or ending texts are included. Give max 30 results"
|
160 |
+
"Context: {context}"
|
161 |
+
)
|
162 |
+
|
163 |
+
prompt = ChatPromptTemplate.from_messages(
|
164 |
+
[("system", system_prompt), ("user", "{input}")]
|
165 |
+
)
|
166 |
+
|
167 |
+
|
168 |
+
async def create_chain():
|
169 |
+
client = ChatOpenAI(model="gpt-4o")
|
170 |
+
return create_stuff_documents_chain(client, prompt)
|
171 |
+
|
172 |
+
# Create the question-answer chain using async function
|
173 |
+
question_answer_chain = await create_chain()
|
174 |
+
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
|
175 |
+
|
176 |
+
# Ensure the vector dimensions match the FAISS index
|
177 |
+
|
178 |
+
# Invoke the RAG chain
|
179 |
+
answer = rag_chain.invoke(
|
180 |
+
{"input": f"Keywords: {keywords}, Minimum JIF: {impact_factor},Publisher list: {preferred_publisher}"}
|
181 |
+
)
|
182 |
+
|
183 |
+
# Inspect the result structure
|
184 |
+
result = []
|
185 |
+
raw_response = answer['answer']
|
186 |
+
cleaned_response = raw_response.strip('```json\n').strip('```').strip()
|
187 |
+
|
188 |
+
# Parse the cleaned JSON response
|
189 |
+
try:
|
190 |
+
json_response = json.loads(cleaned_response)
|
191 |
+
|
192 |
+
# Initialize an empty list to hold the journal objects
|
193 |
+
result = []
|
194 |
+
|
195 |
+
# Process the JSON data and create Journal objects
|
196 |
+
for i, journal in enumerate(json_response):
|
197 |
+
try:
|
198 |
+
journal_name = journal.get('Journal Name')
|
199 |
+
publisher = journal.get('Publisher')
|
200 |
+
jif = float(journal.get('JIF', 0)) # Ensure valid float
|
201 |
+
decision_time = journal.get('Decsion Time', 0) # Default to 0 if not available
|
202 |
+
|
203 |
+
# Only include if JIF is greater than the minimum threshold
|
204 |
+
if jif > impact_factor:
|
205 |
+
result.append(
|
206 |
+
Journal(
|
207 |
+
id=i + 1,
|
208 |
+
Name=journal_name,
|
209 |
+
Publisher=publisher,
|
210 |
+
JIF=jif,
|
211 |
+
Category="", # Set to empty if not available
|
212 |
+
Keywords=keywords, # Use provided keywords
|
213 |
+
Decision_Time=decision_time,
|
214 |
+
)
|
215 |
+
)
|
216 |
+
except Exception as e:
|
217 |
+
print(f"Error processing journal data: {e}")
|
218 |
+
|
219 |
+
except json.JSONDecodeError as e:
|
220 |
+
print(f"Error parsing JSON response: {e}")
|
221 |
+
result = []
|
222 |
+
|
223 |
+
# Return the result wrapped in a QueryResponse
|
224 |
+
return QueryResponse(result=result)
|
225 |
+
elif model_choice == "groq":
|
226 |
+
retriever = db.as_retriever()
|
227 |
+
|
228 |
+
# Set up system prompt
|
229 |
+
system_prompt = (
|
230 |
+
f"You are a specialized Journal recommender that compares all journals in database to given research paper keywords and based on JIF and publisher gives result."
|
231 |
+
f"From the provided context, recommend all journals that are suitable for research paper with {keywords} keywords."
|
232 |
+
f"Ensure that you include **every** journal with a Journal Impact Factor (JIF) strictly greater than {impact_factor}, and the Journal must be only from any Publishers in list: {preferred_publisher}. And Pls show that jif as in Context database "
|
233 |
+
f"Make sure to include both exact matches and related journals, and prioritize including **all relevant high-JIF journals without repetition**. "
|
234 |
+
f"Present the results in a json format with the following information: Journal Name, Publisher, JIF, Decsion Time. "
|
235 |
+
f"Ensure no introductory or ending texts are included. Dont give more than 10 results"
|
236 |
+
"Context: {context}"
|
237 |
+
)
|
238 |
+
|
239 |
+
|
240 |
+
|
241 |
+
prompt = ChatPromptTemplate.from_messages(
|
242 |
+
[("system", system_prompt), ("user", "{input}")]
|
243 |
+
)
|
244 |
+
|
245 |
+
# Create the question-answer chain
|
246 |
+
async def create_chain():
|
247 |
+
client = ChatGroq(model="llama-3.2-3b-preview", temperature=0)
|
248 |
+
return create_stuff_documents_chain(client, prompt)
|
249 |
+
|
250 |
+
# Create the question-answer chain using async function
|
251 |
+
question_answer_chain = await create_chain()
|
252 |
+
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
|
253 |
+
|
254 |
+
# Ensure the vector dimensions match the FAISS index
|
255 |
+
|
256 |
+
# Invoke the RAG chain
|
257 |
+
answer = rag_chain.invoke(
|
258 |
+
{"input": f"Keywords: {keywords}, Minimum JIF: {impact_factor},Publisher list: {preferred_publisher}"}
|
259 |
+
)
|
260 |
+
|
261 |
+
# Inspect the result structure
|
262 |
+
result = []
|
263 |
+
raw_response = answer['answer']
|
264 |
+
|
265 |
+
cleaned_response = raw_response.strip('```json\n').strip('```').strip()
|
266 |
+
|
267 |
+
# Parse the cleaned JSON response
|
268 |
+
try:
|
269 |
+
# Parse the cleaned response
|
270 |
+
print("Cleaned Response:", cleaned_response) # For debugging
|
271 |
+
json_response = json.loads(cleaned_response)
|
272 |
+
|
273 |
+
# Initialize an empty list to hold the journal objects
|
274 |
+
result = []
|
275 |
+
|
276 |
+
# Process the JSON data and create Journal objects
|
277 |
+
for i, journal in enumerate(json_response["journals"]): # Accessing the 'journals' key
|
278 |
+
print("Journal entry:", journal) # For debugging
|
279 |
+
|
280 |
+
try:
|
281 |
+
if isinstance(journal, dict): # Ensure journal is a dictionary
|
282 |
+
journal_name = journal.get('Journal Name')
|
283 |
+
publisher = journal.get('Publisher')
|
284 |
+
jif = float(journal.get('JIF', 0)) # Ensure valid float
|
285 |
+
decision_time = journal.get('Decision Time', 0) # Default to 0 if not available
|
286 |
+
|
287 |
+
# Only include if JIF is greater than the minimum threshold
|
288 |
+
if jif > impact_factor:
|
289 |
+
result.append(
|
290 |
+
Journal(
|
291 |
+
id=i + 1,
|
292 |
+
Name=journal_name,
|
293 |
+
Publisher=publisher,
|
294 |
+
JIF=jif,
|
295 |
+
Category="", # Set to empty if not available
|
296 |
+
Keywords=keywords, # Use provided keywords
|
297 |
+
Decision_Time=decision_time,
|
298 |
+
)
|
299 |
+
)
|
300 |
+
else:
|
301 |
+
print(f"Skipping invalid journal entry: {journal}")
|
302 |
+
except Exception as e:
|
303 |
+
print(f"Error processing journal data: {e}")
|
304 |
+
|
305 |
+
except json.JSONDecodeError as e:
|
306 |
+
print(f"Error parsing JSON response: {e}")
|
307 |
+
result = []
|
308 |
+
|
309 |
+
# Return the result wrapped in a QueryResponse
|
310 |
+
return QueryResponse(result=result)
|
311 |
+
|
312 |
+
|
313 |
+
elif model_choice == "mixtral":
|
314 |
+
retriever = db.as_retriever()
|
315 |
+
|
316 |
+
# Set up system prompt
|
317 |
+
system_prompt = (
|
318 |
+
f"You are a specialized Journal recommender that compares all journals in database to given research paper keywords and based on JIF and publisher gives result."
|
319 |
+
f"From the provided context, recommend all journals that are suitable for research paper with {keywords} keywords."
|
320 |
+
f"Ensure that you include **every** journal with a Journal Impact Factor (JIF) strictly greater than {impact_factor}, and the Journal must be only from any Publishers in list: {preferred_publisher}. And Pls show that jif as in Context database "
|
321 |
+
f"Make sure to include both exact matches and related journals, and prioritize including **all relevant high-JIF journals without repetition**. "
|
322 |
+
f"Present the results in a json format with the following information: Journal Name, Publisher, JIF, Decsion Time. "
|
323 |
+
f"Ensure no introductory or ending texts are included. Dont give more than 10 results"
|
324 |
+
"Context: {context}"
|
325 |
+
)
|
326 |
+
|
327 |
+
prompt = ChatPromptTemplate.from_messages(
|
328 |
+
[("system", system_prompt), ("user", "{input}")]
|
329 |
+
)
|
330 |
+
|
331 |
+
# Create the question-answer chain
|
332 |
+
|
333 |
+
|
334 |
+
async def create_chain():
|
335 |
+
client = ChatGroq(model="mixtral-8x7b-32768",temperature=0)
|
336 |
+
return create_stuff_documents_chain(client, prompt)
|
337 |
+
|
338 |
+
# Create the question-answer chain using async function
|
339 |
+
question_answer_chain = await create_chain()
|
340 |
+
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
|
341 |
+
|
342 |
+
# Ensure the vector dimensions match the FAISS index
|
343 |
+
|
344 |
+
# Invoke the RAG chain
|
345 |
+
answer = rag_chain.invoke(
|
346 |
+
{"input": f"Keywords: {keywords}, Minimum JIF: {impact_factor},Publisher list: {preferred_publisher}"}
|
347 |
+
)
|
348 |
+
|
349 |
+
# Inspect the result structure
|
350 |
+
result = []
|
351 |
+
raw_response = answer['answer']
|
352 |
+
|
353 |
+
cleaned_response = raw_response.strip('```json\n').strip('```').strip()
|
354 |
+
|
355 |
+
# Parse the cleaned JSON response
|
356 |
+
try:
|
357 |
+
# Parse the cleaned response
|
358 |
+
print("Cleaned Response:", cleaned_response) # For debugging
|
359 |
+
json_response = json.loads(cleaned_response)
|
360 |
+
|
361 |
+
# Initialize an empty list to hold the journal objects
|
362 |
+
result = []
|
363 |
+
|
364 |
+
# Process the JSON data and create Journal objects
|
365 |
+
for i, journal in enumerate(json_response): # Iterate directly over the list
|
366 |
+
print("Journal entry:", journal) # For debugging
|
367 |
+
|
368 |
+
try:
|
369 |
+
if isinstance(journal, dict): # Ensure journal is a dictionary
|
370 |
+
journal_name = journal.get('Journal Name')
|
371 |
+
publisher = journal.get('Publisher')
|
372 |
+
jif = float(journal.get('JIF', 0)) # Ensure valid float
|
373 |
+
decision_time = journal.get('Decsion Time', 0) # Default to 0 if not available
|
374 |
+
|
375 |
+
# Only include if JIF is greater than the minimum threshold
|
376 |
+
if jif > impact_factor:
|
377 |
+
result.append(
|
378 |
+
Journal(
|
379 |
+
id=i + 1,
|
380 |
+
Name=journal_name,
|
381 |
+
Publisher=publisher,
|
382 |
+
JIF=jif,
|
383 |
+
Category="", # Set to empty if not available
|
384 |
+
Keywords=keywords, # Use provided keywords
|
385 |
+
Decision_Time=decision_time,
|
386 |
+
)
|
387 |
+
)
|
388 |
+
else:
|
389 |
+
print(f"Skipping invalid journal entry: {journal}")
|
390 |
+
except Exception as e:
|
391 |
+
print(f"Error processing journal data: {e}")
|
392 |
+
|
393 |
+
except json.JSONDecodeError as e:
|
394 |
+
print(f"Error parsing JSON response: {e}")
|
395 |
+
result = []
|
396 |
+
|
397 |
+
# Return the result wrapped in a QueryResponse
|
398 |
+
return QueryResponse(result=result)
|
399 |
+
|
400 |
+
elif model_choice == "gemini-pro":
|
401 |
+
print("Using Gemini-Pro model")
|
402 |
+
retriever = db.as_retriever()
|
403 |
+
|
404 |
+
# Set up system prompt
|
405 |
+
system_prompt = (
|
406 |
+
f"You are a specialized Journal recommender that compares all journals in database to given research paper keywords and based on JIF and publisher gives result."
|
407 |
+
f"From the provided context, recommend all journals that are suitable for research paper with {keywords} keywords."
|
408 |
+
f"Ensure that you include **every** journal with a Journal Impact Factor (JIF) strictly greater than {impact_factor}, and the Journal must be only from any Publishers in list: {preferred_publisher}. And Pls show that jif as in Context database "
|
409 |
+
f"Make sure to include both exact matches and related journals, and prioritize including **all relevant high-JIF journals without repetition**. "
|
410 |
+
f"Present the results in a json format with the following information: Journal Name, Publisher, JIF, Decsion Time. "
|
411 |
+
f"Ensure no introductory or ending texts are included."
|
412 |
+
"Context: {context}"
|
413 |
+
)
|
414 |
+
|
415 |
+
prompt = ChatPromptTemplate.from_messages(
|
416 |
+
[("system", system_prompt), ("user", "{input}")]
|
417 |
+
)
|
418 |
+
|
419 |
+
async def create_chain():
|
420 |
+
client = ChatGoogleGenerativeAI(
|
421 |
+
model="gemini-pro",
|
422 |
+
google_api_key=key,
|
423 |
+
convert_system_message_to_human=True,
|
424 |
+
)
|
425 |
+
return create_stuff_documents_chain(client, prompt)
|
426 |
+
|
427 |
+
# Create the question-answer chain using async function
|
428 |
+
question_answer_chain = await create_chain()
|
429 |
+
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
|
430 |
+
|
431 |
+
|
432 |
+
# Ensure the vector dimensions match the FAISS index
|
433 |
+
|
434 |
+
# Invoke the RAG chain
|
435 |
+
answer = rag_chain.invoke(
|
436 |
+
{"input": f"Keywords: {keywords}, Minimum JIF: {impact_factor},Publisher list: {preferred_publisher}"}
|
437 |
+
)
|
438 |
+
|
439 |
+
# Inspect the result structure
|
440 |
+
result = []
|
441 |
+
raw_response = answer['answer']
|
442 |
+
cleaned_response = raw_response.strip('```json\n').strip('```').strip()
|
443 |
+
|
444 |
+
# Parse the cleaned JSON response
|
445 |
+
try:
|
446 |
+
json_response = json.loads(cleaned_response)
|
447 |
+
|
448 |
+
# Initialize an empty list to hold the journal objects
|
449 |
+
result = []
|
450 |
+
|
451 |
+
# Process the JSON data and create Journal objects
|
452 |
+
for i, journal in enumerate(json_response):
|
453 |
+
try:
|
454 |
+
journal_name = journal.get('Journal Name')
|
455 |
+
publisher = journal.get('Publisher')
|
456 |
+
jif = float(journal.get('JIF', 0)) # Ensure valid float
|
457 |
+
decision_time = journal.get('Decsion Time', 0) # Default to 0 if not available
|
458 |
+
|
459 |
+
# Only include if JIF is greater than the minimum threshold
|
460 |
+
if jif > impact_factor:
|
461 |
+
result.append(
|
462 |
+
Journal(
|
463 |
+
id=i + 1,
|
464 |
+
Name=journal_name,
|
465 |
+
Publisher=publisher,
|
466 |
+
JIF=jif,
|
467 |
+
Category="", # Set to empty if not available
|
468 |
+
Keywords=keywords, # Use provided keywords
|
469 |
+
Decision_Time=decision_time,
|
470 |
+
)
|
471 |
+
)
|
472 |
+
except Exception as e:
|
473 |
+
print(f"Error processing journal data: {e}")
|
474 |
+
|
475 |
+
except json.JSONDecodeError as e:
|
476 |
+
print(f"Error parsing JSON response: {e}")
|
477 |
+
result = []
|
478 |
+
|
479 |
+
# Return the result wrapped in a QueryResponse
|
480 |
+
return QueryResponse(result=result)
|
481 |
+
elif model_choice == "faiss":
|
482 |
+
embeddings = HuggingFaceEmbeddings(
|
483 |
+
model_name="BAAI/bge-small-en", model_kwargs={"device": "cpu"}
|
484 |
+
)
|
485 |
+
jif = impact_factor # Minimum JIF value for filtering
|
486 |
+
publisher = preferred_publisher # Preferred publisher list or "no preference"
|
487 |
+
|
488 |
+
# Load the FAISS index from local storage
|
489 |
+
db1 = FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)
|
490 |
+
|
491 |
+
# Embed the query
|
492 |
+
query_embedding = embeddings.embed_query(keywords)
|
493 |
+
|
494 |
+
# Perform similarity search with FAISS (retrieve top 20 results)
|
495 |
+
results = db1.similarity_search_by_vector(query_embedding, k=20)
|
496 |
+
|
497 |
+
# Prepare the context for processing results
|
498 |
+
context = "\n\n".join(doc.page_content for doc in results)
|
499 |
+
|
500 |
+
# Apply filters for JIF and publisher
|
501 |
+
min_jif = jif
|
502 |
+
valid_publishers = publisher if publisher != ["no preference"] else None
|
503 |
+
|
504 |
+
# Split the output based on each entry starting with 'Name: '
|
505 |
+
entries = re.split(r"\n(?=Name:)", context.strip())
|
506 |
+
|
507 |
+
# Initialize an empty list to hold the Journal models
|
508 |
+
journal_list = []
|
509 |
+
|
510 |
+
# Process each entry
|
511 |
+
for entry in entries:
|
512 |
+
# Use regex to capture different fields
|
513 |
+
name = re.search(r"Name: (.+)", entry)
|
514 |
+
jif_match = re.search(r"JIF: (.+)", entry)
|
515 |
+
category = re.search(r"Category: (.+)", entry)
|
516 |
+
keywords_match = re.search(r"Keywords: (.+)", entry)
|
517 |
+
publisher_match = re.search(r"Publisher: (.+)", entry)
|
518 |
+
first_decision_match = re.search(r"Decsion Time: (.+)", entry)
|
519 |
+
|
520 |
+
if jif_match :
|
521 |
+
# Extract values from regex matches
|
522 |
+
name_value = name.group(1).strip()
|
523 |
+
jif_value = float(jif_match.group(1).strip())
|
524 |
+
category_value = category.group(1).strip()
|
525 |
+
keywords_value = keywords_match.group(1).strip()
|
526 |
+
publisher_value = publisher_match.group(1).strip()
|
527 |
+
decision_time = first_decision_match.group(1).strip()
|
528 |
+
# Filter based on JIF and publisher preferences
|
529 |
+
if jif_value >= min_jif :
|
530 |
+
# Create the Journal model instance
|
531 |
+
journal = Journal(
|
532 |
+
id=len(journal_list) + 1, # Incrementing ID for each journal
|
533 |
+
Name=name_value,
|
534 |
+
JIF=jif_value,
|
535 |
+
Category=category_value,
|
536 |
+
Keywords=keywords_value,
|
537 |
+
Publisher=publisher_value,
|
538 |
+
Decision_Time = decision_time
|
539 |
+
)
|
540 |
+
|
541 |
+
# Add the journal to the list
|
542 |
+
journal_list.append(journal)
|
543 |
+
|
544 |
+
# Return the list of journals as a response or process it further
|
545 |
+
return {"result": [journal.dict() for journal in journal_list]}
|
546 |
+
else:
|
547 |
+
raise HTTPException(status_code=400, detail="Invalid model choice.")
|
548 |
+
|
549 |
+
# Generate response using LLM
|
550 |
+
response = llm.predict(f"Context: {context}\n\nQuestion: {query_text}")
|
551 |
+
return QueryResponse(result=response)
|
552 |
+
|
553 |
+
# Run the app with Uvicorn
|
554 |
+
# Command: uvicorn app:app --reload
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi
|
2 |
+
uvicorn
|
3 |
+
pydantic
|
4 |
+
python-dotenv
|
5 |
+
langchain-community
|
6 |
+
langchain-openai
|
7 |
+
langchain-google-genai
|
8 |
+
langchain-core
|
9 |
+
langchain-groq
|
10 |
+
faiss-cpu
|
11 |
+
numpy
|
12 |
+
sentence-transformers
|