Spaces:

VinitT
/

ResearchRadar.AI

Running

App Files Files Community

ResearchRadar.AI / app.py

VinitT

Update app.py

ee5b4a7 verified 12 days ago

raw

history blame contribute delete

14.4 kB

	from flask import Flask, render_template, request, jsonify, redirect, url_for, flash, session
	from flask_login import LoginManager, UserMixin, login_user, login_required, logout_user, current_user
	from flask_wtf.csrf import CSRFProtect
	from flask_wtf import FlaskForm
	from wtforms import StringField, PasswordField, SubmitField
	from wtforms.validators import DataRequired
	from werkzeug.security import generate_password_hash, check_password_hash
	import arxiv
	import requests
	import PyPDF2
	from io import BytesIO
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_groq import ChatGroq
	from langchain.memory import ConversationBufferMemory
	from langchain_community.embeddings import HuggingFaceEmbeddings
	import numpy as np
	from concurrent.futures import ThreadPoolExecutor, TimeoutError
	from functools import lru_cache
	import time
	import os
	from dotenv import load_dotenv
	import json
	from datetime import datetime
	import firebase_admin
	from firebase_admin import credentials, auth

	# Load environment variables
	load_dotenv()

	app = Flask(__name__)
	app.secret_key = os.getenv('FLASK_SECRET_KEY')

	# Initialize CSRF protection
	csrf = CSRFProtect()
	csrf.init_app(app)

	# Initialize Flask-Login
	login_manager = LoginManager()
	login_manager.init_app(app)
	login_manager.login_view = 'login'

	# Initialize Groq
	groq_api_key = os.getenv('GROQ_API_KEY')
	llm = ChatGroq(
	temperature=0.3,
	groq_api_key=groq_api_key,
	model_name="qwen-2.5-32b"
	)

	# Initialize embeddings with proper cache directory
	embeddings_model = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2",
	cache_folder="/code/.cache/huggingface"
	)

	# Constants
	MAX_CHUNKS = 50
	MAX_RESPONSE_LENGTH = 6000
	CACHE_DURATION = 3600 # 1 hour in seconds

	# Form Classes
	class LoginForm(FlaskForm):
	username = StringField('Username', validators=[DataRequired()])
	password = PasswordField('Password', validators=[DataRequired()])
	submit = SubmitField('Login')

	class RegisterForm(FlaskForm):
	username = StringField('Username', validators=[DataRequired()])
	password = PasswordField('Password', validators=[DataRequired()])
	submit = SubmitField('Register')

	# User class for Flask-Login
	class User(UserMixin):
	def __init__(self, user_id, email):
	self.id = user_id
	self.email = email

	def generate_analysis(chunks):
	analysis_prompts = {
	'executive_summary': """
	## 🧠 Role
	You are an AI assistant that explains research papers in a way that makes reading the original paper unnecessary. Your explanations should be clear, engaging, and easy to understand, even for someone who is not deeply familiar with the subject.

	## 🎯 Goal
	Given any research paper, provide a simple breakdown covering:

	### 1️⃣ What problem does this paper solve?
	- Explain the issue the paper addresses.
	- Why is this problem important?
	- What challenges existed before this research?

	### 2️⃣ How does it solve the problem?
	- Summarize the key idea, method, or approach used in the paper.
	- If applicable, break it down into steps or components.
	- Compare it to previous solutions and highlight what makes it better.

	### 3️⃣ Why does this matter? (Real-world impact & applications)
	- How can this research be used in practice?
	- What industries or fields benefit from it?
	- Does it improve efficiency, accuracy, cost, or scalability?

	### 4️⃣ Explain with a simple analogy (if applicable)
	- Use a real-life example to explain complex ideas.
	- Keep it relatable (e.g., compare it to something like cooking, traveling, or streaming music).

	### 5️⃣ Key findings & results
	- Summarize the main results in simple terms.
	- If possible, include numbers, graphs, or comparisons for clarity.

	### 6️⃣ Limitations & Future Work
	- Mention any weaknesses or areas for improvement.
	- What are the next steps for research in this area?

	### 7️⃣ Final Takeaway (One-liner summary)
	- Provide a quick summary of the research in a single sentence.

	---

	## 🎨 Tone & Style
	✔ Simple & clear language – Avoid jargon unless necessary.
	✔ Step-by-step explanations – Organize information logically.
	✔ Engaging & structured – Use bullet points, lists, or tables when needed.
	✔ Make it feel like a story – Guide the reader smoothly from problem to solution.

	---

	## ⚡ How to Use This Prompt
	1️⃣ Enter the title, abstract, or full text of any research paper.
	2️⃣ AI will generate a detailed explanation that makes the paper easy to understand.
	3️⃣ Use it for blog posts, study guides, or an AI-powered research assistant.


	Remember: The output should be properly formatted in markdown while providing comprehensive coverage of the paper's content."""
	}

	analysis_results = {}

	for aspect, prompt in analysis_prompts.items():
	try:
	# Clean and join the chunks
	context = "\n\n".join(
	chunk.encode('ascii', 'ignore').decode('ascii')
	for chunk in chunks[:3]
	)
	response = llm.invoke(
	f"""Based on the following context from a research paper, {prompt}

	Context:
	{context}

	Additional Instructions:
	- Provide specific examples and evidence from the text
	- Use clear, academic language
	- Maintain objectivity
	- Include relevant quotes or data points
	- Structure your response logically
	- Use markdown formatting for clarity

	Please provide a clear and specific response.""",
	temperature=0.3
	)
	analysis_results[aspect] = response.content[:MAX_RESPONSE_LENGTH]
	except Exception as e:
	analysis_results[aspect] = f"Analysis failed: {str(e)}"

	return analysis_results

	def process_pdf(pdf_url):
	try:
	print(f"Starting PDF processing for: {pdf_url}")

	response = requests.get(pdf_url, timeout=30)
	response.raise_for_status()
	pdf_file = BytesIO(response.content)

	pdf_reader = PyPDF2.PdfReader(pdf_file)
	# Clean and normalize the text
	text = " ".join(
	page.extract_text().encode('ascii', 'ignore').decode('ascii')
	for page in pdf_reader.pages
	)

	if not text.strip():
	return {'error': 'No text could be extracted from the PDF'}

	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=2000,
	chunk_overlap=200,
	length_function=len,
	separators=["\n\n", "\n", " ", ""]
	)

	chunks = text_splitter.split_text(text)[:MAX_CHUNKS]

	analysis = generate_analysis(chunks)
	return {
	'success': True,
	'analysis': analysis
	}

	except Exception as e:
	return {'error': f"PDF processing failed: {str(e)}"}


	@login_manager.user_loader
	def load_user(user_id):
	if 'user_data' in session:
	user_data = session['user_data']
	return User(user_data['uid'], user_data['email'])
	return None

	# User management functions
	def load_users():
	try:
	with open('users.json', 'r') as f:
	return json.load(f)
	except FileNotFoundError:
	return {}

	def save_users(users):
	with open('users.json', 'w') as f:
	json.dump(users, f)

	# Routes
	@app.route('/')
	@login_required
	def index():
	return render_template('index.html')

	@app.route('/login', methods=['GET'])
	def login():
	if current_user.is_authenticated:
	return redirect(url_for('index'))
	return render_template('login.html')

	@app.route('/register', methods=['GET'])
	def register():
	if current_user.is_authenticated:
	print("User is already authenticated")
	return redirect(url_for('index'))
	return render_template('register.html')

	@app.route('/verify-token', methods=['POST'])
	def verify_token():
	try:
	data = request.json
	if not data or not data.get('uid') or not data.get('email'):
	return jsonify({'error': 'Missing required data'}), 400

	# Store user data in session
	session['user_data'] = {
	'uid': data['uid'],
	'email': data['email']
	}

	# Create and login user
	user = User(data['uid'], data['email'])
	login_user(user)

	return jsonify({'success': True, 'redirect': url_for('index')})
	except Exception as e:
	print(f"Verification error: {str(e)}") # Add logging
	return jsonify({'error': str(e)}), 500

	@app.route('/logout')
	@login_required
	def logout():
	logout_user()
	session.clear()
	return redirect(url_for('login'))

	@app.route('/search', methods=['POST'])
	@login_required
	def search():
	try:
	data = request.get_json()
	paper_name = data.get('paper_name')
	sort_by = data.get('sort_by', 'relevance')
	max_results = data.get('max_results', 10)

	if not paper_name:
	return jsonify({'error': 'No search query provided'}), 400

	# Map sort_by to arxiv.SortCriterion
	sort_mapping = {
	'relevance': arxiv.SortCriterion.Relevance,
	'lastUpdated': arxiv.SortCriterion.LastUpdatedDate,
	'submitted': arxiv.SortCriterion.SubmittedDate
	}
	sort_criterion = sort_mapping.get(sort_by, arxiv.SortCriterion.Relevance)

	# Perform the search
	search = arxiv.Search(
	query=paper_name,
	max_results=max_results,
	sort_by=sort_criterion
	)

	results = []
	for paper in search.results():
	results.append({
	'title': paper.title,
	'authors': ', '.join(author.name for author in paper.authors),
	'abstract': paper.summary,
	'pdf_link': paper.pdf_url,
	'arxiv_link': paper.entry_id,
	'published': paper.published.strftime('%Y-%m-%d'),
	'category': paper.primary_category,
	'comment': paper.comment if hasattr(paper, 'comment') else None,
	'doi': paper.doi if hasattr(paper, 'doi') else None
	})

	return jsonify(results)

	except Exception as e:
	print(f"Search error: {str(e)}")
	return jsonify({'error': f'Failed to search papers: {str(e)}'}), 500

	@app.route('/perform-rag', methods=['POST'])
	@login_required
	def perform_rag():
	try:
	pdf_url = request.json.get('pdf_url')
	if not pdf_url:
	return jsonify({'error': 'PDF URL is required'}), 400

	result = process_pdf(pdf_url)

	if 'error' in result:
	return jsonify({'error': result['error']}), 500

	return jsonify(result)

	except Exception as e:
	return jsonify({'error': str(e)}), 500

	@app.route('/chat-with-paper', methods=['POST'])
	@login_required
	def chat_with_paper():
	try:
	pdf_url = request.json.get('pdf_url')
	question = request.json.get('question')

	if not pdf_url or not question:
	return jsonify({'error': 'PDF URL and question are required'}), 400

	# Get PDF text and create chunks
	response = requests.get(pdf_url, timeout=30)
	response.raise_for_status()
	pdf_file = BytesIO(response.content)

	pdf_reader = PyPDF2.PdfReader(pdf_file)
	text = " ".join(page.extract_text() for page in pdf_reader.pages)

	if not text.strip():
	return jsonify({'error': 'No text could be extracted from the PDF'})

	# Create text chunks
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=2000,
	chunk_overlap=200,
	length_function=len
	)
	chunks = text_splitter.split_text(text)[:MAX_CHUNKS]

	# Generate embeddings for chunks
	chunk_embeddings = embeddings_model.embed_documents(chunks)

	# Generate embedding for the question
	question_embedding = embeddings_model.embed_query(question)

	# Find most relevant chunks using cosine similarity
	similarities = []
	for chunk_embedding in chunk_embeddings:
	similarity = np.dot(question_embedding, chunk_embedding) / (
	np.linalg.norm(question_embedding) * np.linalg.norm(chunk_embedding)
	)
	similarities.append(similarity)

	# Get top 3 most relevant chunks
	top_chunk_indices = np.argsort(similarities)[-3:][::-1]
	relevant_chunks = [chunks[i] for i in top_chunk_indices]

	# Construct prompt with relevant context
	context = "\n\n".join(relevant_chunks)
	prompt = f"""Based on the following relevant excerpts from the research paper, please answer this question: {question}

	Context from paper:
	{context}

	Please provide a clear, specific, and accurate response based solely on the information provided in these excerpts. If the answer cannot be fully determined from the given context, please indicate this in your response."""

	# Generate response using Groq
	response = llm.invoke(prompt)

	# Format and return response
	formatted_response = response.content.strip()

	# Add source citations
	source_info = "\n\nThis response is based on specific sections from the paper."

	return jsonify({
	'response': formatted_response + source_info,
	'relevance_scores': [float(similarities[i]) for i in top_chunk_indices]
	})

	except Exception as e:
	print(f"Chat error: {str(e)}")
	return jsonify({'error': f'Failed to process request: {str(e)}'}), 500

	@app.route('/api/data', methods=['GET'])
	def get_data():
	try:
	# Example: Get documents from a collection
	docs = load_users()
	data = [{doc_id: doc_data} for doc_id, doc_data in docs.items()]
	return jsonify(data), 200
	except Exception as e:
	return jsonify({"error": str(e)}), 500

	if __name__ == '__main__':
	app.run(host='0.0.0.0', port=7860)