from flask import Flask, render_template, request, jsonify, redirect, url_for, flash, session
from flask_login import LoginManager, UserMixin, login_user, login_required, logout_user, current_user
from flask_wtf.csrf import CSRFProtect
from flask_wtf import FlaskForm
from wtforms import StringField, PasswordField, SubmitField
from wtforms.validators import DataRequired
from werkzeug.security import generate_password_hash, check_password_hash
import arxiv
import requests
import PyPDF2
from io import BytesIO
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_groq import ChatGroq
from langchain.memory import ConversationBufferMemory
from langchain_community.embeddings import HuggingFaceEmbeddings
import numpy as np
from concurrent.futures import ThreadPoolExecutor, TimeoutError
from functools import lru_cache
import time
import os
from dotenv import load_dotenv
import json
from datetime import datetime
import firebase_admin
from firebase_admin import credentials, auth

# Load environment variables
load_dotenv()

app = Flask(__name__)
app.secret_key = os.getenv('FLASK_SECRET_KEY')

# Initialize CSRF protection
csrf = CSRFProtect()
csrf.init_app(app)

# Initialize Flask-Login
login_manager = LoginManager()
login_manager.init_app(app)
login_manager.login_view = 'login'

# Initialize Groq
groq_api_key = os.getenv('GROQ_API_KEY')
llm = ChatGroq(
    temperature=0.3,
    groq_api_key=groq_api_key,
    model_name="qwen-2.5-32b"
)

# Initialize embeddings with proper cache directory
embeddings_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    cache_folder="/code/.cache/huggingface"
)

# Constants
MAX_CHUNKS = 50
MAX_RESPONSE_LENGTH = 6000
CACHE_DURATION = 3600  # 1 hour in seconds

# Form Classes
class LoginForm(FlaskForm):
    username = StringField('Username', validators=[DataRequired()])
    password = PasswordField('Password', validators=[DataRequired()])
    submit = SubmitField('Login')

class RegisterForm(FlaskForm):
    username = StringField('Username', validators=[DataRequired()])
    password = PasswordField('Password', validators=[DataRequired()])
    submit = SubmitField('Register')

# User class for Flask-Login
class User(UserMixin):
    def __init__(self, user_id, email):
        self.id = user_id
        self.email = email

def generate_analysis(chunks):
    analysis_prompts = {
    'executive_summary': """
## 🧠 Role  
You are an AI assistant that explains research papers in a way that makes reading the original paper unnecessary. Your explanations should be **clear, engaging, and easy to understand**, even for someone who is not deeply familiar with the subject.  

## 🎯 Goal  
Given any research paper, provide a **simple breakdown** covering:  

### 1️⃣ What problem does this paper solve?  
- Explain the **issue the paper addresses**.  
- Why is this problem **important**?  
- What **challenges** existed before this research?  

### 2️⃣ How does it solve the problem?  
- Summarize the **key idea, method, or approach** used in the paper.  
- If applicable, break it down into **steps or components**.  
- Compare it to **previous solutions** and highlight what makes it better.  

### 3️⃣ Why does this matter? (Real-world impact & applications)  
- How can this research be **used in practice**?  
- What **industries or fields** benefit from it?  
- Does it improve **efficiency, accuracy, cost, or scalability**?  

### 4️⃣ Explain with a simple analogy (if applicable)  
- Use a **real-life example** to explain complex ideas.  
- Keep it **relatable** (e.g., compare it to something like cooking, traveling, or streaming music).  

### 5️⃣ Key findings & results  
- Summarize the **main results** in simple terms.  
- If possible, include **numbers, graphs, or comparisons** for clarity.  

### 6️⃣ Limitations & Future Work  
- Mention any **weaknesses** or areas for improvement.  
- What are the **next steps** for research in this area?  

### 7️⃣ Final Takeaway (One-liner summary)  
- Provide a **quick summary** of the research in a **single sentence**.  

---

## 🎨 Tone & Style  
✔ **Simple & clear language** – Avoid jargon unless necessary.  
✔ **Step-by-step explanations** – Organize information logically.  
✔ **Engaging & structured** – Use bullet points, lists, or tables when needed.  
✔ **Make it feel like a story** – Guide the reader smoothly from problem to solution.  

---

## ⚡ How to Use This Prompt  
1️⃣ Enter the **title, abstract, or full text** of any research paper.  
2️⃣ AI will generate a **detailed explanation** that makes the paper easy to understand.  
3️⃣ Use it for **blog posts, study guides, or an AI-powered research assistant**.  


Remember: The output should be properly formatted in markdown while providing comprehensive coverage of the paper's content."""
    }
    
    analysis_results = {}
    
    for aspect, prompt in analysis_prompts.items():
        try:
            # Clean and join the chunks
            context = "\n\n".join(
                chunk.encode('ascii', 'ignore').decode('ascii')
                for chunk in chunks[:3]
            )
            response = llm.invoke(
                f"""Based on the following context from a research paper, {prompt}
                
                Context:
                {context}

                Additional Instructions:
                - Provide specific examples and evidence from the text
                - Use clear, academic language
                - Maintain objectivity
                - Include relevant quotes or data points
                - Structure your response logically
                - Use markdown formatting for clarity

                Please provide a clear and specific response.""", 
                temperature=0.3
            )
            analysis_results[aspect] = response.content[:MAX_RESPONSE_LENGTH]
        except Exception as e:
            analysis_results[aspect] = f"Analysis failed: {str(e)}"
    
    return analysis_results

def process_pdf(pdf_url):
    try:
        print(f"Starting PDF processing for: {pdf_url}")
        
        response = requests.get(pdf_url, timeout=30)
        response.raise_for_status()
        pdf_file = BytesIO(response.content)

        pdf_reader = PyPDF2.PdfReader(pdf_file)
        # Clean and normalize the text
        text = " ".join(
            page.extract_text().encode('ascii', 'ignore').decode('ascii')
            for page in pdf_reader.pages
        )
        
        if not text.strip():
            return {'error': 'No text could be extracted from the PDF'}

        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=2000,
            chunk_overlap=200,
            length_function=len,
            separators=["\n\n", "\n", " ", ""]
        )
        
        chunks = text_splitter.split_text(text)[:MAX_CHUNKS]
        
        analysis = generate_analysis(chunks)
        return {
            'success': True,
            'analysis': analysis
        }
            
    except Exception as e:
        return {'error': f"PDF processing failed: {str(e)}"}


@login_manager.user_loader
def load_user(user_id):
    if 'user_data' in session:
        user_data = session['user_data']
        return User(user_data['uid'], user_data['email'])
    return None

# User management functions
def load_users():
    try:
        with open('users.json', 'r') as f:
            return json.load(f)
    except FileNotFoundError:
        return {}

def save_users(users):
    with open('users.json', 'w') as f:
        json.dump(users, f)

# Routes
@app.route('/')
@login_required
def index():
    return render_template('index.html')

@app.route('/login', methods=['GET'])
def login():
    if current_user.is_authenticated:
        return redirect(url_for('index'))
    return render_template('login.html')

@app.route('/register', methods=['GET'])
def register():
    if current_user.is_authenticated:
        print("User is already authenticated")
        return redirect(url_for('index'))
    return render_template('register.html')

@app.route('/verify-token', methods=['POST'])
def verify_token():
    try:
        data = request.json
        if not data or not data.get('uid') or not data.get('email'):
            return jsonify({'error': 'Missing required data'}), 400

        # Store user data in session
        session['user_data'] = {
            'uid': data['uid'],
            'email': data['email']
        }
        
        # Create and login user
        user = User(data['uid'], data['email'])
        login_user(user)
        
        return jsonify({'success': True, 'redirect': url_for('index')})
    except Exception as e:
        print(f"Verification error: {str(e)}")  # Add logging
        return jsonify({'error': str(e)}), 500

@app.route('/logout')
@login_required
def logout():
    logout_user()
    session.clear()
    return redirect(url_for('login'))

@app.route('/search', methods=['POST'])
@login_required
def search():
    try:
        data = request.get_json()
        paper_name = data.get('paper_name')
        sort_by = data.get('sort_by', 'relevance')
        max_results = data.get('max_results', 10)

        if not paper_name:
            return jsonify({'error': 'No search query provided'}), 400

        # Map sort_by to arxiv.SortCriterion
        sort_mapping = {
            'relevance': arxiv.SortCriterion.Relevance,
            'lastUpdated': arxiv.SortCriterion.LastUpdatedDate,
            'submitted': arxiv.SortCriterion.SubmittedDate
        }
        sort_criterion = sort_mapping.get(sort_by, arxiv.SortCriterion.Relevance)

        # Perform the search
        search = arxiv.Search(
            query=paper_name,
            max_results=max_results,
            sort_by=sort_criterion
        )

        results = []
        for paper in search.results():
            results.append({
                'title': paper.title,
                'authors': ', '.join(author.name for author in paper.authors),
                'abstract': paper.summary,
                'pdf_link': paper.pdf_url,
                'arxiv_link': paper.entry_id,
                'published': paper.published.strftime('%Y-%m-%d'),
                'category': paper.primary_category,
                'comment': paper.comment if hasattr(paper, 'comment') else None,
                'doi': paper.doi if hasattr(paper, 'doi') else None
            })

        return jsonify(results)

    except Exception as e:
        print(f"Search error: {str(e)}")
        return jsonify({'error': f'Failed to search papers: {str(e)}'}), 500

@app.route('/perform-rag', methods=['POST'])
@login_required
def perform_rag():
    try:
        pdf_url = request.json.get('pdf_url')
        if not pdf_url:
            return jsonify({'error': 'PDF URL is required'}), 400

        result = process_pdf(pdf_url)
        
        if 'error' in result:
            return jsonify({'error': result['error']}), 500
            
        return jsonify(result)

    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/chat-with-paper', methods=['POST'])
@login_required
def chat_with_paper():
    try:
        pdf_url = request.json.get('pdf_url')
        question = request.json.get('question')
        
        if not pdf_url or not question:
            return jsonify({'error': 'PDF URL and question are required'}), 400

        # Get PDF text and create chunks
        response = requests.get(pdf_url, timeout=30)
        response.raise_for_status()
        pdf_file = BytesIO(response.content)

        pdf_reader = PyPDF2.PdfReader(pdf_file)
        text = " ".join(page.extract_text() for page in pdf_reader.pages)
        
        if not text.strip():
            return jsonify({'error': 'No text could be extracted from the PDF'})

        # Create text chunks
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=2000,
            chunk_overlap=200,
            length_function=len
        )
        chunks = text_splitter.split_text(text)[:MAX_CHUNKS]

        # Generate embeddings for chunks
        chunk_embeddings = embeddings_model.embed_documents(chunks)

        # Generate embedding for the question
        question_embedding = embeddings_model.embed_query(question)

        # Find most relevant chunks using cosine similarity
        similarities = []
        for chunk_embedding in chunk_embeddings:
            similarity = np.dot(question_embedding, chunk_embedding) / (
                np.linalg.norm(question_embedding) * np.linalg.norm(chunk_embedding)
            )
            similarities.append(similarity)

        # Get top 3 most relevant chunks
        top_chunk_indices = np.argsort(similarities)[-3:][::-1]
        relevant_chunks = [chunks[i] for i in top_chunk_indices]

        # Construct prompt with relevant context
        context = "\n\n".join(relevant_chunks)
        prompt = f"""Based on the following relevant excerpts from the research paper, please answer this question: {question}

        Context from paper:
        {context}

        Please provide a clear, specific, and accurate response based solely on the information provided in these excerpts. If the answer cannot be fully determined from the given context, please indicate this in your response."""

        # Generate response using Groq
        response = llm.invoke(prompt)

        # Format and return response
        formatted_response = response.content.strip()
        
        # Add source citations
        source_info = "\n\nThis response is based on specific sections from the paper."
        
        return jsonify({
            'response': formatted_response + source_info,
            'relevance_scores': [float(similarities[i]) for i in top_chunk_indices]
        })

    except Exception as e:
        print(f"Chat error: {str(e)}")
        return jsonify({'error': f'Failed to process request: {str(e)}'}), 500

@app.route('/api/data', methods=['GET'])
def get_data():
    try:
        # Example: Get documents from a collection
        docs = load_users()
        data = [{doc_id: doc_data} for doc_id, doc_data in docs.items()]
        return jsonify(data), 200
    except Exception as e:
        return jsonify({"error": str(e)}), 500

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860)