import os
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
from urllib.parse import urljoin, urlparse
import requests
from io import BytesIO
from langchain_chroma import Chroma
import requests
from bs4 import BeautifulSoup
from langchain_core.prompts import ChatPromptTemplate
import gradio as gr
from PyPDF2 import PdfReader
from langchain_huggingface import HuggingFaceEmbeddings

from openai import OpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from typing import Iterator
import time

getmod= os.environ.get('V1')
embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")

def scrape_websites(base_urls):
    try:
        visited_links = set()  # To avoid revisiting the same link
        content_by_url = {}  # Store content from each URL

        for base_url in base_urls:
            if not base_url.strip():
                continue  # Skip empty or invalid URLs

            print(f"Scraping base URL: {base_url}")
            html_content = fetch_page_content(base_url)
            if html_content:
                cleaned_content = clean_body_content(html_content)
                content_by_url[base_url] = cleaned_content
                visited_links.add(base_url)

                # Extract and process all internal links
                soup = BeautifulSoup(html_content, "html.parser")
                links = extract_internal_links(base_url, soup)

                for link in links:
                    if link not in visited_links:
                        print(f"Scraping link: {link}")
                        page_content = fetch_page_content(link)
                        if page_content:
                            cleaned_content = clean_body_content(page_content)
                            content_by_url[link] = cleaned_content
                            visited_links.add(link)

                        # If the link is a PDF file, extract its content
                        if link.lower().endswith('.pdf'):
                            print(f"Extracting PDF content from: {link}")
                            pdf_content = extract_pdf_text(link)
                            if pdf_content:
                                content_by_url[link] = pdf_content

        return content_by_url

    except Exception as e:
        print(f"Error during scraping: {e}")
        return {}


def fetch_page_content(url):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Error fetching {url}: {e}")
        return None


def extract_internal_links(base_url, soup):
    links = set()
    for anchor in soup.find_all("a", href=True):
        href = anchor["href"]
        full_url = urljoin(base_url, href)
        if is_internal_link(base_url, full_url):
            links.add(full_url)
    return links


def is_internal_link(base_url, link_url):
    base_netloc = urlparse(base_url).netloc
    link_netloc = urlparse(link_url).netloc
    return base_netloc == link_netloc


def extract_pdf_text(pdf_url):
    try:
        response = requests.get(pdf_url)
        response.raise_for_status()
        with BytesIO(response.content) as file:
            reader = PdfReader(file)
            pdf_text = ""
            for page in reader.pages:
                pdf_text += page.extract_text()

        return pdf_text if pdf_text else None
    except requests.exceptions.RequestException as e:
        print(f"Error fetching PDF {pdf_url}: {e}")
        return None
    except Exception as e:
        print(f"Error reading PDF {pdf_url}: {e}")
        return None


def clean_body_content(html_content):
    soup = BeautifulSoup(html_content, "html.parser")

    
    for script_or_style in soup(["script", "style"]):
        script_or_style.extract()

    
    cleaned_content = soup.get_text(separator="\n")
    cleaned_content = "\n".join(
        line.strip() for line in cleaned_content.splitlines() if line.strip()
    )
    return cleaned_content


if __name__ == "__main__":
    website = ["https://aims.ac.rw/",
               "https://nexteinstein.org/"
               ] 
    all_content = scrape_websites(website)

    temp_list = []
    for url, content in all_content.items():
        temp_list.append((url, content)) 

    
processed_texts = []

    
for element in temp_list:
    if isinstance(element, tuple):
        url, content = element  
        processed_texts.append(f"url: {url}, content: {content}")
    elif isinstance(element, str):
        processed_texts.append(element)
    else:
        processed_texts.append(str(element))

def chunk_string(s, chunk_size=1000):
    return [s[i:i+chunk_size] for i in range(0, len(s), chunk_size)]

chunked_texts = []

for text in processed_texts:
  chunked_texts.extend(chunk_string(text))


vectorstore = Chroma(
    collection_name="AIMS_Chat_DATABASE",
    embedding_function=embed_model,
    persist_directory="./",
)

vectorstore.get().keys()

vectorstore.add_texts(chunked_texts)

template = ("""
    You are a friendly and intelligent chatbot designed to assist users in a conversational and human-like manner. Your goal is to provide accurate, helpful, and engaging responses based on the provided context: {context}. Follow these guidelines:

    1. **Contextual Interaction**
      - Extract precise details from provided context: {context}
      - Respond directly to user's question: {question}

    2. **Communication Guidelines**
      - Maintain warm, conversational tone
      - Use occasional emojis for engagement
      - Provide clear, concise information

    3. **Response Strategies**
      - Greet users naturally (e.g., "Hello! 😊 How can I help?")
      - Deliver only relevant information
      - Avoid generating content beyond context
      - Handle missing information transparently


    4. **No Extra Content**
      - If no information matches user's request:
        * Respond politely: "I don't have that information at the moment. 😊"
        * Offer alternative assistance options
      - Strictly avoid generating unsupported content
      - Prevent information padding or speculation

    5. **Specialized Handling**
      - Prioritize direct data extraction
      - Provide most relevant URL if link requested
      - Personalize responses using available interaction history

    6. **Real-Time Awareness**
      - Acknowledge current context when appropriate
      - Stay focused on user's immediate needs

    **Context:** {context}
    **User's Question:** {question}
    **Response Approach:** Precise, helpful, context-driven
""")


rag_prompt = PromptTemplate.from_template(template)

retriever = vectorstore.as_retriever()

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


class OpenRouterLLM:
    def __init__(self, api_key: str):
        try:
            self.client = OpenAI(
                base_url="https://openrouter.ai/api/v1",
                api_key=api_key
            )
            self.headers = {
                "HTTP-Referer": "http://localhost:3000",
                "X-Title": "Local Development"
            }
        except Exception as e:
            print(f"Initialization error: {e}")
            raise
    
    def stream(self, prompt: str) -> Iterator[str]:
        try:
            completion = self.client.chat.completions.create(
                extra_headers=self.headers,
                model="google/gemini-2.0-flash-thinking-exp:free",
                messages=[{"role": "user", "content": prompt}],
                stream=True
            )
            
            for chunk in completion:
                if chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content
        except Exception as e:
            yield f"Streaming error: {str(e)}"

def create_rag_chain(retriever, template):
    rag_prompt = PromptTemplate.from_template(template)
    
    def stream_generator(input_dict):
        try:
            context = retriever.invoke(input_dict["question"])
            context_str = "\n".join([doc.page_content for doc in context])
            
            prompt = rag_prompt.format(
                context=context_str,
                question=input_dict["question"]
            )
            
            llm = OpenRouterLLM(getmod)
            return llm.stream(prompt)
        except Exception as e:
            def error_stream():
                yield f"RAG chain error: {str(e)}"
            return error_stream()
    
    return stream_generator

# Create RAG chain with error handling
rag_chain = create_rag_chain(retriever, template)


def rag_memory_stream(message, history):
    partial_text = ""
    for new_text in rag_chain.stream(message):  # Replace with actual streaming logic
        partial_text += new_text
        yield partial_text

# Title with emojis
title = "🧮 African Institute for Mathematical Sciences (AIMS Chatbot) 🔬"

# Short description for the examples section
examples = [
    "What are the admission requirements for AIMS?",
    "Tell me about ongoing research at AIMS.",
    "Where can I find study materials for mathematics?"
]

# Custom CSS for styling the interface
custom_css = """
body {
    font-family: "Arial", serif;
}
.gradio-container {
    font-family: "Times New Roman", serif;
}
.gr-button {
    background-color: #007bff; /* Blue button */
    color: white;
    border: none;
    border-radius: 5px;
    font-size: 16px;
    padding: 10px 20px;
    cursor: pointer;
}
.gr-textbox:focus, .gr-button:focus {
    outline: none; /* Remove outline focus for a cleaner look */
}

/* Custom CSS for the examples section */
.gr-examples {
    font-size: 30px; /* Increase font size of examples */
    background-color: #f9f9f9; /* Light background color */
    border-radius: 30px; /* Rounded corners */
}

.gr-examples .example {
    background-color: white; /* White background for each example */
    cursor: pointer; /* Change cursor to pointer on hover */
    transition: background-color 0.3s ease; /* Smooth hover effect */
}

.gr-examples .example:hover {
    background-color: #f1f1f1; /* Light gray background on hover */
}
"""

# Create the Chat Interface
demo = gr.ChatInterface(
    fn=rag_memory_stream,
    title=title,
    examples=examples,  # Display the short description and example questions
    fill_height=True,
    theme="soft",
    css=custom_css,  # Apply the custom CSS
)

# Launch the app
if __name__ == "__main__":
    demo.launch(share=True)