import gradio as gr
import time
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.core.embeddings import resolve_embed_model
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core import PromptTemplate

# bge embedding model
Settings.embed_model = resolve_embed_model("local:BAAI/bge-small-en-v1.5")
query_wrapper_prompt = PromptTemplate(
    "Below is an instruction that describes a task. "
    "Write a response that appropriately completes the request.\n\n"
    "### Instruction:\n{query_str}\n\n### Response:"
)

import torch

llm = HuggingFaceLLM(
    context_window=2048,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.25, "do_sample": False},
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name="Writer/camel-5b-hf",
    model_name="Writer/camel-5b-hf",
    device_map="auto",
    tokenizer_kwargs={"max_length": 2048},
    # uncomment this if using CUDA to reduce memory usage
    # model_kwargs={"torch_dtype": torch.float16}
)

Settings.chunk_size = 512
Settings.llm = llm

info = {}

def echo(message, history, system_prompt, tokens):
    index = info['index']
    query_engine = index.as_query_engine()
    response = query_engine.query(message)
    return response.response

with gr.Blocks() as demo:
    with gr.Row():
        folder = gr.File(file_count='directory')
        docs = gr.Textbox(label="Documents")
    system_prompt = gr.Textbox("You are helpful AI.", label="System Prompt")
    slider = gr.Slider(10, 100, render=False)

    gr.ChatInterface(
        echo, additional_inputs=[system_prompt, slider]
    )
    def update_docs(filepath):
        print(filepath)
        documents = SimpleDirectoryReader(input_files=filepath).load_data()
        index = VectorStoreIndex.from_documents(
            documents,
        )
        info['index'] = index
        return documents
    folder.upload(update_docs, folder, docs)

demo.launch()