import gradio as gr

import os

from pathlib import Path
import torch
from transformers import pipeline
from langchain.llms.base import LLM
from llama_index import SimpleDirectoryReader, GPTVectorStoreIndex, PromptHelper, LLMPredictor, ServiceContext
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
from llama_index.node_parser import SentenceSplitter
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

from llama_index.llms import HuggingFaceLLM

from llama_index.embeddings import LangchainEmbedding

INPUT_FOLDER = "./data"

index_files = list(Path(INPUT_FOLDER).glob("*"))

max_input_size = 2048
num_output = 256
max_chunk_overlap = 20
max_prompt_chunk_overlap = 0.5

# criação de um LLM HuggingFace no framework llamaindex
llm = HuggingFaceLLM(
    tokenizer_name="tiiuae/falcon-7b-instruct",
    model_name="tiiuae/falcon-7b-instruct",
    device_map="auto",
    model_kwargs={"max_length": 64, "offload_folder": "cached", "torch_dtype": torch.float16}
)

# prompt_helper = PromptHelper(max_input_size, num_output, max_prompt_chunk_overlap)

# pipe = pipeline("text-generation", model="databricks/dolly-v2-3b", trust_remote_code=True, torch_dtype=torch.bfloat16, device_map="auto")
embed_model = LangchainEmbedding(HuggingFaceEmbeddings())

# class CustomLLM(LLM):
#     model_name = "databricks/dolly-v2-3b"

#     def _call(self, prompt, stop = None):
#         response = pipe(prompt, max_new_tokens=num_output)[0]["generated_text"]
#         return response

#     @property
#     def _identifying_params(self):
#         return {"name_of_model": self.model_name}

#     @property
#     def _llm_type(self):
#         return "custom"

# define our LLM
llm_predictor = LLMPredictor(llm=llm)

node_parser = SentenceSplitter(chunk_size=512, chunk_overlap=max_chunk_overlap)
prompt_helper = PromptHelper(max_input_size, num_output, max_prompt_chunk_overlap)
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model, prompt_helper=prompt_helper, node_parser=node_parser, chunk_size_limit=512)
# Load your data
documents = SimpleDirectoryReader(input_files=index_files).load_data()

index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
query_engine = index.as_query_engine()


def greet(query):
    return query_engine.query(query)

iface = gr.Interface(fn=greet, inputs="text", outputs="text")
iface.launch(share=True)