import gradio as gr import os from pathlib import Path import torch from transformers import pipeline from langchain.llms.base import LLM from llama_index import SimpleDirectoryReader, GPTVectorStoreIndex, PromptHelper, LLMPredictor, ServiceContext from llama_index.langchain_helpers.text_splitter import TokenTextSplitter from llama_index.node_parser import SentenceSplitter from langchain.embeddings.huggingface import HuggingFaceEmbeddings from llama_index.llms import HuggingFaceLLM from llama_index.embeddings import LangchainEmbedding INPUT_FOLDER = "./data" index_files = list(Path(INPUT_FOLDER).glob("*")) max_input_size = 2048 num_output = 256 max_chunk_overlap = 20 max_prompt_chunk_overlap = 0.5 # criação de um LLM HuggingFace no framework llamaindex llm = HuggingFaceLLM( tokenizer_name="tiiuae/falcon-7b-instruct", model_name="tiiuae/falcon-7b-instruct", device_map="auto", model_kwargs={"max_length": 64, "offload_folder": "cached", "torch_dtype": torch.float16} ) # prompt_helper = PromptHelper(max_input_size, num_output, max_prompt_chunk_overlap) # pipe = pipeline("text-generation", model="databricks/dolly-v2-3b", trust_remote_code=True, torch_dtype=torch.bfloat16, device_map="auto") embed_model = LangchainEmbedding(HuggingFaceEmbeddings()) # class CustomLLM(LLM): # model_name = "databricks/dolly-v2-3b" # def _call(self, prompt, stop = None): # response = pipe(prompt, max_new_tokens=num_output)[0]["generated_text"] # return response # @property # def _identifying_params(self): # return {"name_of_model": self.model_name} # @property # def _llm_type(self): # return "custom" # define our LLM llm_predictor = LLMPredictor(llm=llm) node_parser = SentenceSplitter(chunk_size=512, chunk_overlap=max_chunk_overlap) prompt_helper = PromptHelper(max_input_size, num_output, max_prompt_chunk_overlap) service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model, prompt_helper=prompt_helper, node_parser=node_parser, chunk_size_limit=512) # Load your data documents = SimpleDirectoryReader(input_files=index_files).load_data() index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context) query_engine = index.as_query_engine() def greet(query): return query_engine.query(query) iface = gr.Interface(fn=greet, inputs="text", outputs="text") iface.launch(share=True)