import gradio as gr import time from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings from llama_index.core.embeddings import resolve_embed_model from llama_index.llms.huggingface import HuggingFaceLLM from llama_index.core import PromptTemplate # bge embedding model Settings.embed_model = resolve_embed_model("local:BAAI/bge-small-en-v1.5") query_wrapper_prompt = PromptTemplate( "Below is an instruction that describes a task. " "Write a response that appropriately completes the request.\n\n" "### Instruction:\n{query_str}\n\n### Response:" ) import torch llm = HuggingFaceLLM( context_window=2048, max_new_tokens=256, generate_kwargs={"temperature": 0.25, "do_sample": False}, query_wrapper_prompt=query_wrapper_prompt, tokenizer_name="Writer/camel-5b-hf", model_name="Writer/camel-5b-hf", device_map="auto", tokenizer_kwargs={"max_length": 2048}, # uncomment this if using CUDA to reduce memory usage # model_kwargs={"torch_dtype": torch.float16} ) Settings.chunk_size = 512 Settings.llm = llm info = {} def echo(message, history, system_prompt, tokens): index = info['index'] query_engine = index.as_query_engine() response = query_engine.query(message) return response.response with gr.Blocks() as demo: with gr.Row(): folder = gr.File(file_count='directory') docs = gr.Textbox(label="Documents") system_prompt = gr.Textbox("You are helpful AI.", label="System Prompt") slider = gr.Slider(10, 100, render=False) gr.ChatInterface( echo, additional_inputs=[system_prompt, slider] ) def update_docs(filepath): print(filepath) documents = SimpleDirectoryReader(input_files=filepath).load_data() index = VectorStoreIndex.from_documents( documents, ) info['index'] = index return documents folder.upload(update_docs, folder, docs) demo.launch()