samsonleegh commited on
Commit
c078977
·
verified ·
1 Parent(s): 1cd1f3e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -57
app.py CHANGED
@@ -1,63 +1,74 @@
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
 
 
 
59
  )
60
 
 
 
 
 
 
 
 
61
 
62
  if __name__ == "__main__":
63
  demo.launch()
 
1
+ import os
2
  import gradio as gr
3
+ from dotenv import load_dotenv
4
+ from llama_index.core.callbacks import CallbackManager, LlamaDebugHandler, CBEventType
5
+ from llama_index.core.node_parser import SentenceSplitter
6
+ from llama_index.core.postprocessor import SimilarityPostprocessor
7
+ from llama_index.llms.openai import OpenAI
8
+ from llama_index.llms.groq import Groq
9
+ from llama_index.core.base.embeddings.base import similarity
10
+ from llama_index.llms.ollama import Ollama
11
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
12
+ from llama_index.core import StorageContext
13
+ from llama_index.vector_stores.chroma import ChromaVectorStore
14
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
15
+ from llama_index.core import load_index_from_storage
16
+
17
+
18
+ load_dotenv()
19
+
20
+ # set up LLM
21
+ GROQ_API_KEY = os.getenv('GROQ_API_KEY')
22
+ llm = Groq(model="llama3-70b-8192")
23
+ Settings.llm = llm
24
+
25
+ # set up callback manager
26
+ llama_debug = LlamaDebugHandler(print_trace_on_end=True)
27
+ callback_manager = CallbackManager([llama_debug])
28
+ Settings.callback_manager = callback_manager
29
+
30
+ # converting documents into embeddings and indexing
31
+ embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
32
+ Settings.embed_model = embed_model
33
+
34
+ # create splitter
35
+ splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=20)
36
+ Settings.transformations = [splitter]
37
+
38
+ if os.path.exists("./vectordb"):
39
+ storage_context = StorageContext.from_defaults(persist_dir="./vectordb")
40
+ index = load_index_from_storage(storage_context)
41
+ else:
42
+ filename_fn = lambda filename: {"file_name": filename}
43
+ required_exts = [".pdf",".docx"]
44
+ reader = SimpleDirectoryReader(
45
+ input_dir="./data",
46
+ required_exts=required_exts,
47
+ recursive=True,
48
+ file_metadata=filename_fn
49
+ )
50
+ documents = reader.load_data()
51
+ for doc in documents:
52
+ doc.text = str(doc.metadata) +' '+ doc.text
53
+ print("index creating with `%d` documents", len(documents))
54
+ index = VectorStoreIndex.from_documents(documents, embed_model=embed_model, text_splitter=splitter)
55
+
56
+ index.storage_context.persist(persist_dir="./vectordb")
57
+
58
+ # set up query engine
59
+ query_engine = index.as_query_engine(
60
+ similarity_top_k=5,
61
+ #node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],
62
+ verbose=True,
63
  )
64
 
65
+ def retreive(question):
66
+ qns_w_source = "Answer the following question: " + question + " Followed by providing the page and file name of the source document as well, thank you!"
67
+ streaming_response = query_engine.query(qns_w_source)
68
+ #sources = streaming_response.get_formatted_sources(length=5000)
69
+ return str(streaming_response) # + "\n" + str(sources)
70
+
71
+ demo = gr.Interface(fn=retreive, inputs="textbox", outputs="textbox")
72
 
73
  if __name__ == "__main__":
74
  demo.launch()