marcelo-castro-cardoso commited on
Commit
b1df1e0
1 Parent(s): 3a204c1
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+
4
+ from llama_index import (
5
+ VectorStoreIndex,
6
+ SimpleDirectoryReader,
7
+ StorageContext,
8
+ ServiceContext,
9
+ load_index_from_storage,
10
+ )
11
+ from llama_index.llms import OpenAI
12
+ from llama_index.memory import ChatMemoryBuffer
13
+ from llama_index.prompts import ChatPromptTemplate, ChatMessage, MessageRole
14
+ from llama_index.embeddings import LangchainEmbedding
15
+ import tiktoken
16
+ from llama_index.text_splitter import SentenceSplitter
17
+
18
+ from langchain.embeddings import HuggingFaceEmbeddings
19
+
20
+ # criação do embeding LangChain
21
+ lc_embed_model = HuggingFaceEmbeddings(
22
+ model_name="sentence-transformers/all-mpnet-base-v2"
23
+ )
24
+ # mapeamento do embeding LangChain para o embeding LlamaIndex
25
+ embed_model = LangchainEmbedding(lc_embed_model)
26
+
27
+ # max_tokens: o tamanho máximo da resposta a ser dada
28
+ llm = OpenAI(temperature=0.3, model='gpt-3.5-turbo', max_tokens=1024)
29
+
30
+ # quebra inteligênte das sentenças, combinando separadores, tokenizadores e chunks
31
+ text_splitter = SentenceSplitter(
32
+ separator=" ", chunk_size=1000, chunk_overlap=200,
33
+ paragraph_separator=" \n \n", secondary_chunking_regex="[^,.;。]+[,.;。]?",
34
+ tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode
35
+ )
36
+
37
+ # cria um serviço de contexto para configurar a criação do indice
38
+ service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model, text_splitter=text_splitter)
39
+
40
+ # verifica se a pasta storage existe localmente
41
+ PERSIST_DIR = "./storage"
42
+ if not os.path.exists(PERSIST_DIR):
43
+ # caso não exista lê os documentos da pasta e cria um índice
44
+ documents = SimpleDirectoryReader("./data").load_data()
45
+
46
+ # cria um indice utilizando um contexto de serviços
47
+ index = VectorStoreIndex.from_documents(documents, service_context=service_context)
48
+
49
+ # depois, armazena o índice na pasta
50
+ index.storage_context.persist(persist_dir=PERSIST_DIR)
51
+ else:
52
+ # caso a pasta exista, lê o índice existente
53
+ storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
54
+ index = load_index_from_storage(storage_context, service_context=service_context)
55
+
56
+ # define um prompt para o chat
57
+ message_templates = [
58
+ ChatMessage(role=MessageRole.SYSTEM,
59
+ content='''Você é um sistema especialista que sabe responder perguntas relacionadas ao contexto abaixo.
60
+ ------------------
61
+ {context_str}
62
+ ------------------'''
63
+ ),
64
+ ChatMessage(role=MessageRole.SYSTEM,
65
+ context='''Para responder leve em consideração as perguntas e respostas dadas anteriormente.'''
66
+ ),
67
+ ChatMessage(role=MessageRole.USER,
68
+ content='''Utilizando o histórico de conversação e o contexto armazenado iteraja e ajude o usuário.'''
69
+ ),
70
+ ]
71
+ chat_template = ChatPromptTemplate(message_templates=message_templates)
72
+
73
+ # Definir uma memória de mensagens anteriores
74
+ memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
75
+
76
+ chat_engine = index.as_chat_engine(
77
+ chat_mode="condense_plus_context",
78
+ memory=memory,
79
+ context_prompt=chat_template,
80
+ )
81
+
82
+ # consulta o índice local
83
+ def slow_echo(message, history):
84
+ response_gen = chat_engine.stream_chat(message)
85
+ response = ""
86
+ for token in response_gen.response_gen:
87
+ response = response + token
88
+ yield response
89
+
90
+ # cria a interface com o gradio
91
+ demo = gr.ChatInterface(slow_echo).queue()
92
+
93
+ if __name__ == "__main__":
94
+ demo.launch(share=True)
data/276376-convite-menor_preco_por_lote-false-obras.pdf ADDED
Binary file (779 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ llama-index==0.9.26
3
+ langchain==0.0.348
4
+ pypdf
5
+ sentence_transformers
storage/default__vector_store.json ADDED
The diff for this file is too large to render. See raw diff
 
storage/docstore.json ADDED
The diff for this file is too large to render. See raw diff
 
storage/graph_store.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"graph_dict": {}}
storage/image__vector_store.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}
storage/index_store.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"index_store/data": {"dddeb7a6-b8bf-4920-90fe-17bd593c5a79": {"__type__": "vector_store", "__data__": "{\"index_id\": \"dddeb7a6-b8bf-4920-90fe-17bd593c5a79\", \"summary\": null, \"nodes_dict\": {\"1bbbab2e-f03f-415f-b0bf-49197cf33e0a\": \"1bbbab2e-f03f-415f-b0bf-49197cf33e0a\", \"8300bdcd-b17a-4107-9ab7-97951f6f4691\": \"8300bdcd-b17a-4107-9ab7-97951f6f4691\", \"a660654a-2689-4b1b-b9d8-d20a21482e9d\": \"a660654a-2689-4b1b-b9d8-d20a21482e9d\", \"abf23cfa-4d35-437b-a114-07d6dc136936\": \"abf23cfa-4d35-437b-a114-07d6dc136936\", \"70b8b7c6-d191-418b-b5cb-4e74cd7947ef\": \"70b8b7c6-d191-418b-b5cb-4e74cd7947ef\", \"788dfde5-3523-4c7c-ad30-47006d62fda2\": \"788dfde5-3523-4c7c-ad30-47006d62fda2\", \"9bb341c3-eb15-4ec7-8199-47b91296134d\": \"9bb341c3-eb15-4ec7-8199-47b91296134d\", \"9413f492-4c70-427b-aefe-131ef6a78d56\": \"9413f492-4c70-427b-aefe-131ef6a78d56\", \"75ecada0-2785-4070-8a91-5320e771941a\": \"75ecada0-2785-4070-8a91-5320e771941a\", \"fcbb0eb0-ab34-4aef-ba84-699d945fd971\": \"fcbb0eb0-ab34-4aef-ba84-699d945fd971\", \"a547fdac-976d-42e5-a3ed-f65b7aa6af7f\": \"a547fdac-976d-42e5-a3ed-f65b7aa6af7f\", \"d4ae8872-461a-4570-854f-23157f72c5b1\": \"d4ae8872-461a-4570-854f-23157f72c5b1\", \"507987b7-221c-4725-9bec-7bec40350255\": \"507987b7-221c-4725-9bec-7bec40350255\", \"392f0cf2-8e7a-4ce0-b9b8-dccf88ebde81\": \"392f0cf2-8e7a-4ce0-b9b8-dccf88ebde81\", \"9fc76b5a-a513-4ca4-8707-6d06a3bc4011\": \"9fc76b5a-a513-4ca4-8707-6d06a3bc4011\", \"634cdb2e-5f0c-47ee-b839-9bb25e08f496\": \"634cdb2e-5f0c-47ee-b839-9bb25e08f496\", \"d574085f-97ad-4580-9be1-f22d8c5c0cea\": \"d574085f-97ad-4580-9be1-f22d8c5c0cea\", \"a0d46172-8c07-41ca-b2b2-03be9e18ec5e\": \"a0d46172-8c07-41ca-b2b2-03be9e18ec5e\", \"d32d34b6-96c7-42a4-98b4-37d83cb08507\": \"d32d34b6-96c7-42a4-98b4-37d83cb08507\", \"c08ea130-3c6a-4746-975b-60bd8e7f0766\": \"c08ea130-3c6a-4746-975b-60bd8e7f0766\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}