Spaces:
Sleeping
Sleeping
marcelo-castro-cardoso
commited on
Commit
•
b1df1e0
1
Parent(s):
3a204c1
deploy
Browse files- app.py +94 -0
- data/276376-convite-menor_preco_por_lote-false-obras.pdf +0 -0
- requirements.txt +5 -0
- storage/default__vector_store.json +0 -0
- storage/docstore.json +0 -0
- storage/graph_store.json +1 -0
- storage/image__vector_store.json +1 -0
- storage/index_store.json +1 -0
app.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
|
4 |
+
from llama_index import (
|
5 |
+
VectorStoreIndex,
|
6 |
+
SimpleDirectoryReader,
|
7 |
+
StorageContext,
|
8 |
+
ServiceContext,
|
9 |
+
load_index_from_storage,
|
10 |
+
)
|
11 |
+
from llama_index.llms import OpenAI
|
12 |
+
from llama_index.memory import ChatMemoryBuffer
|
13 |
+
from llama_index.prompts import ChatPromptTemplate, ChatMessage, MessageRole
|
14 |
+
from llama_index.embeddings import LangchainEmbedding
|
15 |
+
import tiktoken
|
16 |
+
from llama_index.text_splitter import SentenceSplitter
|
17 |
+
|
18 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
19 |
+
|
20 |
+
# criação do embeding LangChain
|
21 |
+
lc_embed_model = HuggingFaceEmbeddings(
|
22 |
+
model_name="sentence-transformers/all-mpnet-base-v2"
|
23 |
+
)
|
24 |
+
# mapeamento do embeding LangChain para o embeding LlamaIndex
|
25 |
+
embed_model = LangchainEmbedding(lc_embed_model)
|
26 |
+
|
27 |
+
# max_tokens: o tamanho máximo da resposta a ser dada
|
28 |
+
llm = OpenAI(temperature=0.3, model='gpt-3.5-turbo', max_tokens=1024)
|
29 |
+
|
30 |
+
# quebra inteligênte das sentenças, combinando separadores, tokenizadores e chunks
|
31 |
+
text_splitter = SentenceSplitter(
|
32 |
+
separator=" ", chunk_size=1000, chunk_overlap=200,
|
33 |
+
paragraph_separator=" \n \n", secondary_chunking_regex="[^,.;。]+[,.;。]?",
|
34 |
+
tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode
|
35 |
+
)
|
36 |
+
|
37 |
+
# cria um serviço de contexto para configurar a criação do indice
|
38 |
+
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model, text_splitter=text_splitter)
|
39 |
+
|
40 |
+
# verifica se a pasta storage existe localmente
|
41 |
+
PERSIST_DIR = "./storage"
|
42 |
+
if not os.path.exists(PERSIST_DIR):
|
43 |
+
# caso não exista lê os documentos da pasta e cria um índice
|
44 |
+
documents = SimpleDirectoryReader("./data").load_data()
|
45 |
+
|
46 |
+
# cria um indice utilizando um contexto de serviços
|
47 |
+
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
|
48 |
+
|
49 |
+
# depois, armazena o índice na pasta
|
50 |
+
index.storage_context.persist(persist_dir=PERSIST_DIR)
|
51 |
+
else:
|
52 |
+
# caso a pasta exista, lê o índice existente
|
53 |
+
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
|
54 |
+
index = load_index_from_storage(storage_context, service_context=service_context)
|
55 |
+
|
56 |
+
# define um prompt para o chat
|
57 |
+
message_templates = [
|
58 |
+
ChatMessage(role=MessageRole.SYSTEM,
|
59 |
+
content='''Você é um sistema especialista que sabe responder perguntas relacionadas ao contexto abaixo.
|
60 |
+
------------------
|
61 |
+
{context_str}
|
62 |
+
------------------'''
|
63 |
+
),
|
64 |
+
ChatMessage(role=MessageRole.SYSTEM,
|
65 |
+
context='''Para responder leve em consideração as perguntas e respostas dadas anteriormente.'''
|
66 |
+
),
|
67 |
+
ChatMessage(role=MessageRole.USER,
|
68 |
+
content='''Utilizando o histórico de conversação e o contexto armazenado iteraja e ajude o usuário.'''
|
69 |
+
),
|
70 |
+
]
|
71 |
+
chat_template = ChatPromptTemplate(message_templates=message_templates)
|
72 |
+
|
73 |
+
# Definir uma memória de mensagens anteriores
|
74 |
+
memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
|
75 |
+
|
76 |
+
chat_engine = index.as_chat_engine(
|
77 |
+
chat_mode="condense_plus_context",
|
78 |
+
memory=memory,
|
79 |
+
context_prompt=chat_template,
|
80 |
+
)
|
81 |
+
|
82 |
+
# consulta o índice local
|
83 |
+
def slow_echo(message, history):
|
84 |
+
response_gen = chat_engine.stream_chat(message)
|
85 |
+
response = ""
|
86 |
+
for token in response_gen.response_gen:
|
87 |
+
response = response + token
|
88 |
+
yield response
|
89 |
+
|
90 |
+
# cria a interface com o gradio
|
91 |
+
demo = gr.ChatInterface(slow_echo).queue()
|
92 |
+
|
93 |
+
if __name__ == "__main__":
|
94 |
+
demo.launch(share=True)
|
data/276376-convite-menor_preco_por_lote-false-obras.pdf
ADDED
Binary file (779 kB). View file
|
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
llama-index==0.9.26
|
3 |
+
langchain==0.0.348
|
4 |
+
pypdf
|
5 |
+
sentence_transformers
|
storage/default__vector_store.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
storage/docstore.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
storage/graph_store.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"graph_dict": {}}
|
storage/image__vector_store.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}
|
storage/index_store.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"index_store/data": {"dddeb7a6-b8bf-4920-90fe-17bd593c5a79": {"__type__": "vector_store", "__data__": "{\"index_id\": \"dddeb7a6-b8bf-4920-90fe-17bd593c5a79\", \"summary\": null, \"nodes_dict\": {\"1bbbab2e-f03f-415f-b0bf-49197cf33e0a\": \"1bbbab2e-f03f-415f-b0bf-49197cf33e0a\", \"8300bdcd-b17a-4107-9ab7-97951f6f4691\": \"8300bdcd-b17a-4107-9ab7-97951f6f4691\", \"a660654a-2689-4b1b-b9d8-d20a21482e9d\": \"a660654a-2689-4b1b-b9d8-d20a21482e9d\", \"abf23cfa-4d35-437b-a114-07d6dc136936\": \"abf23cfa-4d35-437b-a114-07d6dc136936\", \"70b8b7c6-d191-418b-b5cb-4e74cd7947ef\": \"70b8b7c6-d191-418b-b5cb-4e74cd7947ef\", \"788dfde5-3523-4c7c-ad30-47006d62fda2\": \"788dfde5-3523-4c7c-ad30-47006d62fda2\", \"9bb341c3-eb15-4ec7-8199-47b91296134d\": \"9bb341c3-eb15-4ec7-8199-47b91296134d\", \"9413f492-4c70-427b-aefe-131ef6a78d56\": \"9413f492-4c70-427b-aefe-131ef6a78d56\", \"75ecada0-2785-4070-8a91-5320e771941a\": \"75ecada0-2785-4070-8a91-5320e771941a\", \"fcbb0eb0-ab34-4aef-ba84-699d945fd971\": \"fcbb0eb0-ab34-4aef-ba84-699d945fd971\", \"a547fdac-976d-42e5-a3ed-f65b7aa6af7f\": \"a547fdac-976d-42e5-a3ed-f65b7aa6af7f\", \"d4ae8872-461a-4570-854f-23157f72c5b1\": \"d4ae8872-461a-4570-854f-23157f72c5b1\", \"507987b7-221c-4725-9bec-7bec40350255\": \"507987b7-221c-4725-9bec-7bec40350255\", \"392f0cf2-8e7a-4ce0-b9b8-dccf88ebde81\": \"392f0cf2-8e7a-4ce0-b9b8-dccf88ebde81\", \"9fc76b5a-a513-4ca4-8707-6d06a3bc4011\": \"9fc76b5a-a513-4ca4-8707-6d06a3bc4011\", \"634cdb2e-5f0c-47ee-b839-9bb25e08f496\": \"634cdb2e-5f0c-47ee-b839-9bb25e08f496\", \"d574085f-97ad-4580-9be1-f22d8c5c0cea\": \"d574085f-97ad-4580-9be1-f22d8c5c0cea\", \"a0d46172-8c07-41ca-b2b2-03be9e18ec5e\": \"a0d46172-8c07-41ca-b2b2-03be9e18ec5e\", \"d32d34b6-96c7-42a4-98b4-37d83cb08507\": \"d32d34b6-96c7-42a4-98b4-37d83cb08507\", \"c08ea130-3c6a-4746-975b-60bd8e7f0766\": \"c08ea130-3c6a-4746-975b-60bd8e7f0766\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}
|