luanpoppe commited on
Commit
ca8a144
·
1 Parent(s): 1fd7b67

feat: adicionandoresumo cursor completo

Browse files
_utils/resumo_completo_cursor.py CHANGED
@@ -1,17 +1,43 @@
 
1
  import os
2
- from typing import List, Dict, Tuple
3
  from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- from langchain.document_loaders import PyPDFLoader
5
- from langchain.embeddings import HuggingFaceEmbeddings
6
- from langchain.vectorstores import Chroma
7
- from langchain.chat_models import ChatOpenAI
8
  from langchain.chains import create_extraction_chain
9
  from langchain.prompts import PromptTemplate
10
  from dataclasses import dataclass
11
  import uuid
12
  import json
13
- from langchain_huggingface import HuggingFaceEndpoint
14
- from setup.environment import default_model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  os.environ["LANGCHAIN_TRACING_V2"]="true"
17
  os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
@@ -20,202 +46,528 @@ os.environ["LANGCHAIN_PROJECT"]="VELLA"
20
 
21
  @dataclass
22
  class DocumentChunk:
23
- content: str
24
- page_number: int
25
- chunk_id: str
26
- start_char: int
27
- end_char: int
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  class DocumentSummarizer:
30
- def __init__(self, openai_api_key: str, model, embedding, chunk_config, system_prompt):
31
- self.model = model
32
- self.system_prompt = system_prompt
33
- self.openai_api_key = openai_api_key
34
- self.embeddings = HuggingFaceEmbeddings(
35
- model_name=embedding
36
- )
37
- self.text_splitter = RecursiveCharacterTextSplitter(
38
- chunk_size=chunk_config["size"],
39
- chunk_overlap=chunk_config["overlap"]
40
- )
41
- self.chunk_metadata = {} # Store chunk metadata for tracing
42
-
43
- def load_and_split_document(self, pdf_path: str) -> List[DocumentChunk]:
44
- """Load PDF and split into chunks with metadata"""
45
- loader = PyPDFLoader(pdf_path)
46
- pages = loader.load()
47
- chunks = []
48
- char_count = 0
49
-
50
- for page in pages:
51
- text = page.page_content
52
- # Split the page content
53
- page_chunks = self.text_splitter.split_text(text)
54
-
55
- for chunk in page_chunks:
56
- chunk_id = str(uuid.uuid4())
57
- start_char = text.find(chunk)
58
- end_char = start_char + len(chunk)
59
-
60
- doc_chunk = DocumentChunk(
61
- content=chunk,
62
- page_number=page.metadata.get('page') + 1, # 1-based page numbering
63
- chunk_id=chunk_id,
64
- start_char=char_count + start_char,
65
- end_char=char_count + end_char
66
- )
67
- chunks.append(doc_chunk)
68
-
69
- # Store metadata for later retrieval
70
- self.chunk_metadata[chunk_id] = {
71
- 'page': doc_chunk.page_number,
72
- 'start_char': doc_chunk.start_char,
73
- 'end_char': doc_chunk.end_char
74
- }
75
-
76
- char_count += len(text)
77
-
78
- return chunks
79
-
80
- def create_vector_store(self, chunks: List[DocumentChunk]) -> Chroma:
81
- """Create vector store with metadata"""
82
- texts = [chunk.content for chunk in chunks]
83
- metadatas = [{
84
- 'chunk_id': chunk.chunk_id,
85
- 'page': chunk.page_number,
86
- 'start_char': chunk.start_char,
87
- 'end_char': chunk.end_char
88
- } for chunk in chunks]
89
-
90
- vector_store = Chroma.from_texts(
91
- texts=texts,
92
- metadatas=metadatas,
93
- embedding=self.embeddings
94
- )
95
- return vector_store
96
-
97
- def generate_summary_with_sources(
98
- self,
99
- vector_store: Chroma,
100
- query: str = "Summarize the main points of this document"
101
- ) -> List[Dict]:
102
- """Generate summary with source citations, returning structured JSON data"""
103
- # Retrieve relevant chunks with metadata
104
- relevant_docs = vector_store.similarity_search_with_score(query, k=5)
105
-
106
- # Prepare context and track sources
107
- contexts = []
108
- sources = []
109
-
110
- for doc, score in relevant_docs:
111
- chunk_id = doc.metadata['chunk_id']
112
- context = doc.page_content
113
- contexts.append(context)
114
-
115
- sources.append({
116
- 'content': context,
117
- 'page': doc.metadata['page'],
118
- 'chunk_id': chunk_id,
119
- 'relevance_score': score
120
- })
121
-
122
- prompt = PromptTemplate(
123
- template=self.system_prompt,
124
- input_variables=["context"]
125
- )
126
- llm = ""
127
-
128
- if (self.model == default_model):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  llm = ChatOpenAI(
130
  temperature=0,
131
  model_name="gpt-4o-mini",
132
  api_key=self.openai_api_key
133
  )
134
- else:
135
- llm = HuggingFaceEndpoint(
136
- repo_id=self.model,
137
- task="text-generation",
138
- max_new_tokens=1100,
139
- do_sample=False,
140
- huggingfacehub_api_token=os.environ.get("HUGGINGFACEHUB_API_TOKEN")
141
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
-
144
- response = llm.predict(prompt.format(context="\n\n".join(contexts)))
145
-
146
- # Split the response into paragraphs
147
- summaries = [p.strip() for p in response.split('\n\n') if p.strip()]
148
-
149
- # Create structured output
150
- structured_output = []
151
- for idx, summary in enumerate(summaries):
152
- # Associate each summary with the most relevant source
153
- structured_output.append({
154
- "content": summary,
155
- "source": {
156
- "page": sources[min(idx, len(sources)-1)]['page'],
157
- "text": sources[min(idx, len(sources)-1)]['content'][:200] + "...",
158
- "relevance_score": sources[min(idx, len(sources)-1)]['relevance_score']
159
- }
160
- })
161
-
162
- return structured_output
163
-
164
- def get_source_context(self, chunk_id: str, window: int = 100) -> Dict:
165
- """Get extended context around a specific chunk"""
166
- metadata = self.chunk_metadata.get(chunk_id)
167
- if not metadata:
168
- return None
169
-
170
- return {
171
- 'page': metadata['page'],
172
- 'start_char': metadata['start_char'],
173
- 'end_char': metadata['end_char']
174
- }
175
-
176
- def get_llm_summary_answer_by_cursor(serializer, listaPDFs):
177
- # By Luan
178
- allPdfsChunks = []
179
-
180
- # Initialize summarizer
181
- summarizer = DocumentSummarizer(
182
- openai_api_key=os.environ.get("OPENAI_API_KEY"),
183
- embedding=serializer["hf_embedding"],
184
- chunk_config={"size": serializer["chunk_size"], "overlap": serializer["chunk_overlap"]},
185
- system_prompt=serializer["system_prompt"],
186
- model=serializer["model"]
187
- )
188
-
189
- # Load and process document
190
- for pdf in listaPDFs:
191
- pdf_path = pdf
192
- chunks = summarizer.load_and_split_document(pdf_path)
193
- allPdfsChunks = allPdfsChunks + chunks
194
-
195
- vector_store = summarizer.create_vector_store(allPdfsChunks)
196
-
197
- # Generate structured summary
198
- structured_summaries = summarizer.generate_summary_with_sources(vector_store)
199
-
200
- # Print or return the structured data
201
- # print(structured_summaries)
202
- json_data = json.dumps(structured_summaries)
203
- print("\n\n")
204
- print(json_data)
205
- return structured_summaries
206
- # If you need to send to frontend, you can just return structured_summaries
207
- # It will be in the format:
208
- # [
209
- # {
210
- # "content": "Summary point 1...",
211
- # "source": {
212
- # "page": 1,
213
- # "text": "Source text...",
214
- # "relevance_score": 0.95
215
- # }
216
- # },
217
- # ...
218
- # ]
219
-
220
- if __name__ == "__main__":
221
- get_llm_summary_answer_by_cursor()
 
1
+
2
  import os
3
+ from typing import List, Dict, Tuple, Optional
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain_community.document_loaders import PyPDFLoader
6
+ from langchain_huggingface import HuggingFaceEmbeddings
7
+ from langchain_community.vectorstores import Chroma
8
+ from langchain_community.chat_models import ChatOpenAI
9
  from langchain.chains import create_extraction_chain
10
  from langchain.prompts import PromptTemplate
11
  from dataclasses import dataclass
12
  import uuid
13
  import json
14
+ from anthropic import Anthropic
15
+ import numpy as np
16
+ from rank_bm25 import BM25Okapi
17
+ import logging
18
+ from cohere import Client
19
+
20
+ def reciprocal_rank_fusion(result_lists, weights=None):
21
+ """Combine multiple ranked lists using reciprocal rank fusion"""
22
+ fused_scores = {}
23
+ num_lists = len(result_lists)
24
+ if weights is None:
25
+ weights = [1.0] * num_lists
26
+
27
+ for i in range(num_lists):
28
+ for doc_id, score in result_lists[i]:
29
+ if doc_id not in fused_scores:
30
+ fused_scores[doc_id] = 0
31
+ fused_scores[doc_id] += weights[i] * score
32
+
33
+ # Sort by score in descending order
34
+ sorted_results = sorted(
35
+ fused_scores.items(),
36
+ key=lambda x: x[1],
37
+ reverse=True
38
+ )
39
+
40
+ return sorted_results
41
 
42
  os.environ["LANGCHAIN_TRACING_V2"]="true"
43
  os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
 
46
 
47
  @dataclass
48
  class DocumentChunk:
49
+ content: str
50
+ page_number: int
51
+ chunk_id: str
52
+ start_char: int
53
+ end_char: int
54
+
55
+ @dataclass
56
+ class RetrievalConfig:
57
+ num_chunks: int = 5
58
+ embedding_weight: float = 0.5
59
+ bm25_weight: float = 0.5
60
+ context_window: int = 3
61
+ chunk_overlap: int = 200
62
+ chunk_size: int = 1000
63
+
64
+ @dataclass
65
+ class ContextualizedChunk(DocumentChunk):
66
+ context: str = ""
67
+ embedding: Optional[np.ndarray] = None
68
+ bm25_score: Optional[float] = None
69
 
70
  class DocumentSummarizer:
71
+ def __init__(self, openai_api_key: str, cohere_api_key: str, embedding_model, chunk_size, chunk_overlap, num_k_rerank, model_cohere_rerank):
72
+ self.openai_api_key = openai_api_key
73
+ self.cohere_client = Client(cohere_api_key)
74
+ self.embeddings = HuggingFaceEmbeddings(
75
+ model_name=embedding_model
76
+ )
77
+ self.text_splitter = RecursiveCharacterTextSplitter(
78
+ chunk_size=chunk_size,
79
+ chunk_overlap=chunk_overlap
80
+ )
81
+ self.chunk_metadata = {} # Store chunk metadata for tracing
82
+ self.num_k_rerank = num_k_rerank
83
+ self.model_cohere_rerank = model_cohere_rerank
84
+
85
+ def load_and_split_document(self, pdf_path: str) -> List[DocumentChunk]:
86
+ """Load PDF and split into chunks with metadata"""
87
+ loader = PyPDFLoader(pdf_path)
88
+ pages = loader.load()
89
+ chunks = []
90
+ char_count = 0
91
+
92
+ for page in pages:
93
+ text = page.page_content
94
+ # Split the page content
95
+ page_chunks = self.text_splitter.split_text(text)
96
+
97
+ for chunk in page_chunks:
98
+ chunk_id = str(uuid.uuid4())
99
+ start_char = text.find(chunk)
100
+ end_char = start_char + len(chunk)
101
+
102
+ doc_chunk = DocumentChunk(
103
+ content=chunk,
104
+ page_number=page.metadata.get('page') + 1, # 1-based page numbering
105
+ chunk_id=chunk_id,
106
+ start_char=char_count + start_char,
107
+ end_char=char_count + end_char
108
+ )
109
+ chunks.append(doc_chunk)
110
+
111
+ # Store metadata for later retrieval
112
+ self.chunk_metadata[chunk_id] = {
113
+ 'page': doc_chunk.page_number,
114
+ 'start_char': doc_chunk.start_char,
115
+ 'end_char': doc_chunk.end_char
116
+ }
117
+
118
+ char_count += len(text)
119
+
120
+ return chunks
121
+
122
+ def create_vector_store(self, chunks: List[DocumentChunk]) -> Chroma:
123
+ """Create vector store with metadata"""
124
+ texts = [chunk.content for chunk in chunks]
125
+ metadatas = [{
126
+ 'chunk_id': chunk.chunk_id,
127
+ 'page': chunk.page_number,
128
+ 'start_char': chunk.start_char,
129
+ 'end_char': chunk.end_char
130
+ } for chunk in chunks]
131
+
132
+ vector_store = Chroma.from_texts(
133
+ texts=texts,
134
+ metadatas=metadatas,
135
+ embedding=self.embeddings
136
+ )
137
+ return vector_store
138
+
139
+ def rerank_chunks(
140
+ self,
141
+ chunks: List[Dict],
142
+ query: str,
143
+ k: int = 5
144
+ ) -> List[Dict]:
145
+ """
146
+ Rerank chunks using Cohere's reranking model.
147
+
148
+ Args:
149
+ chunks: List of dictionaries containing chunks and their metadata
150
+ query: Original search query
151
+ k: Number of top chunks to return
152
+
153
+ Returns:
154
+ List of reranked chunks with updated relevance scores
155
+ """
156
+ try:
157
+ # Prepare documents for reranking
158
+ documents = [chunk['content'] for chunk in chunks]
159
+
160
+ # Get reranking scores from Cohere
161
+ results = self.cohere_client.rerank(
162
+ query=query,
163
+ documents=documents,
164
+ top_n=k,
165
+ model=self.model_cohere_rerank
166
+ )
167
+
168
+ # Create reranked results with original metadata
169
+ reranked_chunks = []
170
+ for hit in results:
171
+ original_chunk = chunks[hit.index]
172
+ reranked_chunks.append({
173
+ **original_chunk,
174
+ 'relevance_score': hit.relevance_score
175
+ })
176
+
177
+ return reranked_chunks
178
+
179
+ except Exception as e:
180
+ logging.error(f"Reranking failed: {str(e)}")
181
+ return chunks[:k] # Fallback to original ordering
182
+
183
+ def generate_summary_with_sources(
184
+ self,
185
+ vector_store: Chroma,
186
+ query: str = "Summarize the main points of this document"
187
+ ) -> List[Dict]:
188
+ """Generate summary with source citations using reranking"""
189
+ # Retrieve more initial chunks for reranking
190
+ relevant_docs = vector_store.similarity_search_with_score(query, k=20)
191
+
192
+ # Prepare chunks for reranking
193
+ chunks = []
194
+ for doc, score in relevant_docs:
195
+ chunks.append({
196
+ 'content': doc.page_content,
197
+ 'page': doc.metadata['page'],
198
+ 'chunk_id': doc.metadata['chunk_id'],
199
+ 'relevance_score': score
200
+ })
201
+
202
+ # Rerank chunks
203
+ reranked_chunks = self.rerank_chunks(chunks, query, k=self.num_k_rerank)
204
+
205
+ # Prepare context and sources from reranked chunks
206
+ contexts = []
207
+ sources = []
208
+
209
+ for chunk in reranked_chunks:
210
+ contexts.append(chunk['content'])
211
+ sources.append({
212
+ 'content': chunk['content'],
213
+ 'page': chunk['page'],
214
+ 'chunk_id': chunk['chunk_id'],
215
+ 'relevance_score': chunk['relevance_score']
216
+ })
217
+
218
+ prompt_template = """
219
+ Based on the following context, provide multiple key points from the document.
220
+ For each point, create a new paragraph.
221
+ Each paragraph should be a complete, self-contained insight.
222
+
223
+ Context: {context}
224
+
225
+ Key points:
226
+ """
227
+
228
+ prompt = PromptTemplate(
229
+ template=prompt_template,
230
+ input_variables=["context"]
231
+ )
232
+
233
  llm = ChatOpenAI(
234
  temperature=0,
235
  model_name="gpt-4o-mini",
236
  api_key=self.openai_api_key
237
  )
238
+
239
+ response = llm.predict(prompt.format(context="\n\n".join(contexts)))
240
+
241
+ # Split the response into paragraphs
242
+ summaries = [p.strip() for p in response.split('\n\n') if p.strip()]
243
+
244
+ # Create structured output
245
+ structured_output = []
246
+ for idx, summary in enumerate(summaries):
247
+ # Associate each summary with the most relevant source
248
+ structured_output.append({
249
+ "content": summary,
250
+ "source": {
251
+ "page": sources[min(idx, len(sources)-1)]['page'],
252
+ "text": sources[min(idx, len(sources)-1)]['content'][:200] + "...",
253
+ "relevance_score": sources[min(idx, len(sources)-1)]['relevance_score']
254
+ }
255
+ })
256
+
257
+ return structured_output
258
+
259
+ def get_source_context(self, chunk_id: str, window: int = 100) -> Dict:
260
+ """Get extended context around a specific chunk"""
261
+ metadata = self.chunk_metadata.get(chunk_id)
262
+ if not metadata:
263
+ return None
264
+
265
+ return {
266
+ 'page': metadata['page'],
267
+ 'start_char': metadata['start_char'],
268
+ 'end_char': metadata['end_char']
269
+ }
270
+
271
+ class ContextualRetriever:
272
+ def __init__(self, config: RetrievalConfig, claude_api_key: str, claude_context_model):
273
+ self.config = config
274
+ self.claude_client = Anthropic(api_key=claude_api_key)
275
+ self.logger = logging.getLogger(__name__)
276
+ self.bm25 = None
277
+ self.claude_context_model = claude_context_model
278
+
279
+ def generate_context(self, full_text: str, chunk: DocumentChunk) -> str:
280
+ """Generate contextual description using Claude"""
281
+ try:
282
+ prompt = f"""<document>
283
+ {full_text}
284
+ </document>
285
+ Here is the chunk we want to situate within the whole document
286
+ <chunk>
287
+ {chunk.content}
288
+ </chunk>
289
+ Please give a short succinct context to situate this chunk within the overall document for the purposes of improving search retrieval of the chunk. Answer only with the succinct context and nothing else."""
290
+
291
+ response = self.claude_client.messages.create(
292
+ model=self.claude_context_model,
293
+ max_tokens=100,
294
+ messages=[{"role": "user", "content": prompt}]
295
+ )
296
+ return response.content[0].text
297
+ except Exception as e:
298
+ self.logger.error(f"Context generation failed for chunk {chunk.chunk_id}: {str(e)}")
299
+ return ""
300
+
301
+ def contextualize_chunks(self, full_text: str, chunks: List[DocumentChunk]) -> List[ContextualizedChunk]:
302
+ """Add context to all chunks"""
303
+ contextualized_chunks = []
304
+ for chunk in chunks:
305
+ context = self.generate_context(full_text, chunk)
306
+ contextualized_chunk = ContextualizedChunk(
307
+ content=chunk.content,
308
+ page_number=chunk.page_number,
309
+ chunk_id=chunk.chunk_id,
310
+ start_char=chunk.start_char,
311
+ end_char=chunk.end_char,
312
+ context=context
313
+ )
314
+ contextualized_chunks.append(contextualized_chunk)
315
+ return contextualized_chunks
316
+
317
+ class EnhancedDocumentSummarizer(DocumentSummarizer):
318
+ def __init__(self, openai_api_key: str, claude_api_key: str, config: RetrievalConfig, embedding_model, chunk_size, chunk_overlap, num_k_rerank, model_cohere_rerank, claude_context_model, system_prompt, gpt_model, gpt_temperature):
319
+ super().__init__(openai_api_key, os.environ.get("COHERE_API_KEY"), embedding_model, chunk_size, chunk_overlap, num_k_rerank, model_cohere_rerank)
320
+ self.config = config
321
+ self.contextual_retriever = ContextualRetriever(config, claude_api_key, claude_context_model)
322
+ self.logger = logging.getLogger(__name__)
323
+ self.system_prompt = system_prompt
324
+ self.gpt_model = gpt_model
325
+ self.gpt_temperature = gpt_temperature
326
+
327
+ def create_enhanced_vector_store(self, chunks: List[ContextualizedChunk]) -> Tuple[Chroma, BM25Okapi, List[str]]:
328
+ """Create vector store and BM25 index with contextualized chunks"""
329
+ try:
330
+ # Prepare texts with context
331
+ texts = [f"{chunk.context} {chunk.content}" for chunk in chunks]
332
+
333
+ # Create vector store
334
+ metadatas = [{
335
+ 'chunk_id': chunk.chunk_id,
336
+ 'page': chunk.page_number,
337
+ 'start_char': chunk.start_char,
338
+ 'end_char': chunk.end_char,
339
+ 'context': chunk.context
340
+ } for chunk in chunks]
341
+
342
+ vector_store = Chroma.from_texts(
343
+ texts=texts,
344
+ metadatas=metadatas,
345
+ embedding=self.embeddings
346
+ )
347
+
348
+ # Create BM25 index
349
+ tokenized_texts = [text.split() for text in texts]
350
+ bm25 = BM25Okapi(tokenized_texts)
351
+
352
+ # Get chunk IDs in order
353
+ chunk_ids = [chunk.chunk_id for chunk in chunks]
354
+
355
+ return vector_store, bm25, chunk_ids
356
+
357
+ except Exception as e:
358
+ self.logger.error(f"Error creating enhanced vector store: {str(e)}")
359
+ raise
360
+
361
+ def retrieve_with_rank_fusion(
362
+ self,
363
+ vector_store: Chroma,
364
+ bm25: BM25Okapi,
365
+ chunk_ids: List[str],
366
+ query: str
367
+ ) -> List[Dict]:
368
+ """Combine embedding and BM25 retrieval results"""
369
+ try:
370
+ # Get embedding results
371
+ embedding_results = vector_store.similarity_search_with_score(
372
+ query,
373
+ k=self.config.num_chunks
374
+ )
375
+
376
+ # Convert embedding results to list of (chunk_id, score)
377
+ embedding_list = [
378
+ (doc.metadata['chunk_id'], 1 / (1 + score))
379
+ for doc, score in embedding_results
380
+ ]
381
+
382
+ # Get BM25 results
383
+ tokenized_query = query.split()
384
+ bm25_scores = bm25.get_scores(tokenized_query)
385
+
386
+ # Convert BM25 scores to list of (chunk_id, score)
387
+ bm25_list = [
388
+ (chunk_ids[i], float(score)) for i, score in enumerate(bm25_scores)
389
+ ]
390
+
391
+ # Sort bm25_list by score in descending order and limit to top N results
392
+ bm25_list = sorted(bm25_list, key=lambda x: x[1], reverse=True)[:self.config.num_chunks]
393
+
394
+ # Normalize BM25 scores
395
+ max_bm25 = max([score for _, score in bm25_list]) if bm25_list else 1
396
+ bm25_list = [(doc_id, score / max_bm25) for doc_id, score in bm25_list]
397
+
398
+ # Pass the lists to rank fusion
399
+ result_lists = [embedding_list, bm25_list]
400
+ weights = [self.config.embedding_weight, self.config.bm25_weight]
401
+
402
+ combined_results = reciprocal_rank_fusion(
403
+ result_lists,
404
+ weights=weights
405
+ )
406
+
407
+ return combined_results
408
+
409
+ except Exception as e:
410
+ self.logger.error(f"Error in rank fusion retrieval: {str(e)}")
411
+ raise
412
+
413
+ def generate_enhanced_summary(
414
+ self,
415
+ vector_store: Chroma,
416
+ bm25: BM25Okapi,
417
+ chunk_ids: List[str],
418
+ query: str = "Summarize the main points of this document"
419
+ ) -> List[Dict]:
420
+ """Generate enhanced summary using both vector and BM25 retrieval"""
421
+ try:
422
+ # Get combined results using rank fusion
423
+ ranked_results = self.retrieve_with_rank_fusion(
424
+ vector_store,
425
+ bm25,
426
+ chunk_ids,
427
+ query
428
+ )
429
+
430
+ # Prepare context and track sources
431
+ contexts = []
432
+ sources = []
433
+
434
+ # Get full documents for top results
435
+ for chunk_id, score in ranked_results[:self.config.num_chunks]:
436
+ results = vector_store.get(
437
+ where={"chunk_id": chunk_id},
438
+ include=["documents", "metadatas"]
439
+ )
440
+
441
+ if results["documents"]:
442
+ context = results["documents"][0]
443
+ metadata = results["metadatas"][0]
444
+
445
+ contexts.append(context)
446
+ sources.append({
447
+ 'content': context,
448
+ 'page': metadata['page'],
449
+ 'chunk_id': chunk_id,
450
+ 'relevance_score': score,
451
+ 'context': metadata.get('context', '')
452
+ })
453
+
454
+ prompt_template = self.system_prompt
455
+
456
+ prompt = PromptTemplate(
457
+ template=prompt_template,
458
+ input_variables=["context"]
459
+ )
460
+
461
+ llm = ChatOpenAI(
462
+ temperature=self.gpt_temperature,
463
+ model_name=self.gpt_model,
464
+ api_key=self.openai_api_key,
465
+ )
466
+
467
+ response = llm.predict(prompt.format(context="\n\n".join(contexts)))
468
+
469
+ # Split the response into paragraphs
470
+ summaries = [p.strip() for p in response.split('\n\n') if p.strip()]
471
+
472
+ # Create structured output
473
+ structured_output = []
474
+ for idx, summary in enumerate(summaries):
475
+ source_idx = min(idx, len(sources)-1)
476
+ structured_output.append({
477
+ "content": summary,
478
+ "source": {
479
+ "page": sources[source_idx]['page'],
480
+ "text": sources[source_idx]['content'][:200] + "...",
481
+ "context": sources[source_idx]['context'],
482
+ "relevance_score": sources[source_idx]['relevance_score'],
483
+ "chunk_id": sources[source_idx]['chunk_id']
484
+ }
485
+ })
486
+
487
+ return structured_output
488
+
489
+ except Exception as e:
490
+ self.logger.error(f"Error generating enhanced summary: {str(e)}")
491
+ raise
492
+
493
+ def get_llm_summary_answer_by_cursor_complete(serializer, listaPDFs):
494
+ allPdfsChunks = []
495
+
496
+ # Configuration
497
+ config = RetrievalConfig(
498
+ num_chunks=serializer["num_chunks_retrieval"],
499
+ embedding_weight=serializer["embedding_weight"],
500
+ bm25_weight=serializer["bm25_weight"],
501
+ context_window=serializer["context_window"],
502
+ chunk_overlap=serializer["chunk_overlap"]
503
+ )
504
+
505
+ # Initialize enhanced summarizer
506
+ summarizer = EnhancedDocumentSummarizer(
507
+ openai_api_key=os.environ.get("OPENAI_API_KEY"),
508
+ claude_api_key= os.environ.get("CLAUDE_API_KEY"),
509
+ config=config,
510
+ embedding_model=serializer["hf_embedding"],
511
+ chunk_overlap=serializer["chunk_overlap"],
512
+ chunk_size=serializer["chunk_size"],
513
+ num_k_rerank=serializer["num_k_rerank"],
514
+ model_cohere_rerank=serializer["model_cohere_rerank"],
515
+ claude_context_model=serializer["claude_context_model"],
516
+ system_prompt=serializer["system_prompt"],
517
+ gpt_model=serializer["model"],
518
+ gpt_temperature=serializer["gpt_temperature"]
519
+ )
520
+
521
+ # # Load and process document
522
+ # pdf_path = "./Im_a_storyteller.pdf"
523
+ # chunks = summarizer.load_and_split_document(pdf_path)
524
+
525
+ # Load and process document
526
+ for pdf in listaPDFs:
527
+ pdf_path = pdf
528
+ chunks = summarizer.load_and_split_document(pdf_path)
529
+ allPdfsChunks = allPdfsChunks + chunks
530
+
531
+ # Get full text for contextualization
532
+ loader = PyPDFLoader(pdf_path)
533
+ pages = loader.load()
534
+ full_text = " ".join([page.page_content for page in pages])
535
+
536
+ # Contextualize chunks
537
+ contextualized_chunks = summarizer.contextual_retriever.contextualize_chunks(full_text, allPdfsChunks)
538
+
539
+ # Create enhanced vector store and BM25 index
540
+ vector_store, bm25, chunk_ids = summarizer.create_enhanced_vector_store(contextualized_chunks)
541
+
542
+ # Generate enhanced summary
543
+ structured_summaries = summarizer.generate_enhanced_summary(
544
+ vector_store,
545
+ bm25,
546
+ chunk_ids,
547
+ serializer["user_message"]
548
+ )
549
 
550
+ # Output results as JSON
551
+ json_output = json.dumps(structured_summaries, indent=2)
552
+ print("\nStructured Summaries:")
553
+ print(json_output)
554
+ return {
555
+ "resultado": structured_summaries,
556
+ "parametros-utilizados": {
557
+ "num_chunks_retrieval": serializer["num_chunks_retrieval"],
558
+ "embedding_weight": serializer["embedding_weight"],
559
+ "bm25_weight": serializer["bm25_weight"],
560
+ "context_window": serializer["context_window"],
561
+ "chunk_overlap": serializer["chunk_overlap"],
562
+ "num_k_rerank": serializer["num_k_rerank"],
563
+ "model_cohere_rerank": serializer["model_cohere_rerank"],
564
+ "more_initial_chunks_for_reranking": serializer["more_initial_chunks_for_reranking"],
565
+ "claude_context_model": serializer["claude_context_model"],
566
+ "gpt_temperature": serializer["gpt_temperature"],
567
+ "user_message": serializer["user_message"],
568
+ "model": serializer["model"],
569
+ "hf_embedding": serializer["hf_embedding"],
570
+ "chunk_size": serializer["chunk_size"],
571
+ "chunk_overlap": serializer["chunk_overlap"],
572
+ "system_prompt": serializer["system_prompt"],
573
+ }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
_utils/resumo_simples_cursor.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, Dict, Tuple
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.document_loaders import PyPDFLoader
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ from langchain.vectorstores import Chroma
7
+ from langchain.chat_models import ChatOpenAI
8
+ from langchain.chains import create_extraction_chain
9
+ from langchain.prompts import PromptTemplate
10
+ from dataclasses import dataclass
11
+ import uuid
12
+ import json
13
+ from langchain_huggingface import HuggingFaceEndpoint
14
+ from setup.environment import default_model
15
+
16
+ os.environ["LANGCHAIN_TRACING_V2"]="true"
17
+ os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
18
+ os.environ.get("LANGCHAIN_API_KEY")
19
+ os.environ["LANGCHAIN_PROJECT"]="VELLA"
20
+
21
+ @dataclass
22
+ class DocumentChunk:
23
+ content: str
24
+ page_number: int
25
+ chunk_id: str
26
+ start_char: int
27
+ end_char: int
28
+
29
+ class DocumentSummarizer:
30
+ def __init__(self, openai_api_key: str, model, embedding, chunk_config, system_prompt):
31
+ self.model = model
32
+ self.system_prompt = system_prompt
33
+ self.openai_api_key = openai_api_key
34
+ self.embeddings = HuggingFaceEmbeddings(
35
+ model_name=embedding
36
+ )
37
+ self.text_splitter = RecursiveCharacterTextSplitter(
38
+ chunk_size=chunk_config["size"],
39
+ chunk_overlap=chunk_config["overlap"]
40
+ )
41
+ self.chunk_metadata = {} # Store chunk metadata for tracing
42
+
43
+ def load_and_split_document(self, pdf_path: str) -> List[DocumentChunk]:
44
+ """Load PDF and split into chunks with metadata"""
45
+ loader = PyPDFLoader(pdf_path)
46
+ pages = loader.load()
47
+ chunks = []
48
+ char_count = 0
49
+
50
+ for page in pages:
51
+ text = page.page_content
52
+ # Split the page content
53
+ page_chunks = self.text_splitter.split_text(text)
54
+
55
+ for chunk in page_chunks:
56
+ chunk_id = str(uuid.uuid4())
57
+ start_char = text.find(chunk)
58
+ end_char = start_char + len(chunk)
59
+
60
+ doc_chunk = DocumentChunk(
61
+ content=chunk,
62
+ page_number=page.metadata.get('page') + 1, # 1-based page numbering
63
+ chunk_id=chunk_id,
64
+ start_char=char_count + start_char,
65
+ end_char=char_count + end_char
66
+ )
67
+ chunks.append(doc_chunk)
68
+
69
+ # Store metadata for later retrieval
70
+ self.chunk_metadata[chunk_id] = {
71
+ 'page': doc_chunk.page_number,
72
+ 'start_char': doc_chunk.start_char,
73
+ 'end_char': doc_chunk.end_char
74
+ }
75
+
76
+ char_count += len(text)
77
+
78
+ return chunks
79
+
80
+ def create_vector_store(self, chunks: List[DocumentChunk]) -> Chroma:
81
+ """Create vector store with metadata"""
82
+ texts = [chunk.content for chunk in chunks]
83
+ metadatas = [{
84
+ 'chunk_id': chunk.chunk_id,
85
+ 'page': chunk.page_number,
86
+ 'start_char': chunk.start_char,
87
+ 'end_char': chunk.end_char
88
+ } for chunk in chunks]
89
+
90
+ vector_store = Chroma.from_texts(
91
+ texts=texts,
92
+ metadatas=metadatas,
93
+ embedding=self.embeddings
94
+ )
95
+ return vector_store
96
+
97
+ def generate_summary_with_sources(
98
+ self,
99
+ vector_store: Chroma,
100
+ query: str = "Summarize the main points of this document"
101
+ ) -> List[Dict]:
102
+ """Generate summary with source citations, returning structured JSON data"""
103
+ # Retrieve relevant chunks with metadata
104
+ relevant_docs = vector_store.similarity_search_with_score(query, k=5)
105
+
106
+ # Prepare context and track sources
107
+ contexts = []
108
+ sources = []
109
+
110
+ for doc, score in relevant_docs:
111
+ chunk_id = doc.metadata['chunk_id']
112
+ context = doc.page_content
113
+ contexts.append(context)
114
+
115
+ sources.append({
116
+ 'content': context,
117
+ 'page': doc.metadata['page'],
118
+ 'chunk_id': chunk_id,
119
+ 'relevance_score': score
120
+ })
121
+
122
+ prompt = PromptTemplate(
123
+ template=self.system_prompt,
124
+ input_variables=["context"]
125
+ )
126
+ llm = ""
127
+
128
+ if (self.model == default_model):
129
+ llm = ChatOpenAI(
130
+ temperature=0,
131
+ model_name="gpt-4o-mini",
132
+ api_key=self.openai_api_key
133
+ )
134
+ else:
135
+ llm = HuggingFaceEndpoint(
136
+ repo_id=self.model,
137
+ task="text-generation",
138
+ max_new_tokens=1100,
139
+ do_sample=False,
140
+ huggingfacehub_api_token=os.environ.get("HUGGINGFACEHUB_API_TOKEN")
141
+ )
142
+
143
+
144
+ response = llm.predict(prompt.format(context="\n\n".join(contexts)))
145
+
146
+ # Split the response into paragraphs
147
+ summaries = [p.strip() for p in response.split('\n\n') if p.strip()]
148
+
149
+ # Create structured output
150
+ structured_output = []
151
+ for idx, summary in enumerate(summaries):
152
+ # Associate each summary with the most relevant source
153
+ structured_output.append({
154
+ "content": summary,
155
+ "source": {
156
+ "page": sources[min(idx, len(sources)-1)]['page'],
157
+ "text": sources[min(idx, len(sources)-1)]['content'][:200] + "...",
158
+ "relevance_score": sources[min(idx, len(sources)-1)]['relevance_score']
159
+ }
160
+ })
161
+
162
+ return structured_output
163
+
164
+ def get_source_context(self, chunk_id: str, window: int = 100) -> Dict:
165
+ """Get extended context around a specific chunk"""
166
+ metadata = self.chunk_metadata.get(chunk_id)
167
+ if not metadata:
168
+ return None
169
+
170
+ return {
171
+ 'page': metadata['page'],
172
+ 'start_char': metadata['start_char'],
173
+ 'end_char': metadata['end_char']
174
+ }
175
+
176
+ def get_llm_summary_answer_by_cursor(serializer, listaPDFs):
177
+ # By Luan
178
+ allPdfsChunks = []
179
+
180
+ # Initialize summarizer
181
+ summarizer = DocumentSummarizer(
182
+ openai_api_key=os.environ.get("OPENAI_API_KEY"),
183
+ embedding=serializer["hf_embedding"],
184
+ chunk_config={"size": serializer["chunk_size"], "overlap": serializer["chunk_overlap"]},
185
+ system_prompt=serializer["system_prompt"],
186
+ model=serializer["model"]
187
+ )
188
+
189
+ # Load and process document
190
+ for pdf in listaPDFs:
191
+ pdf_path = pdf
192
+ chunks = summarizer.load_and_split_document(pdf_path)
193
+ allPdfsChunks = allPdfsChunks + chunks
194
+
195
+ vector_store = summarizer.create_vector_store(allPdfsChunks)
196
+
197
+ # Generate structured summary
198
+ structured_summaries = summarizer.generate_summary_with_sources(vector_store)
199
+
200
+ # Print or return the structured data
201
+ # print(structured_summaries)
202
+ json_data = json.dumps(structured_summaries)
203
+ print("\n\n")
204
+ print(json_data)
205
+ return structured_summaries
206
+ # If you need to send to frontend, you can just return structured_summaries
207
+ # It will be in the format:
208
+ # [
209
+ # {
210
+ # "content": "Summary point 1...",
211
+ # "source": {
212
+ # "page": 1,
213
+ # "text": "Source text...",
214
+ # "relevance_score": 0.95
215
+ # }
216
+ # },
217
+ # ...
218
+ # ]
219
+
220
+ if __name__ == "__main__":
221
+ get_llm_summary_answer_by_cursor()
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
resumos/serializer.py CHANGED
@@ -26,4 +26,35 @@ class ResumoCursorSerializer(serializers.Serializer):
26
  model = serializers.CharField(required=False, default=default_model)
27
  hf_embedding = serializers.CharField(required=False, default="all-MiniLM-L6-v2")
28
  chunk_size = serializers.IntegerField(required=False, default=1000)
29
- chunk_overlap = serializers.IntegerField(required=False, default=200)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  model = serializers.CharField(required=False, default=default_model)
27
  hf_embedding = serializers.CharField(required=False, default="all-MiniLM-L6-v2")
28
  chunk_size = serializers.IntegerField(required=False, default=1000)
29
+ chunk_overlap = serializers.IntegerField(required=False, default=200)
30
+
31
+
32
+ system_prompt = """
33
+ Based on the following context, provide multiple key points from the document.
34
+ For each point, create a new paragraph.
35
+ Each paragraph should be a complete, self-contained insight.
36
+ Include any relevant context provided.
37
+
38
+ Context: {context}
39
+
40
+ Key points:
41
+ """
42
+ user_message = "What are the main points of this document?"
43
+ class ResumoCursorCompeltoSerializer(ResumoCursorSerializer):
44
+ # files = serializers.ListField(child=serializers.FileField(), required=True)
45
+ system_prompt = serializers.CharField(required=False, default=system_prompt)
46
+ user_message = serializers.CharField(required=False, default=user_message)
47
+ # model = serializers.CharField(required=False, default=default_model)
48
+ # hf_embedding = serializers.CharField(required=False, default="all-MiniLM-L6-v2")
49
+ # chunk_size = serializers.IntegerField(required=False, default=1000)
50
+ # chunk_overlap = serializers.IntegerField(required=False, default=200)
51
+ num_chunks_retrieval = serializers.IntegerField(default=5)
52
+ embedding_weight = serializers.FloatField(default=0.5)
53
+ bm25_weight = serializers.FloatField(default=0.5)
54
+ context_window = serializers.IntegerField(default=3)
55
+ chunk_overlap = serializers.IntegerField(default=200)
56
+ num_k_rerank = serializers.IntegerField(default=5)
57
+ model_cohere_rerank = serializers.CharField(required=False, default="rerank-english-v2.0")
58
+ more_initial_chunks_for_reranking = serializers.IntegerField(default=20)
59
+ claude_context_model = serializers.CharField(required=False, default="claude-3-haiku-20240307")
60
+ gpt_temperature = serializers.FloatField(default=0)
resumos/views.py CHANGED
@@ -2,9 +2,10 @@ from rest_framework.views import APIView
2
  import tempfile, os
3
  from rest_framework.response import Response
4
 
5
- from _utils.resumo_completo_cursor import get_llm_summary_answer_by_cursor
 
6
  from _utils.utils import DEFAULT_SYSTEM_PROMPT
7
- from .serializer import ResumoPDFSerializer, ResumoCursorSerializer
8
  from _utils.main import get_llm_answer_summary, get_llm_answer_summary_with_embedding
9
  from setup.environment import default_model
10
  from rest_framework.parsers import MultiPartParser
@@ -74,7 +75,7 @@ class ResumoEmbeddingView(APIView):
74
 
75
  return Response({"resposta": resposta_llm})
76
 
77
- class ResumoCompletoCursorView(APIView):
78
  parser_classes = [MultiPartParser]
79
 
80
  @extend_schema(
@@ -98,6 +99,38 @@ class ResumoCompletoCursorView(APIView):
98
 
99
  resposta_llm = get_llm_summary_answer_by_cursor(data, listaPDFs)
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  for file in listaPDFs:
102
  os.remove(file)
103
 
 
2
  import tempfile, os
3
  from rest_framework.response import Response
4
 
5
+ from _utils.resumo_completo_cursor import get_llm_summary_answer_by_cursor_complete
6
+ from _utils.resumo_simples_cursor import get_llm_summary_answer_by_cursor
7
  from _utils.utils import DEFAULT_SYSTEM_PROMPT
8
+ from .serializer import ResumoCursorCompeltoSerializer, ResumoPDFSerializer, ResumoCursorSerializer
9
  from _utils.main import get_llm_answer_summary, get_llm_answer_summary_with_embedding
10
  from setup.environment import default_model
11
  from rest_framework.parsers import MultiPartParser
 
75
 
76
  return Response({"resposta": resposta_llm})
77
 
78
+ class ResumoSimplesCursorView(APIView):
79
  parser_classes = [MultiPartParser]
80
 
81
  @extend_schema(
 
99
 
100
  resposta_llm = get_llm_summary_answer_by_cursor(data, listaPDFs)
101
 
102
+ for file in listaPDFs:
103
+ os.remove(file)
104
+
105
+ return Response({"resposta": resposta_llm})
106
+
107
+ class ResumoSimplesCursorCompletoView(APIView):
108
+ parser_classes = [MultiPartParser]
109
+
110
+ @extend_schema(
111
+ request=ResumoCursorCompeltoSerializer,
112
+ )
113
+ def post(self, request):
114
+ serializer = ResumoCursorCompeltoSerializer(data=request.data)
115
+ if serializer.is_valid(raise_exception=True):
116
+ print('\n\n\n')
117
+ print('serializer.validated_data: ', serializer.validated_data)
118
+ print('\n\n\n')
119
+ listaPDFs = []
120
+ data = serializer.validated_data
121
+ print('\nserializer.validated_data: ', serializer.validated_data)
122
+
123
+ for file in serializer.validated_data['files']:
124
+ file.seek(0)
125
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: # Create a temporary file to save the uploaded PDF
126
+ for chunk in file.chunks(): # Write the uploaded file content to the temporary file
127
+ temp_file.write(chunk)
128
+ temp_file_path = temp_file.name # Get the path of the temporary file
129
+ listaPDFs.append(temp_file_path)
130
+ print('listaPDFs: ', listaPDFs)
131
+
132
+ resposta_llm = get_llm_summary_answer_by_cursor_complete(data, listaPDFs)
133
+
134
  for file in listaPDFs:
135
  os.remove(file)
136
 
setup/urls.py CHANGED
@@ -5,7 +5,7 @@ from drf_spectacular.views import SpectacularSwaggerView, SpectacularAPIView
5
 
6
 
7
  from pdfs.views import getPDF
8
- from resumos.views import ResumoView, ResumoCompletoCursorView
9
  from modelos_usuarios.views import ListCreateModeloUsuarioView, CreateUpdateDeleteModeloUsuarioView, ListModelosPorUsuarioView
10
 
11
  router = routers.DefaultRouter()
@@ -18,7 +18,8 @@ urlpatterns = [
18
  path('', include(router.urls)),
19
  path('pdf', getPDF, name='upload-pdf'),
20
  path('resumo', ResumoView.as_view(), name='summary-pdf'),
21
- path('resumo/cursor', ResumoCompletoCursorView.as_view(), name='summary-cursor-pdf'),
 
22
  path("modelo", ListCreateModeloUsuarioView.as_view()),
23
  path("modelo/<int:pk>", CreateUpdateDeleteModeloUsuarioView.as_view()),
24
  path("usuario/<int:user_id>/modelos", ListModelosPorUsuarioView.as_view())
 
5
 
6
 
7
  from pdfs.views import getPDF
8
+ from resumos.views import ResumoView, ResumoSimplesCursorView, ResumoSimplesCursorCompletoView
9
  from modelos_usuarios.views import ListCreateModeloUsuarioView, CreateUpdateDeleteModeloUsuarioView, ListModelosPorUsuarioView
10
 
11
  router = routers.DefaultRouter()
 
18
  path('', include(router.urls)),
19
  path('pdf', getPDF, name='upload-pdf'),
20
  path('resumo', ResumoView.as_view(), name='summary-pdf'),
21
+ path('resumo/cursor', ResumoSimplesCursorView.as_view(), name='summary-cursor-pdf'),
22
+ path('resumo/cursor-completo', ResumoSimplesCursorCompletoView.as_view(), name='summary-cursor-completo-pdf'),
23
  path("modelo", ListCreateModeloUsuarioView.as_view()),
24
  path("modelo/<int:pk>", CreateUpdateDeleteModeloUsuarioView.as_view()),
25
  path("usuario/<int:user_id>/modelos", ListModelosPorUsuarioView.as_view())