luanpoppe commited on
Commit
b700f35
·
1 Parent(s): 3f199c2

feat: adicionando funcionalidade de múltiplos pdfs

Browse files
endpoint_teste/serializer.py CHANGED
@@ -14,7 +14,8 @@ class TesteSerializer(serializers.Serializer):
14
  pdf_url = serializers.CharField(required=False)
15
 
16
  class PDFUploadSerializer(serializers.Serializer):
17
- file = serializers.FileField()
 
18
  system_prompt = serializers.CharField(required=True)
19
  user_message = serializers.CharField(required=True)
20
  model = serializers.CharField(required=False)
 
14
  pdf_url = serializers.CharField(required=False)
15
 
16
  class PDFUploadSerializer(serializers.Serializer):
17
+ # file = serializers.FileField()
18
+ files = serializers.ListField(child=serializers.FileField(), required=True)
19
  system_prompt = serializers.CharField(required=True)
20
  user_message = serializers.CharField(required=True)
21
  model = serializers.CharField(required=False)
endpoint_teste/views.py CHANGED
@@ -59,40 +59,36 @@ def getTeste(request):
59
  @api_view(["POST"])
60
  def getPDF(request):
61
  if request.method == "POST":
62
- print('\n\n\n')
63
- print("CHEGOU AQUI")
64
  serializer = PDFUploadSerializer(data=request.data)
65
  if serializer.is_valid(raise_exception=True):
66
- # Access the uploaded file
 
67
  data = request.data
68
  print('data: ', data)
69
- pdf_file = serializer.validated_data['file']
70
- pdf_file.seek(0)
71
-
72
  embedding = serializer.validated_data.get("embedding", "gpt")
73
  model = serializer.validated_data.get("model", default_model)
74
- # print(dir(pdf_file))
75
- # print('pdf_file: ', pdf_file.read())
76
- # pdf_content = pdf_file.read()
77
- # Save the file or process it as needed
78
- # For example, you can save it to a specific location
79
- # with open(f'endpoint_teste/media/uploads/{pdf_file.name}', 'wb+') as destination:
80
- # for chunk in pdf_file.chunks():
81
- # destination.write(chunk)
82
- # return Response({"message": "File uploaded successfully."})
83
 
84
- # Create a temporary file to save the uploaded PDF
85
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
86
- # Write the uploaded file content to the temporary file
87
- for chunk in pdf_file.chunks():
88
- temp_file.write(chunk)
89
- temp_file_path = temp_file.name # Get the path of the temporary file
90
- print('temp_file_path: ', temp_file_path)
 
 
 
 
 
 
91
 
92
  resposta_llm = None
93
- resposta_llm = get_llm_answer(data["system_prompt"], data["user_message"], temp_file_path, model=model, embedding=embedding)
 
94
 
95
- os.remove(temp_file_path)
 
 
96
 
97
  return Response({
98
  "Resposta": resposta_llm
 
59
  @api_view(["POST"])
60
  def getPDF(request):
61
  if request.method == "POST":
 
 
62
  serializer = PDFUploadSerializer(data=request.data)
63
  if serializer.is_valid(raise_exception=True):
64
+ listaPDFs = []
65
+ print('\n\n')
66
  data = request.data
67
  print('data: ', data)
 
 
 
68
  embedding = serializer.validated_data.get("embedding", "gpt")
69
  model = serializer.validated_data.get("model", default_model)
 
 
 
 
 
 
 
 
 
70
 
71
+ # pdf_file = serializer.validated_data['file']
72
+ for file in serializer.validated_data['files']:
73
+ print("file: ", file)
74
+ file.seek(0)
75
+ # Create a temporary file to save the uploaded PDF
76
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
77
+ # Write the uploaded file content to the temporary file
78
+ for chunk in file.chunks():
79
+ temp_file.write(chunk)
80
+ temp_file_path = temp_file.name # Get the path of the temporary file
81
+ listaPDFs.append(temp_file_path)
82
+ # print('temp_file_path: ', temp_file_path)
83
+ print('listaPDFs: ', listaPDFs)
84
 
85
  resposta_llm = None
86
+ # resposta_llm = get_llm_answer(data["system_prompt"], data["user_message"], temp_file_path, model=model, embedding=embedding)
87
+ resposta_llm = get_llm_answer(data["system_prompt"], data["user_message"], listaPDFs, model=model, embedding=embedding)
88
 
89
+ for file in listaPDFs:
90
+ os.remove(file)
91
+ # os.remove(temp_file_path)
92
 
93
  return Response({
94
  "Resposta": resposta_llm
langchain_backend/main.py CHANGED
@@ -19,7 +19,6 @@ def get_llm_answer(system_prompt, user_prompt, pdf_url, model, embedding):
19
  embedding_function=embedding_object
20
  )
21
 
22
-
23
  print('model: ', model)
24
  print('embedding: ', embedding)
25
  pages = []
 
19
  embedding_function=embedding_object
20
  )
21
 
 
22
  print('model: ', model)
23
  print('embedding: ', embedding)
24
  pages = []
langchain_backend/utils.py CHANGED
@@ -16,11 +16,18 @@ embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-m
16
 
17
  allIds = []
18
 
19
- def getPDF(file_path):
20
  documentId = 0
21
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
22
- loader = PyPDFLoader(file_path, extract_images=False)
23
- pages = loader.load_and_split(text_splitter)
 
 
 
 
 
 
 
24
  for page in pages:
25
  print('\n')
26
  print('allIds: ', allIds)
 
16
 
17
  allIds = []
18
 
19
+ def getPDF(file_paths):
20
  documentId = 0
21
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
22
+ pages = []
23
+ for file in file_paths:
24
+ loader = PyPDFLoader(file, extract_images=False)
25
+ pagesDoc = loader.load_and_split(text_splitter)
26
+ pages = pages + pagesDoc
27
+
28
+
29
+ # loader = PyPDFLoader(file_paths, extract_images=False)
30
+ # pages = loader.load_and_split(text_splitter)
31
  for page in pages:
32
  print('\n')
33
  print('allIds: ', allIds)