from rest_framework.views import APIView import tempfile, os from rest_framework.response import Response from _utils.ragas import test_ragas from _utils.resumo_completo_cursor import ( get_llm_summary_answer_by_cursor_complete, ) from .serializer import ( RagasFromTextSerializer, RagasSerializer, ) from rest_framework.parsers import MultiPartParser from drf_spectacular.utils import extend_schema class RagasView(APIView): parser_classes = [MultiPartParser] @extend_schema( request=RagasSerializer, ) def post(self, request): serializer = RagasSerializer(data=request.data) print("\n\n\n") print("\n\n\n") print("serializer.data: ", serializer) listaPDFs = [] if serializer.is_valid(raise_exception=True): for file in serializer.validated_data["files"]: file.seek(0) with tempfile.NamedTemporaryFile( delete=False, suffix=".pdf" ) as temp_file: # Create a temporary file to save the uploaded PDF for ( chunk ) in ( file.chunks() ): # Write the uploaded file content to the temporary file temp_file.write(chunk) temp_file_path = ( temp_file.name ) # Get the path of the temporary file listaPDFs.append(temp_file_path) result = test_ragas(serializer, listaPDFs) for file in listaPDFs: os.remove(file) return Response({"msg": result}) class RagasFromTextView(APIView): def post(self, request): serializer = RagasFromTextSerializer(data=request.data) if serializer.is_valid(raise_exception=True): from datasets import Dataset from ragas import evaluate from ragas.metrics import ( faithfulness, answer_relevancy, answer_correctness, context_precision, context_recall, ) import os from datasets import load_dataset import pandas as pd os.environ.get("OPENAI_API_KEY") df_pandas = pd.read_csv( "D:/repositorios/projetos-pessoais/projeto-y-backend-hugginf-face-teste-01/vella-backend/_utils/files/ragas_testset.csv" ) # print(df_pandas["position"]) # Print a specific column data = { "user_input": [ "What is the capital of France?", ], "response": [], "retrieved_contexts": [], } reference = [ "Paris is the capital of France. It is a major European city known for its culture." ] for x in df_pandas["user_input"]: data["user_input"].append(x) for x in df_pandas["reference"]: reference.append(x) print("data: ", reference) for i in range(len(reference)): serializer.validated_data["user_message"] = data["user_input"][i] resposta_llm = get_llm_summary_answer_by_cursor_complete( serializer.validated_data, contexto=reference[i] ) data["response"].append(resposta_llm["texto_completo"]) lista_reference_contexts = [] for x in resposta_llm["resultado"]: lista_reference_contexts.append(x["source"]["text"]) data["retrieved_contexts"].append(lista_reference_contexts) # Convert the data to a Hugging Face Dataset dataset = Dataset.from_dict(data) # Define the metrics you want to evaluate metrics = [ faithfulness, # answer_relevancy, # answer_correctness, # context_precision, # context_recall, ] # Evaluate the dataset using the selected metrics results = evaluate(dataset, metrics) # results.to_pandas().to_csv("./result.csv") return Response({"resposta": results.to_pandas().to_string()})