Spaces:
Running
Running
from rest_framework.views import APIView | |
import tempfile, os | |
from rest_framework.response import Response | |
from _utils.ragas import test_ragas | |
from _utils.resumo_completo_cursor import ( | |
get_llm_summary_answer_by_cursor_complete, | |
) | |
from .serializer import ( | |
RagasFromTextSerializer, | |
RagasSerializer, | |
) | |
from rest_framework.parsers import MultiPartParser | |
from drf_spectacular.utils import extend_schema | |
class RagasView(APIView): | |
parser_classes = [MultiPartParser] | |
def post(self, request): | |
serializer = RagasSerializer(data=request.data) | |
print("\n\n\n") | |
print("\n\n\n") | |
print("serializer.data: ", serializer) | |
listaPDFs = [] | |
if serializer.is_valid(raise_exception=True): | |
for file in serializer.validated_data["files"]: | |
file.seek(0) | |
with tempfile.NamedTemporaryFile( | |
delete=False, suffix=".pdf" | |
) as temp_file: # Create a temporary file to save the uploaded PDF | |
for ( | |
chunk | |
) in ( | |
file.chunks() | |
): # Write the uploaded file content to the temporary file | |
temp_file.write(chunk) | |
temp_file_path = ( | |
temp_file.name | |
) # Get the path of the temporary file | |
listaPDFs.append(temp_file_path) | |
result = test_ragas(serializer, listaPDFs) | |
for file in listaPDFs: | |
os.remove(file) | |
return Response({"msg": result}) | |
class RagasFromTextView(APIView): | |
def post(self, request): | |
serializer = RagasFromTextSerializer(data=request.data) | |
if serializer.is_valid(raise_exception=True): | |
from datasets import Dataset | |
from ragas import evaluate | |
from ragas.metrics import ( | |
faithfulness, | |
answer_relevancy, | |
answer_correctness, | |
context_precision, | |
context_recall, | |
) | |
import os | |
from datasets import load_dataset | |
import pandas as pd | |
os.environ.get("OPENAI_API_KEY") | |
df_pandas = pd.read_csv( | |
"D:/repositorios/projetos-pessoais/projeto-y-backend-hugginf-face-teste-01/vella-backend/_utils/files/ragas_testset.csv" | |
) | |
# print(df_pandas["position"]) # Print a specific column | |
data = { | |
"user_input": [ | |
"What is the capital of France?", | |
], | |
"response": [], | |
"retrieved_contexts": [], | |
} | |
reference = [ | |
"Paris is the capital of France. It is a major European city known for its culture." | |
] | |
for x in df_pandas["user_input"]: | |
data["user_input"].append(x) | |
for x in df_pandas["reference"]: | |
reference.append(x) | |
print("data: ", reference) | |
for i in range(len(reference)): | |
serializer.validated_data["user_message"] = data["user_input"][i] | |
resposta_llm = get_llm_summary_answer_by_cursor_complete( | |
serializer.validated_data, contexto=reference[i] | |
) | |
data["response"].append(resposta_llm["texto_completo"]) | |
lista_reference_contexts = [] | |
for x in resposta_llm["resultado"]: | |
lista_reference_contexts.append(x["source"]["text"]) | |
data["retrieved_contexts"].append(lista_reference_contexts) | |
# Convert the data to a Hugging Face Dataset | |
dataset = Dataset.from_dict(data) | |
# Define the metrics you want to evaluate | |
metrics = [ | |
faithfulness, | |
# answer_relevancy, | |
# answer_correctness, | |
# context_precision, | |
# context_recall, | |
] | |
# Evaluate the dataset using the selected metrics | |
results = evaluate(dataset, metrics) | |
# results.to_pandas().to_csv("./result.csv") | |
return Response({"resposta": results.to_pandas().to_string()}) | |