Spaces:
Running
Running
File size: 4,360 Bytes
c66a7e7 1286e81 c66a7e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
from rest_framework.views import APIView
import tempfile, os
from rest_framework.response import Response
from _utils.ragas import test_ragas
from _utils.resumo_completo_cursor import (
get_llm_summary_answer_by_cursor_complete,
)
from .serializer import (
RagasFromTextSerializer,
RagasSerializer,
)
from rest_framework.parsers import MultiPartParser
from drf_spectacular.utils import extend_schema
class RagasView(APIView):
parser_classes = [MultiPartParser]
@extend_schema(
request=RagasSerializer,
)
def post(self, request):
serializer = RagasSerializer(data=request.data)
print("\n\n\n")
print("\n\n\n")
print("serializer.data: ", serializer)
listaPDFs = []
if serializer.is_valid(raise_exception=True):
for file in serializer.validated_data["files"]:
file.seek(0)
with tempfile.NamedTemporaryFile(
delete=False, suffix=".pdf"
) as temp_file: # Create a temporary file to save the uploaded PDF
for (
chunk
) in (
file.chunks()
): # Write the uploaded file content to the temporary file
temp_file.write(chunk)
temp_file_path = (
temp_file.name
) # Get the path of the temporary file
listaPDFs.append(temp_file_path)
result = test_ragas(serializer, listaPDFs)
for file in listaPDFs:
os.remove(file)
return Response({"msg": result})
class RagasFromTextView(APIView):
def post(self, request):
serializer = RagasFromTextSerializer(data=request.data)
if serializer.is_valid(raise_exception=True):
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import (
faithfulness,
answer_relevancy,
answer_correctness,
context_precision,
context_recall,
)
import os
from datasets import load_dataset
import pandas as pd
os.environ.get("OPENAI_API_KEY")
df_pandas = pd.read_csv(
"D:/repositorios/projetos-pessoais/projeto-y-backend-hugginf-face-teste-01/vella-backend/_utils/files/ragas_testset.csv"
)
# print(df_pandas["position"]) # Print a specific column
data = {
"user_input": [
"What is the capital of France?",
],
"response": [],
"retrieved_contexts": [],
}
reference = [
"Paris is the capital of France. It is a major European city known for its culture."
]
for x in df_pandas["user_input"]:
data["user_input"].append(x)
for x in df_pandas["reference"]:
reference.append(x)
print("data: ", reference)
for i in range(len(reference)):
serializer.validated_data["user_message"] = data["user_input"][i]
resposta_llm = get_llm_summary_answer_by_cursor_complete(
serializer.validated_data, contexto=reference[i]
)
data["response"].append(resposta_llm["texto_completo"])
lista_reference_contexts = []
for x in resposta_llm["resultado"]:
lista_reference_contexts.append(x["source"]["text"])
data["retrieved_contexts"].append(lista_reference_contexts)
# Convert the data to a Hugging Face Dataset
dataset = Dataset.from_dict(data)
# Define the metrics you want to evaluate
metrics = [
faithfulness,
# answer_relevancy,
# answer_correctness,
# context_precision,
# context_recall,
]
# Evaluate the dataset using the selected metrics
results = evaluate(dataset, metrics)
# results.to_pandas().to_csv("./result.csv")
return Response({"resposta": results.to_pandas().to_string()})
|