File size: 4,360 Bytes
c66a7e7
 
 
 
1286e81
c66a7e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from rest_framework.views import APIView
import tempfile, os
from rest_framework.response import Response

from _utils.ragas import test_ragas
from _utils.resumo_completo_cursor import (
    get_llm_summary_answer_by_cursor_complete,
)
from .serializer import (
    RagasFromTextSerializer,
    RagasSerializer,
)
from rest_framework.parsers import MultiPartParser
from drf_spectacular.utils import extend_schema


class RagasView(APIView):
    parser_classes = [MultiPartParser]

    @extend_schema(
        request=RagasSerializer,
    )
    def post(self, request):
        serializer = RagasSerializer(data=request.data)
        print("\n\n\n")
        print("\n\n\n")
        print("serializer.data: ", serializer)
        listaPDFs = []
        if serializer.is_valid(raise_exception=True):
            for file in serializer.validated_data["files"]:
                file.seek(0)
                with tempfile.NamedTemporaryFile(
                    delete=False, suffix=".pdf"
                ) as temp_file:  # Create a temporary file to save the uploaded PDF
                    for (
                        chunk
                    ) in (
                        file.chunks()
                    ):  # Write the uploaded file content to the temporary file
                        temp_file.write(chunk)
                    temp_file_path = (
                        temp_file.name
                    )  # Get the path of the temporary file
                    listaPDFs.append(temp_file_path)

            result = test_ragas(serializer, listaPDFs)

            for file in listaPDFs:
                os.remove(file)

            return Response({"msg": result})


class RagasFromTextView(APIView):
    def post(self, request):
        serializer = RagasFromTextSerializer(data=request.data)
        if serializer.is_valid(raise_exception=True):
            from datasets import Dataset
            from ragas import evaluate
            from ragas.metrics import (
                faithfulness,
                answer_relevancy,
                answer_correctness,
                context_precision,
                context_recall,
            )
            import os
            from datasets import load_dataset
            import pandas as pd

            os.environ.get("OPENAI_API_KEY")

            df_pandas = pd.read_csv(
                "D:/repositorios/projetos-pessoais/projeto-y-backend-hugginf-face-teste-01/vella-backend/_utils/files/ragas_testset.csv"
            )
            # print(df_pandas["position"])  # Print a specific column
            data = {
                "user_input": [
                    "What is the capital of France?",
                ],
                "response": [],
                "retrieved_contexts": [],
            }

            reference = [
                "Paris is the capital of France. It is a major European city known for its culture."
            ]

            for x in df_pandas["user_input"]:
                data["user_input"].append(x)

            for x in df_pandas["reference"]:
                reference.append(x)

            print("data: ", reference)

            for i in range(len(reference)):
                serializer.validated_data["user_message"] = data["user_input"][i]
                resposta_llm = get_llm_summary_answer_by_cursor_complete(
                    serializer.validated_data, contexto=reference[i]
                )
                data["response"].append(resposta_llm["texto_completo"])
                lista_reference_contexts = []
                for x in resposta_llm["resultado"]:
                    lista_reference_contexts.append(x["source"]["text"])
                data["retrieved_contexts"].append(lista_reference_contexts)

            # Convert the data to a Hugging Face Dataset
            dataset = Dataset.from_dict(data)

            # Define the metrics you want to evaluate
            metrics = [
                faithfulness,
                # answer_relevancy,
                # answer_correctness,
                # context_precision,
                # context_recall,
            ]

            # Evaluate the dataset using the selected metrics
            results = evaluate(dataset, metrics)

            # results.to_pandas().to_csv("./result.csv")
            return Response({"resposta": results.to_pandas().to_string()})