File size: 4,130 Bytes
c4e8aa2
e5b2dcd
fb00050
e5b2dcd
c4e8aa2
e5b2dcd
2068870
c4e8aa2
fb00050
 
 
16d8bf6
 
 
 
fb00050
 
 
 
 
 
 
e5b2dcd
 
 
 
 
 
 
 
 
 
 
fb00050
e5b2dcd
 
c4e8aa2
fb00050
20e8be4
 
 
 
 
 
c4e8aa2
 
 
20e8be4
e5b2dcd
c4e8aa2
 
 
 
 
 
20e8be4
 
 
 
93b4c8c
20e8be4
 
 
e5b2dcd
 
 
c4e8aa2
 
fb00050
 
 
 
 
 
 
 
 
 
 
 
 
b2c63d2
 
 
c4e8aa2
20e8be4
 
 
 
c4e8aa2
 
 
 
 
 
 
20e8be4
c4e8aa2
 
 
fb00050
 
 
 
7ac6acd
fb00050
b2c63d2
fb00050
 
 
 
 
 
 
 
 
c4e8aa2
 
 
20e8be4
fb00050
93b4c8c
7ac6acd
16d8bf6
df4431b
e5b2dcd
b2c63d2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import gradio as gr
import openai
import fitz  # PyMuPDF
import time

# Set your OpenAI API key
openai.api_key = 'sk-proj-fCrObs9lnucfEFwJdMkHT3BlbkFJA7auo8szgjDuHz28QGBW'

def extract_text_from_pdf(pdf_file):
    text = ""
    try:
        if isinstance(pdf_file, bytes):
            document = fitz.open(stream=pdf_file, filetype="pdf")
        else:
            document = fitz.open(stream=pdf_file.read(), filetype="pdf")
        for page_num in range(len(document)):
            page = document.load_page(page_num)
            text += page.get_text()
    except Exception as e:
        return str(e)
    return text

def generate_summary(text):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a summarization assistant."},
            {"role": "user", "content": f"Please summarize the following text: {text}"}
        ]
    )
    summary = response['choices'][0]['message']['content'].strip()
    return summary

def evaluate_text_against_rubric(rubric_text, text):
    start_time = time.time()
    
    # Split rubric into criteria
    criteria = [criterion.strip() for criterion in rubric_text.split('\n') if criterion.strip()]
    
    if not criteria:
        return "No valid criteria found in the rubric."
    
    if not text:
        return "No text provided for evaluation."
    
    evaluations = {}
    for i, criterion in enumerate(criteria):
        try:
            summary = generate_summary(text[:1000])
            evaluations[f'Criteria {i+1}'] = {
                "Criterion": criterion,
                "Score": 3,  # Dummy score for now
                "Comment": f"Evaluation based on criterion: {criterion}",
                "Example": summary
            }
        except Exception as e:
            evaluations[f'Criteria {i+1}'] = {
                "Criterion": criterion,
                "Score": 0,
                "Comment": f"Error during evaluation: {str(e)}",
                "Example": ""
            }
    
    end_time = time.time()
    print(f"Evaluation took {end_time - start_time} seconds")
    
    return evaluations

def evaluate(rubric_pdf, rubric_text, text):
    rubric = ""
    if rubric_pdf is not None:
        rubric = extract_text_from_pdf(rubric_pdf)
    elif rubric_text:
        rubric = rubric_text
    
    if not rubric:
        return "No rubric provided."
    
    if not text:
        return "No text provided for evaluation."
    
    if len(text) > 2000:
        return "The text provided exceeds the 2000 character limit."
    
    evaluation = evaluate_text_against_rubric(rubric, text)
    
    if isinstance(evaluation, str):  # If it's an error message
        return evaluation
    
    evaluation_text = ""
    for criterion, details in evaluation.items():
        evaluation_text += f"{criterion}:\n"
        evaluation_text += f"  Criterion: {details['Criterion']}\n"
        evaluation_text += f"  Score: {details['Score']}\n"
        evaluation_text += f"  Comment: {details['Comment']}\n"
        evaluation_text += f"  Example: {details['Example']}\n\n"
    
    return evaluation_text

# Create Gradio interface
with gr.Blocks() as interface:
    gr.Markdown("# PDF Text Evaluator")
    gr.Markdown("Upload a rubric as a PDF or paste the rubric text, then paste text for evaluation.")
    
    rubric_pdf_input = gr.File(label="Upload Rubric PDF (optional)", type="binary")
    rubric_text_input = gr.Textbox(lines=10, placeholder="Or enter your rubric text here...", label="Rubric Text (optional)")
    text_input = gr.Textbox(lines=10, placeholder="Paste the text to be evaluated here (max 2000 characters)...", label="Text to Evaluate")
    
    evaluate_button = gr.Button("Evaluate")
    
    output = gr.Textbox(label="Evaluation Results")
    
    def evaluate_button_clicked(rubric_pdf, rubric_text, text):
        return evaluate(rubric_pdf, rubric_text, text)
    
    evaluate_button.click(evaluate_button_clicked, inputs=[rubric_pdf_input, rubric_text_input, text_input], outputs=output)

# Launch the interface
interface.launch()