File size: 3,617 Bytes
93b4c8c
 
 
 
 
 
c4e8aa2
93b4c8c
fb00050
c4e8aa2
 
 
 
fb00050
 
 
16d8bf6
 
 
 
fb00050
 
 
 
 
 
 
 
c4e8aa2
fb00050
20e8be4
 
 
 
 
 
c4e8aa2
 
 
20e8be4
c4e8aa2
 
 
 
 
 
 
20e8be4
 
 
 
93b4c8c
20e8be4
 
 
c4e8aa2
 
fb00050
 
 
 
 
 
 
 
 
 
 
 
 
c4e8aa2
20e8be4
 
 
 
c4e8aa2
 
 
 
 
 
 
20e8be4
c4e8aa2
 
 
fb00050
 
 
 
7ac6acd
fb00050
 
 
 
 
 
 
 
 
 
 
c4e8aa2
 
 
20e8be4
fb00050
93b4c8c
7ac6acd
16d8bf6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# Dependencies:
# gradio==3.3.1
# transformers==4.27.1
# torch==2.0.1
# pymupdf==1.21.1

import gradio as gr
from transformers import pipeline
import fitz  # PyMuPDF

# Load a summarization model from Hugging Face
summarizer = pipeline("summarization")

def extract_text_from_pdf(pdf_file):
    text = ""
    try:
        if isinstance(pdf_file, bytes):
            document = fitz.open(stream=pdf_file, filetype="pdf")
        else:
            document = fitz.open(stream=pdf_file.read(), filetype="pdf")
        for page_num in range(len(document)):
            page = document.load_page(page_num)
            text += page.get_text()
    except Exception as e:
        return str(e)
    return text

def evaluate_text_against_rubric(rubric_text, text):
    # Split rubric into criteria
    criteria = [criterion.strip() for criterion in rubric_text.split('\n') if criterion.strip()]
    
    if not criteria:
        return "No valid criteria found in the rubric."
    
    if not text:
        return "No text provided for evaluation."
    
    evaluations = {}
    for i, criterion in enumerate(criteria):
        try:
            summary = summarizer(text, max_length=50, min_length=25, do_sample=False)[0]['summary_text']
            evaluations[f'Criteria {i+1}'] = {
                "Criterion": criterion,
                "Score": 3,  # Dummy score for now
                "Comment": f"Evaluation based on criterion: {criterion}",
                "Example": summary
            }
        except Exception as e:
            evaluations[f'Criteria {i+1}'] = {
                "Criterion": criterion,
                "Score": 0,
                "Comment": f"Error during evaluation: {str(e)}",
                "Example": ""
            }
    
    return evaluations

def evaluate(rubric_pdf, rubric_text, text):
    rubric = ""
    if rubric_pdf is not None:
        rubric = extract_text_from_pdf(rubric_pdf)
    elif rubric_text:
        rubric = rubric_text
    
    if not rubric:
        return "No rubric provided."
    
    if not text:
        return "No text provided for evaluation."
    
    evaluation = evaluate_text_against_rubric(rubric, text)
    
    if isinstance(evaluation, str):  # If it's an error message
        return evaluation
    
    evaluation_text = ""
    for criterion, details in evaluation.items():
        evaluation_text += f"{criterion}:\n"
        evaluation_text += f"  Criterion: {details['Criterion']}\n"
        evaluation_text += f"  Score: {details['Score']}\n"
        evaluation_text += f"  Comment: {details['Comment']}\n"
        evaluation_text += f"  Example: {details['Example']}\n\n"
    
    return evaluation_text

# Create Gradio interface
with gr.Blocks() as interface:
    gr.Markdown("# PDF Text Evaluator")
    gr.Markdown("Upload a rubric as a PDF or paste the rubric text, then paste text for evaluation.")
    
    rubric_pdf_input = gr.File(label="Upload Rubric PDF (optional)", type="binary")
    rubric_text_input = gr.Textbox(lines=10, placeholder="Or enter your rubric text here...", label="Rubric Text (optional)")
    text_input = gr.Textbox(lines=10, placeholder="Paste the text to be evaluated here...", label="Text to Evaluate")
    
    evaluate_button = gr.Button("Evaluate")
    
    output = gr.Textbox(label="Evaluation Results")
    
    def evaluate_button_clicked(rubric_pdf, rubric_text, text):
        return evaluate(rubric_pdf, rubric_text, text)
    
    evaluate_button.click(evaluate_button_clicked, inputs=[rubric_pdf_input, rubric_text_input, text_input], outputs=output)

# Launch the interface
interface.launch()