|
import pandas as pd |
|
import streamlit as st |
|
from transformers import pipeline |
|
import os |
|
|
|
|
|
model1 = pipeline("text-classification", model="vectara/hallucination_evaluation_model") |
|
model2 = pipeline("text-classification", model="sileod/deberta-v3-base-tasksource-nli") |
|
|
|
|
|
examples = { |
|
'good': { |
|
'question': "What causes rainbows to appear in the sky?", |
|
'explanation': "Rainbows appear when sunlight is refracted, dispersed, and reflected inside water droplets in the atmosphere, resulting in a spectrum of light appearing in the sky.", |
|
'ground_truth': "Correct" |
|
}, |
|
'bad': { |
|
'question': "What causes rainbows to appear in the sky?", |
|
'explanation': "Rainbows happen because light in the sky gets mixed up and sometimes shows colors when it's raining or when there is water around.", |
|
'ground_truth': "Incorrect" |
|
} |
|
} |
|
|
|
|
|
def evaluate_explanation(question, explanation): |
|
results1 = model1(explanation) |
|
results2 = model2(explanation) |
|
return results1, results2 |
|
|
|
|
|
def compare_vectors(v1, v2): |
|
diff = abs(v1[0]['score'] - v2[0]['score']) |
|
return diff |
|
|
|
|
|
st.title('Dual Model Evaluation of Explanations') |
|
|
|
|
|
def check_password(): |
|
def password_entered(): |
|
if password_input == os.getenv('PASSWORD'): |
|
st.session_state['password_correct'] = True |
|
else: |
|
st.error("Incorrect Password, please try again.") |
|
|
|
password_input = st.text_input("Enter Password:", type="password") |
|
submit_button = st.button("Submit", on_click=password_entered) |
|
|
|
if submit_button and not st.session_state.get('password_correct', False): |
|
st.error("Please enter a valid password to access the demo.") |
|
|
|
|
|
if not st.session_state.get('password_correct', False): |
|
check_password() |
|
else: |
|
st.sidebar.success("Password Verified. Proceed with the demo.") |
|
|
|
input_type = st.radio("Choose input type:", ('Use predefined example', 'Enter your own')) |
|
if input_type == 'Use predefined example': |
|
example_type = st.radio("Select an example type:", ('good', 'bad')) |
|
selected_example = examples[example_type] |
|
question = selected_example['question'] |
|
explanation = selected_example['explanation'] |
|
ground_truth = selected_example['ground_truth'] |
|
else: |
|
question = st.text_input('Enter your question:', '') |
|
explanation = st.text_input('Enter your explanation:', '') |
|
ground_truth = st.text_input('Enter ground truth:', '') |
|
|
|
if st.button('Evaluate Explanation'): |
|
if question and explanation and ground_truth: |
|
results1, results2 = evaluate_explanation(question, explanation) |
|
diff = compare_vectors(results1, results2) |
|
st.write('### Model 1 Results') |
|
st.write(results1) |
|
st.write('### Model 2 Results') |
|
st.write(results2) |
|
st.write(f'### Score Difference: {diff}') |
|
else: |
|
st.error('Please enter a question, explanation, and ground truth to evaluate.') |