Spaces:
Runtime error
Runtime error
import os | |
import torch | |
import dash | |
import streamlit as st | |
import pandas as pd | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
from transformers import pipeline | |
from plotly.subplots import make_subplots | |
import plotly.graph_objects as go | |
import plotly.express as px | |
def z_score(y, mean=.04853076, sd=.9409466): | |
return (y - mean) / sd | |
def indicator_plot(value, title, value_range, domain): | |
plot = go.Indicator( | |
mode = "gauge+delta", | |
value = value, | |
domain = domain, | |
title = title, | |
delta = { | |
'reference': 0, | |
'decreasing': {'color': "#ec4899"}, | |
'increasing': {'color': "#36def1"} | |
}, | |
gauge = { | |
'axis': {'range': value_range, 'tickwidth': 1, 'tickcolor': "black"}, | |
'bar': {'color': "#4361ee"}, | |
'bgcolor': "white", | |
'borderwidth': 2, | |
'bordercolor': "#efefef", | |
'steps': [ | |
{'range': [value_range[0], 0], 'color': '#efefef'}, | |
{'range': [0, value_range[1]], 'color': '#efefef'} | |
], | |
'threshold': { | |
'line': {'color': "#4361ee", 'width': 8}, | |
'thickness': 0.75, | |
'value': value | |
} | |
} | |
) | |
return plot | |
def scatter_plot(df, group_var): | |
colors = ['#36def1', '#4361ee'] if group_var else ['#4361ee'] | |
plot = px.scatter( | |
df, | |
x='Machine-ratings', | |
y='Human-ratings', | |
color=group_var, | |
facet_col='x_group', | |
facet_col_wrap=2, | |
trendline='ols', | |
trendline_scope='trace', | |
hover_data={ | |
'Text': df.text, | |
'Language': False, | |
'x_group': False, | |
'Human-ratings': ':.2f', | |
'Machine-ratings': ':.2f', | |
'Study': df.study, | |
'Instrument': df.instrument, | |
}, | |
width=400, | |
height=400, | |
color_discrete_sequence=colors | |
) | |
plot.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1])) | |
plot.update_layout( | |
legend={ | |
'orientation':'h', | |
'yanchor': 'bottom', | |
'y': -.30 | |
}) | |
plot.update_xaxes(title_standoff = 0) | |
return plot | |
# data import and wrangling | |
covariate_columns = { | |
'content_domain': 'Content Domain', | |
'language': 'Language', | |
'rater_group': 'Rater Group', | |
} | |
df = ( | |
pd | |
.read_feather(path='data.feather').query('partition == "test" | partition == "dev"') | |
.melt( | |
value_vars=['sentiment_model', 'desirability_model'], | |
var_name='x_group', | |
value_name='x', | |
id_vars=['mean_z', 'text', 'content_domain', 'language', 'rater_group', 'study', 'instrument'] | |
) | |
.replace( | |
to_replace={ | |
'en': 'English', | |
'de': 'German', | |
'other': 'Other', | |
'personality': 'Personality', | |
'laypeople': 'Laypeople', | |
'students': 'Students', | |
'sentiment_model': 'Sentiment Model', | |
'desirability_model': 'Desirability Model' | |
} | |
) | |
.rename(columns=covariate_columns) | |
.rename( | |
columns={ | |
'mean_z': 'Human-ratings', | |
'x': 'Machine-ratings', | |
} | |
) | |
) | |
st.markdown(""" | |
# NLP for Item Desirability Ratings | |
This web application accompanies the paper "*Expanding the Methodological Toolbox: Machine-Based Item Desirability Ratings as an Alternative to Human-Based Ratings*". | |
## What is this research about? | |
Researchers use personality scales to measure people's traits and behaviors, but biases can affect the accuracy of these scales. | |
Socially desirable responding is a common bias that can skew results. To overcome this, researchers gather item desirability ratings, e.g., to ensure that questions are neutral. | |
Recently, advancements in natural language processing have made it possible to use machines to estimate social desirability ratings, | |
which can provide a viable alternative to human ratings and help researchers, scale developers, and practitioners improve the accuracy of personality scales. | |
""") | |
# demo | |
st.markdown(""" | |
## Try it yourself! | |
Use the text field below to enter a statement that might be part of a psychological questionnaire (e.g., "I love a good fight."). | |
The left dial indicates how socially desirable it might be to endorse this item. | |
The right dial indicates sentiment (i.e., valence) as estimated by regular sentiment analysis (using the `cardiffnlp/twitter-xlm-roberta-base-sentiment` model). | |
""") | |
## desirability model | |
with st.spinner('Processing...'): | |
if os.environ.get('item-desirability'): | |
model_path = 'magnolia-psychometrics/item-desirability' | |
else: | |
model_path = '/nlp/nlp/models/finetuned/twitter-xlm-roberta-base-regressive-desirability-ft-4' | |
auth_token = os.environ.get('item-desirability') or True | |
if 'tokenizer' not in globals(): | |
tokenizer = AutoTokenizer.from_pretrained( | |
pretrained_model_name_or_path=model_path, | |
use_fast=True, | |
use_auth_token=auth_token | |
) | |
if 'model' not in globals(): | |
model = AutoModelForSequenceClassification.from_pretrained( | |
pretrained_model_name_or_path=model_path, | |
num_labels=1, | |
ignore_mismatched_sizes=True, | |
use_auth_token=auth_token | |
) | |
## sentiment model | |
if 'classifier' not in globals(): | |
sentiment_model = 'cardiffnlp/twitter-xlm-roberta-base-sentiment' | |
classifier = pipeline("sentiment-analysis", model=sentiment_model, tokenizer=sentiment_model, use_fast=False, top_k=3) | |
input_text = st.text_input( | |
label='Estimate item desirability:', | |
value='I love a good fight.', | |
placeholder='Enter item text' | |
) | |
if input_text: | |
classifier_output = classifier(input_text) | |
classifier_output_dict = {x['label']: x['score'] for x in classifier_output[0]} | |
classifier_score = classifier_output_dict['positive'] - classifier_output_dict['negative'] | |
inputs = tokenizer(input_text, padding=True, return_tensors='pt') | |
with torch.no_grad(): | |
score = model(**inputs).logits.squeeze().tolist() | |
z = z_score(score) | |
p1 = indicator_plot( | |
value=classifier_score, | |
title=f'Item Sentiment', | |
value_range=[-1, 1], | |
domain={'x': [.55, 1], 'y': [0, 1]} | |
) | |
p2 = indicator_plot( | |
value=z, | |
title=f'Item Desirability', | |
value_range=[-4, 4], | |
domain={'x': [0, .45], 'y': [0, 1]}, | |
) | |
fig = go.Figure() | |
fig.add_trace(p1) | |
fig.add_trace(p2) | |
fig.update_layout( | |
title=dict(text=f'"{input_text}"', font=dict(size=36),yref='paper'), | |
paper_bgcolor = "white", | |
font = {'color': "black", 'family': "Arial"}) | |
st.plotly_chart(fig, theme=None, use_container_width=True) | |
st.markdown(""" | |
Item sentiment: Absolute differences between positive and negative sentiment. | |
Item desirability: z-transformed values, 0 indicated "neutral". | |
""") | |
## plot | |
st.markdown(""" | |
## Explore the data | |
Figures show the accuarcy in precitions of human-rated item desirability by the sentiment model (left) and the desirability model (right), using `test`-partition data only. | |
""") | |
show_covariates = st.checkbox('Show covariates', value=True) | |
if show_covariates: | |
option = st.selectbox('Group by', options=list(covariate_columns.values())) | |
else: | |
option = None | |
plot = scatter_plot(df, option) | |
st.plotly_chart(plot, theme=None, use_container_width=True) |