Spaces:

magnolia-psychometrics
/

item-desirability-demo

Runtime error

App Files Files Community

bjorn-hommel commited on Jun 23, 2023

Commit

228ea6c

•

1 Parent(s): aed0724

refactor and database integration

Browse files

Files changed (5) hide show

app.py +19 -146
demo_section.py +314 -0
utils.py → explore_data_section.py +57 -32
public_creds.json +11 -0
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -3,56 +3,33 @@ import torch
 import dash
 import streamlit as st
 import pandas as pd
 import utils
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from transformers import pipeline
 from dotenv import load_dotenv
 import plotly.graph_objects as go
-load_dotenv()
-# data import and wrangling
-covariate_columns = {
-        'content_domain': 'Content Domain',
-        'language': 'Language',
-        'rater_group': 'Rater Group',
-    }
-if 'df' not in st.session_state:
-    st.session_state.df = (
-        pd
-        .read_feather(path='data.feather').query('partition == "test" | partition == "dev"')
-        .melt(
-            value_vars=['sentiment_model', 'desirability_model'],
-            var_name='x_group',
-            value_name='x',
-            id_vars=['mean_z', 'text', 'content_domain', 'language', 'rater_group', 'study', 'instrument']
-            )
-        .replace(
-            to_replace={
-                'en': 'English',
-                'de': 'German',
-                'other': 'Other',
-                'personality': 'Personality',
-                'laypeople': 'Laypeople',
-                'students': 'Students',
-                'sentiment_model': 'Sentiment Model',
-                'desirability_model': 'Desirability Model'
-            }
-        )
-        .rename(columns=covariate_columns)
-        .rename(
-            columns={
-                'mean_z': 'Human-ratings',
-                'x': 'Machine-ratings',
-            }
-        )
-    )
 st.markdown("""
-    # NLP for Item Desirability Ratings
     This web application accompanies the paper "*Expanding the Methodological Toolbox: Machine-Based Item Desirability Ratings as an Alternative to Human-Based Ratings*".
     ## What is this research about?
     Researchers use personality scales to measure people's traits and behaviors, but biases can affect the accuracy of these scales.
@@ -61,111 +38,7 @@ st.markdown("""
     which can provide a viable alternative to human ratings and help researchers, scale developers, and practitioners improve the accuracy of personality scales.
 """)
-# demo
-st.markdown("""
-    ## Try it yourself!
-    Use the text field below to enter a statement that might be part of a psychological questionnaire (e.g., "I love a good fight.").
-    The left dial indicates how socially desirable it might be to endorse this item.
-    The right dial indicates sentiment (i.e., valence) as estimated by regular sentiment analysis (using the `cardiffnlp/twitter-xlm-roberta-base-sentiment` model).
-""")
-## desirability model
-with st.spinner('Processing...'):
-    if os.environ.get('item-desirability'):
-        model_path = 'magnolia-psychometrics/item-desirability'
-    else:
-        model_path = os.getenv('model_path')
-    auth_token = os.environ.get('item-desirability') or True
-    if 'tokenizer' not in st.session_state:
-        st.session_state.tokenizer = AutoTokenizer.from_pretrained(
-            pretrained_model_name_or_path=model_path,
-            use_fast=True,
-            use_auth_token=auth_token
-        )
-    if 'model' not in st.session_state:
-        st.session_state.model = AutoModelForSequenceClassification.from_pretrained(
-            pretrained_model_name_or_path=model_path,
-            num_labels=1,
-            ignore_mismatched_sizes=True,
-            use_auth_token=auth_token
-        )
-    ## sentiment model
-    if 'classifier' not in st.session_state:
-        st.session_state.sentiment_model = 'cardiffnlp/twitter-xlm-roberta-base-sentiment'
-        st.session_state.classifier = pipeline(
-            task='sentiment-analysis',
-            model=st.session_state.sentiment_model,
-            tokenizer=st.session_state.sentiment_model,
-            use_fast=False,
-            top_k=3
-        )
-    input_text = st.text_input(
-        label='Estimate item desirability:',
-        value='I love a good fight.',
-        placeholder='Enter item text'
-    )
-    if input_text:
-        classifier_output = st.session_state.classifier(input_text)
-        classifier_output_dict = {x['label']: x['score'] for x in classifier_output[0]}
-        classifier_score = classifier_output_dict['positive'] - classifier_output_dict['negative']
-        inputs = st.session_state.tokenizer(text=input_text, padding=True, return_tensors='pt')
-        with torch.no_grad():
-            score = st.session_state.model(**inputs).logits.squeeze().tolist()
-            z = utils.z_score(score)
-        p1 = utils.indicator_plot(
-            value=classifier_score,
-            title=f'Item Sentiment',
-            value_range=[-1, 1],
-            domain={'x': [.55, 1], 'y': [0, 1]}
-        )
-        p2 = utils.indicator_plot(
-            value=z,
-            title=f'Item Desirability',
-            value_range=[-4, 4],
-            domain={'x': [0, .45], 'y': [0, 1]},
-        )
-        fig = go.Figure()
-        fig.add_trace(p1)
-        fig.add_trace(p2)
-        fig.update_layout(
-            title=dict(text=f'"{input_text}"', font=dict(size=36),yref='paper'),
-            paper_bgcolor = "white",
-            font = {'color': "black", 'family': "Arial"})
-        st.plotly_chart(fig, theme=None, use_container_width=True)
-        st.markdown("""
-            Item sentiment: Absolute differences between positive and negative sentiment.
-            Item desirability: z-transformed values, 0 indicated "neutral".
-        """)
-## plot
-st.markdown("""
-    ## Explore the data
-    Figures show the accuarcy in precitions of human-rated item desirability by the sentiment model (left) and the desirability model (right), using `test`-partition data only.
-""")
-show_covariates = st.checkbox('Show covariates', value=True)
-if show_covariates:
-    option = st.selectbox('Group by', options=list(covariate_columns.values()))
-else:
-    option = None
-plot = utils.scatter_plot(st.session_state.df, option)
-st.plotly_chart(plot, theme=None, use_container_width=True)

 import dash
 import streamlit as st
 import pandas as pd
+import json
+import random
 import utils
+import firebase_admin
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from transformers import pipeline
+from firebase_admin import credentials, firestore
 from dotenv import load_dotenv
 import plotly.graph_objects as go
+import demo_section
+import explore_data_section
+load_dotenv()
+if 'collect_data' not in st.session_state:
+    st.session_state.collect_data = True
+if 'user_id' not in st.session_state:
+    st.session_state.user_id = random.randint(1, 9999999)
 st.markdown("""
+    # Machine-Based Item Desirability Ratings
     This web application accompanies the paper "*Expanding the Methodological Toolbox: Machine-Based Item Desirability Ratings as an Alternative to Human-Based Ratings*".
+    *Hommel, B. E. (2023). Expanding the methodological toolbox: Machine-based item desirability ratings as an alternative to human-based ratings. Personality and Individual Differences, 213, 112307. https://doi.org/10.1016/j.paid.2023.112307*
     ## What is this research about?
     Researchers use personality scales to measure people's traits and behaviors, but biases can affect the accuracy of these scales.
     which can provide a viable alternative to human ratings and help researchers, scale developers, and practitioners improve the accuracy of personality scales.
 """)
+st.divider()
+demo_section.show()
+st.divider()
+explore_data_section.show()

demo_section.py ADDED Viewed

	@@ -0,0 +1,314 @@

+import os
+import torch
+import json
+import time
+import random
+import streamlit as st
+import firebase_admin
+import logging
+from firebase_admin import credentials, firestore
+from dotenv import load_dotenv
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from transformers import pipeline
+import plotly.graph_objects as go
+logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
+load_dotenv()
+def load_credentials():
+    try:
+        with open('public_creds.json') as f:
+            credentials_dict = json.load(f)
+        secret = {
+            'private_key_id': os.environ.get('private_key_id'),
+            'private_key': os.environ.get('private_key')
+        }
+        credentials_dict.update(secret)
+        return credentials_dict
+    except Exception as e:
+        logging.error(f'Error while loading credentials: {e}')
+        return None
+def connect_to_db(credentials_dict):
+    try:
+        cred = credentials.Certificate(credentials_dict)
+        if not firebase_admin._apps:
+            firebase_admin.initialize_app(cred)
+        logging.info('Established connection to db!')
+        return firestore.client()
+    except Exception as e:
+        logging.error(f'Error while connecting to db: {e}')
+        return None
+def get_statements_from_db(db):
+    try:
+        document = db.collection('ItemDesirability').document('Items')
+        statements = document.get().to_dict()['statements']
+        logging.info(f'Retrieved {len(statements)} statements from db!')
+        return statements
+    except Exception as e:
+        logging.error(f'Error while retrieving items from db: {e}')
+        return None
+def update_db(db, payload):
+    try:
+        collection_ref = db.collection('ItemDesirability')
+        doc_ref = collection_ref.document('Responses')
+        doc = doc_ref.get()
+        if doc.exists:
+            doc_ref.update({
+                'Data': firestore.ArrayUnion([payload])
+            })
+        else:
+            doc_ref.set({
+                'Data': [payload]
+            })
+        logging.info(f'Sent payload to db!')
+        return True
+    except Exception as e:
+        logging.error(f'Error while sending payload to db: {e}')
+        return False
+def pick_random(input_list):
+    try:
+        return random.choice(input_list)
+    except Exception as e:
+        logging.error(f'Error while picking random statement: {e}')
+        return None
+def z_score(y, mean=.04853076, sd=.9409466):
+    return (y - mean) / sd
+def score_text(input_text):
+    classifier_output = st.session_state.classifier(input_text)
+    classifier_output_dict = {x['label']: x['score'] for x in classifier_output[0]}
+    sentiment = classifier_output_dict['positive'] - classifier_output_dict['negative']
+    inputs = st.session_state.tokenizer(text=input_text, padding=True, return_tensors='pt')
+    with torch.no_grad():
+        score = st.session_state.model(**inputs).logits.squeeze().tolist()
+        desirability = z_score(score)
+    return sentiment, desirability
+def indicator_plot(value, title, value_range, domain):
+    plot = go.Indicator(
+        mode = "gauge+delta",
+        value = value,
+        domain = domain,
+        title = title,
+        delta = {
+            'reference': 0,
+            'decreasing': {'color': "#ec4899"},
+            'increasing': {'color': "#36def1"}
+            },
+        gauge = {
+            'axis': {'range': value_range, 'tickwidth': 1, 'tickcolor': "black"},
+            'bar': {'color': "#4361ee"},
+            'bgcolor': "white",
+            'borderwidth': 2,
+            'bordercolor': "#efefef",
+            'steps': [
+                {'range': [value_range[0], 0], 'color': '#efefef'},
+                {'range': [0, value_range[1]], 'color': '#efefef'}
+            ],
+            'threshold': {
+                'line': {'color': "#4361ee", 'width': 8},
+                'thickness': 0.75,
+                'value': value
+            }
+        }
+    )
+    return plot
+def show_scores(sentiment, desirability, input_text):
+    p1 = indicator_plot(
+        value=sentiment,
+        title=f'Item Sentiment',
+        value_range=[-1, 1],
+        domain={'x': [0, .45], 'y': [0, 1]},
+    )
+    p2 = indicator_plot(
+        value=desirability,
+        title=f'Item Desirability',
+        value_range=[-4, 4],
+        domain={'x': [.55, 1], 'y': [0, 1]}
+    )
+    fig = go.Figure()
+    fig.add_trace(p1)
+    fig.add_trace(p2)
+    fig.update_layout(
+        title=dict(text=f'"{input_text}"', font=dict(size=36),yref='paper'),
+        paper_bgcolor = "white",
+        font = {'color': "black", 'family': "Arial"})
+    st.plotly_chart(fig, theme=None, use_container_width=True)
+    st.markdown("""
+        Item sentiment: Absolute differences between positive and negative sentiment.
+        Item desirability: z-transformed values, 0 indicated "neutral".
+    """)
+def update_statement_placeholder(placeholder):
+    placeholder.markdown(
+        body=f"""
+            Is it socially desirable or undesirable to endorse the following statement?
+            ### <center>\"{st.session_state.current_statement.capitalize()}\"</center>
+        """,
+        unsafe_allow_html=True
+    )
+def show():
+    credentials_dict = load_credentials()
+    connection_attempts = 0
+    if 'db' not in st.session_state:
+        st.session_state.db = None
+    while st.session_state.db is None and connection_attempts < 3:
+        st.session_state.db = connect_to_db(credentials_dict)
+        if st.session_state.db is None:
+            logging.info('Retrying to connect to db...')
+            connection_attempts += 1
+            time.sleep(1)
+    retrieval_attempts = 0
+    if 'statements' not in st.session_state:
+        st.session_state.statements = None
+    if 'current_statement' not in st.session_state:
+        st.session_state.current_statement = None
+    while st.session_state.statements is None and retrieval_attempts < 3:
+        st.session_state.statements = get_statements_from_db(st.session_state.db)
+        st.session_state.current_statement = pick_random(st.session_state.statements)
+        if st.session_state.statements is None:
+            logging.info('Retrying to retrieve statements from db...')
+            retrieval_attempts += 1
+            time.sleep(1)
+    st.markdown("""
+        ## Try it yourself!
+        Use the text field below to enter a statement that might be part of a psychological questionnaire (e.g., "I love a good fight.").
+        The left dial indicates how socially desirable it might be to endorse this item.
+        The right dial indicates sentiment (i.e., valence) as estimated by regular sentiment analysis (using the `cardiffnlp/twitter-xlm-roberta-base-sentiment` model).
+    """)
+    if st.session_state.db:
+        collect_data = st.checkbox(
+            label='I want to support and help improve this research.',
+            value=True
+        )
+    else:
+        collect_data = False
+    if st.session_state.db and collect_data:
+        st.divider()
+        statement_placeholder = st.empty()
+        update_statement_placeholder(statement_placeholder)
+        rating_options = ['[Please select]', 'Very undesirable', 'Undesirable', 'Neutral', 'Desirable', 'Very desirable']
+        selected_rating = st.selectbox(
+            label='Rate the statement above according to whether it is socially desirable or undesirable.',
+            options=rating_options,
+            index=0
+        )
+        suitability_options = ['No, I\'m just playing around', 'Yes, my input can help improve this research']
+        research_suitability = st.radio(
+            label='Is your input suitable for research purposes?',
+            options=suitability_options,
+            horizontal=True
+        )
+        st.divider()
+    with st.spinner('Loading the model might take a couple of seconds...'):
+        st.markdown("### Estimate item desirability")
+        if os.environ.get('item-desirability'):
+            model_path = 'magnolia-psychometrics/item-desirability'
+        else:
+            model_path = os.getenv('model_path')
+        auth_token = os.environ.get('item-desirability') or True
+        if 'tokenizer' not in st.session_state:
+            st.session_state.tokenizer = AutoTokenizer.from_pretrained(
+                pretrained_model_name_or_path=model_path,
+                use_fast=True,
+                use_auth_token=auth_token
+            )
+        if 'model' not in st.session_state:
+            st.session_state.model = AutoModelForSequenceClassification.from_pretrained(
+                pretrained_model_name_or_path=model_path,
+                num_labels=1,
+                ignore_mismatched_sizes=True,
+                use_auth_token=auth_token
+            )
+        ## sentiment model
+        if 'classifier' not in st.session_state:
+            st.session_state.sentiment_model = 'cardiffnlp/twitter-xlm-roberta-base-sentiment'
+            st.session_state.classifier = pipeline(
+                task='sentiment-analysis',
+                model=st.session_state.sentiment_model,
+                tokenizer=st.session_state.sentiment_model,
+                use_fast=False,
+                top_k=3
+            )
+        input_text = st.text_input(
+            label='Item text/statement:',
+            value='I love a good fight.',
+            placeholder='Enter item text'
+        )
+        if st.button(label='Evaluate Item Text', type="primary", use_container_width=True):
+            if collect_data and st.session_state.db:
+                if selected_rating != rating_options[0]:
+                    item_rating = rating_options.index(selected_rating)
+                    suitability_rating = suitability_options.index(research_suitability)
+                    sentiment, desirability = score_text(input_text)
+                    payload = {
+                        'user_id': st.session_state.user_id,
+                        'statement': st.session_state.current_statement,
+                        'rating': item_rating,
+                        'suitability': suitability_rating,
+                        'input_text': input_text,
+                        'sentiment': sentiment,
+                        'desirability': desirability,
+                    }
+                    update_success = update_db(
+                        db=st.session_state.db,
+                        payload=payload
+                    )
+                    if update_success:
+                        st.session_state.current_statement = pick_random(st.session_state.statements)
+                        update_statement_placeholder(statement_placeholder)
+                    show_scores(sentiment, desirability, input_text)
+                else:
+                    st.error('Please rate the statement presented above!')
+            else:
+                sentiment, desirability = score_text(input_text)
+                show_scores(sentiment, desirability, input_text)

utils.py → explore_data_section.py RENAMED Viewed

@@ -1,42 +1,51 @@
-from plotly.subplots import make_subplots
 import plotly.graph_objects as go
 import plotly.express as px
-def z_score(y, mean=.04853076, sd=.9409466):
-    return (y - mean) / sd
-def indicator_plot(value, title, value_range, domain):
-    plot = go.Indicator(
-        mode = "gauge+delta",
-        value = value,
-        domain = domain,
-        title = title,
-        delta = {
-            'reference': 0,
-            'decreasing': {'color': "#ec4899"},
-            'increasing': {'color': "#36def1"}
-            },
-        gauge = {
-            'axis': {'range': value_range, 'tickwidth': 1, 'tickcolor': "black"},
-            'bar': {'color': "#4361ee"},
-            'bgcolor': "white",
-            'borderwidth': 2,
-            'bordercolor': "#efefef",
-            'steps': [
-                {'range': [value_range[0], 0], 'color': '#efefef'},
-                {'range': [0, value_range[1]], 'color': '#efefef'}
-            ],
-            'threshold': {
-                'line': {'color': "#4361ee", 'width': 8},
-                'thickness': 0.75,
-                'value': value
             }
-        }
     )
-    return plot
 def scatter_plot(df, group_var):
     colors = ['#36def1', '#4361ee'] if group_var else ['#4361ee']
@@ -73,4 +82,20 @@ def scatter_plot(df, group_var):
         })
     plot.update_xaxes(title_standoff = 0)
-    return plot

+import streamlit as st
+import pandas as pd
 import plotly.graph_objects as go
 import plotly.express as px
+covariate_columns = {
+    'content_domain': 'Content Domain',
+    'language': 'Language',
+    'rater_group': 'Rater Group',
+}
+id_vars = [
+    'mean_z', 'text', 'content_domain', 'language',
+    'rater_group', 'study', 'instrument'
+]
+if 'df' not in st.session_state:
+    st.session_state.df = (
+        pd
+        .read_feather(path='data.feather')
+        .query('partition == "test" | partition == "dev"')
+        .melt(
+            value_vars=['sentiment_model', 'desirability_model'],
+            var_name='x_group',
+            value_name='x',
+            id_vars=id_vars
+            )
+        .replace(
+            to_replace={
+                'en': 'English',
+                'de': 'German',
+                'other': 'Other',
+                'personality': 'Personality',
+                'laypeople': 'Laypeople',
+                'students': 'Students',
+                'sentiment_model': 'Sentiment Model',
+                'desirability_model': 'Desirability Model'
+            }
+        )
+        .rename(columns=covariate_columns)
+        .rename(
+            columns={
+                'mean_z': 'Human-ratings',
+                'x': 'Machine-ratings',
             }
+        )
     )
 def scatter_plot(df, group_var):
     colors = ['#36def1', '#4361ee'] if group_var else ['#4361ee']
         })
     plot.update_xaxes(title_standoff = 0)
+    return plot
+def show():
+    st.markdown("""
+        ## Explore the data
+        Figures show the accuarcy in precitions of human-rated item desirability by the sentiment model (left) and the desirability model (right), using `test`-partition data only.
+    """)
+    show_covariates = st.checkbox('Show covariates', value=True)
+    if show_covariates:
+        option = st.selectbox('Group by', options=list(covariate_columns.values()))
+    else:
+        option = None
+    if 'df' in st.session_state:
+        plot = scatter_plot(st.session_state.df, option)
+        st.plotly_chart(plot, theme=None, use_container_width=True)

public_creds.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+    "type": "service_account",
+    "project_id": "huggingfacespaces",
+    "client_email": "firebase-adminsdk-1nwag@huggingfacespaces.iam.gserviceaccount.com",
+    "client_id": "106819644534694903759",
+    "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+    "token_uri": "https://oauth2.googleapis.com/token",
+    "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
+    "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/firebase-adminsdk-1nwag%40huggingfacespaces.iam.gserviceaccount.com",
+    "universe_domain": "googleapis.com"
+}

requirements.txt CHANGED Viewed

@@ -5,4 +5,5 @@ dash==2.10.2
 statsmodels==0.14.0
 sentencepiece==0.1.99
 altair==4.2.2
 python-dotenv

 statsmodels==0.14.0
 sentencepiece==0.1.99
 altair==4.2.2
+firebase_admin==6.1.0
 python-dotenv