Spaces:

magnolia-psychometrics
/

item-desirability-demo

Runtime error

App Files Files Community

bjorn-hommel commited on Jun 28, 2023

Commit

28183db

•

1 Parent(s): 06f5189

refactor

Browse files

Files changed (7) hide show

app.py +290 -29
db.py +71 -0
demo_section.py +0 -312
logo-130x130.svg +35 -0
modeling.py +65 -0
plots.py +105 -0
explore_data_section.py → utils.py +1 -57

app.py CHANGED Viewed

@@ -1,42 +1,303 @@
-import os
-import torch
-import dash
 import streamlit as st
 import pandas as pd
-import json
-import random
-import firebase_admin
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-from transformers import pipeline
-from firebase_admin import credentials, firestore
 from dotenv import load_dotenv
-import plotly.graph_objects as go
-import demo_section
-import explore_data_section
-load_dotenv()
-if 'collect_data' not in st.session_state:
-    st.session_state.collect_data = True
-if 'user_id' not in st.session_state:
-    st.session_state.user_id = random.randint(1, 9999999)
-st.markdown("""
-    # Machine-Based Item Desirability Ratings
-    This web application accompanies the paper "*Expanding the Methodological Toolbox: Machine-Based Item Desirability Ratings as an Alternative to Human-Based Ratings*".
-    *Hommel, B. E. (2023). Expanding the methodological toolbox: Machine-based item desirability ratings as an alternative to human-based ratings. Personality and Individual Differences, 213, 112307. https://doi.org/10.1016/j.paid.2023.112307*
-    ## What is this research about?
-    Researchers use personality scales to measure people's traits and behaviors, but biases can affect the accuracy of these scales.
-    Socially desirable responding is a common bias that can skew results. To overcome this, researchers gather item desirability ratings, e.g., to ensure that questions are neutral.
-    Recently, advancements in natural language processing have made it possible to use machines to estimate social desirability ratings,
-    which can provide a viable alternative to human ratings and help researchers, scale developers, and practitioners improve the accuracy of personality scales.
-""")
-demo_section.show()
-explore_data_section.show()

+import time
+import random
+import logging
 import streamlit as st
 import pandas as pd
 from dotenv import load_dotenv
+import utils
+import db
+import modeling
+import plots
+def set_if_not_in_session_state(key, value):
+    """Helper function to initialize a session state variable if it doesn't exist."""
+    if key not in st.session_state:
+        st.session_state[key] = value
+def initialize():
+    """Initialization function to set up logging, load environment variables, and initialize session state variables."""
+    load_dotenv()
+    logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
+    keys = ['selected_rating', 'collect_data', 'gender_value', 'expert_value', 'show_launch', 'user_id', 'statements', 'current_statement', 'db']
+    values = [0, None, None, None, True, random.randint(1, 999_999_999), None, None, None]
+    for key, value in zip(keys, values):
+        set_if_not_in_session_state(key, value)
+    connect_to_database()
+def connect_to_database():
+    """Establishes a connection to the database."""
+    if st.session_state.db is None:
+        credentials_dict = db.load_credentials()
+        connection_attempts = 0
+        while st.session_state.db is None and connection_attempts < 3:
+            st.session_state.db = db.connect_to_db(credentials_dict)
+            if st.session_state.db is None:
+                logging.info('Retrying to connect to db...')
+                connection_attempts += 1
+                time.sleep(1)
+    else:
+        retrieve_statements()
+def retrieve_statements():
+    """Retrieves statements from the database."""
+    retrieval_attempts = 0
+    while st.session_state.statements is None and retrieval_attempts < 3:
+        st.session_state.statements = db.get_statements_from_db(st.session_state.db)
+        st.session_state.current_statement = db.pick_random(st.session_state.statements)
+        if st.session_state.statements is None:
+            logging.info('Retrying to retrieve statements from db...')
+            retrieval_attempts += 1
+            time.sleep(1)
+def get_user_consent():
+    st.markdown("""
+        ### Support Future Research
+        Additionally, we kindly ask for your agreement to collect anonymous data from your app usage in order to improve future research.
+        You may choose to agree or decline this data collection.
+    """)
+    collect_data_options = ['Yes, I agree and want to support and help improve this research', 'No']
+    collect_data_input = st.radio(
+        label='You may choose to agree or decline this data collection.',
+        options=collect_data_options,
+        horizontal=True,
+        label_visibility='collapsed'
+    )
+    return collect_data_options.index(collect_data_input) == 0
+def get_user_info():
+    gender_options = ['[Please select]', 'Female', 'Male', 'Other']
+    gender_input = st.selectbox(
+        label='Please select your gender',
+        options=gender_options,
+    )
+    gender_value = gender_options.index(gender_input)
+    expert_options = [
+        '[PLEASE SELECT]',
+        'No, I do not have a background in social or behavioral sciences',
+        'Yes, I have either studied social or behavioral sciences or I am currently a student in this field',
+        'Yes, I have either worked as a researcher in the field of social or behavioral sciences or I have had past experience as a researcher in this area'
+    ]
+    expert_input = st.selectbox(
+        label='Please indicate whether you have any experience or educational background in social or behavioral sciences (e.g., psychology)',
+        options=expert_options,
+    )
+    expert_value = expert_options.index(expert_input)
+    return expert_value, gender_value
+def get_user_rating(placeholder):
+    with placeholder:
+        with st.container():
+            st.markdown(f"""
+                ### How desirable is the following statement?
+                To support future research, rate the following statement according to whether it is socially desirable or undesirable.
+                Is it socially desirable or undesirable to endorse the following statement?
+                #### <center>\"{st.session_state.current_statement.capitalize()}\"</center>
+            """, unsafe_allow_html=True)
+            rating_options = ['[Please select]', 'Very undesirable', 'Undesirable', 'Neutral', 'Desirable', 'Very desirable']
+            selected_rating = st.selectbox(
+                label='Rate the statement above according to whether it is socially desirable or undesirable.',
+                options=rating_options,
+                key='selection'
+            )
+            suitability_options = ['No, I\'m just playing around', 'Yes, my input can help improve this research']
+            research_suitability = st.radio(
+                label='Is your input suitable for research purposes?',
+                options=suitability_options,
+                horizontal=True
+            )
+            st.session_state.collect_data_optout = st.checkbox(
+                label='Don\'t ask me to rate further statements.',
+                value=False
+            )
+            st.session_state.item_rating = rating_options.index(selected_rating)
+            st.session_state.suitability_rating = suitability_options.index(research_suitability)
+def handle_acceptance(collect_data_value, expert_value, gender_value, message):
+    if st.button(label='Accept Disclaimer', type='primary', use_container_width=True):
+        if collect_data_value and not (expert_value > 0 and gender_value > 0):
+            message.error('Please answer the questions above!')
+        else:
+            st.session_state.expert_value = expert_value
+            st.session_state.gender_value = gender_value
+            st.session_state.show_launch = False
+            st.session_state.collect_data = collect_data_value
+            st.experimental_rerun()
+def show_launch(placeholder):
+    with placeholder:
+        with st.container():
+            st.divider()
+            st.markdown("""
+                ## Before Using the App
+                ### Disclaimer
+                This application is provided as-is, without any warranty or guarantee of any kind, expressed or implied. It is intended for educational, non-commercial use only.
+                The developers of this app shall not be held liable for any damages or losses incurred from its use. By using this application, you agree to the terms and conditions
+                outlined herein and acknowledge that any commercial use or reliance on its functionality is strictly prohibited.
+            """)
+            collect_data_value = False
+            if st.session_state.db:
+                collect_data_value = get_user_consent()
+            expert_value, gender_value = (0, 0)
+            if collect_data_value:
+                expert_value, gender_value = get_user_info()
+            message = st.empty()
+            handle_acceptance(collect_data_value, expert_value, gender_value, message)
+def show_summary(placeholder):
+    with placeholder:
+        with st.container():
+            st.markdown("""
+                ## What is the focus of this research?
+                Certain biases can affect how people respond to surveys and psychological questionnaires.
+                For example, survey respondents may attempt to conceal socially undesirable traits (e.g.,
+                being ill-tempered) and endorse statements that cast them in a favorable manner (e.g.,
+                being cooperative).
+                Developers of psychological questionnaires hence sometimes aim to ensure that questions
+                are neutral, or that a subset of questions is equally (un)desirable. In the past, human
+                judges have been tasked with quantifying item desirability. In contrast, the research
+                underlying this web application demonstrates that large language models (LLMs) can
+                achieve this too!
+            """)
+def handle_demo_input():
+    if st.session_state.collect_data:
+        if st.session_state.item_rating > 0:
+            st.session_state.sentiment, st.session_state.desirability = modeling.score_text(st.session_state.input_text)
+            payload = {
+                'user_id': st.session_state.user_id,
+                'gender_value': st.session_state.gender_value,
+                'expert_value': st.session_state.expert_value,
+                'statement': st.session_state.current_statement,
+                'rating': st.session_state.item_rating,
+                'suitability': st.session_state.suitability_rating,
+                'input_text': st.session_state.input_text,
+                'sentiment': st.session_state.sentiment,
+                'desirability': st.session_state.desirability,
+            }
+            write_to_db_success = db.write_to_db(st.session_state.db, payload)
+            if st.session_state.collect_data_optout:
+                st.session_state.collect_data = False
+            if write_to_db_success:
+                st.session_state.current_statement = db.pick_random(st.session_state.statements)
+                st.session_state.selection = '[Please select]'
+        else:
+            return None
+    else:
+        st.session_state.sentiment, st.session_state.desirability = modeling.score_text(st.session_state.input_text)
+def show_demo(placeholder):
+    with placeholder:
+        with st.container():
+            st.divider()
+            st.markdown("""
+                ## Try it yourself!
+                Use the text field below to enter a statement that might be part of a psychological
+                questionnaire (e.g., "I love a good fight."). Your input will be processed by
+                language models, returning a machine-based estimate of item sentiment (i.e., valence)
+                and desirability.
+            """)
+            modeling.load_model()
+            if 'sentiment' in st.session_state and 'desirability' in st.session_state:
+                plots.show_scores(
+                    sentiment=st.session_state.sentiment,
+                    desirability=st.session_state.desirability,
+                    input_text=st.session_state.input_text
+                )
+            st.session_state.input_text = st.text_input(
+                label='Item text/statement:',
+                value='I love a good fight.',
+                placeholder='Enter item text'
+            )
+            user_rating_placeholder = st.empty()
+            if st.session_state.collect_data:
+                get_user_rating(user_rating_placeholder)
+            if st.button(
+                label='Evaluate Item Text',
+                on_click=handle_demo_input,
+                type='primary',
+                use_container_width=True
+            ):
+                if st.session_state.collect_data and st.session_state.item_rating == 0:
+                    st.error('Please rate the statement presented above!')
+def show_data(placeholder):
+    with placeholder:
+        with st.container():
+            st.divider()
+            st.markdown("""
+                ## Explore the data
+                Figures show the accuarcy in precitions of human-rated item desirability by the sentiment model (left) and the desirability model (right), using `test`-partition data only.
+            """)
+            show_covariates = st.checkbox('Show covariates', value=True)
+            if show_covariates:
+                option = st.selectbox('Group by', options=list(utils.covariate_columns.values()))
+            else:
+                option = None
+            if 'df' in st.session_state:
+                plot = plots.scatter_plot(st.session_state.df, option)
+                st.plotly_chart(plot, theme=None, use_container_width=True)
+def main():
+    st.markdown("""
+        # Machine-Based Item Desirability Ratings
+        This web application demonstrates how item desirability ratings can be obtained with natural language processing ("AI") and accompanies the paper "*Expanding the Methodological Toolbox: Machine-Based Item Desirability Ratings as an Alternative to Human-Based Ratings*".
+        *Hommel, B. E. (2023). Expanding the methodological toolbox: Machine-based item desirability ratings as an alternative to human-based ratings. Personality and Individual Differences, 213, 112307. https://doi.org/10.1016/j.paid.2023.112307*
+        <small>https://www.magnolia-psychometrics.com/</small>
+    """, unsafe_allow_html=True)
+    placeholder_launch = st.empty()
+    placeholder_summary = st.empty()
+    placeholder_demo = st.empty()
+    placeholder_data = st.empty()
+    if st.session_state.show_launch is True:
+        show_launch(placeholder_launch)
+    else:
+        placeholder_launch = st.empty()
+        show_summary(placeholder_summary)
+        show_demo(placeholder_demo)
+        show_data(placeholder_data)
+if __name__ == '__main__':
+    initialize()
+    main()

db.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import os
+import json
+import random
+import logging
+import firebase_admin
+from firebase_admin import credentials, firestore
+public_creds_path = 'public_creds.json'
+def load_credentials():
+    try:
+        with open(public_creds_path) as f:
+            credentials_dict = json.load(f)
+        secret = {
+            'private_key_id': os.environ.get('private_key_id'),
+            'private_key': os.environ.get('private_key').replace(r'\n', '\n')
+        }
+        credentials_dict.update(secret)
+        return credentials_dict
+    except Exception as e:
+        logging.error(f'Error while loading credentials: {e}')
+        return None
+def connect_to_db(credentials_dict):
+    try:
+        cred = credentials.Certificate(credentials_dict)
+        if not firebase_admin._apps:
+            firebase_admin.initialize_app(cred)
+        logging.info('Established connection to db!')
+        return firestore.client()
+    except Exception as e:
+        logging.error(f'Error while connecting to db: {e}')
+        return None
+def get_statements_from_db(db):
+    try:
+        document = db.collection('ItemDesirability').document('Items')
+        statements = document.get().to_dict()['statements']
+        logging.info(f'Retrieved {len(statements)} statements from db!')
+        return statements
+    except Exception as e:
+        logging.error(f'Error while retrieving items from db: {e}')
+        return None
+def pick_random(input_list):
+    try:
+        return random.choice(input_list)
+    except Exception as e:
+        logging.error(f'Error while picking random statement: {e}')
+        return None
+def write_to_db(db, payload):
+    try:
+        collection_ref = db.collection('ItemDesirability')
+        doc_ref = collection_ref.document('Responses')
+        doc = doc_ref.get()
+        if doc.exists:
+            doc_ref.update({
+                'Data': firestore.ArrayUnion([payload])
+            })
+        else:
+            doc_ref.set({
+                'Data': [payload]
+            })
+        logging.info(f'Sent payload to db!')
+        return True
+    except Exception as e:
+        logging.error(f'Error while sending payload to db: {e}')
+        return False

demo_section.py DELETED Viewed

@@ -1,312 +0,0 @@
-import os
-import torch
-import json
-import time
-import random
-import streamlit as st
-import firebase_admin
-import logging
-from firebase_admin import credentials, firestore
-from dotenv import load_dotenv
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-from transformers import pipeline
-import plotly.graph_objects as go
-logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
-load_dotenv()
-def load_credentials():
-    try:
-        with open('public_creds.json') as f:
-            credentials_dict = json.load(f)
-        secret = {
-            'private_key_id': os.environ.get('private_key_id'),
-            'private_key': os.environ.get('private_key').replace(r'\n', '\n')
-        }
-        credentials_dict.update(secret)
-        return credentials_dict
-    except Exception as e:
-        logging.error(f'Error while loading credentials: {e}')
-        return None
-def connect_to_db(credentials_dict):
-    try:
-        cred = credentials.Certificate(credentials_dict)
-        if not firebase_admin._apps:
-            firebase_admin.initialize_app(cred)
-        logging.info('Established connection to db!')
-        return firestore.client()
-    except Exception as e:
-        logging.error(f'Error while connecting to db: {e}')
-        return None
-def get_statements_from_db(db):
-    try:
-        document = db.collection('ItemDesirability').document('Items')
-        statements = document.get().to_dict()['statements']
-        logging.info(f'Retrieved {len(statements)} statements from db!')
-        return statements
-    except Exception as e:
-        logging.error(f'Error while retrieving items from db: {e}')
-        return None
-def update_db(db, payload):
-    try:
-        collection_ref = db.collection('ItemDesirability')
-        doc_ref = collection_ref.document('Responses')
-        doc = doc_ref.get()
-        if doc.exists:
-            doc_ref.update({
-                'Data': firestore.ArrayUnion([payload])
-            })
-        else:
-            doc_ref.set({
-                'Data': [payload]
-            })
-        logging.info(f'Sent payload to db!')
-        return True
-    except Exception as e:
-        logging.error(f'Error while sending payload to db: {e}')
-        return False
-def pick_random(input_list):
-    try:
-        return random.choice(input_list)
-    except Exception as e:
-        logging.error(f'Error while picking random statement: {e}')
-        return None
-def z_score(y, mean=.04853076, sd=.9409466):
-    return (y - mean) / sd
-def score_text(input_text):
-    classifier_output = st.session_state.classifier(input_text)
-    classifier_output_dict = {x['label']: x['score'] for x in classifier_output[0]}
-    sentiment = classifier_output_dict['positive'] - classifier_output_dict['negative']
-    inputs = st.session_state.tokenizer(text=input_text, padding=True, return_tensors='pt')
-    with torch.no_grad():
-        score = st.session_state.model(**inputs).logits.squeeze().tolist()
-        desirability = z_score(score)
-    return sentiment, desirability
-def indicator_plot(value, title, value_range, domain):
-    plot = go.Indicator(
-        mode = "gauge+delta",
-        value = value,
-        domain = domain,
-        title = title,
-        delta = {
-            'reference': 0,
-            'decreasing': {'color': "#ec4899"},
-            'increasing': {'color': "#36def1"}
-            },
-        gauge = {
-            'axis': {'range': value_range, 'tickwidth': 1, 'tickcolor': "black"},
-            'bar': {'color': "#4361ee"},
-            'bgcolor': "white",
-            'borderwidth': 2,
-            'bordercolor': "#efefef",
-            'steps': [
-                {'range': [value_range[0], 0], 'color': '#efefef'},
-                {'range': [0, value_range[1]], 'color': '#efefef'}
-            ],
-            'threshold': {
-                'line': {'color': "#4361ee", 'width': 8},
-                'thickness': 0.75,
-                'value': value
-            }
-        }
-    )
-    return plot
-def show_scores(sentiment, desirability, input_text):
-    p1 = indicator_plot(
-        value=sentiment,
-        title=f'Item Sentiment',
-        value_range=[-1, 1],
-        domain={'x': [0, .45], 'y': [0, 1]},
-    )
-    p2 = indicator_plot(
-        value=desirability,
-        title=f'Item Desirability',
-        value_range=[-4, 4],
-        domain={'x': [.55, 1], 'y': [0, 1]}
-    )
-    fig = go.Figure()
-    fig.add_trace(p1)
-    fig.add_trace(p2)
-    fig.update_layout(
-        title=dict(text=f'"{input_text}"', font=dict(size=36),yref='paper'),
-        paper_bgcolor = "white",
-        font = {'color': "black", 'family': "Arial"})
-    st.plotly_chart(fig, theme=None, use_container_width=True)
-    st.markdown("""
-        Item sentiment: Absolute differences between positive and negative sentiment.
-        Item desirability: z-transformed values, 0 indicated "neutral".
-    """)
-def update_statement_placeholder(placeholder):
-    placeholder.markdown(
-        body=f"""
-            Is it socially desirable or undesirable to endorse the following statement?
-            ### <center>\"{st.session_state.current_statement.capitalize()}\"</center>
-        """,
-        unsafe_allow_html=True
-    )
-def show():
-    credentials_dict = load_credentials()
-    connection_attempts = 0
-    if 'db' not in st.session_state:
-        st.session_state.db = None
-    while st.session_state.db is None and connection_attempts < 3:
-        st.session_state.db = connect_to_db(credentials_dict)
-        if st.session_state.db is None:
-            logging.info('Retrying to connect to db...')
-            connection_attempts += 1
-            time.sleep(1)
-    retrieval_attempts = 0
-    if 'statements' not in st.session_state:
-        st.session_state.statements = None
-    if 'current_statement' not in st.session_state:
-        st.session_state.current_statement = None
-    while st.session_state.statements is None and retrieval_attempts < 3:
-        st.session_state.statements = get_statements_from_db(st.session_state.db)
-        st.session_state.current_statement = pick_random(st.session_state.statements)
-        if st.session_state.statements is None:
-            logging.info('Retrying to retrieve statements from db...')
-            retrieval_attempts += 1
-            time.sleep(1)
-    st.markdown("""
-        ## Try it yourself!
-        Use the text field below to enter a statement that might be part of a psychological questionnaire (e.g., "I love a good fight.").
-        The left dial indicates how socially desirable it might be to endorse this item.
-        The right dial indicates sentiment (i.e., valence) as estimated by regular sentiment analysis (using the `cardiffnlp/twitter-xlm-roberta-base-sentiment` model).
-    """)
-    if st.session_state.db:
-        collect_data = st.checkbox(
-            label='I want to support and help improve this research.',
-            value=True
-        )
-    else:
-        collect_data = False
-    if st.session_state.db and collect_data:
-        statement_placeholder = st.empty()
-        update_statement_placeholder(statement_placeholder)
-        rating_options = ['[Please select]', 'Very undesirable', 'Undesirable', 'Neutral', 'Desirable', 'Very desirable']
-        selected_rating = st.selectbox(
-            label='Rate the statement above according to whether it is socially desirable or undesirable.',
-            options=rating_options,
-            index=0
-        )
-        suitability_options = ['No, I\'m just playing around', 'Yes, my input can help improve this research']
-        research_suitability = st.radio(
-            label='Is your input suitable for research purposes?',
-            options=suitability_options,
-            horizontal=True
-        )
-    with st.spinner('Loading the model might take a couple of seconds...'):
-        st.markdown("### Estimate item desirability")
-        if os.environ.get('item-desirability'):
-            model_path = 'magnolia-psychometrics/item-desirability'
-        else:
-            model_path = os.getenv('model_path')
-        auth_token = os.environ.get('item-desirability') or True
-        if 'tokenizer' not in st.session_state:
-            st.session_state.tokenizer = AutoTokenizer.from_pretrained(
-                pretrained_model_name_or_path=model_path,
-                use_fast=True,
-                use_auth_token=auth_token
-            )
-        if 'model' not in st.session_state:
-            st.session_state.model = AutoModelForSequenceClassification.from_pretrained(
-                pretrained_model_name_or_path=model_path,
-                num_labels=1,
-                ignore_mismatched_sizes=True,
-                use_auth_token=auth_token
-            )
-        ## sentiment model
-        if 'classifier' not in st.session_state:
-            st.session_state.sentiment_model = 'cardiffnlp/twitter-xlm-roberta-base-sentiment'
-            st.session_state.classifier = pipeline(
-                task='sentiment-analysis',
-                model=st.session_state.sentiment_model,
-                tokenizer=st.session_state.sentiment_model,
-                use_fast=False,
-                top_k=3
-            )
-        input_text = st.text_input(
-            label='Item text/statement:',
-            value='I love a good fight.',
-            placeholder='Enter item text'
-        )
-        if st.button(label='Evaluate Item Text', type="primary"):
-            if collect_data and st.session_state.db:
-                if selected_rating != rating_options[0]:
-                    item_rating = rating_options.index(selected_rating)
-                    suitability_rating = suitability_options.index(research_suitability)
-                    sentiment, desirability = score_text(input_text)
-                    payload = {
-                        'user_id': st.session_state.user_id,
-                        'statement': st.session_state.current_statement,
-                        'rating': item_rating,
-                        'suitability': suitability_rating,
-                        'input_text': input_text,
-                        'sentiment': sentiment,
-                        'desirability': desirability,
-                    }
-                    update_success = update_db(
-                        db=st.session_state.db,
-                        payload=payload
-                    )
-                    if update_success:
-                        st.session_state.current_statement = pick_random(st.session_state.statements)
-                        update_statement_placeholder(statement_placeholder)
-                    show_scores(sentiment, desirability, input_text)
-                else:
-                    st.error('Please rate the statement presented above!')
-            else:
-                sentiment, desirability = score_text(input_text)
-                show_scores(sentiment, desirability, input_text)

logo-130x130.svg ADDED Viewed

modeling.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import os
+import logging
+import torch
+import streamlit as st
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from transformers import pipeline
+sentiment_model_path = 'cardiffnlp/twitter-xlm-roberta-base-sentiment'
+def load_model():
+    keys = ['tokenizer', 'model', 'classifier']
+    if any(st.session_state.get(key) is None for key in keys):
+        with st.spinner('Loading the model might take a couple of seconds...'):
+            try:
+                if os.environ.get('item-desirability'):
+                    model_path = 'magnolia-psychometrics/item-desirability'
+                else:
+                    model_path = os.getenv('model_path')
+                auth_token = os.environ.get('item-desirability') or True
+                st.session_state.tokenizer = AutoTokenizer.from_pretrained(
+                    pretrained_model_name_or_path=model_path,
+                    use_fast=True,
+                    use_auth_token=auth_token
+                )
+                st.session_state.model = AutoModelForSequenceClassification.from_pretrained(
+                    pretrained_model_name_or_path=model_path,
+                    num_labels=1,
+                    ignore_mismatched_sizes=True,
+                    use_auth_token=auth_token
+                )
+                st.session_state.classifier = pipeline(
+                    task='sentiment-analysis',
+                    model=sentiment_model_path,
+                    tokenizer=sentiment_model_path,
+                    use_fast=False,
+                    top_k=3
+                )
+                logging.info('Loaded models and tokenizer!')
+            except Exception as e:
+                logging.error(f'Error while loading models/tokenizer: {e}')
+def z_score(y, mean=.04853076, sd=.9409466):
+    return (y - mean) / sd
+def score_text(input_text):
+    with st.spinner('Predicting...'):
+        classifier_output = st.session_state.classifier(input_text)
+        classifier_output_dict = {x['label']: x['score'] for x in classifier_output[0]}
+        sentiment = classifier_output_dict['positive'] - classifier_output_dict['negative']
+        inputs = st.session_state.tokenizer(text=input_text, padding=True, return_tensors='pt')
+        with torch.no_grad():
+            score = st.session_state.model(**inputs).logits.squeeze().tolist()
+            desirability = z_score(score)
+        return sentiment, desirability

plots.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import streamlit as st
+import plotly.graph_objects as go
+import plotly.express as px
+def indicator_plot(value, title, value_range, domain):
+    plot = go.Indicator(
+        mode = 'gauge+delta',
+        value = value,
+        domain = domain,
+        title = title,
+        delta = {
+            'reference': 0,
+            'decreasing': {'color': '#ec4899'},
+            'increasing': {'color': '#36def1'}
+            },
+        gauge = {
+            'axis': {'range': value_range, 'tickwidth': 1, 'tickcolor': 'black'},
+            'bar': {'color': '#4361ee'},
+            'bgcolor': 'white',
+            'borderwidth': 2,
+            'bordercolor': '#efefef',
+            'steps': [
+                {'range': [value_range[0], 0], 'color': '#efefef'},
+                {'range': [0, value_range[1]], 'color': '#efefef'}
+            ],
+            'threshold': {
+                'line': {'color': '#4361ee', 'width': 8},
+                'thickness': 0.75,
+                'value': value
+            }
+        }
+    )
+    return plot
+def scatter_plot(df, group_var):
+    colors = ['#36def1', '#4361ee'] if group_var else ['#4361ee']
+    plot = px.scatter(
+        df,
+        x='Machine-ratings',
+        y='Human-ratings',
+        color=group_var,
+        facet_col='x_group',
+        facet_col_wrap=2,
+        trendline='ols',
+        trendline_scope='trace',
+        hover_data={
+            'Text': df.text,
+            'Language': False,
+            'x_group': False,
+            'Human-ratings': ':.2f',
+            'Machine-ratings': ':.2f',
+            'Study': df.study,
+            'Instrument': df.instrument,
+        },
+        width=400,
+        height=400,
+        color_discrete_sequence=colors
+    )
+    plot.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1]))
+    plot.update_layout(
+        legend={
+            'orientation':'h',
+            'yanchor': 'bottom',
+            'y': -.30
+        })
+    plot.update_xaxes(title_standoff = 0)
+    return plot
+def show_scores(sentiment, desirability, input_text):
+    with st.container():
+        p1 = indicator_plot(
+            value=sentiment,
+            title=f'Item Sentiment',
+            value_range=[-1, 1],
+            domain={'x': [0, .45], 'y': [0, .5]},
+        )
+        p2 = indicator_plot(
+            value=desirability,
+            title=f'Item Desirability',
+            value_range=[-4, 4],
+            domain={'x': [.55, 1], 'y': [0, .5]}
+        )
+        fig = go.Figure()
+        fig.add_trace(p1)
+        fig.add_trace(p2)
+        fig.update_layout(
+            title=dict(text=f'"{input_text}"', font=dict(size=36),yref='paper'),
+            paper_bgcolor = 'white',
+            font = {'color': 'black', 'family': 'Arial'})
+        st.plotly_chart(fig, theme=None, use_container_width=True)
+        st.markdown("""
+            Item sentiment: Absolute differences between positive and negative sentiment.
+            Item desirability: z-transformed values, 0 indicated "neutral".
+        """)

explore_data_section.py → utils.py RENAMED Viewed

@@ -1,7 +1,5 @@
 import streamlit as st
 import pandas as pd
-import plotly.graph_objects as go
-import plotly.express as px
 covariate_columns = {
     'content_domain': 'Content Domain',
@@ -44,58 +42,4 @@ if 'df' not in st.session_state:
                 'x': 'Machine-ratings',
             }
         )
-    )
-def scatter_plot(df, group_var):
-    colors = ['#36def1', '#4361ee'] if group_var else ['#4361ee']
-    plot = px.scatter(
-        df,
-        x='Machine-ratings',
-        y='Human-ratings',
-        color=group_var,
-        facet_col='x_group',
-        facet_col_wrap=2,
-        trendline='ols',
-        trendline_scope='trace',
-        hover_data={
-            'Text': df.text,
-            'Language': False,
-            'x_group': False,
-            'Human-ratings': ':.2f',
-            'Machine-ratings': ':.2f',
-            'Study': df.study,
-            'Instrument': df.instrument,
-        },
-        width=400,
-        height=400,
-        color_discrete_sequence=colors
-    )
-    plot.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1]))
-    plot.update_layout(
-        legend={
-            'orientation':'h',
-            'yanchor': 'bottom',
-            'y': -.30
-        })
-    plot.update_xaxes(title_standoff = 0)
-    return plot
-def show():
-    st.markdown("""
-        ## Explore the data
-        Figures show the accuarcy in precitions of human-rated item desirability by the sentiment model (left) and the desirability model (right), using `test`-partition data only.
-    """)
-    show_covariates = st.checkbox('Show covariates', value=True)
-    if show_covariates:
-        option = st.selectbox('Group by', options=list(covariate_columns.values()))
-    else:
-        option = None
-    if 'df' in st.session_state:
-        plot = scatter_plot(st.session_state.df, option)
-        st.plotly_chart(plot, theme=None, use_container_width=True)

 import streamlit as st
 import pandas as pd
 covariate_columns = {
     'content_domain': 'Content Domain',
                 'x': 'Machine-ratings',
             }
         )
+    )