Spaces:
Runtime error
Runtime error
bjorn-hommel
commited on
Commit
•
51082bd
1
Parent(s):
daea372
handling local env; added state-management
Browse files- .gitignore +1 -0
- README.md +1 -1
- app.py +49 -39
- requirements.txt +2 -1
.gitignore
CHANGED
@@ -1,2 +1,3 @@
|
|
|
|
1 |
tmp.ipynb
|
2 |
__pycache__
|
|
|
1 |
+
.env
|
2 |
tmp.ipynb
|
3 |
__pycache__
|
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 🎭
|
|
4 |
colorFrom: blue
|
5 |
colorTo: yellow
|
6 |
sdk: streamlit
|
7 |
-
python_version: 3.
|
8 |
sdk_version: 1.17.0
|
9 |
app_file: app.py
|
10 |
pinned: false
|
|
|
4 |
colorFrom: blue
|
5 |
colorTo: yellow
|
6 |
sdk: streamlit
|
7 |
+
python_version: 3.10.6
|
8 |
sdk_version: 1.17.0
|
9 |
app_file: app.py
|
10 |
pinned: false
|
app.py
CHANGED
@@ -5,10 +5,12 @@ import streamlit as st
|
|
5 |
import pandas as pd
|
6 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
7 |
from transformers import pipeline
|
|
|
8 |
from plotly.subplots import make_subplots
|
9 |
import plotly.graph_objects as go
|
10 |
import plotly.express as px
|
11 |
|
|
|
12 |
|
13 |
def z_score(y, mean=.04853076, sd=.9409466):
|
14 |
return (y - mean) / sd
|
@@ -90,35 +92,37 @@ covariate_columns = {
|
|
90 |
'rater_group': 'Rater Group',
|
91 |
}
|
92 |
|
93 |
-
df
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
)
|
102 |
-
.replace(
|
103 |
-
to_replace={
|
104 |
-
'en': 'English',
|
105 |
-
'de': 'German',
|
106 |
-
'other': 'Other',
|
107 |
-
'personality': 'Personality',
|
108 |
-
'laypeople': 'Laypeople',
|
109 |
-
'students': 'Students',
|
110 |
-
'sentiment_model': 'Sentiment Model',
|
111 |
-
'desirability_model': 'Desirability Model'
|
112 |
-
}
|
113 |
-
)
|
114 |
-
.rename(columns=covariate_columns)
|
115 |
-
.rename(
|
116 |
-
columns={
|
117 |
-
'mean_z': 'Human-ratings',
|
118 |
-
'x': 'Machine-ratings',
|
119 |
-
}
|
120 |
)
|
121 |
-
|
122 |
|
123 |
st.markdown("""
|
124 |
# NLP for Item Desirability Ratings
|
@@ -145,19 +149,19 @@ with st.spinner('Processing...'):
|
|
145 |
if os.environ.get('item-desirability'):
|
146 |
model_path = 'magnolia-psychometrics/item-desirability'
|
147 |
else:
|
148 |
-
model_path = '
|
149 |
|
150 |
auth_token = os.environ.get('item-desirability') or True
|
151 |
|
152 |
-
if 'tokenizer' not in
|
153 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
154 |
pretrained_model_name_or_path=model_path,
|
155 |
use_fast=True,
|
156 |
use_auth_token=auth_token
|
157 |
)
|
158 |
|
159 |
-
if 'model' not in
|
160 |
-
model = AutoModelForSequenceClassification.from_pretrained(
|
161 |
pretrained_model_name_or_path=model_path,
|
162 |
num_labels=1,
|
163 |
ignore_mismatched_sizes=True,
|
@@ -165,9 +169,15 @@ with st.spinner('Processing...'):
|
|
165 |
)
|
166 |
|
167 |
## sentiment model
|
168 |
-
if 'classifier' not in
|
169 |
-
sentiment_model = 'cardiffnlp/twitter-xlm-roberta-base-sentiment'
|
170 |
-
classifier = pipeline(
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
input_text = st.text_input(
|
173 |
label='Estimate item desirability:',
|
@@ -177,14 +187,14 @@ with st.spinner('Processing...'):
|
|
177 |
|
178 |
if input_text:
|
179 |
|
180 |
-
classifier_output = classifier(input_text)
|
181 |
classifier_output_dict = {x['label']: x['score'] for x in classifier_output[0]}
|
182 |
classifier_score = classifier_output_dict['positive'] - classifier_output_dict['negative']
|
183 |
|
184 |
-
inputs = tokenizer(input_text, padding=True, return_tensors='pt')
|
185 |
|
186 |
with torch.no_grad():
|
187 |
-
score = model(**inputs).logits.squeeze().tolist()
|
188 |
z = z_score(score)
|
189 |
|
190 |
p1 = indicator_plot(
|
@@ -231,6 +241,6 @@ if show_covariates:
|
|
231 |
else:
|
232 |
option = None
|
233 |
|
234 |
-
plot = scatter_plot(df, option)
|
235 |
|
236 |
st.plotly_chart(plot, theme=None, use_container_width=True)
|
|
|
5 |
import pandas as pd
|
6 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
7 |
from transformers import pipeline
|
8 |
+
from dotenv import load_dotenv
|
9 |
from plotly.subplots import make_subplots
|
10 |
import plotly.graph_objects as go
|
11 |
import plotly.express as px
|
12 |
|
13 |
+
load_dotenv()
|
14 |
|
15 |
def z_score(y, mean=.04853076, sd=.9409466):
|
16 |
return (y - mean) / sd
|
|
|
92 |
'rater_group': 'Rater Group',
|
93 |
}
|
94 |
|
95 |
+
if 'df' not in st.session_state:
|
96 |
+
st.session_state.df = (
|
97 |
+
pd
|
98 |
+
.read_feather(path='data.feather').query('partition == "test" | partition == "dev"')
|
99 |
+
.melt(
|
100 |
+
value_vars=['sentiment_model', 'desirability_model'],
|
101 |
+
var_name='x_group',
|
102 |
+
value_name='x',
|
103 |
+
id_vars=['mean_z', 'text', 'content_domain', 'language', 'rater_group', 'study', 'instrument']
|
104 |
+
)
|
105 |
+
.replace(
|
106 |
+
to_replace={
|
107 |
+
'en': 'English',
|
108 |
+
'de': 'German',
|
109 |
+
'other': 'Other',
|
110 |
+
'personality': 'Personality',
|
111 |
+
'laypeople': 'Laypeople',
|
112 |
+
'students': 'Students',
|
113 |
+
'sentiment_model': 'Sentiment Model',
|
114 |
+
'desirability_model': 'Desirability Model'
|
115 |
+
}
|
116 |
+
)
|
117 |
+
.rename(columns=covariate_columns)
|
118 |
+
.rename(
|
119 |
+
columns={
|
120 |
+
'mean_z': 'Human-ratings',
|
121 |
+
'x': 'Machine-ratings',
|
122 |
+
}
|
123 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
)
|
125 |
+
|
126 |
|
127 |
st.markdown("""
|
128 |
# NLP for Item Desirability Ratings
|
|
|
149 |
if os.environ.get('item-desirability'):
|
150 |
model_path = 'magnolia-psychometrics/item-desirability'
|
151 |
else:
|
152 |
+
model_path = os.getenv('model_path')
|
153 |
|
154 |
auth_token = os.environ.get('item-desirability') or True
|
155 |
|
156 |
+
if 'tokenizer' not in st.session_state:
|
157 |
+
st.session_state.tokenizer = AutoTokenizer.from_pretrained(
|
158 |
pretrained_model_name_or_path=model_path,
|
159 |
use_fast=True,
|
160 |
use_auth_token=auth_token
|
161 |
)
|
162 |
|
163 |
+
if 'model' not in st.session_state:
|
164 |
+
st.session_state.model = AutoModelForSequenceClassification.from_pretrained(
|
165 |
pretrained_model_name_or_path=model_path,
|
166 |
num_labels=1,
|
167 |
ignore_mismatched_sizes=True,
|
|
|
169 |
)
|
170 |
|
171 |
## sentiment model
|
172 |
+
if 'classifier' not in st.session_state:
|
173 |
+
st.session_state.sentiment_model = 'cardiffnlp/twitter-xlm-roberta-base-sentiment'
|
174 |
+
st.session_state.classifier = pipeline(
|
175 |
+
task='sentiment-analysis',
|
176 |
+
model=st.session_state.sentiment_model,
|
177 |
+
tokenizer=st.session_state.sentiment_model,
|
178 |
+
use_fast=False,
|
179 |
+
top_k=3
|
180 |
+
)
|
181 |
|
182 |
input_text = st.text_input(
|
183 |
label='Estimate item desirability:',
|
|
|
187 |
|
188 |
if input_text:
|
189 |
|
190 |
+
classifier_output = st.session_state.classifier(input_text)
|
191 |
classifier_output_dict = {x['label']: x['score'] for x in classifier_output[0]}
|
192 |
classifier_score = classifier_output_dict['positive'] - classifier_output_dict['negative']
|
193 |
|
194 |
+
inputs = st.session_state.tokenizer(text=input_text, padding=True, return_tensors='pt')
|
195 |
|
196 |
with torch.no_grad():
|
197 |
+
score = st.session_state.model(**inputs).logits.squeeze().tolist()
|
198 |
z = z_score(score)
|
199 |
|
200 |
p1 = indicator_plot(
|
|
|
241 |
else:
|
242 |
option = None
|
243 |
|
244 |
+
plot = scatter_plot(st.session_state.df, option)
|
245 |
|
246 |
st.plotly_chart(plot, theme=None, use_container_width=True)
|
requirements.txt
CHANGED
@@ -3,4 +3,5 @@ transformers
|
|
3 |
plotly
|
4 |
dash
|
5 |
statsmodels
|
6 |
-
sentencepiece
|
|
|
|
3 |
plotly
|
4 |
dash
|
5 |
statsmodels
|
6 |
+
sentencepiece
|
7 |
+
python-dotenv
|