Huertas97 commited on
Commit
f564c9e
·
1 Parent(s): 9574797

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -29
app.py CHANGED
@@ -18,8 +18,13 @@ st.set_page_config(
18
 
19
  @st.cache(show_spinner=False, allow_output_mutation=True, suppress_st_warning=True)
20
  def load_models():
21
- spanish_model = spacy.load("./spacy-models/toy_output_es_blank/model-best/")
22
- english_model = spacy.load("./spacy-models/toy_output_en_blank/model-best/")
 
 
 
 
 
23
  models = {"English": english_model, "Spanish": spanish_model}
24
  return models
25
 
@@ -52,57 +57,72 @@ def process_text(doc, selected_multi_ner):
52
  # Side bar
53
  selected_language = st.sidebar.selectbox("Select a language", options=["English", "Spanish"])
54
  selected_multi_ner = st.sidebar.radio('Do you want to break down the Entities detected by type of leetspeak?', ['Yes', 'No'])
 
55
 
56
  models = load_models()
57
  selected_model = models[selected_language]
58
 
59
  import base64
60
 
61
- LOGO_IMAGE = "aida_logo.png"
62
 
63
  st.markdown(
64
  """
65
  <style>
66
- .container {
67
- display: flex;
68
-
69
- }
70
  .logo-img {
71
- float:right;
72
- margin-top: 2.2em;
73
- margin-left: -10em;
 
 
 
74
  }
 
 
 
 
 
 
75
  </style>
76
  """,
77
  unsafe_allow_html=True
78
  )
79
 
80
 
81
- col1, col2 = st.columns([4, 1])
82
- with col1:
83
- st.markdown("""
84
- <style>
85
- .big-font {
86
- font-size:3em;
87
- font-weight: bold;
88
- }
89
- </style>
90
- """, unsafe_allow_html=True)
91
-
92
- st.markdown('<p class="big-font">Welcome to <font color="#4B8BBE">Leet</font><font color="#FFE873">Speak</font><font color="#ff73a2">-NER</font></p>', unsafe_allow_html=True)
93
- with col2:
94
  # st.image('./aida_logo.png')
95
  st.markdown(
96
  f"""
97
- <div class="container">
98
  <img class="logo-img" src="data:image/png;base64,{base64.b64encode(open(LOGO_IMAGE, "rb").read()).decode()}">
99
- </div>
100
  """,
101
  unsafe_allow_html=True
102
  )
 
 
 
 
 
 
 
 
 
 
 
103
 
104
 
 
 
 
 
 
 
 
 
105
 
 
 
 
106
  with st.expander("Project Description", expanded=False):
107
  st.write("""
108
  Developed in Applied Intelligence and Data Analysis ([AI+DA](http://aida.etsisi.upm.es/)) group at Polytech University of Madrid (UPM).
@@ -110,9 +130,29 @@ with st.expander("Project Description", expanded=False):
110
 
111
  Currently, two languages are supported: English and Spanish. Additionally, you can select whether the detected entities are broken down into the three types of camouflaged words: Canonical Leetspeak, Punctuation Camouflaged, Inversion Camouflaged.
112
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
 
114
 
115
-
 
 
116
 
117
 
118
  st.subheader("Input Text")
@@ -135,8 +175,9 @@ with st.form("my_form"):
135
 
136
 
137
  st.subheader("Output")
138
- doc = selected_model(text_input)
139
- tokens = process_text(doc, selected_multi_ner)
 
140
 
141
- annotated_text(*tokens)
142
 
 
18
 
19
  @st.cache(show_spinner=False, allow_output_mutation=True, suppress_st_warning=True)
20
  def load_models():
21
+ if selected_for == "Accuracy":
22
+ spanish_model = spacy.load("./spacy-models/output_full_ES_roberta-base-bne/model-best")
23
+ english_model = spacy.load("./spacy-models/output_full_EN_roberta_base/model-best")
24
+
25
+ if selected_for == "Efficiency":
26
+ spanish_model = spacy.load("./spacy-models/toy_output_es_blank/model-best")
27
+ english_model = spacy.load("./spacy-models/toy_output_en_blank/model-best/")
28
  models = {"English": english_model, "Spanish": spanish_model}
29
  return models
30
 
 
57
  # Side bar
58
  selected_language = st.sidebar.selectbox("Select a language", options=["English", "Spanish"])
59
  selected_multi_ner = st.sidebar.radio('Do you want to break down the Entities detected by type of leetspeak?', ['Yes', 'No'])
60
+ selected_for = st.sidebar.radio('Select for:', ['Efficiency', 'Accuracy'])
61
 
62
  models = load_models()
63
  selected_model = models[selected_language]
64
 
65
  import base64
66
 
67
+ LOGO_IMAGE = "LeetSpeak-NER-cropped.png"
68
 
69
  st.markdown(
70
  """
71
  <style>
 
 
 
 
72
  .logo-img {
73
+
74
+
75
+ margin-top: auto;
76
+ margin-left: 30%;
77
+ width: 30%;
78
+
79
  }
80
+ .logo-img-2 {
81
+ margin-top: 10%;
82
+ margin-left: 20%;
83
+ width: 35%;
84
+
85
+ }
86
  </style>
87
  """,
88
  unsafe_allow_html=True
89
  )
90
 
91
 
92
+ col1, col2= st.columns([2, 2])
93
+ with col1:
 
 
 
 
 
 
 
 
 
 
 
94
  # st.image('./aida_logo.png')
95
  st.markdown(
96
  f"""
 
97
  <img class="logo-img" src="data:image/png;base64,{base64.b64encode(open(LOGO_IMAGE, "rb").read()).decode()}">
 
98
  """,
99
  unsafe_allow_html=True
100
  )
101
+
102
+ with col2:
103
+ # st.image('./aida_logo.png')
104
+ st.markdown(
105
+ f"""
106
+ <img class="logo-img-2" src="data:image/png;base64,{base64.b64encode(open("aida_logo.png", "rb").read()).decode()}">
107
+ """,
108
+ unsafe_allow_html=True
109
+ )
110
+
111
+ # st.image([LOGO_IMAGE,"aida_logo.png"], width=100)
112
 
113
 
114
+ st.markdown("""
115
+ <style>
116
+ .big-font {
117
+ font-size:3em;
118
+ font-weight: bold;
119
+ }
120
+ </style>
121
+ """, unsafe_allow_html=True)
122
 
123
+ st.markdown('<p class="big-font">Welcome to <font color="#4B8BBE">Leet</font><font color=" #FFD43B">Speak</font><font color="#ff73a2">-NER</font></p>', unsafe_allow_html=True)
124
+
125
+
126
  with st.expander("Project Description", expanded=False):
127
  st.write("""
128
  Developed in Applied Intelligence and Data Analysis ([AI+DA](http://aida.etsisi.upm.es/)) group at Polytech University of Madrid (UPM).
 
130
 
131
  Currently, two languages are supported: English and Spanish. Additionally, you can select whether the detected entities are broken down into the three types of camouflaged words: Canonical Leetspeak, Punctuation Camouflaged, Inversion Camouflaged.
132
  """)
133
+
134
+ with st.expander("Try some of these examples", expanded=False):
135
+ st.write("""
136
+ ENGLISH:
137
+ - Desperately dominated by fam1ly sitüatløns, he leaves her.
138
+ - You might as well come out to investigate a strang3 n'o?i+se or something.
139
+ - But one other thing that we have to re;think is the way that we dy£ our #c!l.o|th?£+s.
140
+ - And he wanted Baltimore to get that same kind of att£ntløn from the outside, but )i)n)t)r)o)s)p)e)c)t)i)o)n from the inside about what was going on with us.
141
+
142
+
143
+ SPANISH
144
+ - _d+i%o"s mío!
145
+ - se asocian con el m13;d0 y el d'o'lor. g£rønlmo solía decir
146
+ - Con las nuevas tecnologías digitales, los agrlcultør£s pueden manejar mejor el uso de sus tierras, su energía y su agua, y prepararse para el mal clima.
147
+ - En el tiempo transcurrido entre mi período de escuela %s%3%c%_%n%d%a%r%1%a y el mo'm3n'to de empezar a enseñar vimos surgir el fenómeno de in't£r'net
148
+ - Las pre0c_pac1on3s van desde inquietudes por las ramificaciones desestabilizadoras de una estrategia de salida de la FC, hasta aprehensión por pérdidas de capital en la rápidamente creciente cartera de valores de la Fed (actualmente de $3 billones y en camino a los $4 billones para finales de este a��o).
149
+ """)
150
 
151
+ # - Why do all these _r_e_p_o_r_t_e_r_s, who get praise and money for doing what Assange has done, maintain a cow;ardly silence (at best) while a fellow publisher faces threats of extradition, banning, and espionage charges (which can incur the death penalty), not to mention calls for his as'sa'ss1nat'i'on?
152
 
153
+ # - Cada uno de estos es un crimen de guerra, un crimen contra la humanidad y, en el caso de los asesinatos masivos de la campaña de Anfal, y tal vez también en el caso de los árabes de los pantanos, el crimen más serio de todos, ge'no'ci'dio.
154
+
155
+ # - No quiere decir que debamos iniciar una campaña por los derechos de los lns£ctøs
156
 
157
 
158
  st.subheader("Input Text")
 
175
 
176
 
177
  st.subheader("Output")
178
+ with st.spinner('Wait for it...'):
179
+ doc = selected_model(text_input)
180
+ tokens = process_text(doc, selected_multi_ner)
181
 
182
+ annotated_text(*tokens)
183