Spaces:
Runtime error
Runtime error
hm-auch
commited on
Commit
•
072c906
1
Parent(s):
b642a67
update classifier and demonstrator-code
Browse files- README.md +1 -1
- app.py +53 -34
- gradio_queue.db +0 -0
- gradio_queue.db-journal +0 -0
- {result/model → model_1}/keras_metadata.pb +0 -0
- {result/model → model_1}/saved_model.pb +0 -0
- {result/model → model_1}/variables/variables.data-00000-of-00001 +0 -0
- {result/model → model_1}/variables/variables.index +0 -0
- model_2/keras_metadata.pb +3 -0
- model_2/saved_model.pb +3 -0
- model_2/variables/variables.data-00000-of-00001 +3 -0
- model_2/variables/variables.index +0 -0
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: 😻
|
4 |
colorFrom: yellow
|
5 |
colorTo: red
|
|
|
1 |
---
|
2 |
+
title: Hassrede
|
3 |
emoji: 😻
|
4 |
colorFrom: yellow
|
5 |
colorTo: red
|
app.py
CHANGED
@@ -1,13 +1,9 @@
|
|
1 |
import transformers
|
2 |
|
3 |
import gradio as gr
|
|
|
4 |
import tensorflow as tf
|
5 |
|
6 |
-
MODEL_DIRECTORY = './result/model'
|
7 |
-
PRETRAINED_MODEL_NAME = 'dbmdz/bert-base-german-cased'
|
8 |
-
TOKENIZER = transformers.BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)
|
9 |
-
MAX_SEQUENCE_LENGTH = 300
|
10 |
-
|
11 |
def encode(sentences, tokenizer, sequence_length):
|
12 |
return tokenizer.batch_encode_plus(
|
13 |
sentences,
|
@@ -19,39 +15,62 @@ def encode(sentences, tokenizer, sequence_length):
|
|
19 |
return_tensors='tf'
|
20 |
)
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
def inference(sentence):
|
25 |
-
encoded_sentence = encode([sentence], TOKENIZER, MAX_SEQUENCE_LENGTH)
|
26 |
-
return hs_detection_model.predict(encoded_sentence.values())
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
-
title = "HS-Detector Demonstrator"
|
30 |
description = """
|
|
|
|
|
31 |
<center>
|
32 |
-
<
|
33 |
-
<
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
</center>
|
|
|
|
|
51 |
"""
|
|
|
|
|
|
|
|
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
ui.launch()
|
|
|
1 |
import transformers
|
2 |
|
3 |
import gradio as gr
|
4 |
+
import numpy as np
|
5 |
import tensorflow as tf
|
6 |
|
|
|
|
|
|
|
|
|
|
|
7 |
def encode(sentences, tokenizer, sequence_length):
|
8 |
return tokenizer.batch_encode_plus(
|
9 |
sentences,
|
|
|
15 |
return_tensors='tf'
|
16 |
)
|
17 |
|
18 |
+
hs_detection_model_1 = tf.keras.models.load_model('./model_1', compile=True)
|
19 |
+
hs_detection_model_2 = tf.keras.models.load_model('./model_2', compile=True)
|
|
|
|
|
|
|
20 |
|
21 |
+
def model_inference(sentence):
|
22 |
+
encoded_model1_sentence = encode([sentence], transformers.BertTokenizer.from_pretrained('dbmdz/bert-base-german-cased'), 300)
|
23 |
+
encoded_model2_sentence = encode([sentence], transformers.BertTokenizer.from_pretrained('dbmdz/bert-base-german-uncased'), 512)
|
24 |
+
predictions_1 = hs_detection_model_1.predict(encoded_model1_sentence.values()).flatten()
|
25 |
+
predictions_2 = hs_detection_model_2.predict(encoded_model2_sentence.values()).flatten()
|
26 |
+
return {'Hassrede': float(predictions_1[0])}, {'Hassrede': float(predictions_2[0])}
|
27 |
|
28 |
+
title = "HS-Detector Demonstrator (deutsch)"
|
29 |
description = """
|
30 |
+
<div style="float: none; overflow: hidden;">
|
31 |
+
<div style="display:block; width:100%;">
|
32 |
<center>
|
33 |
+
<div style="width:50%; float: left; display: inline-block;">
|
34 |
+
<h2>Ausgangsmodell</h2>
|
35 |
+
<p>Modell: Bert ('dbmdz/bert-base-german-cased')</p>
|
36 |
+
<p>Dataset: germeval18_hasoc19_rp21_combi_dataset <br/> (77.161 Einträge mit einem Hassrede-Anteil von 17,7%)</p>
|
37 |
+
<p>Fine-Tuning Parameter: 2 Epochen, 300 Token pro Eintrag, 2e-5 LR</p>
|
38 |
+
|
39 |
+
Evaluationsergebnisse:
|
40 |
+
Balanced Accuracy: 0.756
|
41 |
+
(Accuracy: 0.880)
|
42 |
+
Binary F1-Score: 0.625
|
43 |
+
Binary Precision: 0.699
|
44 |
+
Binary Recall: 0.565
|
45 |
+
MCC score: 0.559
|
46 |
+
AUROC score: 0.756
|
47 |
+
</div>
|
48 |
+
<div style="width:50%; float: left; display: inline-block;">
|
49 |
+
<h2>Challenger-Modell</h2>
|
50 |
+
<p>Modell: Bert ('dbmdz/bert-base-german-uncased')</p>
|
51 |
+
<p>Dataset: germeval18_hasoc19_rp21_combi_dataset_no-url_no-address <br/> (~77.161 Einträge mit einem Hassrede-Anteil von 17,7%)</p>
|
52 |
+
<p>Fine-Tuning Parameter: 2 Epochen, 512 Token pro Eintrag, 2e-5 LR</p>
|
53 |
+
|
54 |
+
Evaluationsergebnisse:
|
55 |
+
Balanced Accuracy: 0.749
|
56 |
+
(Accuracy: 0.867)
|
57 |
+
Binary F1-Score: 0.602
|
58 |
+
Binary Precision: 0.642
|
59 |
+
Binary Recall: 0.567
|
60 |
+
MCC score: 0.524
|
61 |
+
AUROC score: 0.749
|
62 |
+
</div>
|
63 |
</center>
|
64 |
+
</div>
|
65 |
+
</div>
|
66 |
"""
|
67 |
+
# <p>Dataset: germeval18_hasoc19_rp21_glasebach22_combi_dataset_no-addr.csv <br/> (84.239 Einträge mit einem Hassrede-Anteil von 18,2%)</p>
|
68 |
+
article = """Die Eingaben werden nicht geloggt. Klassifikator einfach ausprobieren.
|
69 |
+
Unter dem Button 'Ersteller' kann inspiziert werden, welche Satz-Bestandteile für die Modelle vermutlich entscheident waren.
|
70 |
+
Dabei werden automatisiert Satzteile verändert und die Auswirkungen auf die jeweils abgefragten Predictions beobachtet."""
|
71 |
|
72 |
+
input_sentence_text = gr.inputs.Textbox(lines=5, placeholder="Geben Sie hier den Satz ein, der von den Modellen auf Hassrede geprüft werden soll.")
|
73 |
+
output_predictions = [gr.outputs.Label(label="Prediction of initial model", num_top_classes=1), gr.outputs.Label(label="Prediction of challenging model", num_top_classes=1)]
|
74 |
+
ui = gr.Interface(fn=model_inference, inputs=input_sentence_text, outputs=output_predictions, title=title, article=article, description=description, interpretation="default",
|
75 |
+
flagging_options=["incorrect", "ambiguous", "other"])
|
76 |
+
ui.launch(enable_queue=True)
|
gradio_queue.db
ADDED
File without changes
|
gradio_queue.db-journal
ADDED
Binary file (512 Bytes). View file
|
|
{result/model → model_1}/keras_metadata.pb
RENAMED
File without changes
|
{result/model → model_1}/saved_model.pb
RENAMED
File without changes
|
{result/model → model_1}/variables/variables.data-00000-of-00001
RENAMED
File without changes
|
{result/model → model_1}/variables/variables.index
RENAMED
File without changes
|
model_2/keras_metadata.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8af43660950e7ee4747371bb148060c46c696cf2141dcccfa48b02fe15d51f6
|
3 |
+
size 154814
|
model_2/saved_model.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f32916473730c6bcf6302fc51828059364bc2d8ccc5d19fac4f633ac47f15073
|
3 |
+
size 6564579
|
model_2/variables/variables.data-00000-of-00001
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:338d6f22e3fa2f63a391c300d0ba96774d08ff685986f1006c55177d11656004
|
3 |
+
size 1319386304
|
model_2/variables/variables.index
ADDED
Binary file (40.7 kB). View file
|
|