Spaces:
Sleeping
Sleeping
ACMCMC
commited on
Commit
Β·
39e3879
1
Parent(s):
0123a49
App update to include tokenization ratio
Browse files
app.py
CHANGED
@@ -7,9 +7,12 @@ from unidecode import unidecode
|
|
7 |
|
8 |
|
9 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
10 |
|
|
|
11 |
|
12 |
-
model
|
|
|
13 |
tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
14 |
|
15 |
|
@@ -33,8 +36,8 @@ def calculate_perplexity(text_logits: torch.Tensor):
|
|
33 |
return perplexity.item()
|
34 |
|
35 |
|
36 |
-
# Function to calculate
|
37 |
-
def
|
38 |
# # Tokenize the texts
|
39 |
# unhomoglyphed_text_tokens = tokenizer(unhomoglyphed_text, return_tensors="pt")[
|
40 |
# "input_ids"
|
@@ -55,17 +58,24 @@ def process_homoglyphed_text(homoglyphed_text, unhomoglyphed_text):
|
|
55 |
)["perplexities"]
|
56 |
)
|
57 |
|
|
|
|
|
|
|
58 |
print(
|
59 |
f"Unhomoglyphed text perplexity: {unhomoglyphed_text_perplexity}, homoglyphed text perplexity: {homoglyphed_text_perplexity}"
|
60 |
)
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
66 |
|
67 |
# Return the burstiness for both texts
|
68 |
-
return alarm_triggered,
|
69 |
|
70 |
|
71 |
def unhomoglyphize_text(homoglyphed_text):
|
@@ -83,20 +93,24 @@ def unhomoglyphize_text(homoglyphed_text):
|
|
83 |
'alias': 'GREEK',
|
84 |
'homoglyphs': [{'c': 'α΄', 'n': 'LATIN LETTER SMALL CAPITAL T'}]}]
|
85 |
"""
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
|
|
|
|
|
|
|
|
100 |
# The Markdown comment is not used, but it's here to keep the interface consistent
|
101 |
|
102 |
# If the user text doesn't contain homoglyphs, don't trigger the alarm
|
@@ -105,18 +119,23 @@ def process_user_text(user_text, markdown_comment = None):
|
|
105 |
user_text, preferred_aliases=["latin"]
|
106 |
)
|
107 |
):
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
|
|
111 |
|
112 |
print(f"Unhomoglyphed text: {unhomoglyphed_text}")
|
113 |
|
114 |
-
alarm_triggered,
|
115 |
-
|
|
|
|
|
116 |
)
|
117 |
return (
|
118 |
-
|
119 |
-
|
|
|
120 |
"# π¨ Alarm triggered" if alarm_triggered else "# β
All good",
|
121 |
)
|
122 |
|
@@ -157,8 +176,10 @@ demo = gr.Interface(
|
|
157 |
outputs=[
|
158 |
# A checkbox: is dangerous or not
|
159 |
gr.Checkbox(label="Is dangerous"),
|
|
|
|
|
160 |
# The number of the difference ratio
|
161 |
-
gr.Number(label="Difference ratio"),
|
162 |
# Just an emoji: alarm triggered or not
|
163 |
gr.Markdown(label="Alarm triggered", show_label=False),
|
164 |
],
|
|
|
7 |
|
8 |
|
9 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
10 |
+
import logging
|
11 |
|
12 |
+
logging.basicConfig(level=logging.INFO)
|
13 |
|
14 |
+
# Load the model and tokenizer
|
15 |
+
# model = AutoModelForCausalLM.from_pretrained("gpt2")
|
16 |
tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
17 |
|
18 |
|
|
|
36 |
return perplexity.item()
|
37 |
|
38 |
|
39 |
+
# Function to calculate PPL using an LLM
|
40 |
+
def process_homoglyphed_text_perplexity(homoglyphed_text, unhomoglyphed_text):
|
41 |
# # Tokenize the texts
|
42 |
# unhomoglyphed_text_tokens = tokenizer(unhomoglyphed_text, return_tensors="pt")[
|
43 |
# "input_ids"
|
|
|
58 |
)["perplexities"]
|
59 |
)
|
60 |
|
61 |
+
num_tokens_unhomoglyphed = len(tokenizer(unhomoglyphed_text)["input_ids"])
|
62 |
+
num_tokens_homoglyphed = len(tokenizer(homoglyphed_text)["input_ids"])
|
63 |
+
|
64 |
print(
|
65 |
f"Unhomoglyphed text perplexity: {unhomoglyphed_text_perplexity}, homoglyphed text perplexity: {homoglyphed_text_perplexity}"
|
66 |
)
|
67 |
|
68 |
+
difference_ratio_tokens = 1.0 / (
|
69 |
+
num_tokens_unhomoglyphed / num_tokens_homoglyphed
|
70 |
+
) # Same as inverse of the ratio
|
71 |
+
|
72 |
+
# If the version without homoglyphs is more than 1.5 of the perplexity of the version with homoglyphs, trigger the alarm. Also trigger the alarm if the number of tokens is more than 1.2 times the number of tokens in the homoglyphed text
|
73 |
+
difference_ratio_ppl = unhomoglyphed_text_perplexity / homoglyphed_text_perplexity
|
74 |
+
print(f"Difference ratio: {difference_ratio_ppl}")
|
75 |
+
alarm_triggered = difference_ratio_ppl > 1.5 or difference_ratio_tokens > 1.3
|
76 |
|
77 |
# Return the burstiness for both texts
|
78 |
+
return alarm_triggered, difference_ratio_tokens, difference_ratio_ppl
|
79 |
|
80 |
|
81 |
def unhomoglyphize_text(homoglyphed_text):
|
|
|
93 |
'alias': 'GREEK',
|
94 |
'homoglyphs': [{'c': 'α΄', 'n': 'LATIN LETTER SMALL CAPITAL T'}]}]
|
95 |
"""
|
96 |
+
try:
|
97 |
+
for confusable in confusables:
|
98 |
+
# Check if the character is in ASCII
|
99 |
+
if ord(confusable["character"]) < 128:
|
100 |
+
continue
|
101 |
+
homoglyph = confusable["homoglyphs"][0]
|
102 |
+
unhomoglyphed_text = unhomoglyphed_text.replace(
|
103 |
+
confusable["character"], homoglyph["c"]
|
104 |
+
)
|
105 |
+
# Finally, remove any diacritics (this is not done by the homoglyphs library)
|
106 |
+
unhomoglyphed_text = unidecode(unhomoglyphed_text)
|
107 |
+
return unhomoglyphed_text
|
108 |
+
except Exception as e:
|
109 |
+
logging.exception("Could not unhomoglyphize text")
|
110 |
+
return homoglyphed_text
|
111 |
+
|
112 |
+
|
113 |
+
def process_user_text(user_text, markdown_comment=None):
|
114 |
# The Markdown comment is not used, but it's here to keep the interface consistent
|
115 |
|
116 |
# If the user text doesn't contain homoglyphs, don't trigger the alarm
|
|
|
119 |
user_text, preferred_aliases=["latin"]
|
120 |
)
|
121 |
):
|
122 |
+
is_dangerous = False
|
123 |
+
unhomoglyphed_text = user_text
|
124 |
+
else:
|
125 |
+
is_dangerous = True
|
126 |
+
unhomoglyphed_text = unhomoglyphize_text(user_text)
|
127 |
|
128 |
print(f"Unhomoglyphed text: {unhomoglyphed_text}")
|
129 |
|
130 |
+
alarm_triggered, difference_ratio_tokens, difference_ratio_ppl = (
|
131 |
+
process_homoglyphed_text_perplexity(
|
132 |
+
homoglyphed_text=user_text, unhomoglyphed_text=unhomoglyphed_text
|
133 |
+
)
|
134 |
)
|
135 |
return (
|
136 |
+
is_dangerous,
|
137 |
+
difference_ratio_tokens,
|
138 |
+
difference_ratio_ppl,
|
139 |
"# π¨ Alarm triggered" if alarm_triggered else "# β
All good",
|
140 |
)
|
141 |
|
|
|
176 |
outputs=[
|
177 |
# A checkbox: is dangerous or not
|
178 |
gr.Checkbox(label="Is dangerous"),
|
179 |
+
# The number of the difference ratio in tokens
|
180 |
+
gr.Number(label="Difference ratio (Tokens)"),
|
181 |
# The number of the difference ratio
|
182 |
+
gr.Number(label="Difference ratio (PPL)"),
|
183 |
# Just an emoji: alarm triggered or not
|
184 |
gr.Markdown(label="Alarm triggered", show_label=False),
|
185 |
],
|