ACMCMC commited on
Commit
39e3879
Β·
1 Parent(s): 0123a49

App update to include tokenization ratio

Browse files
Files changed (1) hide show
  1. app.py +51 -30
app.py CHANGED
@@ -7,9 +7,12 @@ from unidecode import unidecode
7
 
8
 
9
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
10
 
 
11
 
12
- model = AutoModelForCausalLM.from_pretrained("gpt2")
 
13
  tokenizer = AutoTokenizer.from_pretrained("gpt2")
14
 
15
 
@@ -33,8 +36,8 @@ def calculate_perplexity(text_logits: torch.Tensor):
33
  return perplexity.item()
34
 
35
 
36
- # Function to calculate burstiness using an LLM
37
- def process_homoglyphed_text(homoglyphed_text, unhomoglyphed_text):
38
  # # Tokenize the texts
39
  # unhomoglyphed_text_tokens = tokenizer(unhomoglyphed_text, return_tensors="pt")[
40
  # "input_ids"
@@ -55,17 +58,24 @@ def process_homoglyphed_text(homoglyphed_text, unhomoglyphed_text):
55
  )["perplexities"]
56
  )
57
 
 
 
 
58
  print(
59
  f"Unhomoglyphed text perplexity: {unhomoglyphed_text_perplexity}, homoglyphed text perplexity: {homoglyphed_text_perplexity}"
60
  )
61
 
62
- # If the version without homoglyphs is more than 1.5 of the perplexity of the version with homoglyphs, trigger the alarm
63
- difference_ratio = unhomoglyphed_text_perplexity / homoglyphed_text_perplexity
64
- print(f"Difference ratio: {difference_ratio}")
65
- alarm_triggered = difference_ratio > 1.5
 
 
 
 
66
 
67
  # Return the burstiness for both texts
68
- return alarm_triggered, difference_ratio
69
 
70
 
71
  def unhomoglyphize_text(homoglyphed_text):
@@ -83,20 +93,24 @@ def unhomoglyphize_text(homoglyphed_text):
83
  'alias': 'GREEK',
84
  'homoglyphs': [{'c': 'α΄›', 'n': 'LATIN LETTER SMALL CAPITAL T'}]}]
85
  """
86
- for confusable in confusables:
87
- # Check if the character is in ASCII
88
- if ord(confusable["character"]) < 128:
89
- continue
90
- homoglyph = confusable["homoglyphs"][0]
91
- unhomoglyphed_text = unhomoglyphed_text.replace(
92
- confusable["character"], homoglyph["c"]
93
- )
94
- # Finally, remove any diacritics (this is not done by the homoglyphs library)
95
- unhomoglyphed_text = unidecode(unhomoglyphed_text)
96
- return unhomoglyphed_text
97
-
98
-
99
- def process_user_text(user_text, markdown_comment = None):
 
 
 
 
100
  # The Markdown comment is not used, but it's here to keep the interface consistent
101
 
102
  # If the user text doesn't contain homoglyphs, don't trigger the alarm
@@ -105,18 +119,23 @@ def process_user_text(user_text, markdown_comment = None):
105
  user_text, preferred_aliases=["latin"]
106
  )
107
  ):
108
- return False, 0.0, "# βœ… All good"
109
-
110
- unhomoglyphed_text = unhomoglyphize_text(user_text)
 
 
111
 
112
  print(f"Unhomoglyphed text: {unhomoglyphed_text}")
113
 
114
- alarm_triggered, difference_ratio = process_homoglyphed_text(
115
- homoglyphed_text=user_text, unhomoglyphed_text=unhomoglyphed_text
 
 
116
  )
117
  return (
118
- True,
119
- difference_ratio,
 
120
  "# 🚨 Alarm triggered" if alarm_triggered else "# βœ… All good",
121
  )
122
 
@@ -157,8 +176,10 @@ demo = gr.Interface(
157
  outputs=[
158
  # A checkbox: is dangerous or not
159
  gr.Checkbox(label="Is dangerous"),
 
 
160
  # The number of the difference ratio
161
- gr.Number(label="Difference ratio"),
162
  # Just an emoji: alarm triggered or not
163
  gr.Markdown(label="Alarm triggered", show_label=False),
164
  ],
 
7
 
8
 
9
  from transformers import AutoModelForCausalLM, AutoTokenizer
10
+ import logging
11
 
12
+ logging.basicConfig(level=logging.INFO)
13
 
14
+ # Load the model and tokenizer
15
+ # model = AutoModelForCausalLM.from_pretrained("gpt2")
16
  tokenizer = AutoTokenizer.from_pretrained("gpt2")
17
 
18
 
 
36
  return perplexity.item()
37
 
38
 
39
+ # Function to calculate PPL using an LLM
40
+ def process_homoglyphed_text_perplexity(homoglyphed_text, unhomoglyphed_text):
41
  # # Tokenize the texts
42
  # unhomoglyphed_text_tokens = tokenizer(unhomoglyphed_text, return_tensors="pt")[
43
  # "input_ids"
 
58
  )["perplexities"]
59
  )
60
 
61
+ num_tokens_unhomoglyphed = len(tokenizer(unhomoglyphed_text)["input_ids"])
62
+ num_tokens_homoglyphed = len(tokenizer(homoglyphed_text)["input_ids"])
63
+
64
  print(
65
  f"Unhomoglyphed text perplexity: {unhomoglyphed_text_perplexity}, homoglyphed text perplexity: {homoglyphed_text_perplexity}"
66
  )
67
 
68
+ difference_ratio_tokens = 1.0 / (
69
+ num_tokens_unhomoglyphed / num_tokens_homoglyphed
70
+ ) # Same as inverse of the ratio
71
+
72
+ # If the version without homoglyphs is more than 1.5 of the perplexity of the version with homoglyphs, trigger the alarm. Also trigger the alarm if the number of tokens is more than 1.2 times the number of tokens in the homoglyphed text
73
+ difference_ratio_ppl = unhomoglyphed_text_perplexity / homoglyphed_text_perplexity
74
+ print(f"Difference ratio: {difference_ratio_ppl}")
75
+ alarm_triggered = difference_ratio_ppl > 1.5 or difference_ratio_tokens > 1.3
76
 
77
  # Return the burstiness for both texts
78
+ return alarm_triggered, difference_ratio_tokens, difference_ratio_ppl
79
 
80
 
81
  def unhomoglyphize_text(homoglyphed_text):
 
93
  'alias': 'GREEK',
94
  'homoglyphs': [{'c': 'α΄›', 'n': 'LATIN LETTER SMALL CAPITAL T'}]}]
95
  """
96
+ try:
97
+ for confusable in confusables:
98
+ # Check if the character is in ASCII
99
+ if ord(confusable["character"]) < 128:
100
+ continue
101
+ homoglyph = confusable["homoglyphs"][0]
102
+ unhomoglyphed_text = unhomoglyphed_text.replace(
103
+ confusable["character"], homoglyph["c"]
104
+ )
105
+ # Finally, remove any diacritics (this is not done by the homoglyphs library)
106
+ unhomoglyphed_text = unidecode(unhomoglyphed_text)
107
+ return unhomoglyphed_text
108
+ except Exception as e:
109
+ logging.exception("Could not unhomoglyphize text")
110
+ return homoglyphed_text
111
+
112
+
113
+ def process_user_text(user_text, markdown_comment=None):
114
  # The Markdown comment is not used, but it's here to keep the interface consistent
115
 
116
  # If the user text doesn't contain homoglyphs, don't trigger the alarm
 
119
  user_text, preferred_aliases=["latin"]
120
  )
121
  ):
122
+ is_dangerous = False
123
+ unhomoglyphed_text = user_text
124
+ else:
125
+ is_dangerous = True
126
+ unhomoglyphed_text = unhomoglyphize_text(user_text)
127
 
128
  print(f"Unhomoglyphed text: {unhomoglyphed_text}")
129
 
130
+ alarm_triggered, difference_ratio_tokens, difference_ratio_ppl = (
131
+ process_homoglyphed_text_perplexity(
132
+ homoglyphed_text=user_text, unhomoglyphed_text=unhomoglyphed_text
133
+ )
134
  )
135
  return (
136
+ is_dangerous,
137
+ difference_ratio_tokens,
138
+ difference_ratio_ppl,
139
  "# 🚨 Alarm triggered" if alarm_triggered else "# βœ… All good",
140
  )
141
 
 
176
  outputs=[
177
  # A checkbox: is dangerous or not
178
  gr.Checkbox(label="Is dangerous"),
179
+ # The number of the difference ratio in tokens
180
+ gr.Number(label="Difference ratio (Tokens)"),
181
  # The number of the difference ratio
182
+ gr.Number(label="Difference ratio (PPL)"),
183
  # Just an emoji: alarm triggered or not
184
  gr.Markdown(label="Alarm triggered", show_label=False),
185
  ],