Spaces:

acmc
/

homoglyphs-alarm

Sleeping

App Files Files Community

ACMCMC commited on 13 days ago

Commit

39e3879

1 Parent(s): 0123a49

App update to include tokenization ratio

Browse files

Files changed (1) hide show

app.py +51 -30

app.py CHANGED Viewed

@@ -7,9 +7,12 @@ from unidecode import unidecode
 from transformers import AutoModelForCausalLM, AutoTokenizer
-model = AutoModelForCausalLM.from_pretrained("gpt2")
 tokenizer = AutoTokenizer.from_pretrained("gpt2")
@@ -33,8 +36,8 @@ def calculate_perplexity(text_logits: torch.Tensor):
     return perplexity.item()
-# Function to calculate burstiness using an LLM
-def process_homoglyphed_text(homoglyphed_text, unhomoglyphed_text):
     # # Tokenize the texts
     # unhomoglyphed_text_tokens = tokenizer(unhomoglyphed_text, return_tensors="pt")[
     #     "input_ids"
@@ -55,17 +58,24 @@ def process_homoglyphed_text(homoglyphed_text, unhomoglyphed_text):
         )["perplexities"]
     )
     print(
         f"Unhomoglyphed text perplexity: {unhomoglyphed_text_perplexity}, homoglyphed text perplexity: {homoglyphed_text_perplexity}"
     )
-    # If the version without homoglyphs is more than 1.5 of the perplexity of the version with homoglyphs, trigger the alarm
-    difference_ratio = unhomoglyphed_text_perplexity / homoglyphed_text_perplexity
-    print(f"Difference ratio: {difference_ratio}")
-    alarm_triggered = difference_ratio > 1.5
     # Return the burstiness for both texts
-    return alarm_triggered, difference_ratio
 def unhomoglyphize_text(homoglyphed_text):
@@ -83,20 +93,24 @@ def unhomoglyphize_text(homoglyphed_text):
   'alias': 'GREEK',
   'homoglyphs': [{'c': 'ᴛ', 'n': 'LATIN LETTER SMALL CAPITAL T'}]}]
     """
-    for confusable in confusables:
-        # Check if the character is in ASCII
-        if ord(confusable["character"]) < 128:
-            continue
-        homoglyph = confusable["homoglyphs"][0]
-        unhomoglyphed_text = unhomoglyphed_text.replace(
-            confusable["character"], homoglyph["c"]
-        )
-    # Finally, remove any diacritics (this is not done by the homoglyphs library)
-    unhomoglyphed_text = unidecode(unhomoglyphed_text)
-    return unhomoglyphed_text
-def process_user_text(user_text, markdown_comment = None):
     # The Markdown comment is not used, but it's here to keep the interface consistent
     # If the user text doesn't contain homoglyphs, don't trigger the alarm
@@ -105,18 +119,23 @@ def process_user_text(user_text, markdown_comment = None):
             user_text, preferred_aliases=["latin"]
         )
     ):
-        return False, 0.0, "# ✅ All good"
-    unhomoglyphed_text = unhomoglyphize_text(user_text)
     print(f"Unhomoglyphed text: {unhomoglyphed_text}")
-    alarm_triggered, difference_ratio = process_homoglyphed_text(
-        homoglyphed_text=user_text, unhomoglyphed_text=unhomoglyphed_text
     )
     return (
-        True,
-        difference_ratio,
         "# 🚨 Alarm triggered" if alarm_triggered else "# ✅ All good",
     )
@@ -157,8 +176,10 @@ demo = gr.Interface(
     outputs=[
         # A checkbox: is dangerous or not
         gr.Checkbox(label="Is dangerous"),
         # The number of the difference ratio
-        gr.Number(label="Difference ratio"),
         # Just an emoji: alarm triggered or not
         gr.Markdown(label="Alarm triggered", show_label=False),
     ],

 from transformers import AutoModelForCausalLM, AutoTokenizer
+import logging
+logging.basicConfig(level=logging.INFO)
+# Load the model and tokenizer
+# model = AutoModelForCausalLM.from_pretrained("gpt2")
 tokenizer = AutoTokenizer.from_pretrained("gpt2")
     return perplexity.item()
+# Function to calculate PPL using an LLM
+def process_homoglyphed_text_perplexity(homoglyphed_text, unhomoglyphed_text):
     # # Tokenize the texts
     # unhomoglyphed_text_tokens = tokenizer(unhomoglyphed_text, return_tensors="pt")[
     #     "input_ids"
         )["perplexities"]
     )
+    num_tokens_unhomoglyphed = len(tokenizer(unhomoglyphed_text)["input_ids"])
+    num_tokens_homoglyphed = len(tokenizer(homoglyphed_text)["input_ids"])
     print(
         f"Unhomoglyphed text perplexity: {unhomoglyphed_text_perplexity}, homoglyphed text perplexity: {homoglyphed_text_perplexity}"
     )
+    difference_ratio_tokens = 1.0 / (
+        num_tokens_unhomoglyphed / num_tokens_homoglyphed
+    )  # Same as inverse of the ratio
+    # If the version without homoglyphs is more than 1.5 of the perplexity of the version with homoglyphs, trigger the alarm. Also trigger the alarm if the number of tokens is more than 1.2 times the number of tokens in the homoglyphed text
+    difference_ratio_ppl = unhomoglyphed_text_perplexity / homoglyphed_text_perplexity
+    print(f"Difference ratio: {difference_ratio_ppl}")
+    alarm_triggered = difference_ratio_ppl > 1.5 or difference_ratio_tokens > 1.3
     # Return the burstiness for both texts
+    return alarm_triggered, difference_ratio_tokens, difference_ratio_ppl
 def unhomoglyphize_text(homoglyphed_text):
   'alias': 'GREEK',
   'homoglyphs': [{'c': 'ᴛ', 'n': 'LATIN LETTER SMALL CAPITAL T'}]}]
     """
+    try:
+        for confusable in confusables:
+            # Check if the character is in ASCII
+            if ord(confusable["character"]) < 128:
+                continue
+            homoglyph = confusable["homoglyphs"][0]
+            unhomoglyphed_text = unhomoglyphed_text.replace(
+                confusable["character"], homoglyph["c"]
+            )
+        # Finally, remove any diacritics (this is not done by the homoglyphs library)
+        unhomoglyphed_text = unidecode(unhomoglyphed_text)
+        return unhomoglyphed_text
+    except Exception as e:
+        logging.exception("Could not unhomoglyphize text")
+        return homoglyphed_text
+def process_user_text(user_text, markdown_comment=None):
     # The Markdown comment is not used, but it's here to keep the interface consistent
     # If the user text doesn't contain homoglyphs, don't trigger the alarm
             user_text, preferred_aliases=["latin"]
         )
     ):
+        is_dangerous = False
+        unhomoglyphed_text = user_text
+    else:
+        is_dangerous = True
+        unhomoglyphed_text = unhomoglyphize_text(user_text)
     print(f"Unhomoglyphed text: {unhomoglyphed_text}")
+    alarm_triggered, difference_ratio_tokens, difference_ratio_ppl = (
+        process_homoglyphed_text_perplexity(
+            homoglyphed_text=user_text, unhomoglyphed_text=unhomoglyphed_text
+        )
     )
     return (
+        is_dangerous,
+        difference_ratio_tokens,
+        difference_ratio_ppl,
         "# 🚨 Alarm triggered" if alarm_triggered else "# ✅ All good",
     )
     outputs=[
         # A checkbox: is dangerous or not
         gr.Checkbox(label="Is dangerous"),
+        # The number of the difference ratio in tokens
+        gr.Number(label="Difference ratio (Tokens)"),
         # The number of the difference ratio
+        gr.Number(label="Difference ratio (PPL)"),
         # Just an emoji: alarm triggered or not
         gr.Markdown(label="Alarm triggered", show_label=False),
     ],