Straive-Kripa commited on
Commit
a1d685e
·
verified ·
1 Parent(s): ec54400

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -24,10 +24,10 @@ def anonymize_text_including_proper_nouns_and_addresses(text):
24
  lookup_table = {}
25
  counters = Counter()
26
  pii_config = {
27
- 'phone_number': (r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b', '__PHONE__'),
28
- 'text_address': (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '__EMAIL]__'),
29
- 'social_security_number': (r'\b\d{3}-\d{2}-\d{4}\b', '__SSN]__'),
30
- 'website': (r'\b(?:http://|https://)?(?:www\.)?[a-zA-Z0-9./]+\.[a-z]{2,}\b', '__WEBSITE]__')
31
  }
32
 
33
  # First, handle regex-based PII detection and anonymization
@@ -55,7 +55,7 @@ def anonymize_text_including_proper_nouns_and_addresses(text):
55
  if label in ['ORG', 'PERSON', 'GPE', 'LOC', 'FAC']: # Optionally include'PROPN' for more restriction
56
  original = ent_or_token.text
57
  counters[label] += 1
58
- placeholder_with_counter = f"[__{label}__{counters[label]}"
59
  if original in anonymized_text: # Check if the text still contains the original
60
  anonymized_text = anonymized_text.replace(original, placeholder_with_counter, 1)
61
  lookup_table[placeholder_with_counter] = original
 
24
  lookup_table = {}
25
  counters = Counter()
26
  pii_config = {
27
+ 'phone_number': (r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b', 'PHONE__'),
28
+ 'text_address': (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', 'EMAIL__'),
29
+ 'social_security_number': (r'\b\d{3}-\d{2}-\d{4}\b', 'SSN__'),
30
+ 'website': (r'\b(?:http://|https://)?(?:www\.)?[a-zA-Z0-9./]+\.[a-z]{2,}\b', 'WEBSITE__')
31
  }
32
 
33
  # First, handle regex-based PII detection and anonymization
 
55
  if label in ['ORG', 'PERSON', 'GPE', 'LOC', 'FAC']: # Optionally include'PROPN' for more restriction
56
  original = ent_or_token.text
57
  counters[label] += 1
58
+ placeholder_with_counter = f"{label}__{counters[label]}"
59
  if original in anonymized_text: # Check if the text still contains the original
60
  anonymized_text = anonymized_text.replace(original, placeholder_with_counter, 1)
61
  lookup_table[placeholder_with_counter] = original