Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -24,10 +24,10 @@ def anonymize_text_including_proper_nouns_and_addresses(text):
|
|
24 |
lookup_table = {}
|
25 |
counters = Counter()
|
26 |
pii_config = {
|
27 |
-
'phone_number': (r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b', '
|
28 |
-
'text_address': (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '
|
29 |
-
'social_security_number': (r'\b\d{3}-\d{2}-\d{4}\b', '
|
30 |
-
'website': (r'\b(?:http://|https://)?(?:www\.)?[a-zA-Z0-9./]+\.[a-z]{2,}\b', '
|
31 |
}
|
32 |
|
33 |
# First, handle regex-based PII detection and anonymization
|
@@ -55,7 +55,7 @@ def anonymize_text_including_proper_nouns_and_addresses(text):
|
|
55 |
if label in ['ORG', 'PERSON', 'GPE', 'LOC', 'FAC']: # Optionally include'PROPN' for more restriction
|
56 |
original = ent_or_token.text
|
57 |
counters[label] += 1
|
58 |
-
placeholder_with_counter = f"
|
59 |
if original in anonymized_text: # Check if the text still contains the original
|
60 |
anonymized_text = anonymized_text.replace(original, placeholder_with_counter, 1)
|
61 |
lookup_table[placeholder_with_counter] = original
|
|
|
24 |
lookup_table = {}
|
25 |
counters = Counter()
|
26 |
pii_config = {
|
27 |
+
'phone_number': (r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b', 'PHONE__'),
|
28 |
+
'text_address': (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', 'EMAIL__'),
|
29 |
+
'social_security_number': (r'\b\d{3}-\d{2}-\d{4}\b', 'SSN__'),
|
30 |
+
'website': (r'\b(?:http://|https://)?(?:www\.)?[a-zA-Z0-9./]+\.[a-z]{2,}\b', 'WEBSITE__')
|
31 |
}
|
32 |
|
33 |
# First, handle regex-based PII detection and anonymization
|
|
|
55 |
if label in ['ORG', 'PERSON', 'GPE', 'LOC', 'FAC']: # Optionally include'PROPN' for more restriction
|
56 |
original = ent_or_token.text
|
57 |
counters[label] += 1
|
58 |
+
placeholder_with_counter = f"{label}__{counters[label]}"
|
59 |
if original in anonymized_text: # Check if the text still contains the original
|
60 |
anonymized_text = anonymized_text.replace(original, placeholder_with_counter, 1)
|
61 |
lookup_table[placeholder_with_counter] = original
|