Spaces:
Build error
Build error
import gradio as gr | |
import spacy | |
import re | |
# Load the spaCy model | |
nlp = spacy.load("en_core_web_sm") | |
pii_config = { | |
'phone_number': (r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b', '[PHONE]'), | |
'text_address': (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]'), | |
'social_security_number': (r'\b\d{3}-\d{2}-\d{4}\b', '[SSN]'), | |
'website': (r'\b(?:http://|https://)?(?:www\.)?[a-zA-Z0-9./]+\.[a-z]{2,}\b', '[WEBSITE]') | |
} | |
def anonymize(text): | |
for pii_type, (pattern, placeholder) in pii_config.items(): | |
text = re.sub(pattern, placeholder, text) | |
# Anonymize named entities using spaCy | |
doc = nlp(text) | |
for ent in doc.ents: | |
if ent.label_ in ['PERSON', 'ORG', 'GPE', 'LOC']: | |
text = text.replace(ent.text, f'[{ent.label_}]') | |
return text | |
demo = gr.Interface( | |
fn=anonymize, | |
inputs=["text"], | |
outputs=["text"], | |
) | |
demo.launch() | |