File size: 902 Bytes
1694b5c
c51d7db
133b5ab
1694b5c
c51d7db
 
 
133b5ab
 
 
 
 
 
1694b5c
133b5ab
 
 
c51d7db
 
 
 
 
 
 
133b5ab
1694b5c
133b5ab
 
 
 
1694b5c
133b5ab
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import gradio as gr
import spacy
import re

# Load the spaCy model
nlp = spacy.load("en_core_web_sm")

pii_config = {
    'phone_number': (r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b', '[PHONE]'),
    'text_address': (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]'),
    'social_security_number': (r'\b\d{3}-\d{2}-\d{4}\b', '[SSN]'),
    'website': (r'\b(?:http://|https://)?(?:www\.)?[a-zA-Z0-9./]+\.[a-z]{2,}\b', '[WEBSITE]')
}

def anonymize(text):
    for pii_type, (pattern, placeholder) in pii_config.items():
        text = re.sub(pattern, placeholder, text)

    # Anonymize named entities using spaCy
    doc = nlp(text)
    for ent in doc.ents:
        if ent.label_ in ['PERSON', 'ORG', 'GPE', 'LOC']:
            text = text.replace(ent.text, f'[{ent.label_}]')

    return text

demo = gr.Interface(
    fn=anonymize,
    inputs=["text"],
    outputs=["text"],
)

demo.launch()