Spaces:
Running
Running
File size: 2,111 Bytes
0601dad cd45d79 dfd722b b1deccc 74f30e9 0601dad 48b9215 74f30e9 48b9215 0601dad 74f30e9 b97cc67 74f30e9 b97cc67 74f30e9 b1deccc 0c18028 dfd722b b1deccc dfd722b b1deccc dfd722b b1deccc dfd722b 6f102ed b1deccc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
"""
utils.py - Utility functions for the project.
"""
import logging
import re
def postprocess(text: str):
"""
postprocess - remove common values in scraped dataset
Args:
text (str): the text to postprocess
"""
replacements = {
"ENA": "COMPANY",
"Enron": "COMPANY",
"Enron Corporation": "COMPANY",
"Sony Pictures Entertainment": "COMPANY",
"Columbia Pictures": "COMPANY",
"Sony": "COMPANY",
"Columbia": "COMPANY",
"Hillary": "Jane",
"Clinton": "Smith",
"Amy": "Jane",
"Sara": "Jane",
"Harambe": "Jane",
"Pascal": "PERSON",
}
# replace common values, also check lowercase
for k, v in replacements.items():
text = text.replace(k, v)
text = text.replace(k.lower(), v)
return text
def clear(text, verbose=False, **kwargs):
"""for use with buttons"""
if verbose:
logging.info(f"Clearing text: {text}")
return ""
def make_email_link(
subject: str = "Email subject - This was generated by Postbot",
link_text: str = "click to open in your email client",
body: str = None,
tag_placeholder: str = "PLACEHOLDER",
):
"""
email_link - generate an email link
Args:
subject (str, optional): the subject of the email. Defaults to "Email subject - This was generated by Postbot".
link_text (str, optional): the text of the link. Defaults to "click to open in your email client".
body (str, optional): the body of the email. Defaults to None.
tag_placeholder (str, optional): the placeholder for the tag. Defaults to "PLACEHOLDER".
Returns:
str: the email link, in the form of an html link
"""
if body is None:
body = "hmm - no body. replace me"
# strip brackets and other HTML-tag characters from body with regex
body = re.sub(r"<[^>]*>", tag_placeholder, body)
# replace all newline chars with a whitespace
body = body.replace("\n", " ")
return f'<a href="mailto:%20?subject={subject}&body={body}">{link_text}</a>'
|