import re | |
import string | |
def remove_articles(text): | |
regex = re.compile(r"\b(a|an|the)\b", re.UNICODE) | |
return re.sub(regex, " ", text) | |
def white_space_fix(text): | |
return " ".join(text.split()) | |
def remove_punc(text): | |
exclude = set(string.punctuation) | |
return "".join(ch for ch in text if ch not in exclude) | |
def lower(text): | |
return text.lower() | |