from spacy.tokens import Doc, Span | |
from spacy import Language | |
import re | |
def year_matcher(doc: Doc): | |
entities = list(doc.ents) | |
years = [] | |
for token in doc: | |
if token.ent_type == 0: | |
year = re.search(r'\d{4}', token.text) | |
if year: | |
years.append(Span(doc, token.i, token.i + 1, 'YEAR')) | |
if len(years) > 0: | |
entities.append(years[-1]) | |
doc.set_ents(entities) | |
return doc | |