File size: 479 Bytes
90bf6af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from spacy.tokens import Doc, Span
from spacy import Language
import re


@Language.component("year_matcher")
def year_matcher(doc: Doc):
    entities = list(doc.ents)
    years = []
    for token in doc:
        if token.ent_type == 0:
            year = re.search(r'\d{4}', token.text)
            if year:
                years.append(Span(doc, token.i, token.i + 1, 'YEAR'))
    if len(years) > 0:
        entities.append(years[-1])
    doc.set_ents(entities)
    return doc