Spaces:
Runtime error
Runtime error
Commit
·
c407b1b
1
Parent(s):
0d10901
Update app.py
Browse files
app.py
CHANGED
@@ -1,62 +1,155 @@
|
|
1 |
#!/usr/bin/env python3
|
2 |
# -*- coding: utf-8 -*-
|
|
|
|
|
3 |
import streamlit as st
|
4 |
from spacy import displacy
|
5 |
|
6 |
-
import span_marker
|
7 |
-
import spacy
|
8 |
-
import spacy_udpipe
|
9 |
-
|
10 |
-
|
11 |
spacy_udpipe.download("ar")
|
12 |
nlp = spacy_udpipe.load("ar")
|
13 |
nlp.add_pipe("span_marker",
|
14 |
config={"model": "iahlt/span-marker-xlm-roberta-base-ar"})
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
def get_html(html: str):
|
17 |
"""Convert HTML so it can be rendered."""
|
18 |
-
WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
|
19 |
# Newlines seem to mess with the rendering
|
20 |
html = html.replace("\n", " ")
|
21 |
-
|
|
|
|
|
|
|
22 |
|
23 |
def page_init():
|
24 |
st.header("Named Entity Recognition Demo")
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
if __name__ == '__main__':
|
28 |
page_init()
|
29 |
-
|
30 |
-
displacy_options = {}
|
31 |
|
32 |
sample_text = """
|
33 |
-
|
34 |
""".strip()
|
35 |
|
36 |
text = st.text_area("Text", sample_text, height=200, max_chars=1000)
|
37 |
btn = st.button("Annotate")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
if text and btn:
|
39 |
doc = nlp(text)
|
40 |
html = displacy.render(
|
41 |
doc,
|
42 |
style="ent",
|
43 |
-
options=
|
44 |
manual=False,
|
45 |
)
|
46 |
-
style = "<style>mark.entity { display: inline-block }</style>"
|
47 |
-
st.write(f"{style}{get_html(html)}", unsafe_allow_html=True)
|
48 |
-
else:
|
49 |
-
st.write("")
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
<style>
|
54 |
-
textarea {
|
55 |
-
direction: rtl;
|
56 |
-
}
|
57 |
-
</style>
|
58 |
-
""",
|
59 |
-
unsafe_allow_html=True,
|
60 |
-
)
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
|
|
|
|
|
1 |
#!/usr/bin/env python3
|
2 |
# -*- coding: utf-8 -*-
|
3 |
+
import requests
|
4 |
+
import spacy_udpipe
|
5 |
import streamlit as st
|
6 |
from spacy import displacy
|
7 |
|
|
|
|
|
|
|
|
|
|
|
8 |
spacy_udpipe.download("ar")
|
9 |
nlp = spacy_udpipe.load("ar")
|
10 |
nlp.add_pipe("span_marker",
|
11 |
config={"model": "iahlt/span-marker-xlm-roberta-base-ar"})
|
12 |
|
13 |
+
|
14 |
+
DEFAULT_LABEL_COLORS = {
|
15 |
+
"ORG": "#17A2B8",
|
16 |
+
"ORGS": "#17A2B8",
|
17 |
+
"ORGANIZATION": "#17A2B8",
|
18 |
+
"PRODUCT": "#FA9F42",
|
19 |
+
"COMMERCIAL_ITEM": "#FA9F42",
|
20 |
+
"DUC": "#FA9F42",
|
21 |
+
"GPE": "#FFC107",
|
22 |
+
"LOC": "#28A745",
|
23 |
+
"LOCATION": "#28A745",
|
24 |
+
"PERSON": "#0069B4",
|
25 |
+
"PER": "#0069B4",
|
26 |
+
"PERS": "#0069B4",
|
27 |
+
"TTL": "#FA8B1B",
|
28 |
+
"TITLE": "#FA8B1B",
|
29 |
+
"NORP": "#c887fb",
|
30 |
+
"FAC": "#721817",
|
31 |
+
"EVENT": "#2B4162",
|
32 |
+
"EVE": "#2B4162",
|
33 |
+
"LAW": "#C880B7",
|
34 |
+
"LANGUAGE": "#437F97",
|
35 |
+
"ANG": "#437F97",
|
36 |
+
"WORK_OF_ART": "#0B6E4F",
|
37 |
+
"WOA": "#0B6E4F",
|
38 |
+
"DATE": "#849324",
|
39 |
+
"TIME": "#849324",
|
40 |
+
"TIMEX": "#849324",
|
41 |
+
"MONEY": "#6C757D",
|
42 |
+
"QUANTITY": "#FD151B",
|
43 |
+
"ORDINAL": "#FD151B",
|
44 |
+
"CARDINAL": "#FD151B",
|
45 |
+
"PERCENT": "#F1D302",
|
46 |
+
"MISC": "#e7d2e4",
|
47 |
+
"OTHER": '#ff8197',
|
48 |
+
}
|
49 |
+
|
50 |
def get_html(html: str):
|
51 |
"""Convert HTML so it can be rendered."""
|
52 |
+
WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem"; direction: rtl; >{}</div>"""
|
53 |
# Newlines seem to mess with the rendering
|
54 |
html = html.replace("\n", " ")
|
55 |
+
style = "<style>mark.entity { display: inline-block }</style>"
|
56 |
+
html = WRAPPER.format(html)
|
57 |
+
return f"{style}{html}"
|
58 |
+
|
59 |
|
60 |
def page_init():
|
61 |
st.header("Named Entity Recognition Demo")
|
62 |
+
|
63 |
+
|
64 |
+
@st.cache_data
|
65 |
+
def get_html_from_server(text):
|
66 |
+
base_url = "https://ne-api.iahlt.org/api/arabic/ner/?text={}"
|
67 |
+
|
68 |
+
def get_entities(text):
|
69 |
+
text = text.strip()
|
70 |
+
if text == "":
|
71 |
+
return []
|
72 |
+
response = requests.get(base_url.format(text))
|
73 |
+
answer = response.json()
|
74 |
+
ents = []
|
75 |
+
for ent in answer["ents"]:
|
76 |
+
if ent["entity_group"] == "O":
|
77 |
+
continue
|
78 |
+
ents.append({
|
79 |
+
"start": ent["start"],
|
80 |
+
"end": ent["end"],
|
81 |
+
"label": ent["entity_group"]
|
82 |
+
})
|
83 |
+
answer["ents"] = ents
|
84 |
+
return answer
|
85 |
+
|
86 |
+
def render_entities(text):
|
87 |
+
entities = get_entities(text)
|
88 |
+
html = displacy.render(entities,
|
89 |
+
style="ent",
|
90 |
+
options={"direction": "rtl", "colors": DEFAULT_LABEL_COLORS},
|
91 |
+
manual=True)
|
92 |
+
return html.replace("ltr", "rtl")
|
93 |
+
|
94 |
+
return get_html(render_entities(text))
|
95 |
+
|
96 |
|
97 |
if __name__ == '__main__':
|
98 |
page_init()
|
|
|
|
|
99 |
|
100 |
sample_text = """
|
101 |
+
تمكن البطل الملاكم "محمد عيسى" القناص من الفوز في مباراته ببطولة دبي وذلك بعد انهائه النزال بالضربة القاضية. حيث يواصل البطل محمد عيسى مسيرته بتسلَّق الرُّتَب والألقاب ليصل لملاكمة الاحتراف.
|
102 |
""".strip()
|
103 |
|
104 |
text = st.text_area("Text", sample_text, height=200, max_chars=1000)
|
105 |
btn = st.button("Annotate")
|
106 |
+
style = """
|
107 |
+
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Scheherazade+New">
|
108 |
+
<style>
|
109 |
+
.stTextArea textarea {
|
110 |
+
font-size: 20px;
|
111 |
+
font-color: black;
|
112 |
+
font-family: 'Scheherazade+New';
|
113 |
+
direction: rtl;
|
114 |
+
}
|
115 |
+
.entities {
|
116 |
+
font-size: 16px;
|
117 |
+
font-family: 'David+Libre';
|
118 |
+
direction: rtl;
|
119 |
+
}
|
120 |
+
#MainMenu {visibility: hidden;}
|
121 |
+
footer {visibility: hidden;}
|
122 |
+
</style>
|
123 |
+
"""
|
124 |
+
st.write(style, unsafe_allow_html=True)
|
125 |
+
|
126 |
if text and btn:
|
127 |
doc = nlp(text)
|
128 |
html = displacy.render(
|
129 |
doc,
|
130 |
style="ent",
|
131 |
+
options={"direction": "rtl", "colors": DEFAULT_LABEL_COLORS},
|
132 |
manual=False,
|
133 |
)
|
|
|
|
|
|
|
|
|
134 |
|
135 |
+
nemo_html = get_html(html)
|
136 |
+
iahlt_html = get_html_from_server(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
+
html = f"""
|
139 |
+
<div style="display: flex; flex-direction: row; justify-content: space-between; direction: rtl">
|
140 |
+
<div>
|
141 |
+
<h3>Nemo model results</h3>
|
142 |
+
{nemo_html}
|
143 |
+
</div>
|
144 |
+
</div>
|
145 |
+
<div style="display: flex; flex-direction: row; justify-content: space-between; direction: rtl">
|
146 |
+
<div>
|
147 |
+
<h3>IAHLT results</h3>
|
148 |
+
{iahlt_html}
|
149 |
+
</div>
|
150 |
+
</div>
|
151 |
+
"""
|
152 |
+
st.write(html, unsafe_allow_html=True)
|
153 |
|
154 |
+
else:
|
155 |
+
st.write("")
|