awacke1 commited on
Commit
157ac27
1 Parent(s): 49434bf

Create app.backup.py

Browse files
Files changed (1) hide show
  1. app.backup.py +268 -0
app.backup.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import json
4
+ from collections import defaultdict
5
+
6
+ # Create tokenizer for biomed model
7
+ from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
8
+ tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all") # https://huggingface.co/d4data/biomedical-ner-all?text=asthma
9
+ model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
10
+ pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
11
+
12
+ # Matplotlib for entity graph
13
+ import matplotlib.pyplot as plt
14
+ plt.switch_backend("Agg")
15
+
16
+ # Load examples from JSON
17
+ import os
18
+
19
+ # Load terminology datasets:
20
+ basedir = os.path.dirname(__file__)
21
+ #dataLOINC = pd.read_csv(basedir + "\\" + f'LoincTableCore.csv')
22
+ #dataPanels = pd.read_csv(basedir + "\\" + f'PanelsAndForms-ACW1208Labeled.csv')
23
+ #dataSNOMED = pd.read_csv(basedir + "\\" + f'sct2_TextDefinition_Full-en_US1000124_20220901.txt',sep='\t')
24
+ #dataOMS = pd.read_csv(basedir + "\\" + f'SnomedOMS.csv')
25
+ #dataICD10 = pd.read_csv(basedir + "\\" + f'ICD10Diagnosis.csv')
26
+
27
+ dataLOINC = pd.read_csv(f'LoincTableCore.csv')
28
+ dataPanels = pd.read_csv(f'PanelsAndForms-ACW1208Labeled.csv')
29
+ dataSNOMED = pd.read_csv(f'sct2_TextDefinition_Full-en_US1000124_20220901.txt',sep='\t')
30
+ dataOMS = pd.read_csv(f'SnomedOMS.csv')
31
+ dataICD10 = pd.read_csv(f'ICD10Diagnosis.csv')
32
+
33
+ dir_path = os.path.dirname(os.path.realpath(__file__))
34
+ EXAMPLES = {}
35
+ #with open(dir_path + "\\" + "examples.json", "r") as f:
36
+ with open("examples.json", "r") as f:
37
+ example_json = json.load(f)
38
+ EXAMPLES = {x["text"]: x["label"] for x in example_json}
39
+
40
+ def MatchLOINC(name):
41
+ #basedir = os.path.dirname(__file__)
42
+ pd.set_option("display.max_rows", None)
43
+ #data = pd.read_csv(basedir + "\\" + f'LoincTableCore.csv')
44
+ data = dataLOINC
45
+ swith=data.loc[data['COMPONENT'].str.contains(name, case=False, na=False)]
46
+ return swith
47
+
48
+ def MatchLOINCPanelsandForms(name):
49
+ #basedir = os.path.dirname(__file__)
50
+ #data = pd.read_csv(basedir + "\\" + f'PanelsAndForms-ACW1208Labeled.csv')
51
+ data = dataPanels
52
+ # Assessment Name:
53
+ #swith=data.loc[data['ParentName'].str.contains(name, case=False, na=False)]
54
+ # Assessment Question:
55
+ swith=data.loc[data['LoincName'].str.contains(name, case=False, na=False)]
56
+ return swith
57
+
58
+ def MatchSNOMED(name):
59
+ #basedir = os.path.dirname(__file__)
60
+ #data = pd.read_csv(basedir + "\\" + f'sct2_TextDefinition_Full-en_US1000124_20220901.txt',sep='\t')
61
+ data = dataSNOMED
62
+ swith=data.loc[data['term'].str.contains(name, case=False, na=False)]
63
+ return swith
64
+
65
+ def MatchOMS(name):
66
+ #basedir = os.path.dirname(__file__)
67
+ #data = pd.read_csv(basedir + "\\" + f'SnomedOMS.csv')
68
+ data = dataOMS
69
+ swith=data.loc[data['SNOMED CT'].str.contains(name, case=False, na=False)]
70
+ return swith
71
+
72
+ def MatchICD10(name):
73
+ #basedir = os.path.dirname(__file__)
74
+ #data = pd.read_csv(basedir + "\\" + f'ICD10Diagnosis.csv')
75
+ data = dataICD10
76
+ swith=data.loc[data['Description'].str.contains(name, case=False, na=False)]
77
+ return swith
78
+
79
+ def SaveResult(text, outputfileName):
80
+ #try:
81
+ basedir = os.path.dirname(__file__)
82
+ savePath = outputfileName
83
+ print("Saving: " + text + " to " + savePath)
84
+ from os.path import exists
85
+ file_exists = exists(savePath)
86
+ if file_exists:
87
+ with open(outputfileName, "a") as f: #append
88
+ #for line in text:
89
+ f.write(str(text.replace("\n"," ")))
90
+ f.write('\n')
91
+ else:
92
+ with open(outputfileName, "w") as f: #write
93
+ #for line in text:
94
+ f.write(str(text.replace("\n"," ")))
95
+ f.write('\n')
96
+ #except ValueError as err:
97
+ # raise ValueError("File Save Error in SaveResult \n" + format_tb(err.__traceback__)[0] + err.args[0] + "\nEnd of error message.") from None
98
+
99
+ return
100
+
101
+ def loadFile(filename):
102
+ try:
103
+ basedir = os.path.dirname(__file__)
104
+ loadPath = basedir + "\\" + filename
105
+
106
+ print("Loading: " + loadPath)
107
+
108
+ from os.path import exists
109
+ file_exists = exists(loadPath)
110
+
111
+ if file_exists:
112
+ with open(loadPath, "r") as f: #read
113
+ contents = f.read()
114
+ print(contents)
115
+ return contents
116
+
117
+ except ValueError as err:
118
+ raise ValueError("File Save Error in SaveResult \n" + format_tb(err.__traceback__)[0] + err.args[0] + "\nEnd of error message.") from None
119
+
120
+ return ""
121
+
122
+ def get_today_filename():
123
+ from datetime import datetime
124
+ date = datetime.now().strftime("%Y_%m_%d-%I.%M.%S.%p")
125
+ #print(f"filename_{date}") 'filename_2023_01_12-03-29-22_AM'
126
+ return f"MedNER_{date}.csv"
127
+
128
+ def get_base(filename):
129
+ basedir = os.path.dirname(__file__)
130
+ loadPath = basedir + "\\" + filename
131
+ #print("Loading: " + loadPath)
132
+ return loadPath
133
+
134
+ def group_by_entity(raw):
135
+ outputFile = get_base(get_today_filename())
136
+ out = defaultdict(int)
137
+
138
+ for ent in raw:
139
+ out[ent["entity_group"]] += 1
140
+ myEntityGroup = ent["entity_group"]
141
+ print("Found entity group type: " + myEntityGroup)
142
+
143
+ if (myEntityGroup in ['Sign_symptom', 'Detailed_description', 'History', 'Activity', 'Medication' ]):
144
+ eterm = ent["word"].replace('#','')
145
+ minlength = 3
146
+ if len(eterm) > minlength:
147
+ print("Found eterm: " + eterm)
148
+ eterm.replace("#","")
149
+ g1=MatchLOINC(eterm)
150
+ g2=MatchLOINCPanelsandForms(eterm)
151
+ g3=MatchSNOMED(eterm)
152
+ g4=MatchOMS(eterm)
153
+ g5=MatchICD10(eterm)
154
+ sAll = ""
155
+
156
+ print("Saving to output file " + outputFile)
157
+ # Create harmonisation output format of input to output code, name, Text
158
+
159
+ try: # 18 fields, output to labeled CSV dataset for results teaching on scored regret changes to action plan with data inputs
160
+ col = " 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19"
161
+
162
+ #LOINC
163
+ g11 = g1['LOINC_NUM'].to_string().replace(","," ").replace("\n"," ")
164
+ g12 = g1['COMPONENT'].to_string().replace(","," ").replace("\n"," ")
165
+ s1 = ("LOINC," + myEntityGroup + "," + eterm + ",questions of ," + g12 + "," + g11 + ", Label,Value, Label,Value, Label,Value ")
166
+ if g11 != 'Series([] )': SaveResult(s1, outputFile)
167
+
168
+ #LOINC Panels
169
+ g21 = g2['Loinc'].to_string().replace(","," ").replace("\n"," ")
170
+ g22 = g2['LoincName'].to_string().replace(","," ").replace("\n"," ")
171
+ g23 = g2['ParentLoinc'].to_string().replace(","," ").replace("\n"," ")
172
+ g24 = g2['ParentName'].to_string().replace(","," ").replace("\n"," ")
173
+ # s2 = ("LOINC Panel," + myEntityGroup + "," + eterm + ",name of ," + g22 + "," + g21 + ", and Parent codes of ," + g23 + ", with Parent names of ," + g24 + ", Label,Value ")
174
+ s2 = ("LOINC Panel," + myEntityGroup + "," + eterm + ",name of ," + g22 + "," + g21 + "," + g24 + ", and Parent codes of ," + g23 + "," + ", Label,Value ")
175
+ if g21 != 'Series([] )': SaveResult(s2, outputFile)
176
+
177
+ #SNOMED
178
+ g31 = g3['conceptId'].to_string().replace(","," ").replace("\n"," ").replace("\l"," ").replace("\r"," ")
179
+ g32 = g3['term'].to_string().replace(","," ").replace("\n"," ").replace("\l"," ").replace("\r"," ")
180
+ s3 = ("SNOMED Concept," + myEntityGroup + "," + eterm + ",terms of ," + g32 + "," + g31 + ", Label,Value, Label,Value, Label,Value ")
181
+ if g31 != 'Series([] )': SaveResult(s3, outputFile)
182
+
183
+ #OMS
184
+ g41 = g4['Omaha Code'].to_string().replace(","," ").replace("\n"," ")
185
+ g42 = g4['SNOMED CT concept ID'].to_string().replace(","," ").replace("\n"," ")
186
+ g43 = g4['SNOMED CT'].to_string().replace(","," ").replace("\n"," ")
187
+ g44 = g4['PR'].to_string().replace(","," ").replace("\n"," ")
188
+ g45 = g4['S&S'].to_string().replace(","," ").replace("\n"," ")
189
+ s4 = ("OMS," + myEntityGroup + "," + eterm + ",concepts of ," + g44 + "," + g45 + ", and SNOMED codes of ," + g43 + ", and OMS problem of ," + g42 + ", and OMS Sign Symptom of ," + g41)
190
+ if g41 != 'Series([] )': SaveResult(s4, outputFile)
191
+
192
+ #ICD10
193
+ g51 = g5['Code'].to_string().replace(","," ").replace("\n"," ")
194
+ g52 = g5['Description'].to_string().replace(","," ").replace("\n"," ")
195
+ s5 = ("ICD10," + myEntityGroup + "," + eterm + ",descriptions of ," + g52 + "," + g51 + ", Label,Value, Label,Value, Label,Value ")
196
+ if g51 != 'Series([] )': SaveResult(s5, outputFile)
197
+
198
+ except ValueError as err:
199
+ raise ValueError("Error in group by entity \n" + format_tb(err.__traceback__)[0] + err.args[0] + "\nEnd of error message.") from None
200
+
201
+ return outputFile
202
+
203
+
204
+ def plot_to_figure(grouped):
205
+ fig = plt.figure()
206
+ plt.bar(x=list(grouped.keys()), height=list(grouped.values()))
207
+ plt.margins(0.2)
208
+ plt.subplots_adjust(bottom=0.4)
209
+ plt.xticks(rotation=90)
210
+ return fig
211
+
212
+
213
+ def ner(text):
214
+ raw = pipe(text)
215
+ ner_content = {
216
+ "text": text,
217
+ "entities": [
218
+ {
219
+ "entity": x["entity_group"],
220
+ "word": x["word"],
221
+ "score": x["score"],
222
+ "start": x["start"],
223
+ "end": x["end"],
224
+ }
225
+ for x in raw
226
+ ],
227
+ }
228
+
229
+ outputFile = group_by_entity(raw)
230
+ label = EXAMPLES.get(text, "Unknown")
231
+ outputDataframe = pd.read_csv(outputFile)
232
+ return (ner_content, outputDataframe, outputFile)
233
+
234
+ demo = gr.Blocks()
235
+ with demo:
236
+ gr.Markdown(
237
+ """
238
+ # 🩺⚕️NLP Clinical Ontology Biomedical NER
239
+ """
240
+ )
241
+ input = gr.Textbox(label="Note text", value="")
242
+
243
+ with gr.Tab("Biomedical Entity Recognition"):
244
+ output=[
245
+ gr.HighlightedText(label="NER", combine_adjacent=True),
246
+ #gr.JSON(label="Entity Counts"),
247
+ #gr.Label(label="Rating"),
248
+ #gr.Plot(label="Bar"),
249
+ gr.Dataframe(label="Dataframe"),
250
+ gr.File(label="File"),
251
+ ]
252
+ examples=list(EXAMPLES.keys())
253
+ gr.Examples(examples, inputs=input)
254
+ input.change(fn=ner, inputs=input, outputs=output)
255
+
256
+ with gr.Tab("Clinical Terminology Resolution"):
257
+ with gr.Row(variant="compact"):
258
+ btnLOINC = gr.Button("LOINC")
259
+ btnPanels = gr.Button("Panels")
260
+ btnSNOMED = gr.Button("SNOMED")
261
+ btnOMS = gr.Button("OMS")
262
+ btnICD10 = gr.Button("ICD10")
263
+
264
+ examples=list(EXAMPLES.keys())
265
+ gr.Examples(examples, inputs=input)
266
+ input.change(fn=ner, inputs=input, outputs=output)
267
+ #layout="vertical"
268
+ demo.launch(debug=True)