cecilemacaire commited on
Commit
ff77171
1 Parent(s): a63edff

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +43 -1
README.md CHANGED
@@ -94,7 +94,49 @@ outputs = model.generate(inputs.to("cuda:0"), max_new_tokens=40, do_sample=True,
94
  pred = tokenizer.decode(outputs[0], skip_special_tokens=True)
95
  ```
96
 
97
- ## Linking and viewing the predicted sequence of tokens to the corresponding ARASAAC pictograms
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  ## Information
100
 
 
94
  pred = tokenizer.decode(outputs[0], skip_special_tokens=True)
95
  ```
96
 
97
+ ## Linking the predicted sequence of tokens to the corresponding ARASAAC pictograms
98
+
99
+ ```python
100
+ import pandas as pd
101
+
102
+ def process_output_trad(pred):
103
+ return pred.split()
104
+
105
+ def read_lexicon(lexicon):
106
+ df = pd.read_csv(lexicon, sep='\t')
107
+ df['keyword_no_cat'] = df['lemma'].str.split(' #').str[0].str.strip().str.replace(' ', '_')
108
+ return df
109
+
110
+ def get_id_picto_from_predicted_lemma(df_lexicon, lemma):
111
+ id_picto = df_lexicon.loc[df_lexicon['keyword_no_cat'] == lemma, 'id_picto'].tolist()
112
+ return (id_picto[0], lemma) if id_picto else (0, lemma)
113
+
114
+ lexicon = read_lexicon("lexicon.csv")
115
+ sentence_to_map = process_output_trad(pred)
116
+ pictogram_ids = [get_id_picto_from_predicted_lemma(lexicon, lemma) for lemma in sentence_to_map]
117
+ ```
118
+
119
+ ## Viewing the predicted sequence of ARASAAC pictograms
120
+
121
+ ```python
122
+ def generate_html(ids):
123
+ html_content = '<html><body>'
124
+ for picto_id, lemma in ids:
125
+ if picto_id != 0: # ignore invalid IDs
126
+ img_url = f"https://static.arasaac.org/pictograms/{picto_id}/{picto_id}_500.png"
127
+ html_content += f'''
128
+ <figure style="display:inline-block; margin:1px;">
129
+ <img src="{img_url}" alt="{lemma}" width="200" height="200" />
130
+ <figcaption>{lemma}</figcaption>
131
+ </figure>
132
+ '''
133
+ html_content += '</body></html>'
134
+ return html_content
135
+
136
+ html = generate_html(pictogram_ids)
137
+ with open("pictograms.html", "w") as file:
138
+ file.write(html)
139
+ ```
140
 
141
  ## Information
142