BounharAbdelaziz
commited on
Commit
·
2cabce8
1
Parent(s):
c591ceb
changed path to /home/user/app for accessing json files
Browse files
app.py
CHANGED
@@ -13,6 +13,7 @@ from constants import *
|
|
13 |
if __name__ == "__main__":
|
14 |
|
15 |
with gr.Blocks() as app:
|
|
|
16 |
base_path = os.path.dirname(__file__)
|
17 |
local_image_path = os.path.join(base_path, 'open_arabic_lid_arena.png')
|
18 |
|
|
|
13 |
if __name__ == "__main__":
|
14 |
|
15 |
with gr.Blocks() as app:
|
16 |
+
|
17 |
base_path = os.path.dirname(__file__)
|
18 |
local_image_path = os.path.join(base_path, 'open_arabic_lid_arena.png')
|
19 |
|
utils.py
CHANGED
@@ -13,8 +13,16 @@ from sklearn.metrics import (
|
|
13 |
matthews_corrcoef
|
14 |
)
|
15 |
import numpy as np
|
16 |
-
|
17 |
from constants import *
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
def predict_label(text, model, language_mapping_dict, use_mapping=False):
|
20 |
"""
|
@@ -182,48 +190,64 @@ def run_eval_one_vs_all(data_test, TARGET_LANG='Morocco'):
|
|
182 |
|
183 |
return out
|
184 |
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
base_path = os.path.dirname(__file__)
|
189 |
-
json_file_path = os.path.join(base_path, DIALECT_CONFUSION_LEADERBOARD_FILE)
|
190 |
|
191 |
-
|
|
|
|
|
|
|
192 |
|
193 |
-
# Load leaderboard data
|
194 |
try:
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
|
|
|
|
|
|
219 |
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
with open(json_file_path, "w") as f:
|
225 |
-
json.dump(data, f, indent=4)
|
226 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
def handle_evaluation(model_path, model_path_bin, use_mapping=False):
|
228 |
|
229 |
# download model and get the model path
|
@@ -340,59 +364,75 @@ def process_results_file(file, uploaded_model_name, base_path_save="./atlasia/su
|
|
340 |
|
341 |
return create_leaderboard_display_multilingual(df_multilingual, target_label, default_metrics), status_message
|
342 |
|
343 |
-
def update_darija_multilingual_leaderboard(result_df, model_name, MULTI_DIALECTS_LEADERBOARD_FILE):
|
|
|
344 |
|
345 |
-
#
|
346 |
-
|
347 |
-
|
|
|
348 |
|
349 |
-
print(f"[INFO] Loading leaderboard data (json file) from: {json_file_path}")
|
350 |
-
|
351 |
-
# Load leaderboard data
|
352 |
try:
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
|
|
|
|
|
|
|
|
|
|
364 |
|
365 |
-
#
|
366 |
-
|
367 |
-
|
368 |
-
'
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
387 |
|
388 |
-
#
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
|
|
|
|
|
|
|
|
396 |
|
397 |
|
398 |
def load_leaderboard_one_vs_all(DIALECT_CONFUSION_LEADERBOARD_FILE):
|
|
|
13 |
matthews_corrcoef
|
14 |
)
|
15 |
import numpy as np
|
|
|
16 |
from constants import *
|
17 |
+
from pathlib import Path
|
18 |
+
import logging
|
19 |
+
|
20 |
+
|
21 |
+
def get_repo_file_path(filename):
|
22 |
+
"""Get the full path to a file in the repository root"""
|
23 |
+
repo_path = Path("/home/user/app")
|
24 |
+
file_path = repo_path / filename
|
25 |
+
return file_path
|
26 |
|
27 |
def predict_label(text, model, language_mapping_dict, use_mapping=False):
|
28 |
"""
|
|
|
190 |
|
191 |
return out
|
192 |
|
193 |
+
|
194 |
+
def update_darija_one_vs_all_leaderboard(result_df, model_name, target_lang, DIALECT_CONFUSION_LEADERBOARD_FILE="darija_leaderboard_dialect_confusion.json"):
|
195 |
+
file_path = get_repo_file_path(DIALECT_CONFUSION_LEADERBOARD_FILE)
|
|
|
|
|
196 |
|
197 |
+
# Log file information for debugging
|
198 |
+
print(f"Attempting to access file at: {file_path}")
|
199 |
+
print(f"File exists: {file_path.exists()}")
|
200 |
+
print(f"File permissions: {oct(os.stat(file_path).st_mode)[-3:]}" if file_path.exists() else "File does not exist")
|
201 |
|
|
|
202 |
try:
|
203 |
+
# Try to read existing data
|
204 |
+
if file_path.exists():
|
205 |
+
try:
|
206 |
+
with open(file_path, "r") as f:
|
207 |
+
data = json.load(f)
|
208 |
+
except PermissionError:
|
209 |
+
print(f"Permission denied reading file: {file_path}")
|
210 |
+
raise
|
211 |
+
else:
|
212 |
+
data = []
|
213 |
+
# Try to create the file
|
214 |
+
try:
|
215 |
+
file_path.touch()
|
216 |
+
except PermissionError:
|
217 |
+
print(f"Permission denied creating file: {file_path}")
|
218 |
+
raise
|
219 |
+
|
220 |
+
# Process the results for each dialect/country
|
221 |
+
for _, row in result_df.iterrows():
|
222 |
+
dialect = row['dialect']
|
223 |
+
if dialect == 'Other':
|
224 |
+
continue
|
225 |
+
|
226 |
+
target_entry = next((item for item in data if target_lang in item), None)
|
227 |
+
if target_entry is None:
|
228 |
+
target_entry = {target_lang: {}}
|
229 |
+
data.append(target_entry)
|
230 |
|
231 |
+
country_data = target_entry[target_lang]
|
232 |
+
|
233 |
+
if dialect not in country_data:
|
234 |
+
country_data[dialect] = {}
|
|
|
|
|
235 |
|
236 |
+
country_data[dialect][model_name] = float(row['false_positive_rate'])
|
237 |
+
|
238 |
+
# Try to write the updated data
|
239 |
+
try:
|
240 |
+
with open(file_path, "w") as f:
|
241 |
+
json.dump(data, f, indent=4)
|
242 |
+
print(f"Successfully wrote to file: {file_path}")
|
243 |
+
except PermissionError:
|
244 |
+
print(f"Permission denied writing to file: {file_path}")
|
245 |
+
raise
|
246 |
+
|
247 |
+
except Exception as e:
|
248 |
+
print(f"Error handling file {file_path}: {str(e)}")
|
249 |
+
raise
|
250 |
+
|
251 |
def handle_evaluation(model_path, model_path_bin, use_mapping=False):
|
252 |
|
253 |
# download model and get the model path
|
|
|
364 |
|
365 |
return create_leaderboard_display_multilingual(df_multilingual, target_label, default_metrics), status_message
|
366 |
|
367 |
+
def update_darija_multilingual_leaderboard(result_df, model_name, MULTI_DIALECTS_LEADERBOARD_FILE="darija_leaderboard_multi_dialects.json"):
|
368 |
+
file_path = get_repo_file_path(MULTI_DIALECTS_LEADERBOARD_FILE)
|
369 |
|
370 |
+
# Log file information for debugging
|
371 |
+
print(f"Attempting to access file at: {file_path}")
|
372 |
+
print(f"File exists: {file_path.exists()}")
|
373 |
+
print(f"File permissions: {oct(os.stat(file_path).st_mode)[-3:]}" if file_path.exists() else "File does not exist")
|
374 |
|
|
|
|
|
|
|
375 |
try:
|
376 |
+
# Try to read existing data
|
377 |
+
if file_path.exists():
|
378 |
+
try:
|
379 |
+
with open(file_path, "r") as f:
|
380 |
+
data = json.load(f)
|
381 |
+
except PermissionError:
|
382 |
+
print(f"Permission denied reading file: {file_path}")
|
383 |
+
raise
|
384 |
+
else:
|
385 |
+
data = []
|
386 |
+
# Try to create the file
|
387 |
+
try:
|
388 |
+
file_path.touch()
|
389 |
+
except PermissionError:
|
390 |
+
print(f"Permission denied creating file: {file_path}")
|
391 |
+
raise
|
392 |
|
393 |
+
# Process the results for each dialect/country
|
394 |
+
for _, row in result_df.iterrows():
|
395 |
+
country = row['country']
|
396 |
+
if country == 'Other':
|
397 |
+
continue
|
398 |
+
|
399 |
+
metrics = {
|
400 |
+
'f1_score': float(row['f1_score']),
|
401 |
+
'precision': float(row['precision']),
|
402 |
+
'recall': float(row['recall']),
|
403 |
+
'macro_f1_score': float(row['macro_f1_score']),
|
404 |
+
'micro_f1_score': float(row['micro_f1_score']),
|
405 |
+
'weighted_f1_score': float(row['weighted_f1_score']),
|
406 |
+
'specificity': float(row['specificity']),
|
407 |
+
'false_positive_rate': float(row['false_positive_rate']),
|
408 |
+
'false_negative_rate': float(row['false_negative_rate']),
|
409 |
+
'negative_predictive_value': float(row['negative_predictive_value']),
|
410 |
+
'balanced_accuracy': float(row['balanced_accuracy']),
|
411 |
+
'matthews_correlation': float(row['matthews_correlation']),
|
412 |
+
'n_test_samples': int(row['samples'])
|
413 |
+
}
|
414 |
+
|
415 |
+
country_entry = next((item for item in data if country in item), None)
|
416 |
+
if country_entry is None:
|
417 |
+
country_entry = {country: {}}
|
418 |
+
data.append(country_entry)
|
419 |
+
|
420 |
+
if country not in country_entry:
|
421 |
+
country_entry[country] = {}
|
422 |
+
country_entry[country][model_name] = metrics
|
423 |
|
424 |
+
# Try to write the updated data
|
425 |
+
try:
|
426 |
+
with open(file_path, "w") as f:
|
427 |
+
json.dump(data, f, indent=4)
|
428 |
+
print(f"Successfully wrote to file: {file_path}")
|
429 |
+
except PermissionError:
|
430 |
+
print(f"Permission denied writing to file: {file_path}")
|
431 |
+
raise
|
432 |
+
|
433 |
+
except Exception as e:
|
434 |
+
print(f"Error handling file {file_path}: {str(e)}")
|
435 |
+
raise
|
436 |
|
437 |
|
438 |
def load_leaderboard_one_vs_all(DIALECT_CONFUSION_LEADERBOARD_FILE):
|