BounharAbdelaziz commited on
Commit
2cabce8
·
1 Parent(s): c591ceb

changed path to /home/user/app for accessing json files

Browse files
Files changed (2) hide show
  1. app.py +1 -0
  2. utils.py +126 -86
app.py CHANGED
@@ -13,6 +13,7 @@ from constants import *
13
  if __name__ == "__main__":
14
 
15
  with gr.Blocks() as app:
 
16
  base_path = os.path.dirname(__file__)
17
  local_image_path = os.path.join(base_path, 'open_arabic_lid_arena.png')
18
 
 
13
  if __name__ == "__main__":
14
 
15
  with gr.Blocks() as app:
16
+
17
  base_path = os.path.dirname(__file__)
18
  local_image_path = os.path.join(base_path, 'open_arabic_lid_arena.png')
19
 
utils.py CHANGED
@@ -13,8 +13,16 @@ from sklearn.metrics import (
13
  matthews_corrcoef
14
  )
15
  import numpy as np
16
-
17
  from constants import *
 
 
 
 
 
 
 
 
 
18
 
19
  def predict_label(text, model, language_mapping_dict, use_mapping=False):
20
  """
@@ -182,48 +190,64 @@ def run_eval_one_vs_all(data_test, TARGET_LANG='Morocco'):
182
 
183
  return out
184
 
185
- def update_darija_one_vs_all_leaderboard(result_df, model_name, target_lang, DIALECT_CONFUSION_LEADERBOARD_FILE="darija_leaderboard_binary.json"):
186
-
187
- # use base path to ensure correct saving
188
- base_path = os.path.dirname(__file__)
189
- json_file_path = os.path.join(base_path, DIALECT_CONFUSION_LEADERBOARD_FILE)
190
 
191
- print(f"[INFO] Loading leaderboard data (json file) from: {json_file_path}")
 
 
 
192
 
193
- # Load leaderboard data
194
  try:
195
- with open(json_file_path, "r") as f:
196
- data = json.load(f)
197
- except FileNotFoundError:
198
- data = []
199
-
200
- # Process the results for each dialect/country
201
- for _, row in result_df.iterrows():
202
- dialect = row['dialect']
203
- # Skip 'Other' class, it is considered as the null space
204
- if dialect == 'Other':
205
- continue
206
-
207
- # Find existing target_lang entry or create a new one
208
- target_entry = next((item for item in data if target_lang in item), None)
209
- if target_entry is None:
210
- target_entry = {target_lang: {}}
211
- data.append(target_entry)
212
-
213
- # Get the country-specific data for this target language
214
- country_data = target_entry[target_lang]
215
-
216
- # Initialize the dialect/country entry if it doesn't exist
217
- if dialect not in country_data:
218
- country_data[dialect] = {}
 
 
 
219
 
220
- # Update the model metrics under the model name for the given dialect
221
- country_data[dialect][model_name] = float(row['false_positive_rate'])
222
-
223
- # Save updated leaderboard data
224
- with open(json_file_path, "w") as f:
225
- json.dump(data, f, indent=4)
226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  def handle_evaluation(model_path, model_path_bin, use_mapping=False):
228
 
229
  # download model and get the model path
@@ -340,59 +364,75 @@ def process_results_file(file, uploaded_model_name, base_path_save="./atlasia/su
340
 
341
  return create_leaderboard_display_multilingual(df_multilingual, target_label, default_metrics), status_message
342
 
343
- def update_darija_multilingual_leaderboard(result_df, model_name, MULTI_DIALECTS_LEADERBOARD_FILE):
 
344
 
345
- # use base path to ensure correct saving
346
- base_path = os.path.dirname(__file__)
347
- json_file_path = os.path.join(base_path, MULTI_DIALECTS_LEADERBOARD_FILE)
 
348
 
349
- print(f"[INFO] Loading leaderboard data (json file) from: {json_file_path}")
350
-
351
- # Load leaderboard data
352
  try:
353
- with open(json_file_path, "r") as f:
354
- data = json.load(f)
355
- except FileNotFoundError:
356
- data = []
357
-
358
- # Process the results for each dialect/country
359
- for _, row in result_df.iterrows():
360
- country = row['country']
361
- # skip 'Other' class, it is considered as the null space
362
- if country == 'Other':
363
- continue
 
 
 
 
 
364
 
365
- # Create metrics dictionary directly
366
- metrics = {
367
- 'f1_score': float(row['f1_score']),
368
- 'precision': float(row['precision']),
369
- 'recall': float(row['recall']),
370
- 'macro_f1_score': float(row['macro_f1_score']),
371
- 'micro_f1_score': float(row['micro_f1_score']),
372
- 'weighted_f1_score': float(row['weighted_f1_score']),
373
- 'specificity': float(row['specificity']),
374
- 'false_positive_rate': float(row['false_positive_rate']),
375
- 'false_negative_rate': float(row['false_negative_rate']),
376
- 'negative_predictive_value': float(row['negative_predictive_value']),
377
- 'balanced_accuracy': float(row['balanced_accuracy']),
378
- 'matthews_correlation': float(row['matthews_correlation']),
379
- 'n_test_samples': int(row['samples'])
380
- }
381
-
382
- # Find existing country entry or create new one
383
- country_entry = next((item for item in data if country in item), None)
384
- if country_entry is None:
385
- country_entry = {country: {}}
386
- data.append(country_entry)
 
 
 
 
 
 
 
 
387
 
388
- # Update the model metrics directly under the model name
389
- if country not in country_entry:
390
- country_entry[country] = {}
391
- country_entry[country][model_name] = metrics
392
-
393
- # Save updated leaderboard data
394
- with open(json_file_path, "w") as f:
395
- json.dump(data, f, indent=4)
 
 
 
 
396
 
397
 
398
  def load_leaderboard_one_vs_all(DIALECT_CONFUSION_LEADERBOARD_FILE):
 
13
  matthews_corrcoef
14
  )
15
  import numpy as np
 
16
  from constants import *
17
+ from pathlib import Path
18
+ import logging
19
+
20
+
21
+ def get_repo_file_path(filename):
22
+ """Get the full path to a file in the repository root"""
23
+ repo_path = Path("/home/user/app")
24
+ file_path = repo_path / filename
25
+ return file_path
26
 
27
  def predict_label(text, model, language_mapping_dict, use_mapping=False):
28
  """
 
190
 
191
  return out
192
 
193
+
194
+ def update_darija_one_vs_all_leaderboard(result_df, model_name, target_lang, DIALECT_CONFUSION_LEADERBOARD_FILE="darija_leaderboard_dialect_confusion.json"):
195
+ file_path = get_repo_file_path(DIALECT_CONFUSION_LEADERBOARD_FILE)
 
 
196
 
197
+ # Log file information for debugging
198
+ print(f"Attempting to access file at: {file_path}")
199
+ print(f"File exists: {file_path.exists()}")
200
+ print(f"File permissions: {oct(os.stat(file_path).st_mode)[-3:]}" if file_path.exists() else "File does not exist")
201
 
 
202
  try:
203
+ # Try to read existing data
204
+ if file_path.exists():
205
+ try:
206
+ with open(file_path, "r") as f:
207
+ data = json.load(f)
208
+ except PermissionError:
209
+ print(f"Permission denied reading file: {file_path}")
210
+ raise
211
+ else:
212
+ data = []
213
+ # Try to create the file
214
+ try:
215
+ file_path.touch()
216
+ except PermissionError:
217
+ print(f"Permission denied creating file: {file_path}")
218
+ raise
219
+
220
+ # Process the results for each dialect/country
221
+ for _, row in result_df.iterrows():
222
+ dialect = row['dialect']
223
+ if dialect == 'Other':
224
+ continue
225
+
226
+ target_entry = next((item for item in data if target_lang in item), None)
227
+ if target_entry is None:
228
+ target_entry = {target_lang: {}}
229
+ data.append(target_entry)
230
 
231
+ country_data = target_entry[target_lang]
232
+
233
+ if dialect not in country_data:
234
+ country_data[dialect] = {}
 
 
235
 
236
+ country_data[dialect][model_name] = float(row['false_positive_rate'])
237
+
238
+ # Try to write the updated data
239
+ try:
240
+ with open(file_path, "w") as f:
241
+ json.dump(data, f, indent=4)
242
+ print(f"Successfully wrote to file: {file_path}")
243
+ except PermissionError:
244
+ print(f"Permission denied writing to file: {file_path}")
245
+ raise
246
+
247
+ except Exception as e:
248
+ print(f"Error handling file {file_path}: {str(e)}")
249
+ raise
250
+
251
  def handle_evaluation(model_path, model_path_bin, use_mapping=False):
252
 
253
  # download model and get the model path
 
364
 
365
  return create_leaderboard_display_multilingual(df_multilingual, target_label, default_metrics), status_message
366
 
367
+ def update_darija_multilingual_leaderboard(result_df, model_name, MULTI_DIALECTS_LEADERBOARD_FILE="darija_leaderboard_multi_dialects.json"):
368
+ file_path = get_repo_file_path(MULTI_DIALECTS_LEADERBOARD_FILE)
369
 
370
+ # Log file information for debugging
371
+ print(f"Attempting to access file at: {file_path}")
372
+ print(f"File exists: {file_path.exists()}")
373
+ print(f"File permissions: {oct(os.stat(file_path).st_mode)[-3:]}" if file_path.exists() else "File does not exist")
374
 
 
 
 
375
  try:
376
+ # Try to read existing data
377
+ if file_path.exists():
378
+ try:
379
+ with open(file_path, "r") as f:
380
+ data = json.load(f)
381
+ except PermissionError:
382
+ print(f"Permission denied reading file: {file_path}")
383
+ raise
384
+ else:
385
+ data = []
386
+ # Try to create the file
387
+ try:
388
+ file_path.touch()
389
+ except PermissionError:
390
+ print(f"Permission denied creating file: {file_path}")
391
+ raise
392
 
393
+ # Process the results for each dialect/country
394
+ for _, row in result_df.iterrows():
395
+ country = row['country']
396
+ if country == 'Other':
397
+ continue
398
+
399
+ metrics = {
400
+ 'f1_score': float(row['f1_score']),
401
+ 'precision': float(row['precision']),
402
+ 'recall': float(row['recall']),
403
+ 'macro_f1_score': float(row['macro_f1_score']),
404
+ 'micro_f1_score': float(row['micro_f1_score']),
405
+ 'weighted_f1_score': float(row['weighted_f1_score']),
406
+ 'specificity': float(row['specificity']),
407
+ 'false_positive_rate': float(row['false_positive_rate']),
408
+ 'false_negative_rate': float(row['false_negative_rate']),
409
+ 'negative_predictive_value': float(row['negative_predictive_value']),
410
+ 'balanced_accuracy': float(row['balanced_accuracy']),
411
+ 'matthews_correlation': float(row['matthews_correlation']),
412
+ 'n_test_samples': int(row['samples'])
413
+ }
414
+
415
+ country_entry = next((item for item in data if country in item), None)
416
+ if country_entry is None:
417
+ country_entry = {country: {}}
418
+ data.append(country_entry)
419
+
420
+ if country not in country_entry:
421
+ country_entry[country] = {}
422
+ country_entry[country][model_name] = metrics
423
 
424
+ # Try to write the updated data
425
+ try:
426
+ with open(file_path, "w") as f:
427
+ json.dump(data, f, indent=4)
428
+ print(f"Successfully wrote to file: {file_path}")
429
+ except PermissionError:
430
+ print(f"Permission denied writing to file: {file_path}")
431
+ raise
432
+
433
+ except Exception as e:
434
+ print(f"Error handling file {file_path}: {str(e)}")
435
+ raise
436
 
437
 
438
  def load_leaderboard_one_vs_all(DIALECT_CONFUSION_LEADERBOARD_FILE):