BounharAbdelaziz commited on
Commit
1656d75
·
1 Parent(s): 2cabce8

saving through HfApi

Browse files
Files changed (2) hide show
  1. constants.py +1 -0
  2. utils.py +117 -137
constants.py CHANGED
@@ -2,6 +2,7 @@ from datasets import load_dataset
2
 
3
 
4
  # Constants values
 
5
  DATA_PATH = "atlasia/Arabic-LID-Leaderboard"
6
  DIALECT_CONFUSION_LEADERBOARD_FILE = "darija_leaderboard_dialect_confusion.json"
7
  MULTI_DIALECTS_LEADERBOARD_FILE = "darija_leaderboard_multi_dialects.json"
 
2
 
3
 
4
  # Constants values
5
+ LEADERBOARD_PATH = "atlasia/Open-Arabic-Dialect-Identification-Leaderboard"
6
  DATA_PATH = "atlasia/Arabic-LID-Leaderboard"
7
  DIALECT_CONFUSION_LEADERBOARD_FILE = "darija_leaderboard_dialect_confusion.json"
8
  MULTI_DIALECTS_LEADERBOARD_FILE = "darija_leaderboard_multi_dialects.json"
utils.py CHANGED
@@ -14,15 +14,8 @@ from sklearn.metrics import (
14
  )
15
  import numpy as np
16
  from constants import *
 
17
  from pathlib import Path
18
- import logging
19
-
20
-
21
- def get_repo_file_path(filename):
22
- """Get the full path to a file in the repository root"""
23
- repo_path = Path("/home/user/app")
24
- file_path = repo_path / filename
25
- return file_path
26
 
27
  def predict_label(text, model, language_mapping_dict, use_mapping=False):
28
  """
@@ -190,64 +183,7 @@ def run_eval_one_vs_all(data_test, TARGET_LANG='Morocco'):
190
 
191
  return out
192
 
193
-
194
- def update_darija_one_vs_all_leaderboard(result_df, model_name, target_lang, DIALECT_CONFUSION_LEADERBOARD_FILE="darija_leaderboard_dialect_confusion.json"):
195
- file_path = get_repo_file_path(DIALECT_CONFUSION_LEADERBOARD_FILE)
196
-
197
- # Log file information for debugging
198
- print(f"Attempting to access file at: {file_path}")
199
- print(f"File exists: {file_path.exists()}")
200
- print(f"File permissions: {oct(os.stat(file_path).st_mode)[-3:]}" if file_path.exists() else "File does not exist")
201
-
202
- try:
203
- # Try to read existing data
204
- if file_path.exists():
205
- try:
206
- with open(file_path, "r") as f:
207
- data = json.load(f)
208
- except PermissionError:
209
- print(f"Permission denied reading file: {file_path}")
210
- raise
211
- else:
212
- data = []
213
- # Try to create the file
214
- try:
215
- file_path.touch()
216
- except PermissionError:
217
- print(f"Permission denied creating file: {file_path}")
218
- raise
219
-
220
- # Process the results for each dialect/country
221
- for _, row in result_df.iterrows():
222
- dialect = row['dialect']
223
- if dialect == 'Other':
224
- continue
225
-
226
- target_entry = next((item for item in data if target_lang in item), None)
227
- if target_entry is None:
228
- target_entry = {target_lang: {}}
229
- data.append(target_entry)
230
-
231
- country_data = target_entry[target_lang]
232
-
233
- if dialect not in country_data:
234
- country_data[dialect] = {}
235
-
236
- country_data[dialect][model_name] = float(row['false_positive_rate'])
237
-
238
- # Try to write the updated data
239
- try:
240
- with open(file_path, "w") as f:
241
- json.dump(data, f, indent=4)
242
- print(f"Successfully wrote to file: {file_path}")
243
- except PermissionError:
244
- print(f"Permission denied writing to file: {file_path}")
245
- raise
246
-
247
- except Exception as e:
248
- print(f"Error handling file {file_path}: {str(e)}")
249
- raise
250
-
251
  def handle_evaluation(model_path, model_path_bin, use_mapping=False):
252
 
253
  # download model and get the model path
@@ -364,76 +300,6 @@ def process_results_file(file, uploaded_model_name, base_path_save="./atlasia/su
364
 
365
  return create_leaderboard_display_multilingual(df_multilingual, target_label, default_metrics), status_message
366
 
367
- def update_darija_multilingual_leaderboard(result_df, model_name, MULTI_DIALECTS_LEADERBOARD_FILE="darija_leaderboard_multi_dialects.json"):
368
- file_path = get_repo_file_path(MULTI_DIALECTS_LEADERBOARD_FILE)
369
-
370
- # Log file information for debugging
371
- print(f"Attempting to access file at: {file_path}")
372
- print(f"File exists: {file_path.exists()}")
373
- print(f"File permissions: {oct(os.stat(file_path).st_mode)[-3:]}" if file_path.exists() else "File does not exist")
374
-
375
- try:
376
- # Try to read existing data
377
- if file_path.exists():
378
- try:
379
- with open(file_path, "r") as f:
380
- data = json.load(f)
381
- except PermissionError:
382
- print(f"Permission denied reading file: {file_path}")
383
- raise
384
- else:
385
- data = []
386
- # Try to create the file
387
- try:
388
- file_path.touch()
389
- except PermissionError:
390
- print(f"Permission denied creating file: {file_path}")
391
- raise
392
-
393
- # Process the results for each dialect/country
394
- for _, row in result_df.iterrows():
395
- country = row['country']
396
- if country == 'Other':
397
- continue
398
-
399
- metrics = {
400
- 'f1_score': float(row['f1_score']),
401
- 'precision': float(row['precision']),
402
- 'recall': float(row['recall']),
403
- 'macro_f1_score': float(row['macro_f1_score']),
404
- 'micro_f1_score': float(row['micro_f1_score']),
405
- 'weighted_f1_score': float(row['weighted_f1_score']),
406
- 'specificity': float(row['specificity']),
407
- 'false_positive_rate': float(row['false_positive_rate']),
408
- 'false_negative_rate': float(row['false_negative_rate']),
409
- 'negative_predictive_value': float(row['negative_predictive_value']),
410
- 'balanced_accuracy': float(row['balanced_accuracy']),
411
- 'matthews_correlation': float(row['matthews_correlation']),
412
- 'n_test_samples': int(row['samples'])
413
- }
414
-
415
- country_entry = next((item for item in data if country in item), None)
416
- if country_entry is None:
417
- country_entry = {country: {}}
418
- data.append(country_entry)
419
-
420
- if country not in country_entry:
421
- country_entry[country] = {}
422
- country_entry[country][model_name] = metrics
423
-
424
- # Try to write the updated data
425
- try:
426
- with open(file_path, "w") as f:
427
- json.dump(data, f, indent=4)
428
- print(f"Successfully wrote to file: {file_path}")
429
- except PermissionError:
430
- print(f"Permission denied writing to file: {file_path}")
431
- raise
432
-
433
- except Exception as e:
434
- print(f"Error handling file {file_path}: {str(e)}")
435
- raise
436
-
437
 
438
  def load_leaderboard_one_vs_all(DIALECT_CONFUSION_LEADERBOARD_FILE):
439
  current_dir = os.path.dirname(os.path.abspath(__file__))
@@ -589,4 +455,118 @@ def create_html_image(image_path):
589
 
590
  def render_fixed_columns(df):
591
  """ A function to render HTML table with fixed 'model' column for better visibility """
592
- return NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  )
15
  import numpy as np
16
  from constants import *
17
+ from huggingface_hub import HfApi, login
18
  from pathlib import Path
 
 
 
 
 
 
 
 
19
 
20
  def predict_label(text, model, language_mapping_dict, use_mapping=False):
21
  """
 
183
 
184
  return out
185
 
186
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  def handle_evaluation(model_path, model_path_bin, use_mapping=False):
188
 
189
  # download model and get the model path
 
300
 
301
  return create_leaderboard_display_multilingual(df_multilingual, target_label, default_metrics), status_message
302
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
  def load_leaderboard_one_vs_all(DIALECT_CONFUSION_LEADERBOARD_FILE):
305
  current_dir = os.path.dirname(os.path.abspath(__file__))
 
455
 
456
  def render_fixed_columns(df):
457
  """ A function to render HTML table with fixed 'model' column for better visibility """
458
+ return NotImplementedError
459
+
460
+ def update_repo_file(api, repo_id, filename, data):
461
+ """Helper function to update a file in the repository"""
462
+ # Use the app directory
463
+ app_dir = Path("/home/user/app")
464
+ temp_file = app_dir / filename
465
+
466
+ # Write the updated data to file
467
+ with open(temp_file, "w") as f:
468
+ json.dump(data, f, indent=4)
469
+
470
+ # Upload the file back to the repository
471
+ api.upload_file(
472
+ path_or_fileobj=str(temp_file),
473
+ path_in_repo=filename,
474
+ repo_id=repo_id,
475
+ repo_type="space",
476
+ commit_message=f"Update {filename}"
477
+ )
478
+
479
+ def update_darija_one_vs_all_leaderboard(result_df, model_name, target_lang, DIALECT_CONFUSION_LEADERBOARD_FILE="darija_leaderboard_dialect_confusion.json"):
480
+ # Initialize Hugging Face API
481
+ api = HfApi()
482
+
483
+ try:
484
+ # Download existing file
485
+ try:
486
+ file_content = api.fetch_file_content(
487
+ repo_id=LEADERBOARD_PATH,
488
+ filename=DIALECT_CONFUSION_LEADERBOARD_FILE,
489
+ repo_type="model"
490
+ )
491
+ data = json.loads(file_content)
492
+ except:
493
+ data = []
494
+
495
+ # Process the results
496
+ for _, row in result_df.iterrows():
497
+ dialect = row['dialect']
498
+ if dialect == 'Other':
499
+ continue
500
+
501
+ target_entry = next((item for item in data if target_lang in item), None)
502
+ if target_entry is None:
503
+ target_entry = {target_lang: {}}
504
+ data.append(target_entry)
505
+
506
+ country_data = target_entry[target_lang]
507
+
508
+ if dialect not in country_data:
509
+ country_data[dialect] = {}
510
+
511
+ country_data[dialect][model_name] = float(row['false_positive_rate'])
512
+
513
+ # Update the file in the repository
514
+ update_repo_file(api, LEADERBOARD_PATH, DIALECT_CONFUSION_LEADERBOARD_FILE, data)
515
+
516
+ except Exception as e:
517
+ print(f"Error updating repository: {str(e)}")
518
+ raise
519
+
520
+ def update_darija_multilingual_leaderboard(result_df, model_name, MULTI_DIALECTS_LEADERBOARD_FILE="darija_leaderboard_multi_dialects.json"):
521
+ # Initialize Hugging Face API
522
+ api = HfApi()
523
+
524
+ try:
525
+ # Download existing file
526
+ try:
527
+ file_content = api.fetch_file_content(
528
+ repo_id=LEADERBOARD_PATH,
529
+ filename=MULTI_DIALECTS_LEADERBOARD_FILE,
530
+ repo_type="model"
531
+ )
532
+ data = json.loads(file_content)
533
+ except:
534
+ data = []
535
+
536
+ # Process the results
537
+ for _, row in result_df.iterrows():
538
+ country = row['country']
539
+ if country == 'Other':
540
+ continue
541
+
542
+ metrics = {
543
+ 'f1_score': float(row['f1_score']),
544
+ 'precision': float(row['precision']),
545
+ 'recall': float(row['recall']),
546
+ 'macro_f1_score': float(row['macro_f1_score']),
547
+ 'micro_f1_score': float(row['micro_f1_score']),
548
+ 'weighted_f1_score': float(row['weighted_f1_score']),
549
+ 'specificity': float(row['specificity']),
550
+ 'false_positive_rate': float(row['false_positive_rate']),
551
+ 'false_negative_rate': float(row['false_negative_rate']),
552
+ 'negative_predictive_value': float(row['negative_predictive_value']),
553
+ 'balanced_accuracy': float(row['balanced_accuracy']),
554
+ 'matthews_correlation': float(row['matthews_correlation']),
555
+ 'n_test_samples': int(row['samples'])
556
+ }
557
+
558
+ country_entry = next((item for item in data if country in item), None)
559
+ if country_entry is None:
560
+ country_entry = {country: {}}
561
+ data.append(country_entry)
562
+
563
+ if country not in country_entry:
564
+ country_entry[country] = {}
565
+ country_entry[country][model_name] = metrics
566
+
567
+ # Update the file in the repository
568
+ update_repo_file(api, LEADERBOARD_PATH, MULTI_DIALECTS_LEADERBOARD_FILE, data)
569
+
570
+ except Exception as e:
571
+ print(f"Error updating repository: {str(e)}")
572
+ raise