Spaces:

atlasia
/

Open-Arabic-Dialect-Identification-Leaderboard

Running

App Files Files Community

BounharAbdelaziz commited on 5 days ago

Commit

1656d75

1 Parent(s): 2cabce8

saving through HfApi

Browse files

Files changed (2) hide show

constants.py +1 -0
utils.py +117 -137

constants.py CHANGED Viewed

@@ -2,6 +2,7 @@ from datasets import load_dataset
 # Constants values
 DATA_PATH = "atlasia/Arabic-LID-Leaderboard"
 DIALECT_CONFUSION_LEADERBOARD_FILE = "darija_leaderboard_dialect_confusion.json"
 MULTI_DIALECTS_LEADERBOARD_FILE = "darija_leaderboard_multi_dialects.json"

 # Constants values
+LEADERBOARD_PATH = "atlasia/Open-Arabic-Dialect-Identification-Leaderboard"
 DATA_PATH = "atlasia/Arabic-LID-Leaderboard"
 DIALECT_CONFUSION_LEADERBOARD_FILE = "darija_leaderboard_dialect_confusion.json"
 MULTI_DIALECTS_LEADERBOARD_FILE = "darija_leaderboard_multi_dialects.json"

utils.py CHANGED Viewed

@@ -14,15 +14,8 @@ from sklearn.metrics import (
 )
 import numpy as np
 from constants import *
 from pathlib import Path
-import logging
-def get_repo_file_path(filename):
-    """Get the full path to a file in the repository root"""
-    repo_path = Path("/home/user/app")
-    file_path = repo_path / filename
-    return file_path
 def predict_label(text, model, language_mapping_dict, use_mapping=False):
     """
@@ -190,64 +183,7 @@ def run_eval_one_vs_all(data_test, TARGET_LANG='Morocco'):
     return out
-def update_darija_one_vs_all_leaderboard(result_df, model_name, target_lang, DIALECT_CONFUSION_LEADERBOARD_FILE="darija_leaderboard_dialect_confusion.json"):
-    file_path = get_repo_file_path(DIALECT_CONFUSION_LEADERBOARD_FILE)
-    # Log file information for debugging
-    print(f"Attempting to access file at: {file_path}")
-    print(f"File exists: {file_path.exists()}")
-    print(f"File permissions: {oct(os.stat(file_path).st_mode)[-3:]}" if file_path.exists() else "File does not exist")
-    try:
-        # Try to read existing data
-        if file_path.exists():
-            try:
-                with open(file_path, "r") as f:
-                    data = json.load(f)
-            except PermissionError:
-                print(f"Permission denied reading file: {file_path}")
-                raise
-        else:
-            data = []
-            # Try to create the file
-            try:
-                file_path.touch()
-            except PermissionError:
-                print(f"Permission denied creating file: {file_path}")
-                raise
-        # Process the results for each dialect/country
-        for _, row in result_df.iterrows():
-            dialect = row['dialect']
-            if dialect == 'Other':
-                continue
-            target_entry = next((item for item in data if target_lang in item), None)
-            if target_entry is None:
-                target_entry = {target_lang: {}}
-                data.append(target_entry)
-            country_data = target_entry[target_lang]
-            if dialect not in country_data:
-                country_data[dialect] = {}
-            country_data[dialect][model_name] = float(row['false_positive_rate'])
-        # Try to write the updated data
-        try:
-            with open(file_path, "w") as f:
-                json.dump(data, f, indent=4)
-            print(f"Successfully wrote to file: {file_path}")
-        except PermissionError:
-            print(f"Permission denied writing to file: {file_path}")
-            raise
-    except Exception as e:
-        print(f"Error handling file {file_path}: {str(e)}")
-        raise
 def handle_evaluation(model_path, model_path_bin, use_mapping=False):
     # download model and get the model path
@@ -364,76 +300,6 @@ def process_results_file(file, uploaded_model_name, base_path_save="./atlasia/su
     return create_leaderboard_display_multilingual(df_multilingual, target_label, default_metrics), status_message
-def update_darija_multilingual_leaderboard(result_df, model_name, MULTI_DIALECTS_LEADERBOARD_FILE="darija_leaderboard_multi_dialects.json"):
-    file_path = get_repo_file_path(MULTI_DIALECTS_LEADERBOARD_FILE)
-    # Log file information for debugging
-    print(f"Attempting to access file at: {file_path}")
-    print(f"File exists: {file_path.exists()}")
-    print(f"File permissions: {oct(os.stat(file_path).st_mode)[-3:]}" if file_path.exists() else "File does not exist")
-    try:
-        # Try to read existing data
-        if file_path.exists():
-            try:
-                with open(file_path, "r") as f:
-                    data = json.load(f)
-            except PermissionError:
-                print(f"Permission denied reading file: {file_path}")
-                raise
-        else:
-            data = []
-            # Try to create the file
-            try:
-                file_path.touch()
-            except PermissionError:
-                print(f"Permission denied creating file: {file_path}")
-                raise
-        # Process the results for each dialect/country
-        for _, row in result_df.iterrows():
-            country = row['country']
-            if country == 'Other':
-                continue
-            metrics = {
-                'f1_score': float(row['f1_score']),
-                'precision': float(row['precision']),
-                'recall': float(row['recall']),
-                'macro_f1_score': float(row['macro_f1_score']),
-                'micro_f1_score': float(row['micro_f1_score']),
-                'weighted_f1_score': float(row['weighted_f1_score']),
-                'specificity': float(row['specificity']),
-                'false_positive_rate': float(row['false_positive_rate']),
-                'false_negative_rate': float(row['false_negative_rate']),
-                'negative_predictive_value': float(row['negative_predictive_value']),
-                'balanced_accuracy': float(row['balanced_accuracy']),
-                'matthews_correlation': float(row['matthews_correlation']),
-                'n_test_samples': int(row['samples'])
-            }
-            country_entry = next((item for item in data if country in item), None)
-            if country_entry is None:
-                country_entry = {country: {}}
-                data.append(country_entry)
-            if country not in country_entry:
-                country_entry[country] = {}
-            country_entry[country][model_name] = metrics
-        # Try to write the updated data
-        try:
-            with open(file_path, "w") as f:
-                json.dump(data, f, indent=4)
-            print(f"Successfully wrote to file: {file_path}")
-        except PermissionError:
-            print(f"Permission denied writing to file: {file_path}")
-            raise
-    except Exception as e:
-        print(f"Error handling file {file_path}: {str(e)}")
-        raise
 def load_leaderboard_one_vs_all(DIALECT_CONFUSION_LEADERBOARD_FILE):
     current_dir = os.path.dirname(os.path.abspath(__file__))
@@ -589,4 +455,118 @@ def create_html_image(image_path):
 def render_fixed_columns(df):
     """ A function to render HTML table with fixed 'model' column for better visibility """
-    return NotImplementedError

 )
 import numpy as np
 from constants import *
+from huggingface_hub import HfApi, login
 from pathlib import Path
 def predict_label(text, model, language_mapping_dict, use_mapping=False):
     """
     return out
 def handle_evaluation(model_path, model_path_bin, use_mapping=False):
     # download model and get the model path
     return create_leaderboard_display_multilingual(df_multilingual, target_label, default_metrics), status_message
 def load_leaderboard_one_vs_all(DIALECT_CONFUSION_LEADERBOARD_FILE):
     current_dir = os.path.dirname(os.path.abspath(__file__))
 def render_fixed_columns(df):
     """ A function to render HTML table with fixed 'model' column for better visibility """
+    return NotImplementedError
+def update_repo_file(api, repo_id, filename, data):
+    """Helper function to update a file in the repository"""
+    # Use the app directory
+    app_dir = Path("/home/user/app")
+    temp_file = app_dir / filename
+    # Write the updated data to file
+    with open(temp_file, "w") as f:
+        json.dump(data, f, indent=4)
+    # Upload the file back to the repository
+    api.upload_file(
+        path_or_fileobj=str(temp_file),
+        path_in_repo=filename,
+        repo_id=repo_id,
+        repo_type="space",
+        commit_message=f"Update {filename}"
+    )
+def update_darija_one_vs_all_leaderboard(result_df, model_name, target_lang, DIALECT_CONFUSION_LEADERBOARD_FILE="darija_leaderboard_dialect_confusion.json"):
+    # Initialize Hugging Face API
+    api = HfApi()
+    try:
+        # Download existing file
+        try:
+            file_content = api.fetch_file_content(
+                repo_id=LEADERBOARD_PATH,
+                filename=DIALECT_CONFUSION_LEADERBOARD_FILE,
+                repo_type="model"
+            )
+            data = json.loads(file_content)
+        except:
+            data = []
+        # Process the results
+        for _, row in result_df.iterrows():
+            dialect = row['dialect']
+            if dialect == 'Other':
+                continue
+            target_entry = next((item for item in data if target_lang in item), None)
+            if target_entry is None:
+                target_entry = {target_lang: {}}
+                data.append(target_entry)
+            country_data = target_entry[target_lang]
+            if dialect not in country_data:
+                country_data[dialect] = {}
+            country_data[dialect][model_name] = float(row['false_positive_rate'])
+        # Update the file in the repository
+        update_repo_file(api, LEADERBOARD_PATH, DIALECT_CONFUSION_LEADERBOARD_FILE, data)
+    except Exception as e:
+        print(f"Error updating repository: {str(e)}")
+        raise
+def update_darija_multilingual_leaderboard(result_df, model_name, MULTI_DIALECTS_LEADERBOARD_FILE="darija_leaderboard_multi_dialects.json"):
+    # Initialize Hugging Face API
+    api = HfApi()
+    try:
+        # Download existing file
+        try:
+            file_content = api.fetch_file_content(
+                repo_id=LEADERBOARD_PATH,
+                filename=MULTI_DIALECTS_LEADERBOARD_FILE,
+                repo_type="model"
+            )
+            data = json.loads(file_content)
+        except:
+            data = []
+        # Process the results
+        for _, row in result_df.iterrows():
+            country = row['country']
+            if country == 'Other':
+                continue
+            metrics = {
+                'f1_score': float(row['f1_score']),
+                'precision': float(row['precision']),
+                'recall': float(row['recall']),
+                'macro_f1_score': float(row['macro_f1_score']),
+                'micro_f1_score': float(row['micro_f1_score']),
+                'weighted_f1_score': float(row['weighted_f1_score']),
+                'specificity': float(row['specificity']),
+                'false_positive_rate': float(row['false_positive_rate']),
+                'false_negative_rate': float(row['false_negative_rate']),
+                'negative_predictive_value': float(row['negative_predictive_value']),
+                'balanced_accuracy': float(row['balanced_accuracy']),
+                'matthews_correlation': float(row['matthews_correlation']),
+                'n_test_samples': int(row['samples'])
+            }
+            country_entry = next((item for item in data if country in item), None)
+            if country_entry is None:
+                country_entry = {country: {}}
+                data.append(country_entry)
+            if country not in country_entry:
+                country_entry[country] = {}
+            country_entry[country][model_name] = metrics
+        # Update the file in the repository
+        update_repo_file(api, LEADERBOARD_PATH, MULTI_DIALECTS_LEADERBOARD_FILE, data)
+    except Exception as e:
+        print(f"Error updating repository: {str(e)}")
+        raise