Spaces:

atlasia
/

Open-Arabic-Dialect-Identification-Leaderboard

Running

App Files Files Community

BounharAbdelaziz commited on 5 days ago

Commit

2cabce8

1 Parent(s): c591ceb

changed path to /home/user/app for accessing json files

Browse files

Files changed (2) hide show

app.py +1 -0
utils.py +126 -86

app.py CHANGED Viewed

@@ -13,6 +13,7 @@ from constants import *
 if __name__ == "__main__":
     with gr.Blocks() as app:
         base_path = os.path.dirname(__file__)
         local_image_path = os.path.join(base_path, 'open_arabic_lid_arena.png')

 if __name__ == "__main__":
     with gr.Blocks() as app:
         base_path = os.path.dirname(__file__)
         local_image_path = os.path.join(base_path, 'open_arabic_lid_arena.png')

utils.py CHANGED Viewed

@@ -13,8 +13,16 @@ from sklearn.metrics import (
     matthews_corrcoef
 )
 import numpy as np
 from constants import *
 def predict_label(text, model, language_mapping_dict, use_mapping=False):
     """
@@ -182,48 +190,64 @@ def run_eval_one_vs_all(data_test, TARGET_LANG='Morocco'):
     return out
-def update_darija_one_vs_all_leaderboard(result_df, model_name, target_lang, DIALECT_CONFUSION_LEADERBOARD_FILE="darija_leaderboard_binary.json"):
-    # use base path to ensure correct saving
-    base_path = os.path.dirname(__file__)
-    json_file_path = os.path.join(base_path, DIALECT_CONFUSION_LEADERBOARD_FILE)
-    print(f"[INFO] Loading leaderboard data (json file) from: {json_file_path}")
-    # Load leaderboard data
     try:
-        with open(json_file_path, "r") as f:
-            data = json.load(f)
-    except FileNotFoundError:
-        data = []
-    # Process the results for each dialect/country
-    for _, row in result_df.iterrows():
-        dialect = row['dialect']
-        # Skip 'Other' class, it is considered as the null space
-        if dialect == 'Other':
-            continue
-        # Find existing target_lang entry or create a new one
-        target_entry = next((item for item in data if target_lang in item), None)
-        if target_entry is None:
-            target_entry = {target_lang: {}}
-            data.append(target_entry)
-        # Get the country-specific data for this target language
-        country_data = target_entry[target_lang]
-        # Initialize the dialect/country entry if it doesn't exist
-        if dialect not in country_data:
-            country_data[dialect] = {}
-        # Update the model metrics under the model name for the given dialect
-        country_data[dialect][model_name] = float(row['false_positive_rate'])
-    # Save updated leaderboard data
-    with open(json_file_path, "w") as f:
-        json.dump(data, f, indent=4)
 def handle_evaluation(model_path, model_path_bin, use_mapping=False):
     # download model and get the model path
@@ -340,59 +364,75 @@ def process_results_file(file, uploaded_model_name, base_path_save="./atlasia/su
     return create_leaderboard_display_multilingual(df_multilingual, target_label, default_metrics), status_message
-def update_darija_multilingual_leaderboard(result_df, model_name, MULTI_DIALECTS_LEADERBOARD_FILE):
-    # use base path to ensure correct saving
-    base_path = os.path.dirname(__file__)
-    json_file_path = os.path.join(base_path, MULTI_DIALECTS_LEADERBOARD_FILE)
-    print(f"[INFO] Loading leaderboard data (json file) from: {json_file_path}")
-    # Load leaderboard data
     try:
-        with open(json_file_path, "r") as f:
-            data = json.load(f)
-    except FileNotFoundError:
-        data = []
-    # Process the results for each dialect/country
-    for _, row in result_df.iterrows():
-        country = row['country']
-        # skip 'Other' class, it is considered as the null space
-        if country == 'Other':
-            continue
-        # Create metrics dictionary directly
-        metrics = {
-            'f1_score': float(row['f1_score']),
-            'precision': float(row['precision']),
-            'recall': float(row['recall']),
-            'macro_f1_score': float(row['macro_f1_score']),
-            'micro_f1_score': float(row['micro_f1_score']),
-            'weighted_f1_score': float(row['weighted_f1_score']),
-            'specificity': float(row['specificity']),
-            'false_positive_rate': float(row['false_positive_rate']),
-            'false_negative_rate': float(row['false_negative_rate']),
-            'negative_predictive_value': float(row['negative_predictive_value']),
-            'balanced_accuracy': float(row['balanced_accuracy']),
-            'matthews_correlation': float(row['matthews_correlation']),
-            'n_test_samples': int(row['samples'])
-        }
-        # Find existing country entry or create new one
-        country_entry = next((item for item in data if country in item), None)
-        if country_entry is None:
-            country_entry = {country: {}}
-            data.append(country_entry)
-        # Update the model metrics directly under the model name
-        if country not in country_entry:
-            country_entry[country] = {}
-        country_entry[country][model_name] = metrics
-    # Save updated leaderboard data
-    with open(json_file_path, "w") as f:
-        json.dump(data, f, indent=4)
 def load_leaderboard_one_vs_all(DIALECT_CONFUSION_LEADERBOARD_FILE):

     matthews_corrcoef
 )
 import numpy as np
 from constants import *
+from pathlib import Path
+import logging
+def get_repo_file_path(filename):
+    """Get the full path to a file in the repository root"""
+    repo_path = Path("/home/user/app")
+    file_path = repo_path / filename
+    return file_path
 def predict_label(text, model, language_mapping_dict, use_mapping=False):
     """
     return out
+def update_darija_one_vs_all_leaderboard(result_df, model_name, target_lang, DIALECT_CONFUSION_LEADERBOARD_FILE="darija_leaderboard_dialect_confusion.json"):
+    file_path = get_repo_file_path(DIALECT_CONFUSION_LEADERBOARD_FILE)
+    # Log file information for debugging
+    print(f"Attempting to access file at: {file_path}")
+    print(f"File exists: {file_path.exists()}")
+    print(f"File permissions: {oct(os.stat(file_path).st_mode)[-3:]}" if file_path.exists() else "File does not exist")
     try:
+        # Try to read existing data
+        if file_path.exists():
+            try:
+                with open(file_path, "r") as f:
+                    data = json.load(f)
+            except PermissionError:
+                print(f"Permission denied reading file: {file_path}")
+                raise
+        else:
+            data = []
+            # Try to create the file
+            try:
+                file_path.touch()
+            except PermissionError:
+                print(f"Permission denied creating file: {file_path}")
+                raise
+        # Process the results for each dialect/country
+        for _, row in result_df.iterrows():
+            dialect = row['dialect']
+            if dialect == 'Other':
+                continue
+            target_entry = next((item for item in data if target_lang in item), None)
+            if target_entry is None:
+                target_entry = {target_lang: {}}
+                data.append(target_entry)
+            country_data = target_entry[target_lang]
+            if dialect not in country_data:
+                country_data[dialect] = {}
+            country_data[dialect][model_name] = float(row['false_positive_rate'])
+        # Try to write the updated data
+        try:
+            with open(file_path, "w") as f:
+                json.dump(data, f, indent=4)
+            print(f"Successfully wrote to file: {file_path}")
+        except PermissionError:
+            print(f"Permission denied writing to file: {file_path}")
+            raise
+    except Exception as e:
+        print(f"Error handling file {file_path}: {str(e)}")
+        raise
 def handle_evaluation(model_path, model_path_bin, use_mapping=False):
     # download model and get the model path
     return create_leaderboard_display_multilingual(df_multilingual, target_label, default_metrics), status_message
+def update_darija_multilingual_leaderboard(result_df, model_name, MULTI_DIALECTS_LEADERBOARD_FILE="darija_leaderboard_multi_dialects.json"):
+    file_path = get_repo_file_path(MULTI_DIALECTS_LEADERBOARD_FILE)
+    # Log file information for debugging
+    print(f"Attempting to access file at: {file_path}")
+    print(f"File exists: {file_path.exists()}")
+    print(f"File permissions: {oct(os.stat(file_path).st_mode)[-3:]}" if file_path.exists() else "File does not exist")
     try:
+        # Try to read existing data
+        if file_path.exists():
+            try:
+                with open(file_path, "r") as f:
+                    data = json.load(f)
+            except PermissionError:
+                print(f"Permission denied reading file: {file_path}")
+                raise
+        else:
+            data = []
+            # Try to create the file
+            try:
+                file_path.touch()
+            except PermissionError:
+                print(f"Permission denied creating file: {file_path}")
+                raise
+        # Process the results for each dialect/country
+        for _, row in result_df.iterrows():
+            country = row['country']
+            if country == 'Other':
+                continue
+            metrics = {
+                'f1_score': float(row['f1_score']),
+                'precision': float(row['precision']),
+                'recall': float(row['recall']),
+                'macro_f1_score': float(row['macro_f1_score']),
+                'micro_f1_score': float(row['micro_f1_score']),
+                'weighted_f1_score': float(row['weighted_f1_score']),
+                'specificity': float(row['specificity']),
+                'false_positive_rate': float(row['false_positive_rate']),
+                'false_negative_rate': float(row['false_negative_rate']),
+                'negative_predictive_value': float(row['negative_predictive_value']),
+                'balanced_accuracy': float(row['balanced_accuracy']),
+                'matthews_correlation': float(row['matthews_correlation']),
+                'n_test_samples': int(row['samples'])
+            }
+            country_entry = next((item for item in data if country in item), None)
+            if country_entry is None:
+                country_entry = {country: {}}
+                data.append(country_entry)
+            if country not in country_entry:
+                country_entry[country] = {}
+            country_entry[country][model_name] = metrics
+        # Try to write the updated data
+        try:
+            with open(file_path, "w") as f:
+                json.dump(data, f, indent=4)
+            print(f"Successfully wrote to file: {file_path}")
+        except PermissionError:
+            print(f"Permission denied writing to file: {file_path}")
+            raise
+    except Exception as e:
+        print(f"Error handling file {file_path}: {str(e)}")
+        raise
 def load_leaderboard_one_vs_all(DIALECT_CONFUSION_LEADERBOARD_FILE):