|
import base64 |
|
from huggingface_hub import hf_hub_download |
|
import fasttext |
|
import os |
|
import json |
|
import pandas as pd |
|
from sklearn.metrics import ( |
|
precision_score, |
|
recall_score, |
|
f1_score, |
|
confusion_matrix, |
|
balanced_accuracy_score, |
|
matthews_corrcoef |
|
) |
|
import numpy as np |
|
from constants import * |
|
from pathlib import Path |
|
import logging |
|
|
|
|
|
def get_repo_file_path(filename): |
|
"""Get the full path to a file in the repository root""" |
|
repo_path = Path("/home/user/app") |
|
file_path = repo_path / filename |
|
return file_path |
|
|
|
def predict_label(text, model, language_mapping_dict, use_mapping=False): |
|
""" |
|
Runs predictions for a fasttext model. |
|
|
|
Args: |
|
text (str): The input text to classify. |
|
model (fasttext.FastText._FastText): The fasttext model to use for prediction. |
|
language_mapping_dict (dict): A dictionary mapping fasttext labels to human-readable language names. |
|
use_mapping (bool): Whether to use the language mapping dictionary. |
|
|
|
Returns: |
|
str: The predicted label for the input text. |
|
""" |
|
|
|
text = str(text).strip().replace('\n', ' ') |
|
|
|
if text == '': |
|
|
|
return 'EMPTY' |
|
|
|
try: |
|
|
|
prediction = model.predict(text, 1) |
|
|
|
|
|
label = prediction[0][0].replace('__label__', '') |
|
|
|
|
|
confidence = prediction[1][0] |
|
|
|
|
|
if use_mapping: |
|
|
|
label = language_mapping_dict.get(label, 'Other') |
|
return label |
|
|
|
except Exception as e: |
|
print(f"Error processing text: {text}") |
|
print(f"Exception: {e}") |
|
return {'prediction_label': 'Error', 'prediction_confidence': 0.0} |
|
|
|
def compute_classification_metrics(eval_dataset): |
|
""" |
|
Compute comprehensive classification metrics for each class. |
|
|
|
Args: |
|
data (pd.DataFrame): DataFrame containing 'dialect' as true labels and 'preds' as predicted labels. |
|
|
|
Returns: |
|
pd.DataFrame: DataFrame with detailed metrics for each class. |
|
""" |
|
|
|
|
|
data = pd.DataFrame(eval_dataset) |
|
|
|
|
|
true_labels = list(data['dialect']) |
|
predicted_labels = list(data['preds']) |
|
|
|
|
|
labels = sorted(list(set(true_labels + predicted_labels))) |
|
label_to_index = {label: index for index, label in enumerate(labels)} |
|
|
|
|
|
true_indices = [label_to_index[label] for label in true_labels] |
|
pred_indices = [label_to_index[label] for label in predicted_labels] |
|
|
|
|
|
f1_scores = f1_score(true_indices, pred_indices, average=None, labels=range(len(labels))) |
|
precision_scores = precision_score(true_indices, pred_indices, average=None, labels=range(len(labels))) |
|
recall_scores = recall_score(true_indices, pred_indices, average=None, labels=range(len(labels))) |
|
|
|
|
|
macro_f1_score = f1_score(true_indices, pred_indices, average='macro') |
|
weighted_f1_score = f1_score(true_indices, pred_indices, average='weighted') |
|
micro_f1_score = f1_score(true_indices, pred_indices, average='micro') |
|
|
|
|
|
conf_mat = confusion_matrix(true_indices, pred_indices, labels=range(len(labels))) |
|
|
|
|
|
FP = conf_mat.sum(axis=0) - np.diag(conf_mat) |
|
FN = conf_mat.sum(axis=1) - np.diag(conf_mat) |
|
TP = np.diag(conf_mat) |
|
TN = conf_mat.sum() - (FP + FN + TP) |
|
|
|
|
|
samples_per_class = np.bincount(true_indices, minlength=len(labels)) |
|
|
|
|
|
with np.errstate(divide='ignore', invalid='ignore'): |
|
fp_rate = FP / (FP + TN) |
|
fn_rate = FN / (FN + TP) |
|
specificity = TN / (TN + FP) |
|
npv = TN / (TN + FN) |
|
|
|
|
|
metrics = [fp_rate, fn_rate, specificity, npv] |
|
metrics = [np.nan_to_num(m, nan=0.0, posinf=0.0, neginf=0.0) for m in metrics] |
|
fp_rate, fn_rate, specificity, npv = metrics |
|
|
|
|
|
balanced_acc = balanced_accuracy_score(true_indices, pred_indices) |
|
mcc = matthews_corrcoef(true_indices, pred_indices) |
|
|
|
|
|
result_df = pd.DataFrame({ |
|
'country': labels, |
|
'samples': samples_per_class, |
|
'f1_score': f1_scores, |
|
'macro_f1_score': macro_f1_score, |
|
'weighted_f1_score': weighted_f1_score, |
|
'micro_f1_score': micro_f1_score, |
|
'precision': precision_scores, |
|
'recall': recall_scores, |
|
'specificity': specificity, |
|
'false_positive_rate': fp_rate, |
|
'false_negative_rate': fn_rate, |
|
'true_positives': TP, |
|
'false_positives': FP, |
|
'true_negatives': TN, |
|
'false_negatives': FN, |
|
'negative_predictive_value': npv, |
|
'balanced_accuracy': balanced_acc, |
|
'matthews_correlation': mcc, |
|
}) |
|
|
|
|
|
result_df = result_df.sort_values('samples', ascending=False) |
|
|
|
|
|
numeric_cols = result_df.select_dtypes(include=[np.number]).columns |
|
result_df[numeric_cols] = result_df[numeric_cols].round(4) |
|
|
|
print(f'[INFO] result_df \n: {result_df}') |
|
|
|
return result_df |
|
|
|
def make_binary(dialect, target): |
|
if dialect != target: |
|
return 'Other' |
|
return target |
|
|
|
def run_eval_one_vs_all(data_test, TARGET_LANG='Morocco'): |
|
|
|
|
|
df_test_preds = data_test.copy() |
|
df_test_preds.loc[df_test_preds['dialect'] == TARGET_LANG, 'dialect'] = TARGET_LANG |
|
df_test_preds.loc[df_test_preds['dialect'] != TARGET_LANG, 'dialect'] = 'Other' |
|
|
|
|
|
dialect_counts = data_test.groupby('dialect')['dialect'].count().reset_index(name='size') |
|
result_df = pd.merge(dialect_counts, data_test, on='dialect') |
|
result_df = result_df.groupby(['dialect', 'size', 'preds'])['preds'].count()/result_df.groupby(['dialect', 'size'])['preds'].count() |
|
result_df.sort_index(ascending=False, level='size', inplace=True) |
|
|
|
|
|
out = result_df.copy() |
|
out.name = 'false_positive_rate' |
|
out = out.reset_index() |
|
out = out[out['preds']==TARGET_LANG].drop(columns=['preds', 'size']) |
|
|
|
print(f'[INFO] out for TARGET_LANG={TARGET_LANG} \n: {out}') |
|
|
|
return out |
|
|
|
|
|
def update_darija_one_vs_all_leaderboard(result_df, model_name, target_lang, DIALECT_CONFUSION_LEADERBOARD_FILE="darija_leaderboard_dialect_confusion.json"): |
|
file_path = get_repo_file_path(DIALECT_CONFUSION_LEADERBOARD_FILE) |
|
|
|
|
|
print(f"Attempting to access file at: {file_path}") |
|
print(f"File exists: {file_path.exists()}") |
|
print(f"File permissions: {oct(os.stat(file_path).st_mode)[-3:]}" if file_path.exists() else "File does not exist") |
|
|
|
try: |
|
|
|
if file_path.exists(): |
|
try: |
|
with open(file_path, "r") as f: |
|
data = json.load(f) |
|
except PermissionError: |
|
print(f"Permission denied reading file: {file_path}") |
|
raise |
|
else: |
|
data = [] |
|
|
|
try: |
|
file_path.touch() |
|
except PermissionError: |
|
print(f"Permission denied creating file: {file_path}") |
|
raise |
|
|
|
|
|
for _, row in result_df.iterrows(): |
|
dialect = row['dialect'] |
|
if dialect == 'Other': |
|
continue |
|
|
|
target_entry = next((item for item in data if target_lang in item), None) |
|
if target_entry is None: |
|
target_entry = {target_lang: {}} |
|
data.append(target_entry) |
|
|
|
country_data = target_entry[target_lang] |
|
|
|
if dialect not in country_data: |
|
country_data[dialect] = {} |
|
|
|
country_data[dialect][model_name] = float(row['false_positive_rate']) |
|
|
|
|
|
try: |
|
with open(file_path, "w") as f: |
|
json.dump(data, f, indent=4) |
|
print(f"Successfully wrote to file: {file_path}") |
|
except PermissionError: |
|
print(f"Permission denied writing to file: {file_path}") |
|
raise |
|
|
|
except Exception as e: |
|
print(f"Error handling file {file_path}: {str(e)}") |
|
raise |
|
|
|
def handle_evaluation(model_path, model_path_bin, use_mapping=False): |
|
|
|
|
|
model_path_hub = hf_hub_download(repo_id=model_path, filename=model_path_bin, cache_dir=None) |
|
|
|
|
|
print(f"[INFO] Loading model from Path: {model_path_hub}, using version {model_path_bin}...") |
|
model = fasttext.load_model(model_path_hub) |
|
|
|
|
|
print(f"[INFO] Converting evaluation dataset to Pandas DataFrame...") |
|
df_eval = pd.DataFrame(eval_dataset) |
|
|
|
|
|
print(f"[INFO] Running predictions...") |
|
df_eval['preds'] = df_eval['text'].apply(lambda text: predict_label(text, model, language_mapping_dict, use_mapping=use_mapping)) |
|
|
|
|
|
result_df = run_eval(df_eval) |
|
|
|
|
|
model_name = model_path + '/' + model_path_bin |
|
|
|
|
|
update_darija_multilingual_leaderboard(result_df, model_name, MULTI_DIALECTS_LEADERBOARD_FILE) |
|
|
|
for target_lang in all_target_languages: |
|
result_df_one_vs_all =run_eval_one_vs_all(df_eval, TARGET_LANG=target_lang) |
|
update_darija_one_vs_all_leaderboard(result_df_one_vs_all, model_name, target_lang, DIALECT_CONFUSION_LEADERBOARD_FILE) |
|
|
|
|
|
df_multilingual = load_leaderboard_multilingual(MULTI_DIALECTS_LEADERBOARD_FILE) |
|
df_one_vs_all = load_leaderboard_one_vs_all(DIALECT_CONFUSION_LEADERBOARD_FILE) |
|
|
|
status_message = "**Evaluation now ended! 🤗**" |
|
|
|
return create_leaderboard_display_multilingual(df_multilingual, target_label, default_metrics), status_message |
|
|
|
def run_eval(df_eval): |
|
"""Run evaluation on a dataset and compute metrics. |
|
|
|
Args: |
|
model: The model to evaluate. |
|
DATA_PATH (str): Path to the dataset. |
|
is_binary (bool): If True, evaluate as binary classification. |
|
If False, evaluate as multi-class classification. |
|
target_label (str): The target class label in binary mode. |
|
|
|
Returns: |
|
pd.DataFrame: A DataFrame containing evaluation metrics. |
|
""" |
|
|
|
|
|
df_eval_multilingual = df_eval.copy() |
|
|
|
|
|
df_eval_multilingual = df_eval_multilingual.drop(columns=['text', 'metadata', 'dataset_source']) |
|
|
|
|
|
print(f"[INFO] Computing metrics...") |
|
result_df = compute_classification_metrics(df_eval_multilingual) |
|
|
|
|
|
|
|
return result_df |
|
|
|
def process_results_file(file, uploaded_model_name, base_path_save="./atlasia/submissions/", default_language='Morocco'): |
|
try: |
|
if file is None: |
|
return "Please upload a file." |
|
|
|
|
|
uploaded_model_name = uploaded_model_name.strip().replace(" ", "_") |
|
print(f"[INFO] Uploaded model name: {uploaded_model_name}") |
|
|
|
|
|
path_saving = os.path.join(base_path_save, uploaded_model_name) |
|
os.makedirs(path_saving, exist_ok=True) |
|
|
|
|
|
saved_file_path = os.path.join(path_saving, 'submission.csv') |
|
|
|
|
|
print(f"[INFO] Loading csv results file...") |
|
df_eval = pd.read_csv(file.name) |
|
|
|
|
|
print(f"[INFO] Saving the file locally in: {saved_file_path}") |
|
df_eval.to_csv(saved_file_path, index=False) |
|
|
|
except Exception as e: |
|
return f"Error processing file: {str(e)}" |
|
|
|
|
|
print(f"[INFO] Computing metrics...") |
|
result_df = compute_classification_metrics(df_eval) |
|
|
|
|
|
update_darija_multilingual_leaderboard(result_df, uploaded_model_name, MULTI_DIALECTS_LEADERBOARD_FILE) |
|
|
|
|
|
|
|
|
|
|
|
for target_lang in all_target_languages: |
|
result_df_one_vs_all =run_eval_one_vs_all(df_eval, TARGET_LANG=target_lang) |
|
update_darija_one_vs_all_leaderboard(result_df_one_vs_all, uploaded_model_name, target_lang, DIALECT_CONFUSION_LEADERBOARD_FILE) |
|
|
|
|
|
df_multilingual = load_leaderboard_multilingual(MULTI_DIALECTS_LEADERBOARD_FILE) |
|
df_one_vs_all = load_leaderboard_one_vs_all(DIALECT_CONFUSION_LEADERBOARD_FILE) |
|
|
|
status_message = "**Evaluation now ended! 🤗**" |
|
|
|
return create_leaderboard_display_multilingual(df_multilingual, target_label, default_metrics), status_message |
|
|
|
def update_darija_multilingual_leaderboard(result_df, model_name, MULTI_DIALECTS_LEADERBOARD_FILE="darija_leaderboard_multi_dialects.json"): |
|
file_path = get_repo_file_path(MULTI_DIALECTS_LEADERBOARD_FILE) |
|
|
|
|
|
print(f"Attempting to access file at: {file_path}") |
|
print(f"File exists: {file_path.exists()}") |
|
print(f"File permissions: {oct(os.stat(file_path).st_mode)[-3:]}" if file_path.exists() else "File does not exist") |
|
|
|
try: |
|
|
|
if file_path.exists(): |
|
try: |
|
with open(file_path, "r") as f: |
|
data = json.load(f) |
|
except PermissionError: |
|
print(f"Permission denied reading file: {file_path}") |
|
raise |
|
else: |
|
data = [] |
|
|
|
try: |
|
file_path.touch() |
|
except PermissionError: |
|
print(f"Permission denied creating file: {file_path}") |
|
raise |
|
|
|
|
|
for _, row in result_df.iterrows(): |
|
country = row['country'] |
|
if country == 'Other': |
|
continue |
|
|
|
metrics = { |
|
'f1_score': float(row['f1_score']), |
|
'precision': float(row['precision']), |
|
'recall': float(row['recall']), |
|
'macro_f1_score': float(row['macro_f1_score']), |
|
'micro_f1_score': float(row['micro_f1_score']), |
|
'weighted_f1_score': float(row['weighted_f1_score']), |
|
'specificity': float(row['specificity']), |
|
'false_positive_rate': float(row['false_positive_rate']), |
|
'false_negative_rate': float(row['false_negative_rate']), |
|
'negative_predictive_value': float(row['negative_predictive_value']), |
|
'balanced_accuracy': float(row['balanced_accuracy']), |
|
'matthews_correlation': float(row['matthews_correlation']), |
|
'n_test_samples': int(row['samples']) |
|
} |
|
|
|
country_entry = next((item for item in data if country in item), None) |
|
if country_entry is None: |
|
country_entry = {country: {}} |
|
data.append(country_entry) |
|
|
|
if country not in country_entry: |
|
country_entry[country] = {} |
|
country_entry[country][model_name] = metrics |
|
|
|
|
|
try: |
|
with open(file_path, "w") as f: |
|
json.dump(data, f, indent=4) |
|
print(f"Successfully wrote to file: {file_path}") |
|
except PermissionError: |
|
print(f"Permission denied writing to file: {file_path}") |
|
raise |
|
|
|
except Exception as e: |
|
print(f"Error handling file {file_path}: {str(e)}") |
|
raise |
|
|
|
|
|
def load_leaderboard_one_vs_all(DIALECT_CONFUSION_LEADERBOARD_FILE): |
|
current_dir = os.path.dirname(os.path.abspath(__file__)) |
|
DIALECT_CONFUSION_LEADERBOARD_FILE = os.path.join(current_dir, DIALECT_CONFUSION_LEADERBOARD_FILE) |
|
|
|
with open(DIALECT_CONFUSION_LEADERBOARD_FILE, "r") as f: |
|
data = json.load(f) |
|
|
|
|
|
rows = [] |
|
|
|
|
|
for leaderboard_data in data: |
|
for target_language, results in leaderboard_data.items(): |
|
for language, models in results.items(): |
|
|
|
for model_name, false_positive_rate in models.items(): |
|
|
|
row = { |
|
'target_language': target_language, |
|
'language': language, |
|
'model': model_name, |
|
'false_positive_rate': false_positive_rate, |
|
} |
|
|
|
rows.append(row) |
|
|
|
|
|
df = pd.DataFrame(rows) |
|
|
|
|
|
df_pivot = df.pivot(index=['model', 'target_language'], columns='language', values='false_positive_rate').reset_index() |
|
|
|
return df_pivot |
|
|
|
def load_leaderboard_multilingual(MULTI_DIALECTS_LEADERBOARD_FILE): |
|
current_dir = os.path.dirname(os.path.abspath(__file__)) |
|
MULTI_DIALECTS_LEADERBOARD_FILE = os.path.join(current_dir, MULTI_DIALECTS_LEADERBOARD_FILE) |
|
|
|
with open(MULTI_DIALECTS_LEADERBOARD_FILE, "r") as f: |
|
data = json.load(f) |
|
|
|
|
|
rows = [] |
|
|
|
|
|
for country_data in data: |
|
for country, models in country_data.items(): |
|
for model_name, metrics in models.items(): |
|
row = { |
|
'country': country, |
|
'model': model_name, |
|
} |
|
|
|
row.update(metrics) |
|
rows.append(row) |
|
|
|
|
|
df = pd.DataFrame(rows) |
|
return df |
|
|
|
def create_leaderboard_display_one_vs_all(df, target_language, selected_languages): |
|
|
|
|
|
if target_language: |
|
df = df[df['target_language'] == target_language] |
|
|
|
|
|
if target_language in selected_languages: |
|
selected_languages = [lang for lang in selected_languages if lang != target_language] |
|
|
|
|
|
columns_to_show = ['model'] + [language for language in selected_languages if language in df.columns] |
|
|
|
|
|
if selected_languages: |
|
df = df.sort_values(by=selected_languages[0], ascending=False) |
|
|
|
df = df[columns_to_show] |
|
|
|
|
|
numeric_cols = df.select_dtypes(include=['float64']).columns |
|
df[numeric_cols] = df[numeric_cols].round(4) |
|
|
|
return df, selected_languages |
|
|
|
|
|
def create_leaderboard_display_multilingual(df, selected_country, selected_metrics): |
|
|
|
if selected_country and selected_country.upper() != 'ALL': |
|
|
|
df = df[df['country'] == selected_country] |
|
df = df.drop(columns=['country']) |
|
|
|
|
|
columns_to_show = ['model'] + [metric for metric in selected_metrics if metric in df.columns] |
|
|
|
else: |
|
|
|
columns_to_show = ['model', 'country'] + selected_metrics |
|
|
|
|
|
if selected_metrics: |
|
df = df.sort_values(by=selected_metrics[0], ascending=False) |
|
|
|
df = df[columns_to_show] |
|
|
|
|
|
numeric_cols = df.select_dtypes(include=['float64']).columns |
|
df[numeric_cols] = df[numeric_cols].round(4) |
|
|
|
return df |
|
|
|
def update_leaderboard_multilingual(country, selected_metrics): |
|
if not selected_metrics: |
|
selected_metrics = metrics |
|
df = load_leaderboard_multilingual(MULTI_DIALECTS_LEADERBOARD_FILE) |
|
display_df = create_leaderboard_display_multilingual(df, country, selected_metrics) |
|
return display_df |
|
|
|
def update_leaderboard_one_vs_all(target_language, selected_languages): |
|
if not selected_languages: |
|
selected_languages = default_languages |
|
df = load_leaderboard_one_vs_all(DIALECT_CONFUSION_LEADERBOARD_FILE) |
|
display_df, selected_languages = create_leaderboard_display_one_vs_all(df, target_language, selected_languages) |
|
|
|
|
|
|
|
return display_df, selected_languages |
|
|
|
def encode_image_to_base64(image_path): |
|
""" encodes the image to base64""" |
|
with open(image_path, "rb") as image_file: |
|
encoded_string = base64.b64encode(image_file.read()).decode() |
|
return encoded_string |
|
|
|
def create_html_image(image_path): |
|
""" Creates the html of the logo image from the image path input """ |
|
|
|
img_base64 = encode_image_to_base64(image_path) |
|
|
|
|
|
html_string = f""" |
|
<div style="display: flex; justify-content: center; align-items: center; width: 100%; text-align: center;"> |
|
<div style="max-width: 800px; margin: auto;"> |
|
<img src="data:image/jpeg;base64,{img_base64}" |
|
style="max-width: 75%; height: auto; display: block; margin: 0 auto; margin-top: 50px;" |
|
alt="Displayed Image"> |
|
</div> |
|
</div> |
|
""" |
|
return html_string |
|
|
|
def render_fixed_columns(df): |
|
""" A function to render HTML table with fixed 'model' column for better visibility """ |
|
return NotImplementedError |