Spaces:

avid-ml
/

biasaware

Sleeping

App Files Files Community

freyam commited on Sep 13, 2023

Commit

6d2d9db

•

1 Parent(s): e0db39e

Optimise evaluation logic

Browse files

Files changed (11) hide show

.gitignore +1 -0
README.md +56 -0
app.py +26 -23
data/amazon_reviews.json +0 -9
data/imdb.json +0 -9
data/imdb_100.csv +0 -0
data/tweet_eval.json +0 -9
data/z_animal.csv +0 -11
scripts/genbit.py +3 -3
scripts/gender_divide.py +34 -65
scripts/gender_profession_bias.py +11 -12

.gitignore CHANGED Viewed

@@ -5,6 +5,7 @@ flagged/
 check_gender_tagging.py
 *.py[cod]
 *$py.class
 # C extensions
 *.so

 check_gender_tagging.py
 *.py[cod]
 *$py.class
+playground.ipynb
 # C extensions
 *.so

README.md CHANGED Viewed

@@ -11,3 +11,59 @@ license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+{
+gender: "307",
+no gender: "193",
+equal gender: "2",
+female pg: "0",
+male pg: "2",
+female spg: "0",
+male spg: "300"
+}
+{
+both_gender_prof_match: "94",
+count_male_pronoun: "738",
+count_female_pronoun: "435",
+count_male_pronoun_profession: "63",
+count_female_pronoun_profession: "44",
+total_sentence: "5224"
+}
+{
+"genbit_score": 0.7511277214181936,
+"percentage_of_female_gender_definition_words": 0.28019425675675674,
+"percentage_of_male_gender_definition_words": 0.39125844594594594,
+"percentage_of_non_binary_gender_definition_words": 0.3285472972972973,
+"percentage_of_trans_gender_definition_words": 1,
+"percentage_of_cis_gender_definition_words": 0,
+"additional_metrics": {
+"avg_bias_ratio": 0.30200560886941735,
+"avg_bias_conditional": 0.24803272381904817,
+"avg_bias_ratio_absolute": 0.7634929817138464,
+"avg_bias_conditional_absolute": 0.7511277214181936,
+"avg_non_binary_bias_ratio": 0.6588525475408009,
+"avg_non_binary_bias_conditional": 0.5042607755622402,
+"avg_non_binary_bias_ratio_absolute": 0.8116124669471863,
+"avg_non_binary_bias_conditional_absolute": 0.7191749038014791,
+"avg_trans_cis_bias_ratio": -1.4428077555033436,
+"avg_trans_cis_bias_conditional": -1.1196624683470209,
+"avg_trans_cis_bias_ratio_absolute": 1.4428077555033436,
+"avg_trans_cis_bias_conditional_absolute": 1.1683757280093239,
+"std_dev_bias_ratio": 0,
+"std_dev_bias_conditional": 0,
+"std_dev_non_binary_bias_ratio": 0,
+"std_dev_non_binary_bias_conditional": 0,
+"std_dev_trans_cis_bias_ratio": 0,
+"std_dev_trans_cis_bias_conditional": 0
+},
+"statistics": {
+"frequency_cutoff": 7.433655937499999,
+"num_words_considered": 1539,
+"freq_of_female_gender_definition_words": 1327,
+"freq_of_male_gender_definition_words": 1853,
+"freq_of_non_binary_gender_definition_words": 1556,
+"jsd": 0.067655503412491
+}
+}

app.py CHANGED Viewed

@@ -9,7 +9,8 @@ from scripts.gender_divide import *
 methodologies = json.load(open("config/methodologies.json", "r"))
-MAX_THRESHOLD = 1000
 def evaluate(dataset, sampling_method, sampling_size, column, methodology):
@@ -17,7 +18,9 @@ def evaluate(dataset, sampling_method, sampling_size, column, methodology):
         print(
             f"[{dataset.name.split('/')[-1]}::{column}] - {sampling_method} {sampling_size} entries"
         )
-        data = pd.read_csv(dataset.name, usecols=[column])
         if sampling_method == "First":
             data = data.head(sampling_size)
@@ -26,24 +29,25 @@ def evaluate(dataset, sampling_method, sampling_size, column, methodology):
         elif sampling_method == "Random":
             data = data.sample(n=sampling_size, random_state=42)
-        result = globals()[methodologies.get(methodology).get("fx")](data)
-        return gr.JSON.update(result, visible=True)
     except Exception as e:
         return gr.JSON.update(
-            {
-                "error": f"An error occurred while processing the dataset. Please check the dataset and try again. Error: {e}"
-            },
             visible=True,
         )
 def display_dataset_config(dataset):
     try:
-        data = pd.read_csv(dataset.name)
         columns = data.select_dtypes(include=["object"]).columns.tolist()
-        corpus = data[columns[0]].tolist()
         return (
             gr.Radio.update(
@@ -59,7 +63,7 @@ def display_dataset_config(dataset):
                 info=f"Determines the number of entries to be analyzed. Due to computational constraints, the maximum number of entries that can be analyzed is {MAX_THRESHOLD}.",
                 minimum=1,
                 maximum=min(data.shape[0], MAX_THRESHOLD),
-                value=min(data.shape[0], MAX_THRESHOLD) // 2,
                 visible=True,
                 interactive=True,
             ),
@@ -72,7 +76,7 @@ def display_dataset_config(dataset):
                 interactive=True,
             ),
             gr.DataFrame.update(
-                value=pd.DataFrame({f"Data Corpus: {columns[0]}": corpus}), visible=True
             ),
         )
     except:
@@ -85,12 +89,10 @@ def display_dataset_config(dataset):
 def update_column_metadata(dataset, column):
-    data = pd.read_csv(dataset.name)
-    corpus = data[column].tolist()
-    return gr.Dataframe.update(
-        value=pd.DataFrame({f"Data Corpus: {column}": corpus}), visible=True
-    )
 def get_methodology_metadata(methodology):
@@ -109,7 +111,11 @@ BiasAware = gr.Blocks(title="BiasAware: Dataset Bias Detection")
 with BiasAware:
     gr.Markdown(
-        "# BiasAware: Dataset Bias Detection\n\nBiasAware is a specialized tool for detecting and quantifying biases within datasets used for Natural Language Processing (NLP) tasks. NLP training datasets frequently mirror the inherent biases of their source materials, resulting in AI models that unintentionally perpetuate stereotypes, exhibit underrepresentation, and showcase skewed perspectives."
     )
     with gr.Row():
@@ -119,7 +125,7 @@ with BiasAware:
             dataset_file = gr.File(label="Dataset", file_types=["csv"])
             dataset_examples = gr.Examples(
                 [
-                    os.path.join(os.path.dirname(__file__), "data/z_animal.csv"),
                     os.path.join(os.path.dirname(__file__), "data/z_employee.csv"),
                     os.path.join(os.path.dirname(__file__), "data/z_sentences.csv"),
                 ],
@@ -151,10 +157,7 @@ with BiasAware:
         with gr.Column(scale=4):
             gr.Markdown("## Result")
-            result_status = gr.JSON(visible=False)
-            result = gr.DataFrame(
-                row_count=(5, "fixed"), col_count=(3, "fixed"), visible=False
-            )
     dataset_file.change(
         fn=display_dataset_config,
@@ -188,7 +191,7 @@ with BiasAware:
             dataset_column,
             methodology,
         ],
-        outputs=[result_status],
     )
 BiasAware.launch()

 methodologies = json.load(open("config/methodologies.json", "r"))
+MAX_THRESHOLD = 5000
+DATASET_CACHE = {}
 def evaluate(dataset, sampling_method, sampling_size, column, methodology):
         print(
             f"[{dataset.name.split('/')[-1]}::{column}] - {sampling_method} {sampling_size} entries"
         )
+        data = DATASET_CACHE.setdefault(dataset.name, pd.read_csv(dataset.name))[
+            [column]
+        ]
         if sampling_method == "First":
             data = data.head(sampling_size)
         elif sampling_method == "Random":
             data = data.sample(n=sampling_size, random_state=42)
+        result_json = globals()[methodologies.get(methodology).get("fx")](data)
+        result_df = pd.DataFrame.from_dict(result_json, orient="index").reset_index()
+        result_df.columns = ["Metric", "Value"]
+        return gr.Dataframe.update(result_df, visible=True)
     except Exception as e:
         return gr.JSON.update(
+            {"error": f"An error occurred while processing the dataset. {e}"},
             visible=True,
         )
 def display_dataset_config(dataset):
     try:
+        data = DATASET_CACHE.setdefault(dataset.name, pd.read_csv(dataset.name))
         columns = data.select_dtypes(include=["object"]).columns.tolist()
+        corpus = data[columns[0]].tolist()[0:5]
         return (
             gr.Radio.update(
                 info=f"Determines the number of entries to be analyzed. Due to computational constraints, the maximum number of entries that can be analyzed is {MAX_THRESHOLD}.",
                 minimum=1,
                 maximum=min(data.shape[0], MAX_THRESHOLD),
+                value=min(data.shape[0], MAX_THRESHOLD),
                 visible=True,
                 interactive=True,
             ),
                 interactive=True,
             ),
             gr.DataFrame.update(
+                value=pd.DataFrame({f"{columns[0]}": corpus}), visible=True
             ),
         )
     except:
 def update_column_metadata(dataset, column):
+    data = DATASET_CACHE.setdefault(dataset.name, pd.read_csv(dataset.name))
+    corpus = data[column].tolist()[0:5]
+    return gr.Dataframe.update(value=pd.DataFrame({f"{column}": corpus}), visible=True)
 def get_methodology_metadata(methodology):
 with BiasAware:
     gr.Markdown(
+        """
+        # BiasAware: Dataset Bias Detection
+        BiasAware is a specialized tool for detecting and quantifying biases within datasets used for Natural Language Processing (NLP) tasks. NLP training datasets frequently mirror the inherent biases of their source materials, resulting in AI models that unintentionally perpetuate stereotypes, exhibit underrepresentation, and showcase skewed perspectives.
+        """
     )
     with gr.Row():
             dataset_file = gr.File(label="Dataset", file_types=["csv"])
             dataset_examples = gr.Examples(
                 [
+                    os.path.join(os.path.dirname(__file__), "data/imdb_100.csv"),
                     os.path.join(os.path.dirname(__file__), "data/z_employee.csv"),
                     os.path.join(os.path.dirname(__file__), "data/z_sentences.csv"),
                 ],
         with gr.Column(scale=4):
             gr.Markdown("## Result")
+            result = gr.DataFrame(visible=False)
     dataset_file.change(
         fn=display_dataset_config,
             dataset_column,
             methodology,
         ],
+        outputs=[result],
     )
 BiasAware.launch()

data/amazon_reviews.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-    "gender" : 14500,
-    "no gender" : 195500,
-    "equal gender" : 253,
-    "female pg" : 125,
-    "male pg" : 117,
-    "female spg" : 7196,
-    "male spg" : 6809
-}

data/imdb.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-    "gender" : 36174,
-    "no gender" : 13826,
-    "equal gender" : 2160,
-    "female pg" : 2776,
-    "male pg" : 3440,
-    "female spg" : 6918,
-    "male spg" : 20880
-}

data/imdb_100.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/tweet_eval.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-    "gender" : 10247,
-    "no gender" : 49652,
-    "equal gender" : 141,
-    "female pg" : 37,
-    "male pg" : 42,
-    "female spg" : 2478,
-    "male spg" : 7549
-}

data/z_animal.csv DELETED Viewed

@@ -1,11 +0,0 @@
-AnimalID,CommonName,ScientificName,Class,Order,Family,Habitat,ConservationStatus
-1,Lion,Panthera leo,Mammalia,Carnivora,Felidae,Savanna,Vulnerable
-2,Eagle,Aquila chrysaetos,Aves,Accipitriformes,Accipitridae,Mountains,Least Concern
-3,Dolphin,Tursiops truncatus,Mammalia,Cetacea,Delphinidae,Ocean,Least Concern
-4,Elephant,Loxodonta africana,Mammalia,Proboscidea,Elephantidae,Grassland,Vulnerable
-5,Tiger,Panthera tigris,Mammalia,Carnivora,Felidae,Forest,Endangered
-6,Penguin,Spheniscidae,Aves,Sphenisciformes,Spheniscidae,Antarctica,Least Concern
-7,Giraffe,Giraffa camelopardalis,Mammalia,Artiodactyla,Giraffidae,Savanna,Vulnerable
-8,Cheetah,Acinonyx jubatus,Mammalia,Carnivora,Felidae,Grassland,Vulnerable
-9,Panda,Ailuropoda melanoleuca,Mammalia,Carnivora,Ursidae,Forest,Endangered
-10,Kangaroo,Macropus rufus,Mammalia,Diprotodontia,Macropodidae,Grassland,Least Concern

scripts/genbit.py CHANGED Viewed

@@ -6,9 +6,9 @@ def eval_genbit(data):
         language_code="en", context_window=5, distance_weight=0.95, percentile_cutoff=80
     )
-    data[data.columns[0]] = data[data.columns[0]].to_list()
     genbit_metrics.add_data(data, tokenized=False)
-    genbit_metrics = genbit_metrics.get_metrics(output_word_list=False)
-    return genbit_metrics

         language_code="en", context_window=5, distance_weight=0.95, percentile_cutoff=80
     )
+    data = data[data.columns[0]].to_list()
     genbit_metrics.add_data(data, tokenized=False)
+    genbit_metrics_dict = genbit_metrics.get_metrics(output_word_list=False)
+    return genbit_metrics_dict

scripts/gender_divide.py CHANGED Viewed

@@ -4,78 +4,47 @@ import json
 gender_lexicons = json.load(open("config/gender_lexicons.json", "r"))
-def count_male_terms(text, male_terms):
-    pattern = r"\b({})\b".format("|".join(male_terms))
-    match = re.findall(pattern, str(text))
-    return len(match)
-def count_female_terms(text, female_terms):
-    pattern = r"\b({})\b".format("|".join(female_terms))
-    match = re.findall(pattern, str(text))
-    return len(match)
 def get_gender_tag(count_m_term, count_f_term):
-    tag = ""
-    if count_m_term == 0 and count_f_term == 0:
-        tag = "No Gender"
-    elif count_m_term == count_f_term:
-        tag = "Equal Gender"
-    elif count_m_term > count_f_term:
-        m_proportion = (count_m_term / (count_m_term + count_f_term)) * 100
-        if m_proportion >= 50 and m_proportion < 75:
-            tag = "Male Positive Gender"
-        elif m_proportion >= 75:
-            tag = "Male Strongly Positive Gender"
-    elif count_m_term < count_f_term:
-        f_proportion = (count_f_term / (count_m_term + count_f_term)) * 100
-        if f_proportion >= 50 and f_proportion < 75:
-            tag = "Female Positive Gender"
-        elif f_proportion >= 75:
-            tag = "Female Strongly Positive Gender"
-    return tag
 def get_pg_spg(sample_df):
-    count_no_gender_sentences = sample_df[sample_df["gender_cat"] == "No Gender"][
-        "gender_cat"
-    ].count()
-    count_gender_sentences = sample_df[sample_df["gender_cat"] != "No Gender"][
-        "gender_cat"
-    ].count()
-    count_equal_gender = sample_df[sample_df["gender_cat"] == "Equal Gender"][
-        "gender_cat"
-    ].count()
-    count_male_pg = sample_df[sample_df["gender_cat"] == "Male Positive Gender"][
-        "gender_cat"
-    ].count()
-    count_male_spg = sample_df[
-        sample_df["gender_cat"] == "Male Strongly Positive Gender"
-    ]["gender_cat"].count()
-    count_female_pg = sample_df[sample_df["gender_cat"] == "Female Positive Gender"][
-        "gender_cat"
-    ].count()
-    count_female_spg = sample_df[
-        sample_df["gender_cat"] == "Female Stronly Positive Gender"
-    ]["gender_cat"].count()
-    return {
-        "gender": str(count_gender_sentences),
-        "no gender": str(count_no_gender_sentences),
-        "equal gender": str(count_equal_gender),
-        "female pg": str(count_female_pg),
-        "male pg": str(count_male_pg),
-        "female spg": str(count_female_spg),
-        "male spg": str(count_male_spg),
-    }
 def eval_gender_divide(data):
@@ -85,10 +54,10 @@ def eval_gender_divide(data):
     data[data.columns[0]] = data[data.columns[0]].str.lower().str.strip()
     data["count_male_term"] = data.apply(
-        lambda x: count_male_terms(x[data.columns[0]], male_terms), axis=1
     )
     data["count_female_term"] = data.apply(
-        lambda x: count_female_terms(x[:], female_terms), axis=1
     )
     data["gender_cat"] = data.apply(

 gender_lexicons = json.load(open("config/gender_lexicons.json", "r"))
+def count_gender_terms(text, gender_terms):
+    pattern = r"\b({})\b".format("|".join(gender_terms))
+    matches = re.findall(pattern, str(text))
+    return len(matches)
 def get_gender_tag(count_m_term, count_f_term):
+    total_terms = count_m_term + count_f_term
+    if total_terms == 0:
+        return "No Gender"
+    m_proportion = (count_m_term / total_terms) * 100
+    if m_proportion >= 75:
+        return "Male Strongly Positive Gender"
+    elif m_proportion >= 50:
+        return "Male Positive Gender"
+    f_proportion = (count_f_term / total_terms) * 100
+    if f_proportion >= 75:
+        return "Female Strongly Positive Gender"
+    elif f_proportion >= 50:
+        return "Female Positive Gender"
+    return "Equal Gender"
 def get_pg_spg(sample_df):
+    gender_labels = [
+        "Gender",
+        "No Gender",
+        "Equal Gender",
+        "Female Positive Gender",
+        "Male Positive Gender",
+        "Female Strongly Positive Gender",
+        "Male Strongly Positive Gender",
+    ]
+    gender_counts = sample_df["gender_cat"].value_counts()
+    result = {label: str(gender_counts.get(label, 0)) for label in gender_labels}
+    return result
 def eval_gender_divide(data):
     data[data.columns[0]] = data[data.columns[0]].str.lower().str.strip()
     data["count_male_term"] = data.apply(
+        lambda x: count_gender_terms(x[data.columns[0]], male_terms), axis=1
     )
     data["count_female_term"] = data.apply(
+        lambda x: count_gender_terms(x[:], female_terms), axis=1
     )
     data["gender_cat"] = data.apply(

scripts/gender_profession_bias.py CHANGED Viewed

@@ -85,20 +85,19 @@ def call_multiprocessing_pool(df_text):
 def get_statistics(result):
-    conditions = {
-        "both_gender_prof_match": result["Both Match"].eq("Yes"),
-        "count_male_pronoun": result["Male Pronoun"].ne(""),
-        "count_female_pronoun": result["Female Pronoun"].ne(""),
-        "count_male_pronoun_profession": result["Male Pronoun"].ne("")
-        & result["Profession"].ne(""),
-        "count_female_pronoun_profession": result["Female Pronoun"].ne("")
-        & result["Profession"].ne(""),
     }
-    stats = {key: str(value.sum()) for key, value in conditions.items()}
-    stats["total_sentence"] = str(len(result))
     return stats

 def get_statistics(result):
+    stats = {
+        "both_gender_prof_match": str((result["Both Match"] == "Yes").sum()),
+        "count_male_pronoun": str((result["Male Pronoun"] != "").sum()),
+        "count_female_pronoun": str((result["Female Pronoun"] != "").sum()),
+        "count_male_pronoun_profession": str(
+            ((result["Male Pronoun"] != "") & (result["Profession"] != "")).sum()
+        ),
+        "count_female_pronoun_profession": str(
+            ((result["Female Pronoun"] != "") & (result["Profession"] != "")).sum()
+        ),
+        "total_sentence": str(len(result)),
     }
     return stats