freyam commited on
Commit
6d2d9db
1 Parent(s): e0db39e

Optimise evaluation logic

Browse files
.gitignore CHANGED
@@ -5,6 +5,7 @@ flagged/
5
  check_gender_tagging.py
6
  *.py[cod]
7
  *$py.class
 
8
 
9
  # C extensions
10
  *.so
 
5
  check_gender_tagging.py
6
  *.py[cod]
7
  *$py.class
8
+ playground.ipynb
9
 
10
  # C extensions
11
  *.so
README.md CHANGED
@@ -11,3 +11,59 @@ license: mit
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
+
15
+ {
16
+ gender: "307",
17
+ no gender: "193",
18
+ equal gender: "2",
19
+ female pg: "0",
20
+ male pg: "2",
21
+ female spg: "0",
22
+ male spg: "300"
23
+ }
24
+
25
+ {
26
+ both_gender_prof_match: "94",
27
+ count_male_pronoun: "738",
28
+ count_female_pronoun: "435",
29
+ count_male_pronoun_profession: "63",
30
+ count_female_pronoun_profession: "44",
31
+ total_sentence: "5224"
32
+ }
33
+
34
+ {
35
+ "genbit_score": 0.7511277214181936,
36
+ "percentage_of_female_gender_definition_words": 0.28019425675675674,
37
+ "percentage_of_male_gender_definition_words": 0.39125844594594594,
38
+ "percentage_of_non_binary_gender_definition_words": 0.3285472972972973,
39
+ "percentage_of_trans_gender_definition_words": 1,
40
+ "percentage_of_cis_gender_definition_words": 0,
41
+ "additional_metrics": {
42
+ "avg_bias_ratio": 0.30200560886941735,
43
+ "avg_bias_conditional": 0.24803272381904817,
44
+ "avg_bias_ratio_absolute": 0.7634929817138464,
45
+ "avg_bias_conditional_absolute": 0.7511277214181936,
46
+ "avg_non_binary_bias_ratio": 0.6588525475408009,
47
+ "avg_non_binary_bias_conditional": 0.5042607755622402,
48
+ "avg_non_binary_bias_ratio_absolute": 0.8116124669471863,
49
+ "avg_non_binary_bias_conditional_absolute": 0.7191749038014791,
50
+ "avg_trans_cis_bias_ratio": -1.4428077555033436,
51
+ "avg_trans_cis_bias_conditional": -1.1196624683470209,
52
+ "avg_trans_cis_bias_ratio_absolute": 1.4428077555033436,
53
+ "avg_trans_cis_bias_conditional_absolute": 1.1683757280093239,
54
+ "std_dev_bias_ratio": 0,
55
+ "std_dev_bias_conditional": 0,
56
+ "std_dev_non_binary_bias_ratio": 0,
57
+ "std_dev_non_binary_bias_conditional": 0,
58
+ "std_dev_trans_cis_bias_ratio": 0,
59
+ "std_dev_trans_cis_bias_conditional": 0
60
+ },
61
+ "statistics": {
62
+ "frequency_cutoff": 7.433655937499999,
63
+ "num_words_considered": 1539,
64
+ "freq_of_female_gender_definition_words": 1327,
65
+ "freq_of_male_gender_definition_words": 1853,
66
+ "freq_of_non_binary_gender_definition_words": 1556,
67
+ "jsd": 0.067655503412491
68
+ }
69
+ }
app.py CHANGED
@@ -9,7 +9,8 @@ from scripts.gender_divide import *
9
 
10
  methodologies = json.load(open("config/methodologies.json", "r"))
11
 
12
- MAX_THRESHOLD = 1000
 
13
 
14
 
15
  def evaluate(dataset, sampling_method, sampling_size, column, methodology):
@@ -17,7 +18,9 @@ def evaluate(dataset, sampling_method, sampling_size, column, methodology):
17
  print(
18
  f"[{dataset.name.split('/')[-1]}::{column}] - {sampling_method} {sampling_size} entries"
19
  )
20
- data = pd.read_csv(dataset.name, usecols=[column])
 
 
21
 
22
  if sampling_method == "First":
23
  data = data.head(sampling_size)
@@ -26,24 +29,25 @@ def evaluate(dataset, sampling_method, sampling_size, column, methodology):
26
  elif sampling_method == "Random":
27
  data = data.sample(n=sampling_size, random_state=42)
28
 
29
- result = globals()[methodologies.get(methodology).get("fx")](data)
30
 
31
- return gr.JSON.update(result, visible=True)
 
 
 
32
  except Exception as e:
33
  return gr.JSON.update(
34
- {
35
- "error": f"An error occurred while processing the dataset. Please check the dataset and try again. Error: {e}"
36
- },
37
  visible=True,
38
  )
39
 
40
 
41
  def display_dataset_config(dataset):
42
  try:
43
- data = pd.read_csv(dataset.name)
44
 
45
  columns = data.select_dtypes(include=["object"]).columns.tolist()
46
- corpus = data[columns[0]].tolist()
47
 
48
  return (
49
  gr.Radio.update(
@@ -59,7 +63,7 @@ def display_dataset_config(dataset):
59
  info=f"Determines the number of entries to be analyzed. Due to computational constraints, the maximum number of entries that can be analyzed is {MAX_THRESHOLD}.",
60
  minimum=1,
61
  maximum=min(data.shape[0], MAX_THRESHOLD),
62
- value=min(data.shape[0], MAX_THRESHOLD) // 2,
63
  visible=True,
64
  interactive=True,
65
  ),
@@ -72,7 +76,7 @@ def display_dataset_config(dataset):
72
  interactive=True,
73
  ),
74
  gr.DataFrame.update(
75
- value=pd.DataFrame({f"Data Corpus: {columns[0]}": corpus}), visible=True
76
  ),
77
  )
78
  except:
@@ -85,12 +89,10 @@ def display_dataset_config(dataset):
85
 
86
 
87
  def update_column_metadata(dataset, column):
88
- data = pd.read_csv(dataset.name)
89
- corpus = data[column].tolist()
90
 
91
- return gr.Dataframe.update(
92
- value=pd.DataFrame({f"Data Corpus: {column}": corpus}), visible=True
93
- )
94
 
95
 
96
  def get_methodology_metadata(methodology):
@@ -109,7 +111,11 @@ BiasAware = gr.Blocks(title="BiasAware: Dataset Bias Detection")
109
 
110
  with BiasAware:
111
  gr.Markdown(
112
- "# BiasAware: Dataset Bias Detection\n\nBiasAware is a specialized tool for detecting and quantifying biases within datasets used for Natural Language Processing (NLP) tasks. NLP training datasets frequently mirror the inherent biases of their source materials, resulting in AI models that unintentionally perpetuate stereotypes, exhibit underrepresentation, and showcase skewed perspectives."
 
 
 
 
113
  )
114
 
115
  with gr.Row():
@@ -119,7 +125,7 @@ with BiasAware:
119
  dataset_file = gr.File(label="Dataset", file_types=["csv"])
120
  dataset_examples = gr.Examples(
121
  [
122
- os.path.join(os.path.dirname(__file__), "data/z_animal.csv"),
123
  os.path.join(os.path.dirname(__file__), "data/z_employee.csv"),
124
  os.path.join(os.path.dirname(__file__), "data/z_sentences.csv"),
125
  ],
@@ -151,10 +157,7 @@ with BiasAware:
151
  with gr.Column(scale=4):
152
  gr.Markdown("## Result")
153
 
154
- result_status = gr.JSON(visible=False)
155
- result = gr.DataFrame(
156
- row_count=(5, "fixed"), col_count=(3, "fixed"), visible=False
157
- )
158
 
159
  dataset_file.change(
160
  fn=display_dataset_config,
@@ -188,7 +191,7 @@ with BiasAware:
188
  dataset_column,
189
  methodology,
190
  ],
191
- outputs=[result_status],
192
  )
193
 
194
  BiasAware.launch()
 
9
 
10
  methodologies = json.load(open("config/methodologies.json", "r"))
11
 
12
+ MAX_THRESHOLD = 5000
13
+ DATASET_CACHE = {}
14
 
15
 
16
  def evaluate(dataset, sampling_method, sampling_size, column, methodology):
 
18
  print(
19
  f"[{dataset.name.split('/')[-1]}::{column}] - {sampling_method} {sampling_size} entries"
20
  )
21
+ data = DATASET_CACHE.setdefault(dataset.name, pd.read_csv(dataset.name))[
22
+ [column]
23
+ ]
24
 
25
  if sampling_method == "First":
26
  data = data.head(sampling_size)
 
29
  elif sampling_method == "Random":
30
  data = data.sample(n=sampling_size, random_state=42)
31
 
32
+ result_json = globals()[methodologies.get(methodology).get("fx")](data)
33
 
34
+ result_df = pd.DataFrame.from_dict(result_json, orient="index").reset_index()
35
+ result_df.columns = ["Metric", "Value"]
36
+
37
+ return gr.Dataframe.update(result_df, visible=True)
38
  except Exception as e:
39
  return gr.JSON.update(
40
+ {"error": f"An error occurred while processing the dataset. {e}"},
 
 
41
  visible=True,
42
  )
43
 
44
 
45
  def display_dataset_config(dataset):
46
  try:
47
+ data = DATASET_CACHE.setdefault(dataset.name, pd.read_csv(dataset.name))
48
 
49
  columns = data.select_dtypes(include=["object"]).columns.tolist()
50
+ corpus = data[columns[0]].tolist()[0:5]
51
 
52
  return (
53
  gr.Radio.update(
 
63
  info=f"Determines the number of entries to be analyzed. Due to computational constraints, the maximum number of entries that can be analyzed is {MAX_THRESHOLD}.",
64
  minimum=1,
65
  maximum=min(data.shape[0], MAX_THRESHOLD),
66
+ value=min(data.shape[0], MAX_THRESHOLD),
67
  visible=True,
68
  interactive=True,
69
  ),
 
76
  interactive=True,
77
  ),
78
  gr.DataFrame.update(
79
+ value=pd.DataFrame({f"{columns[0]}": corpus}), visible=True
80
  ),
81
  )
82
  except:
 
89
 
90
 
91
  def update_column_metadata(dataset, column):
92
+ data = DATASET_CACHE.setdefault(dataset.name, pd.read_csv(dataset.name))
93
+ corpus = data[column].tolist()[0:5]
94
 
95
+ return gr.Dataframe.update(value=pd.DataFrame({f"{column}": corpus}), visible=True)
 
 
96
 
97
 
98
  def get_methodology_metadata(methodology):
 
111
 
112
  with BiasAware:
113
  gr.Markdown(
114
+ """
115
+ # BiasAware: Dataset Bias Detection
116
+
117
+ BiasAware is a specialized tool for detecting and quantifying biases within datasets used for Natural Language Processing (NLP) tasks. NLP training datasets frequently mirror the inherent biases of their source materials, resulting in AI models that unintentionally perpetuate stereotypes, exhibit underrepresentation, and showcase skewed perspectives.
118
+ """
119
  )
120
 
121
  with gr.Row():
 
125
  dataset_file = gr.File(label="Dataset", file_types=["csv"])
126
  dataset_examples = gr.Examples(
127
  [
128
+ os.path.join(os.path.dirname(__file__), "data/imdb_100.csv"),
129
  os.path.join(os.path.dirname(__file__), "data/z_employee.csv"),
130
  os.path.join(os.path.dirname(__file__), "data/z_sentences.csv"),
131
  ],
 
157
  with gr.Column(scale=4):
158
  gr.Markdown("## Result")
159
 
160
+ result = gr.DataFrame(visible=False)
 
 
 
161
 
162
  dataset_file.change(
163
  fn=display_dataset_config,
 
191
  dataset_column,
192
  methodology,
193
  ],
194
+ outputs=[result],
195
  )
196
 
197
  BiasAware.launch()
data/amazon_reviews.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "gender" : 14500,
3
- "no gender" : 195500,
4
- "equal gender" : 253,
5
- "female pg" : 125,
6
- "male pg" : 117,
7
- "female spg" : 7196,
8
- "male spg" : 6809
9
- }
 
 
 
 
 
 
 
 
 
 
data/imdb.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "gender" : 36174,
3
- "no gender" : 13826,
4
- "equal gender" : 2160,
5
- "female pg" : 2776,
6
- "male pg" : 3440,
7
- "female spg" : 6918,
8
- "male spg" : 20880
9
- }
 
 
 
 
 
 
 
 
 
 
data/imdb_100.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/tweet_eval.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "gender" : 10247,
3
- "no gender" : 49652,
4
- "equal gender" : 141,
5
- "female pg" : 37,
6
- "male pg" : 42,
7
- "female spg" : 2478,
8
- "male spg" : 7549
9
- }
 
 
 
 
 
 
 
 
 
 
data/z_animal.csv DELETED
@@ -1,11 +0,0 @@
1
- AnimalID,CommonName,ScientificName,Class,Order,Family,Habitat,ConservationStatus
2
- 1,Lion,Panthera leo,Mammalia,Carnivora,Felidae,Savanna,Vulnerable
3
- 2,Eagle,Aquila chrysaetos,Aves,Accipitriformes,Accipitridae,Mountains,Least Concern
4
- 3,Dolphin,Tursiops truncatus,Mammalia,Cetacea,Delphinidae,Ocean,Least Concern
5
- 4,Elephant,Loxodonta africana,Mammalia,Proboscidea,Elephantidae,Grassland,Vulnerable
6
- 5,Tiger,Panthera tigris,Mammalia,Carnivora,Felidae,Forest,Endangered
7
- 6,Penguin,Spheniscidae,Aves,Sphenisciformes,Spheniscidae,Antarctica,Least Concern
8
- 7,Giraffe,Giraffa camelopardalis,Mammalia,Artiodactyla,Giraffidae,Savanna,Vulnerable
9
- 8,Cheetah,Acinonyx jubatus,Mammalia,Carnivora,Felidae,Grassland,Vulnerable
10
- 9,Panda,Ailuropoda melanoleuca,Mammalia,Carnivora,Ursidae,Forest,Endangered
11
- 10,Kangaroo,Macropus rufus,Mammalia,Diprotodontia,Macropodidae,Grassland,Least Concern
 
 
 
 
 
 
 
 
 
 
 
 
scripts/genbit.py CHANGED
@@ -6,9 +6,9 @@ def eval_genbit(data):
6
  language_code="en", context_window=5, distance_weight=0.95, percentile_cutoff=80
7
  )
8
 
9
- data[data.columns[0]] = data[data.columns[0]].to_list()
10
 
11
  genbit_metrics.add_data(data, tokenized=False)
12
- genbit_metrics = genbit_metrics.get_metrics(output_word_list=False)
13
 
14
- return genbit_metrics
 
6
  language_code="en", context_window=5, distance_weight=0.95, percentile_cutoff=80
7
  )
8
 
9
+ data = data[data.columns[0]].to_list()
10
 
11
  genbit_metrics.add_data(data, tokenized=False)
12
+ genbit_metrics_dict = genbit_metrics.get_metrics(output_word_list=False)
13
 
14
+ return genbit_metrics_dict
scripts/gender_divide.py CHANGED
@@ -4,78 +4,47 @@ import json
4
  gender_lexicons = json.load(open("config/gender_lexicons.json", "r"))
5
 
6
 
7
- def count_male_terms(text, male_terms):
8
- pattern = r"\b({})\b".format("|".join(male_terms))
9
- match = re.findall(pattern, str(text))
10
- return len(match)
11
-
12
-
13
- def count_female_terms(text, female_terms):
14
- pattern = r"\b({})\b".format("|".join(female_terms))
15
- match = re.findall(pattern, str(text))
16
- return len(match)
17
 
18
 
19
  def get_gender_tag(count_m_term, count_f_term):
20
- tag = ""
21
- if count_m_term == 0 and count_f_term == 0:
22
- tag = "No Gender"
23
-
24
- elif count_m_term == count_f_term:
25
- tag = "Equal Gender"
26
 
27
- elif count_m_term > count_f_term:
28
- m_proportion = (count_m_term / (count_m_term + count_f_term)) * 100
29
- if m_proportion >= 50 and m_proportion < 75:
30
- tag = "Male Positive Gender"
31
- elif m_proportion >= 75:
32
- tag = "Male Strongly Positive Gender"
33
 
34
- elif count_m_term < count_f_term:
35
- f_proportion = (count_f_term / (count_m_term + count_f_term)) * 100
36
- if f_proportion >= 50 and f_proportion < 75:
37
- tag = "Female Positive Gender"
38
- elif f_proportion >= 75:
39
- tag = "Female Strongly Positive Gender"
40
 
41
- return tag
42
 
43
 
44
  def get_pg_spg(sample_df):
45
- count_no_gender_sentences = sample_df[sample_df["gender_cat"] == "No Gender"][
46
- "gender_cat"
47
- ].count()
48
-
49
- count_gender_sentences = sample_df[sample_df["gender_cat"] != "No Gender"][
50
- "gender_cat"
51
- ].count()
52
- count_equal_gender = sample_df[sample_df["gender_cat"] == "Equal Gender"][
53
- "gender_cat"
54
- ].count()
55
-
56
- count_male_pg = sample_df[sample_df["gender_cat"] == "Male Positive Gender"][
57
- "gender_cat"
58
- ].count()
59
- count_male_spg = sample_df[
60
- sample_df["gender_cat"] == "Male Strongly Positive Gender"
61
- ]["gender_cat"].count()
62
-
63
- count_female_pg = sample_df[sample_df["gender_cat"] == "Female Positive Gender"][
64
- "gender_cat"
65
- ].count()
66
- count_female_spg = sample_df[
67
- sample_df["gender_cat"] == "Female Stronly Positive Gender"
68
- ]["gender_cat"].count()
69
-
70
- return {
71
- "gender": str(count_gender_sentences),
72
- "no gender": str(count_no_gender_sentences),
73
- "equal gender": str(count_equal_gender),
74
- "female pg": str(count_female_pg),
75
- "male pg": str(count_male_pg),
76
- "female spg": str(count_female_spg),
77
- "male spg": str(count_male_spg),
78
- }
79
 
80
 
81
  def eval_gender_divide(data):
@@ -85,10 +54,10 @@ def eval_gender_divide(data):
85
  data[data.columns[0]] = data[data.columns[0]].str.lower().str.strip()
86
 
87
  data["count_male_term"] = data.apply(
88
- lambda x: count_male_terms(x[data.columns[0]], male_terms), axis=1
89
  )
90
  data["count_female_term"] = data.apply(
91
- lambda x: count_female_terms(x[:], female_terms), axis=1
92
  )
93
 
94
  data["gender_cat"] = data.apply(
 
4
  gender_lexicons = json.load(open("config/gender_lexicons.json", "r"))
5
 
6
 
7
+ def count_gender_terms(text, gender_terms):
8
+ pattern = r"\b({})\b".format("|".join(gender_terms))
9
+ matches = re.findall(pattern, str(text))
10
+ return len(matches)
 
 
 
 
 
 
11
 
12
 
13
  def get_gender_tag(count_m_term, count_f_term):
14
+ total_terms = count_m_term + count_f_term
15
+ if total_terms == 0:
16
+ return "No Gender"
 
 
 
17
 
18
+ m_proportion = (count_m_term / total_terms) * 100
19
+ if m_proportion >= 75:
20
+ return "Male Strongly Positive Gender"
21
+ elif m_proportion >= 50:
22
+ return "Male Positive Gender"
 
23
 
24
+ f_proportion = (count_f_term / total_terms) * 100
25
+ if f_proportion >= 75:
26
+ return "Female Strongly Positive Gender"
27
+ elif f_proportion >= 50:
28
+ return "Female Positive Gender"
 
29
 
30
+ return "Equal Gender"
31
 
32
 
33
  def get_pg_spg(sample_df):
34
+ gender_labels = [
35
+ "Gender",
36
+ "No Gender",
37
+ "Equal Gender",
38
+ "Female Positive Gender",
39
+ "Male Positive Gender",
40
+ "Female Strongly Positive Gender",
41
+ "Male Strongly Positive Gender",
42
+ ]
43
+
44
+ gender_counts = sample_df["gender_cat"].value_counts()
45
+ result = {label: str(gender_counts.get(label, 0)) for label in gender_labels}
46
+
47
+ return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
 
50
  def eval_gender_divide(data):
 
54
  data[data.columns[0]] = data[data.columns[0]].str.lower().str.strip()
55
 
56
  data["count_male_term"] = data.apply(
57
+ lambda x: count_gender_terms(x[data.columns[0]], male_terms), axis=1
58
  )
59
  data["count_female_term"] = data.apply(
60
+ lambda x: count_gender_terms(x[:], female_terms), axis=1
61
  )
62
 
63
  data["gender_cat"] = data.apply(
scripts/gender_profession_bias.py CHANGED
@@ -85,20 +85,19 @@ def call_multiprocessing_pool(df_text):
85
 
86
 
87
  def get_statistics(result):
88
- conditions = {
89
- "both_gender_prof_match": result["Both Match"].eq("Yes"),
90
- "count_male_pronoun": result["Male Pronoun"].ne(""),
91
- "count_female_pronoun": result["Female Pronoun"].ne(""),
92
- "count_male_pronoun_profession": result["Male Pronoun"].ne("")
93
- & result["Profession"].ne(""),
94
- "count_female_pronoun_profession": result["Female Pronoun"].ne("")
95
- & result["Profession"].ne(""),
 
 
 
96
  }
97
 
98
- stats = {key: str(value.sum()) for key, value in conditions.items()}
99
-
100
- stats["total_sentence"] = str(len(result))
101
-
102
  return stats
103
 
104
 
 
85
 
86
 
87
  def get_statistics(result):
88
+ stats = {
89
+ "both_gender_prof_match": str((result["Both Match"] == "Yes").sum()),
90
+ "count_male_pronoun": str((result["Male Pronoun"] != "").sum()),
91
+ "count_female_pronoun": str((result["Female Pronoun"] != "").sum()),
92
+ "count_male_pronoun_profession": str(
93
+ ((result["Male Pronoun"] != "") & (result["Profession"] != "")).sum()
94
+ ),
95
+ "count_female_pronoun_profession": str(
96
+ ((result["Female Pronoun"] != "") & (result["Profession"] != "")).sum()
97
+ ),
98
+ "total_sentence": str(len(result)),
99
  }
100
 
 
 
 
 
101
  return stats
102
 
103