Mariusz Kossakowski commited on
Commit
10bdf14
·
1 Parent(s): 90966f7

Add class distribution dataframe to aspectemo dataset

Browse files
clarin_datasets/aspectemo_dataset.py CHANGED
@@ -48,6 +48,7 @@ class AspectEmoDataset(DatasetToShow):
48
  header = st.container()
49
  description = st.container()
50
  dataframe_head = st.container()
 
51
 
52
  with header:
53
  st.title(self.dataset_name)
@@ -62,3 +63,25 @@ class AspectEmoDataset(DatasetToShow):
62
  st.header("First 10 observations of the dataset")
63
  st.dataframe(df_to_show)
64
  st.text_area(label="Latex code", value=df_to_show.style.to_latex())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  header = st.container()
49
  description = st.container()
50
  dataframe_head = st.container()
51
+ class_distribution = st.container()
52
 
53
  with header:
54
  st.title(self.dataset_name)
 
63
  st.header("First 10 observations of the dataset")
64
  st.dataframe(df_to_show)
65
  st.text_area(label="Latex code", value=df_to_show.style.to_latex())
66
+
67
+ class_distribution_dict = {}
68
+ for subset in self.subsets:
69
+ all_labels = self.data_dict[subset]["labels"].tolist()
70
+ all_labels = [x for subarray in all_labels for x in subarray if x != 0]
71
+ all_labels = pd.Series(all_labels)
72
+ class_distribution_dict[subset] = (
73
+ all_labels.value_counts(normalize=True)
74
+ .sort_index()
75
+ .reset_index()
76
+ .rename({"index": "class", 0: subset}, axis="columns")
77
+ )
78
+
79
+ class_distribution_df = pd.merge(
80
+ class_distribution_dict["train"],
81
+ class_distribution_dict["test"],
82
+ on="class",
83
+ )
84
+ with class_distribution:
85
+ st.header("Class distribution in each subset (without '0')")
86
+ st.dataframe(class_distribution_df)
87
+ st.text_area(label="LaTeX code", value=class_distribution_df.style.to_latex())