Zekun Wu commited on
Commit
8a73f6f
·
1 Parent(s): a7984de
Files changed (1) hide show
  1. util/evaluation.py +13 -4
util/evaluation.py CHANGED
@@ -7,6 +7,7 @@ from scipy.stats import spearmanr, pearsonr, kendalltau, entropy
7
  from scipy.spatial.distance import jensenshannon
8
  from scipy.stats import ttest_ind, friedmanchisquare, rankdata, ttest_rel
9
  from statsmodels.stats.multicomp import pairwise_tukeyhsd
 
10
 
11
  # def bootstrap_t_test(data1, data2, num_bootstrap=1000):
12
  # """Perform a bootstrapped t-test."""
@@ -24,18 +25,26 @@ from statsmodels.stats.multicomp import pairwise_tukeyhsd
24
  # p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
25
  # return observed_t_stat, p_value
26
 
 
27
  def bootstrap_t_test(data1, data2, num_bootstrap=1000):
28
- """Perform a bootstrapped paired t-test."""
29
- observed_t_stat, _ = ttest_rel(data1, data2)
30
  differences = data1 - data2
 
 
 
31
  t_stats = []
32
 
33
  for _ in range(num_bootstrap):
34
- # Resample with replacement
35
  resampled_diffs = np.random.choice(differences, size=len(differences), replace=True)
36
- t_stat, _ = ttest_rel(resampled_diffs, np.zeros(len(resampled_diffs)))
 
 
37
  t_stats.append(t_stat)
38
 
 
 
39
  p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
40
  return observed_t_stat, p_value
41
 
 
7
  from scipy.spatial.distance import jensenshannon
8
  from scipy.stats import ttest_ind, friedmanchisquare, rankdata, ttest_rel
9
  from statsmodels.stats.multicomp import pairwise_tukeyhsd
10
+ from scipy.stats import ttest_1samp
11
 
12
  # def bootstrap_t_test(data1, data2, num_bootstrap=1000):
13
  # """Perform a bootstrapped t-test."""
 
25
  # p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
26
  # return observed_t_stat, p_value
27
 
28
+
29
  def bootstrap_t_test(data1, data2, num_bootstrap=1000):
30
+ """Perform a bootstrapped paired t-test for mean difference being zero."""
31
+ # Calculate the observed differences between paired samples
32
  differences = data1 - data2
33
+ # Compute the observed t-statistic for the differences
34
+ observed_t_stat, _ = ttest_1samp(differences, 0)
35
+
36
  t_stats = []
37
 
38
  for _ in range(num_bootstrap):
39
+ # Resample the differences with replacement
40
  resampled_diffs = np.random.choice(differences, size=len(differences), replace=True)
41
+ # Perform a one-sample t-test on the resampled differences against zero
42
+ t_stat, _ = ttest_1samp(resampled_diffs, 0)
43
+ # Append the t-statistic to the list
44
  t_stats.append(t_stat)
45
 
46
+ # Calculate the p-value as the proportion of bootstrap t-statistics
47
+ # that are as extreme as or more extreme than the observed t-statistic
48
  p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
49
  return observed_t_stat, p_value
50