File size: 4,923 Bytes
dbdf7e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8657eb5
dbdf7e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8657eb5
dbdf7e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5aaee68
 
 
dbdf7e4
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import gradio as gr
import time
import numpy as np
import matplotlib.pyplot as plt

from scipy.linalg import toeplitz, cholesky
from sklearn.covariance import LedoitWolf, OAS

np.random.seed(0)




def plot_mse(min_slider_samples_range,max_slider_samples_range):
     # plot MSE
     print("inside plot_mse")
     plt.clf()
     plt.subplot(2, 1, 1)
     plt.errorbar(
          slider_samples_range,
          lw_mse.mean(1),
          yerr=lw_mse.std(1),
          label="Ledoit-Wolf",
          color="navy",
          lw=2,
    )
     plt.errorbar(
          slider_samples_range,
          oa_mse.mean(1),
          yerr=oa_mse.std(1),
          label="OAS",
          color="darkorange",
          lw=2,
    )
     plt.ylabel("Squared error")
     plt.legend(loc="upper right")
     plt.title("Comparison of covariance estimators")
     plt.xlim(5, 31)
     print("outside plot_mse")
     return plt  


def plot_shrinkage(min_slider_samples_range,max_slider_samples_range):
    # plot shrinkage coefficient
    print("inside plot_shrink")
    plt.clf()
    plt.subplot(2, 1, 2)
    plt.errorbar(
      slider_samples_range,
      lw_shrinkage.mean(1),
      yerr=lw_shrinkage.std(1),
      label="Ledoit-Wolf",
      color="navy",
      lw=2,
      )
    plt.errorbar(
      slider_samples_range,
      oa_shrinkage.mean(1),
      yerr=oa_shrinkage.std(1),
      label="OAS",
      color="darkorange",
      lw=2,
      )
    plt.xlabel("n_samples")
    plt.ylabel("Shrinkage")
    plt.legend(loc="lower right")
    plt.ylim(plt.ylim()[0], 1.0 + (plt.ylim()[1] - plt.ylim()[0]) / 10.0)
    plt.xlim(5, 31)
    print("outside plot_shrink")
    # plt.show()
    return plt






title = "Ledoit-Wolf vs OAS estimation"


with gr.Blocks(title=title, theme=gr.themes.Default(font=[gr.themes.GoogleFont("Inconsolata"), "Arial", "sans-serif"])) as demo:
    gr.Markdown(f"# {title}")

    gr.Markdown(
    """
    The usual covariance maximum likelihood estimate can be regularized using shrinkage. Ledoit and Wolf proposed a close formula to compute the asymptotically optimal shrinkage parameter (minimizing a MSE criterion), yielding the Ledoit-Wolf covariance estimate.

    Chen et al. proposed an improvement of the Ledoit-Wolf shrinkage parameter, the OAS coefficient, whose convergence is significantly better under the assumption that the data are Gaussian.

    This example, inspired from Chen’s publication [1], shows a comparison of the estimated MSE of the LW and OAS methods, using Gaussian distributed data.

    [1] “Shrinkage Algorithms for MMSE Covariance Estimation” Chen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.
    """)

    n_features = 100






    
    min_slider_samples_range = gr.Slider(6, 31, value=6, step=1, label="min_samples_range", info="Choose between 6 and 31")
    max_slider_samples_range = gr.Slider(6, 31, value=31, step=1, label="max_samples_range", info="Choose between 6 and 31")

    print("min_slider_samples_range=",min_slider_samples_range.value)
    print("max_slider_samples_range=",max_slider_samples_range.value)
    

    low = min_slider_samples_range.value
    high = max_slider_samples_range.value
    ###### initialisation code
    slider_samples_range =np.arange(low, high,1)
    n_features = 100
    repeat = 100
    lw_mse = np.zeros((slider_samples_range.size, repeat))
    
    oa_mse = np.zeros((slider_samples_range.size, repeat))
    
    lw_shrinkage = np.zeros((slider_samples_range.size, repeat))
    
    oa_shrinkage = np.zeros((slider_samples_range.size, repeat))
    



    r = 0.1

    real_cov = toeplitz(r ** np.arange(n_features))
    coloring_matrix = cholesky(real_cov)

    for i, n_samples in enumerate(slider_samples_range):
        for j in range(repeat):
            X = np.dot(np.random.normal(size=(n_samples, n_features)), coloring_matrix.T)

            lw = LedoitWolf(store_precision=False, assume_centered=True)
            lw.fit(X)
            lw_mse[i, j] = lw.error_norm(real_cov, scaling=False)
            lw_shrinkage[i, j] = lw.shrinkage_

            oa = OAS(store_precision=False, assume_centered=True)
            oa.fit(X)
            oa_mse[i, j] = oa.error_norm(real_cov, scaling=False)
            oa_shrinkage[i, j] = oa.shrinkage_ 


    gr.Markdown(" **[Demo is based on sklearn docs](https://scikit-learn.org/stable/auto_examples/covariance/plot_lw_vs_oas.html)**")

    gr.Markdown("Changing the min_samples_range values and the MSE plot changes")
    gr.Markdown("Changing the max_samples_range values and the Shrinkage plot changes")

    gr.Label(value="Comparison of Covariance Estimators")


    min_slider_samples_range.change(plot_mse, inputs=[min_slider_samples_range,max_slider_samples_range], outputs= gr.Plot() )
    max_slider_samples_range.change(plot_shrinkage, inputs=[min_slider_samples_range,max_slider_samples_range], outputs= gr.Plot() )



demo.launch()