Spaces:

sklearn-docs
/

Ledoit-Wolf-OAS

Sleeping

File size: 4,923 Bytes

import gradio as gr
import time
import numpy as np
import matplotlib.pyplot as plt

from scipy.linalg import toeplitz, cholesky
from sklearn.covariance import LedoitWolf, OAS

np.random.seed(0)




def plot_mse(min_slider_samples_range,max_slider_samples_range):
     # plot MSE
     print("inside plot_mse")
     plt.clf()
     plt.subplot(2, 1, 1)
     plt.errorbar(
          slider_samples_range,
          lw_mse.mean(1),
          yerr=lw_mse.std(1),
          label="Ledoit-Wolf",
          color="navy",
          lw=2,
    )
     plt.errorbar(
          slider_samples_range,
          oa_mse.mean(1),
          yerr=oa_mse.std(1),
          label="OAS",
          color="darkorange",
          lw=2,
    )
     plt.ylabel("Squared error")
     plt.legend(loc="upper right")
     plt.title("Comparison of covariance estimators")
     plt.xlim(5, 31)
     print("outside plot_mse")
     return plt  


def plot_shrinkage(min_slider_samples_range,max_slider_samples_range):
    # plot shrinkage coefficient
    print("inside plot_shrink")
    plt.clf()
    plt.subplot(2, 1, 2)
    plt.errorbar(
      slider_samples_range,
      lw_shrinkage.mean(1),
      yerr=lw_shrinkage.std(1),
      label="Ledoit-Wolf",
      color="navy",
      lw=2,
      )
    plt.errorbar(
      slider_samples_range,
      oa_shrinkage.mean(1),
      yerr=oa_shrinkage.std(1),
      label="OAS",
      color="darkorange",
      lw=2,
      )
    plt.xlabel("n_samples")
    plt.ylabel("Shrinkage")
    plt.legend(loc="lower right")
    plt.ylim(plt.ylim()[0], 1.0 + (plt.ylim()[1] - plt.ylim()[0]) / 10.0)
    plt.xlim(5, 31)
    print("outside plot_shrink")
    # plt.show()
    return plt






title = "Ledoit-Wolf vs OAS estimation"


with gr.Blocks(title=title, theme=gr.themes.Default(font=[gr.themes.GoogleFont("Inconsolata"), "Arial", "sans-serif"])) as demo:
    gr.Markdown(f"# {title}")

    gr.Markdown(
    """
    The usual covariance maximum likelihood estimate can be regularized using shrinkage. Ledoit and Wolf proposed a close formula to compute the asymptotically optimal shrinkage parameter (minimizing a MSE criterion), yielding the Ledoit-Wolf covariance estimate.

    Chen et al. proposed an improvement of the Ledoit-Wolf shrinkage parameter, the OAS coefficient, whose convergence is significantly better under the assumption that the data are Gaussian.

    This example, inspired from Chen’s publication [1], shows a comparison of the estimated MSE of the LW and OAS methods, using Gaussian distributed data.

    [1] “Shrinkage Algorithms for MMSE Covariance Estimation” Chen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.
    """)

    n_features = 100






    
    min_slider_samples_range = gr.Slider(6, 31, value=6, step=1, label="min_samples_range", info="Choose between 6 and 31")
    max_slider_samples_range = gr.Slider(6, 31, value=31, step=1, label="max_samples_range", info="Choose between 6 and 31")

    print("min_slider_samples_range=",min_slider_samples_range.value)
    print("max_slider_samples_range=",max_slider_samples_range.value)
    

    low = min_slider_samples_range.value
    high = max_slider_samples_range.value
    ###### initialisation code
    slider_samples_range =np.arange(low, high,1)
    n_features = 100
    repeat = 100
    lw_mse = np.zeros((slider_samples_range.size, repeat))
    
    oa_mse = np.zeros((slider_samples_range.size, repeat))
    
    lw_shrinkage = np.zeros((slider_samples_range.size, repeat))
    
    oa_shrinkage = np.zeros((slider_samples_range.size, repeat))
    



    r = 0.1

    real_cov = toeplitz(r ** np.arange(n_features))
    coloring_matrix = cholesky(real_cov)

    for i, n_samples in enumerate(slider_samples_range):
        for j in range(repeat):
            X = np.dot(np.random.normal(size=(n_samples, n_features)), coloring_matrix.T)

            lw = LedoitWolf(store_precision=False, assume_centered=True)
            lw.fit(X)
            lw_mse[i, j] = lw.error_norm(real_cov, scaling=False)
            lw_shrinkage[i, j] = lw.shrinkage_

            oa = OAS(store_precision=False, assume_centered=True)
            oa.fit(X)
            oa_mse[i, j] = oa.error_norm(real_cov, scaling=False)
            oa_shrinkage[i, j] = oa.shrinkage_ 


    gr.Markdown(" **[Demo is based on sklearn docs](https://scikit-learn.org/stable/auto_examples/covariance/plot_lw_vs_oas.html)**")

    gr.Markdown("Changing the min_samples_range values and the MSE plot changes")
    gr.Markdown("Changing the max_samples_range values and the Shrinkage plot changes")

    gr.Label(value="Comparison of Covariance Estimators")


    min_slider_samples_range.change(plot_mse, inputs=[min_slider_samples_range,max_slider_samples_range], outputs= gr.Plot() )
    max_slider_samples_range.change(plot_shrinkage, inputs=[min_slider_samples_range,max_slider_samples_range], outputs= gr.Plot() )



demo.launch()