import numpy as np
import matplotlib.pyplot as plt
from scipy.linalg import toeplitz, cholesky
from sklearn.covariance import LedoitWolf as LW, OAS


np.random.seed(0)
n_features      = 100
r               = 0.1
real_cov        = toeplitz(r ** np.arange(n_features))
coloring_matrix = cholesky(real_cov)
n_samples_range = np.arange(6, 31, 1)
repeat          = 100
lw_mse          = np.zeros((n_samples_range.size, repeat))
oa_mse          = np.zeros((n_samples_range.size, repeat))
lw_shrinkage    = np.zeros((n_samples_range.size, repeat))
oa_shrinkage    = np.zeros((n_samples_range.size, repeat))

for i, n_samples in enumerate(n_samples_range):
    for j in range(repeat):
        X = np.dot(
            np.random.normal(size=(n_samples, n_features)), 
            coloring_matrix.T)

        lw = LW(store_precision=False, 
                assume_centered=True); lw.fit(X)
        
        lw_mse[i, j] = lw.error_norm(real_cov, 
                                     scaling=False)
        lw_shrinkage[i, j] = lw.shrinkage_

        oa = OAS(store_precision=False, 
                 assume_centered=True); oa.fit(X)
        
        oa_mse[i, j] = oa.error_norm(real_cov, 
                                     scaling=False)
        oa_shrinkage[i, j] = oa.shrinkage_


plt.subplot(2, 1, 1)
plt.errorbar(n_samples_range, 
             lw_mse.mean(1), 
             yerr=lw_mse.std(1),
             label='Ledoit-Wolf', 
             color='navy', lw=2)

plt.errorbar(n_samples_range, 
             oa_mse.mean(1), 
             yerr=oa_mse.std(1),
             label='OAS', 
             color='darkorange', lw=2)

plt.ylabel("Squared error")
plt.legend(loc="upper right")
plt.title("Comparison of covariance estimators")
plt.xlim(5, 31)

# plot shrinkage coefficient
plt.subplot(2, 1, 2)
plt.errorbar(n_samples_range, 
             lw_shrinkage.mean(1), 
             yerr=lw_shrinkage.std(1),
             label='Ledoit-Wolf', 
             color='navy', lw=2)

plt.errorbar(n_samples_range, 
             oa_shrinkage.mean(1), 
             yerr=oa_shrinkage.std(1),
             label='OAS', 
             color='darkorange', lw=2)

plt.xlabel("n_samples")
plt.ylabel("Shrinkage")
plt.legend(loc="lower right")

plt.ylim(plt.ylim()[0], 1. + (plt.ylim()[1] - plt.ylim()[0]) / 10.)
plt.xlim(5, 31)

(5.0, 31.0)


import numpy as np
import matplotlib.pyplot as plt
from scipy import linalg
from sklearn.covariance import LedoitWolf as LW, OAS, \
ShrunkCovariance as SC, log_likelihood as LL, empirical_covariance as EC
from sklearn.model_selection import GridSearchCV


np.random.seed(42)

n_features, n_samples = 40, 20
base_X_train = np.random.normal(size=(n_samples, n_features))
base_X_test  = np.random.normal(size=(n_samples, n_features))

# Color samples
coloring_matrix = np.random.normal(size=(n_features, n_features))
X_train         = np.dot(base_X_train, coloring_matrix)
X_test          = np.dot(base_X_test,  coloring_matrix)


# Compute the likelihood on test data

shrinkages = np.logspace(-2, 0, 30)
negative_logliks = [-SC(shrinkage=s).fit(X_train).score(X_test)
                    for s in shrinkages]

# under the ground-truth model (won't have access to in real life)
real_cov    = np.dot(coloring_matrix.T, coloring_matrix)
emp_cov     = EC(X_train)
loglik_real = -LL(emp_cov, linalg.inv(real_cov))


tuned_parameters = [{'shrinkage': shrinkages}]
cv = GridSearchCV(SC(), tuned_parameters).fit(X_train)

lw = LW(); oa = OAS()

loglik_lw = lw.fit(X_train).score(X_test)
loglik_oa = oa.fit(X_train).score(X_test)


plt.title("Regularized CV: likelihood and shrinkage coefficient")
plt.xlabel('Regularization: shrinkage coefficient')
plt.ylabel('Error: negative log-likelihood on test data')

plt.loglog(shrinkages, 
           negative_logliks, 
           label="Negative log-likelihood")

plt.plot(plt.xlim(), 
         2*[loglik_real], '--r',
         label="Real covariance likelihood")

lik_max = np.amax(negative_logliks)
lik_min = np.amin(negative_logliks)
ymin    = lik_min -  6.*np.log((plt.ylim()[1] - plt.ylim()[0]))
ymax    = lik_max + 10.*np.log(lik_max - lik_min)
xmin    = shrinkages[0]
xmax    = shrinkages[-1]

plt.vlines(lw.shrinkage_, 
           ymin, -loglik_lw, color='magenta',
           linewidth=3, label='LW estimate')

plt.vlines(oa.shrinkage_, ymin, -loglik_oa, color='purple',
           linewidth=3, label='OAS estimate')

plt.vlines(cv.best_estimator_.shrinkage, ymin,
           -cv.best_estimator_.score(X_test), color='cyan',
           linewidth=3, label='CV best estimate')

plt.ylim(ymin, ymax)
plt.xlim(xmin, xmax)
plt.legend()

<matplotlib.legend.Legend at 0x7f662e117250>


import numpy as np
from scipy import linalg
from sklearn.datasets import make_sparse_spd_matrix
from sklearn.covariance import GraphicalLassoCV as GLCV, ledoit_wolf as LW
import matplotlib.pyplot as plt


n_samples = 60
n_features = 20

prng = np.random.RandomState(1)
prec = make_sparse_spd_matrix(n_features, alpha=.98,
                              smallest_coef=.4,
                              largest_coef=.7,
                              random_state=prng)
cov   = linalg.inv(prec)
d     = np.sqrt(np.diag(cov))
cov  /= d
cov  /= d[:, np.newaxis]
prec *= d
prec *= d[:, np.newaxis]
X     = prng.multivariate_normal(np.zeros(n_features), 
                                 cov, 
                                 size=n_samples)
X    -= X.mean(axis=0)
X    /= X.std(axis=0)


# Estimate the covariance
emp_cov    = np.dot(X.T, X) / n_samples
model      = GLCV().fit(X)
cov_       = model.covariance_
prec_      = model.precision_
lw_cov_, _ = LW(X)
lw_prec_   = linalg.inv(lw_cov_)


plt.figure(figsize=(10, 6))
plt.subplots_adjust(left=0.02, right=0.98)

# plot the covariances
covs = [('Empirical',        emp_cov), 
        ('Ledoit-Wolf',      lw_cov_),
        ('GraphicalLassoCV', cov_), 
        ('True',             cov)]

vmax = cov_.max()

for i, (name, this_cov) in enumerate(covs):
    plt.subplot(2, 4, i + 1)
    plt.imshow(this_cov, 
               interpolation='nearest', 
               vmin=-vmax, vmax=vmax, cmap=plt.cm.RdBu_r)
    plt.xticks(())
    plt.yticks(())
    plt.title('%s covariance' % name)

# plot the precisions
precs = [('Empirical', linalg.inv(emp_cov)), 
         ('Ledoit-Wolf',          lw_prec_),
         ('GraphicalLasso',          prec_), 
         ('True',                     prec)]

vmax = .9 * prec_.max()
for i, (name, this_prec) in enumerate(precs):
    ax = plt.subplot(2, 4, i + 5)
    plt.imshow(np.ma.masked_equal(this_prec, 0),
               interpolation='nearest', 
               vmin=-vmax, vmax=vmax, cmap=plt.cm.RdBu_r)

    plt.xticks(())
    plt.yticks(())
    plt.title('%s precision' % name)
    if hasattr(ax, 'set_facecolor'):
        ax.set_facecolor('.7')
    else:
        ax.set_axis_bgcolor('.7')

# plot the model selection metric
plt.figure(figsize=(4, 3))
plt.axes([.2, .15, .75, .7])
plt.plot(model.cv_results_["alphas"], 
         model.cv_results_["mean_score"], 'o-')
plt.axvline(model.alpha_, color='.5')
plt.title('Model selection')
plt.ylabel('Cross-validation score')
plt.xlabel('alpha')

Text(0.5, 0, 'alpha')


import numpy as np
np.random.seed(7)

# gaussian dataset; 125 samples, 2 features (both Gaussian)
# feature 1: mean = 0, stdev = 2
# feature 2: mean = 0, stdev = 1
# 25 samples replaced with Gaussian outliers:
# feature 1: mean = 0, stdev = 1
# feature 2: mean = 0, stdev = 7

n_samples = 125; n_outliers = 25; n_features = 2

# generate Gaussian data of shape (125, 2)
gen_cov = np.eye(n_features)
gen_cov[0, 0] = 2.
X = np.dot(np.random.randn(n_samples, n_features), gen_cov)

# add some outliers
outliers_cov = np.eye(n_features)
outliers_cov[np.arange(1, n_features), 
             np.arange(1, n_features)] = 7.
X[-n_outliers:] = np.dot(np.random.randn(n_outliers, n_features), 
                         outliers_cov)


import matplotlib.pyplot as plt
from sklearn.covariance import EmpiricalCovariance, MinCovDet

robust_cov = MinCovDet().fit(X)
emp_cov = EmpiricalCovariance().fit(X)

print('Estimated covariance matrix:\n'
      'MCD (Robust):\n{}\n'
      'MLE:\n{}'.format(robust_cov.covariance_, emp_cov.covariance_))

Estimated covariance matrix:
MCD (Robust):
[[ 3.26253567e+00 -3.06695631e-03]
 [-3.06695631e-03  1.22747343e+00]]
MLE:
[[ 3.23773583 -0.24640578]
 [-0.24640578  7.51963999]]


# plot Mahalanobis distance contours for both methods.
fig, ax = plt.subplots(figsize=(10, 5))
# Plot data set
inlier_plot = ax.scatter(X[:, 0], 
                         X[:, 1],
                         color='black', label='inliers')
outlier_plot = ax.scatter(X[:, 0][-n_outliers:], 
                          X[:, 1][-n_outliers:],
                          color='red', label='outliers')
ax.set_xlim(ax.get_xlim()[0], 10.)
ax.set_title("Mahalanobis distances - contaminated data")

# Create meshgrid of feature 1 and feature 2 values
xx, yy = np.meshgrid(np.linspace(plt.xlim()[0], plt.xlim()[1], 100),
                     np.linspace(plt.ylim()[0], plt.ylim()[1], 100))
zz = np.c_[xx.ravel(), yy.ravel()]

# Calculate the MLE based Mahalanobis distances of the meshgrid
mahal_emp_cov   = emp_cov.mahalanobis(zz)
mahal_emp_cov   = mahal_emp_cov.reshape(xx.shape)
emp_cov_contour = plt.contour(xx, yy, 
                              np.sqrt(mahal_emp_cov),
                              cmap=plt.cm.PuBu_r, linestyles='dashed')

# Calculate the MCD based Mahalanobis distances
mahal_robust_cov = robust_cov.mahalanobis(zz)
mahal_robust_cov = mahal_robust_cov.reshape(xx.shape)
robust_contour   = ax.contour(xx, yy, 
                              np.sqrt(mahal_robust_cov),
                              cmap=plt.cm.YlOrBr_r, linestyles='dotted')

# Add legend
ax.legend([emp_cov_contour.collections[1], 
           robust_contour.collections[1],
          inlier_plot, outlier_plot],
          ['MLE dist', 'MCD dist', 'inliers', 'outliers'],
          loc="upper right", borderaxespad=0)

<matplotlib.legend.Legend at 0x7f662d0a3e80>


fig, (ax1, ax2) = plt.subplots(1, 2)
plt.subplots_adjust(wspace=.6)

# Calculate cubic root of MLE Mahalanobis distances for samples
# Calculate cubic root of MCD Mahalanobis distances for samples
emp_mahal       = emp_cov.mahalanobis(X -        np.mean(X, 0)) ** (0.33)
robust_mahal = robust_cov.mahalanobis(X - robust_cov.location_) ** (0.33)

# Plot boxplots
ax1.boxplot(   [emp_mahal[:-n_outliers],    emp_mahal[-n_outliers:]], widths=.25)
ax2.boxplot([robust_mahal[:-n_outliers], robust_mahal[-n_outliers:]], widths=.25)

# Plot individual samples
ax1.plot(np.full(n_samples - n_outliers, 1.26), 
         emp_mahal[:-n_outliers],
         '+k', markeredgewidth=1)
ax1.plot(np.full(n_outliers, 2.26), 
         emp_mahal[-n_outliers:],
         '+k', markeredgewidth=1)
ax2.plot(np.full(n_samples - n_outliers, 1.26), 
         robust_mahal[:-n_outliers],
         '+k', markeredgewidth=1)
ax2.plot(np.full(n_outliers, 2.26), 
         robust_mahal[-n_outliers:],
         '+k', markeredgewidth=1)

ax1.axes.set_xticklabels(('inliers', 'outliers'), size=15)
ax2.axes.set_xticklabels(('inliers', 'outliers'), size=15)

ax1.set_ylabel(r"$\sqrt[3]{\rm{(Mahal. dist.)}}$", size=16)
ax2.set_ylabel(r"$\sqrt[3]{\rm{(Mahal. dist.)}}$", size=16)

ax1.set_title("Non-robust estimates\n(MLE)")
ax2.set_title("Robust estimates\n(MCD)")

Text(0.5, 1.0, 'Robust estimates\n(MCD)')

Covariance ¶

Empirical (observed) Covariance ¶

Shrunk Covariance ¶

Ledoit-Wolf (LW) shrinkage ¶

Oracle Approximating Shrinkage (OAS)¶

Example: LW vs OAS ¶

Example: LW vs OAS vs MLE ¶

Sparse Inverse Covariance, aka Precision Matrix ¶

Example: sparse inverse covariance estimates ¶

Outliers and Minimum Covariance Determinant (MCD) estimators ¶

Example: covariance estimates and Mahalanaobis distances ¶

Outliers and Minimum Covariance Determinant (MCD) estimators¶

Outliers and Minimum Covariance Determinant (MCD) estimators ¶