from sklearn import linear_model as LM

reg = LM.LinearRegression()


reg.fit([[0,0],[1,1],[2,2]],
        [0,1,2])

LinearRegression()


reg.coef_

array([0.5, 0.5])


import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model as LM
from sklearn.metrics import mean_squared_error, r2_score


# diabetes dataset
X,y = datasets.load_diabetes(return_X_y=True)

# use only one feature
X = X[:, np.newaxis, 2]


# split into training & test sets
Xtrain,Xtest = X[:-20], X[-20:]


# split target data
ytrain,ytest = y[:-20], y[-20:]


# create & train regression model
regr = LM.LinearRegression()
regr.fit(Xtrain,ytrain)

LinearRegression()


# predict from test data & return results
ypred = regr.predict(Xtest)

print('Coefficients:\n',   regr.coef_)
print('Mean sqd error:\n', mean_squared_error(ytest,ypred))
print('r2 score:\n',       r2_score(          ytest,ypred))

Coefficients:
 [938.23786125]
Mean sqd error:
 2548.0723987259694
r2 score:
 0.47257544798227147


# plot
plt.scatter(Xtest,ytest,color='black')
plt.plot(   Xtest,ypred,color='blue')
plt.xticks(()), plt.yticks(()), plt.show()

(([], []), ([], []), None)


# generate random data
np.random.seed(42)
nsamps,nfeats = 200,50
X = np.random.randn(nsamps,nfeats)


# coefficients
true_coefs = 3*np.random.randn(nfeats)
true_coefs[true_coefs<0] = 0


# set y to dot-product; add normally distributed noise
y =  np.dot(X,true_coefs)
y += np.random.normal(size=(nsamps,))


# split data. split_size = % of data to include in test split.
from sklearn.model_selection import train_test_split as TTS

Xtrain,Xtest,ytrain,ytest = TTS(X,y,test_size=0.5)


# fit model and return results
from sklearn.linear_model import LinearRegression as LR

regr_nn = LR(positive=True)
ypred_nn = regr_nn.fit(Xtrain,ytrain).predict(Xtest)

print('r2 score:\n', r2_score(ytest,ypred_nn))

r2 score:
 0.9898521625129549


# fit using OLS for comparison
regr_ols = LR()
ypred_ols = regr_ols.fit(Xtrain,ytrain).predict(Xtest)

print('r2 score:\n', r2_score(ytest,ypred_ols))

r2 score:
 0.9876061239769228


# plot regression coeffs between OLS & NNLS
# note how NNLS constraint shrinks some coeffs to zero
# NNLS inherently yield sparse results.

fix,ax = plt.subplots()
ax.plot(regr_ols.coef_, regr_nn.coef_, marker='.', linewidth=0)

minx,maxx = ax.get_xlim()
miny,maxy = ax.get_ylim()
low, high = max(minx,miny), min(maxx,maxy)

ax.plot([low,high],[low,high], ls='--', c='0.3', alpha=0.5)

ax.set_xlabel('OLS coeffs'); ax.set_ylabel('NNLS coeffs')

Text(0, 0.5, 'NNLS coeffs')


from sklearn import linear_model as LM
regr = LM.Ridge(alpha=0.5)

regr.fit([[0,0],[0,0],[1,1]],  [0.0, 0.1, 1.0])

print("Coefficients:\n", regr.coef_)
print('Intercept:\n',    regr.intercept_)

Coefficients:
 [0.34545455 0.34545455]
Intercept:
 0.13636363636363638


# example: plot coefficients vs regularization
# when alpha is very large, regularization dominates - coeffs trend to zero.
# when alpha ~0, squared loss function dominates - coeffs start oscillating.


# build 10x10 Hilbert matrix (sq.matrix with entries as unit fractions)
X = 1.0/(np.arange(1,11) + np.arange(0,10)[:, np.newaxis])
y = np.ones(10)


# paths
alphas = np.logspace(-10,2,200)
coefs  = []
for a in alphas:
    ridge = LM.Ridge(alpha=a, fit_intercept=False)
    ridge.fit(X,y)
    coefs.append(ridge.coef_)


# plot
ax = plt.gca()
ax.plot(alphas,coefs)
ax.set_xscale('log')
ax.set_xlim(ax.get_xlim()[::-1]) # reverse axis
plt.xlabel('alpha')
plt.ylabel('weights')
plt.title('coefficients vs regularization')
plt.axis('tight')
plt.show()


import numpy as np
from sklearn import linear_model as LM
reg = LM.RidgeCV(alphas=np.logspace(-6, 6, 13))
reg.fit([[0, 0], [0, 0], [1, 1]], [0, .1, 1])
reg.alpha_

0.01


regr = LM.Lasso(alpha=0.1)
regr.fit([[0,0], [1,1]], 
         [0,1])
regr.predict([[1,1]])

array([0.8])


# Lasso vs Elastic Net - sparse signal fit comparison
np.random.seed(42)
nsamps, nfeats = 50,100
X = np.random.randn(nsamps,nfeats)


# create decreasing coeffs, with alternating signs, for viz
idx  = np.arange(nfeats)
coef = (-1)**idx*np.exp(-idx/10)

# sparsify it
coef[10:]=0
y = np.dot(X,coef)

# add normally-distributed noise
y += 0.01*np.random.normal(size=nsamps)


# split into training, test data
nsamps = X.shape[0]
Xtrain,ytrain = X[:nsamps//2 ], y[:nsamps//2 ]
Xtest, ytest  = X[:nsamps//2:], y[ nsamps//2:]


# fit & predict
from sklearn.linear_model import Lasso as LA
from sklearn.linear_model import ElasticNet as EN
alpha       = 0.1
lasso       = LA(alpha=alpha)
enet        = EN(alpha=alpha, l1_ratio=0.7)

ypred_lasso = lasso.fit(Xtrain,ytrain).predict(Xtest)
ypred_enet  =  enet.fit(Xtrain,ytrain).predict(Xtest)

r2_lasso    = r2_score(ytest,ypred_lasso)
r2_enet     = r2_score(ytest,ypred_enet)

print('r2 score (lasso):\t', r2_lasso)
print('r2 score (enet): \t', r2_enet)

r2 score (lasso):	 -0.2977169214889326
r2 score (enet): 	 -0.321518995138536


# display using a stem plot
m,s,_ = plt.stem(
    np.where(enet.coef_)[0],
             enet.coef_[enet.coef_ != 0],
             markerfmt='x',
             label='enet coefficients',
             use_line_collection=True)

plt.setp([m,s], color='#2ca02c')

m, s, _ = plt.stem(
    np.where(lasso.coef_)[0], 
             lasso.coef_[lasso.coef_ != 0],
             markerfmt='x', 
             label='Lasso coefficients',
             use_line_collection=True)

plt.setp([m,s], color='#ff7f0e')

plt.stem(
    np.where(coef)[0], 
             coef[coef != 0], 
             label='true coefficients',
             markerfmt='bx', 
             use_line_collection=True)

plt.legend(loc='best')
plt.title("Lasso $R^2$: %.3f, Elastic Net $R^2$: %.3f" %
          (r2_lasso, r2_enet))
plt.show()


import time
from sklearn.linear_model import LassoCV, LassoLarsCV, LassoLarsIC
from sklearn import datasets


# example: numpy c_ (translate slice objs to concatenate on 2nd axis)
np.c_[np.array([1,2,3]), np.array([4,5,6])]

array([[1, 4],
       [2, 5],
       [3, 6]])


# to avoid division by zero while doing np.log10
EPSILON=1e-4

X,y = datasets.load_diabetes(return_X_y=True)
print(X.shape)
rng = np.random.RandomState(42)
X   = np.c_[X,rng.randn(X.shape[0],14)]

(442, 10)


# normalize data, as done by Lars, to allow comparisons
X /= np.sqrt(np.sum(X**2,axis=0))
print(X.shape)

(442, 24)


# LassoLarsIC: least angle regression with BIC/AIC criterion
bic = LassoLarsIC(criterion='bic')
aic = LassoLarsIC(criterion='aic')

bic.fit(X,y); bic_alpha = bic.alpha_
aic.fit(X,y); aic_alpha = aic.alpha_


def plot_criterion(model,name,color):
    criterion_ = model.criterion_
    
    plt.semilogx(
        model.alphas_ + EPSILON,
        model.criterion_,
        '--', color=color, linewidth=3, label='%s criterion' % name
    )
#    plt.axvline(
#        model.alphas_ + EPSILON,
#        color=color, linewidth=3, label='alpha: %s estimate' % name
#    )
    plt.xlabel(r'$\alpha$')
    plt.ylabel('criterion')


plt.figure()
plot_criterion(aic,'AIC','b')
plot_criterion(bic,'BIC','r')
plt.legend()
plt.title('info criteria for model selection')

Text(0.5, 1.0, 'info criteria for model selection')


# LassoCV: coordinate descent
t1 = time.time()
model = LassoCV(cv=20).fit(X, y)
t_lasso_cv = time.time() - t1

plt.figure()
ymin, ymax = 2300, 3800
plt.semilogx(model.alphas_ + EPSILON, model.mse_path_, ':')
plt.plot(    model.alphas_ + EPSILON, model.mse_path_.mean(axis=-1), 'k', label='avg across folds', linewidth=2)
plt.axvline( model.alpha_ + EPSILON, linestyle='--',           color='k', label='alpha: CV estimate')
plt.legend()

plt.xlabel(r'$\alpha$')
plt.ylabel('Mean square error')
plt.title('MSE on each fold: coordinate descent '
          '(train time: %.2fs)' % t_lasso_cv)
plt.axis('tight')
plt.ylim(ymin, ymax)

(2300.0, 3800.0)


# LassoLarsCV: least angle regression
t1 = time.time()
model = LassoLarsCV(cv=20).fit(X, y)
t_lasso_lars_cv = time.time() - t1

plt.figure()
plt.semilogx(model.cv_alphas_ + EPSILON, model.mse_path_, ':')
plt.semilogx(model.cv_alphas_ + EPSILON, model.mse_path_.mean(axis=-1), 'k', label='avg across folds', linewidth=2)
plt.axvline(model.alpha_, linestyle='--',                         color='k', label='alpha CV')
plt.legend()

plt.xlabel(r'$\alpha$')
plt.ylabel('Mean square error')
plt.title('MSE on each fold: Lars (train time: %.2fs)'
          % t_lasso_lars_cv)
plt.axis('tight')
plt.ylim(ymin, ymax)
plt.show()


from sklearn.linear_model import MultiTaskLasso as MTL
from sklearn.linear_model import Lasso as L


# generate sine-based coefficients - random frequencies & phases
nsamps, nfeats, ntasks = 100,30,40
nrelevant_feats = 5
coefs           = np.zeros((ntasks,nfeats))
times           = np.linspace(0,2*np.pi,ntasks)


rng = np.random.RandomState(42)

for k in range(nrelevant_feats):
    coefs[:,k] = np.sin((1.0+rng.randn(1))*times+3*rng.randn(1))


X = rng.randn(nsamps,nfeats)
Y = np.dot(X,coefs.T) + rng.randn(nsamps,ntasks)


Lcoefs   = np.array([L(alpha=0.5).fit(X,y).coef_ for y in Y.T])
MTLcoefs =         MTL(alpha=1.0).fit(X,Y).coef_


fig = plt.figure()

plt.subplot(1,2,1);    plt.spy(Lcoefs)
plt.xlabel('feature'); plt.ylabel('time or task')
plt.text(10,5,'lasso')

plt.subplot(1,2,2);    plt.spy(MTLcoefs)
plt.xlabel('feature'); plt.ylabel('time or task')
plt.text(10,5,'MT lasso')

Text(10, 5, 'MT lasso')


featurenum=0
plt.figure()
plt.plot(coefs[:, featurenum],
         linewidth=2, color='green', label='ground truth')
plt.plot(Lcoefs[:, featurenum],
         linewidth=2, color='blue', label='lasso')
plt.plot(MTLcoefs[:, featurenum],
         linewidth=2, color='gold', label='MT lasso')
plt.show()


from sklearn.linear_model import lasso_path as LP
from sklearn.linear_model import enet_path  as EP
from sklearn import datasets


X,y = datasets.load_diabetes(return_X_y=True)
X /= X.std(axis=0) #standardize data for easier l1_ratio


# compute Lasso & Enet paths via coordinate descent
alphas_l,    coefs_l,    _ = LP(X,y,eps=5e-3,fit_intercept=False)
alphas_posl, coefs_posl, _ = LP(X,y,eps=5e-3,fit_intercept=False,positive=True)
alphas_e,    coefs_e,    _ = EP(X,y,eps=5e-3,fit_intercept=False,l1_ratio=0.8)
alphas_pose, coefs_pose, _ = EP(X,y,eps=5e-3,fit_intercept=False,l1_ratio=0.8, positive=True)


# plot
import matplotlib.pyplot as plt
import numpy as np
from itertools import cycle

plt.figure()
colors = cycle(['b', 'r', 'g', 'c', 'k'])

neg_log_alphas_lasso = -np.log10(alphas_l)
neg_log_alphas_enet  = -np.log10(alphas_e)

for coef_l, coef_e, c in zip(coefs_l, coefs_e, colors):
    l1 = plt.plot(neg_log_alphas_lasso, coef_l,                 c=c)
    l2 = plt.plot(neg_log_alphas_enet,  coef_e, linestyle='--', c=c)
    
plt.xlabel('-Log(alpha)')
plt.ylabel('coefficients')
plt.title('Lasso and Elastic-Net Paths')
plt.legend((l1[-1], l2[-1]), ('Lasso', 'Elastic-Net'), loc='lower left')

<matplotlib.legend.Legend at 0x7f6b54925f10>


plt.figure()
neg_log_alphas_positive_lasso = -np.log10(alphas_positive_lasso)
for coef_l, coef_pl, c in zip(coefs_lasso, coefs_positive_lasso, colors):
    l1 = plt.plot(neg_log_alphas_lasso, coef_l, c=c)
    l2 = plt.plot(neg_log_alphas_positive_lasso, coef_pl, linestyle='--', c=c)

plt.xlabel('-Log(alpha)')
plt.ylabel('coefficients')
plt.title('Lasso and positive Lasso')
plt.legend((l1[-1], l2[-1]), ('Lasso', 'positive Lasso'), loc='lower left')


# best practice: pass X & y as Fortran-contiguous numpy arrays
# to avoid unnecessary memory duplication

from sklearn import linear_model as LM

clf     = LM.MultiTaskElasticNet(alpha=0.1)
clfcv   = LM.MultiTaskElasticNetCV(cv=3)

X,y = [[0, 0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]] 

clf.fit(X,y); clfcv.fit(X,y)

print(clf.coef_, clf.intercept_)
print(clfcv.coef_, clfcv.intercept_)

[[0.45663524 0.45612256]
 [0.45663524 0.45612256]] [0.0872422 0.0872422]
[[0.52875032 0.46958558]
 [0.52875032 0.46958558]] [0.00166409 0.00166409]


reg = LM.Lars(n_nonzero_coefs=1)

reg.fit([[-1,1],  [0, 0], [1, 1]], 
         [-1.1111, 0,     -1.1111])

print(reg.coef_)

[ 0.     -1.1111]


import numpy as np; import matplotlib.pyplot as plt
from sklearn import linear_model as LM
from sklearn import datasets as DATA


X,y = DATA.load_diabetes(return_X_y=True)

_,_,coefs = LM.lars_path(X,y,method='lasso',verbose=True)
xx        = np.sum(np.abs(coefs.T),axis=1); xx /= xx[-1]

print(coefs.shape); print(xx.shape)

.(10, 13)
(13,)


plt.plot(xx,coefs.T)
ymin,ymax = plt.ylim()
plt.vlines(xx,ymin,ymax,linestyle="dotted")
plt.xlabel('|coef\/max|coef|')
plt.ylabel('coeffs')
plt.title('LASSO path')
plt.show()


from sklearn.linear_model import OrthogonalMatchingPursuit as OMP
from sklearn.linear_model import OrthogonalMatchingPursuitCV as OMPCV
from sklearn.datasets     import make_sparse_coded_signal as SCS


n_components, n_features, n_nonzero_coefs = 512,100,17

y,X,w = SCS(n_samples      =1,
            n_components   =n_components,
            n_features     =n_features,
            n_nonzero_coefs=n_nonzero_coefs,
            random_state   =0)

y_noisy = y+0.05*np.random.randn(len(y))


# indexes
idx0, = w.nonzero()

# noise-free reconstruction
omp   = OMP(n_nonzero_coefs=n_nonzero_coefs); omp.fit(X,y)
coef_ = omp.coef_
idxr, = coef.nonzero()

# noisy reconstruction
omp.fit(X,y_noisy)
coef  = omp.coef_
idxn, = coef.nonzero()

# noisy reconstruction, #non-zeros set by CV
ompcv = OMPCV()
ompcv.fit(X,y_noisy)
coef  = ompcv.coef_
idxcv, = coef.nonzero()


# plot comparisons
plt.figure()

plt.subplot(4,1,1); plt.xlim(0,512); plt.title('sparse signal')
plt.stem(idx0,w[idx0],use_line_collection=True)

plt.subplot(4,1,2); plt.xlim(0,512); plt.title('recovered/noise-free')
plt.stem(idxr,coef[idxr],use_line_collection=True)

plt.subplot(4,1,3); plt.xlim(0,512); plt.title('recovered/noisy')
plt.stem(idxn,coef[idxn],use_line_collection=True)

plt.subplot(4,1,4); plt.xlim(0,512); plt.title('recovered/noisy/CV')
plt.stem(idxcv,coef[idxcv],use_line_collection=True)

plt.subplots_adjust(0.06,0.04,0.94,0.90,0.20,0.38)
plt.suptitle('sparse signal recovery w/ OMP'); plt.show()


from sklearn import linear_model as LM
X = [[0., 0.], [1., 1.], [2., 2.], [3., 3.]]
Y = [ 0.,       1.,       2.,       3.]

reg = LM.BayesianRidge(); reg.fit(X, Y)

print(reg.predict([[1,0]]))
print(reg.coef_)

[0.50000013]
[0.49999993 0.49999993]


from scipy import stats
from sklearn.linear_model import BayesianRidge as BR
from sklearn.linear_model import LinearRegression as LR

np.random.seed(0); n_samples, n_features = 100,100

X = np.random.randn(n_samples,n_features) # gaussian
w = np.zeros(n_features)

# keep 10 weights of interest
relevant_features = np.random.randint(0,n_features,10)
lambda_ = 4.0
for i in relevant_features:
    w[i] = stats.norm.rvs(loc=0,scale=1.0/np.sqrt(lambda_))

# create noise - precision alpha of 50
alpha_ = 50.0
noise = stats.norm.rvs(loc=0, scale=1.0/np.sqrt(alpha_), size=n_samples)

# create target
y = np.dot(X,w)+noise


clf = BR(compute_score=True); clf.fit(X,y)
ols = LR();                   ols.fit(X,y)

LinearRegression()


plt.title('model weights')
plt.plot(clf.coef_, color='lightgreen', linewidth=2, label='bayes ridge')
plt.plot(w,         color='gold',       linewidth=2, label='ground truth')
plt.plot(ols.coef_, color='navy',       linewidth=1, label='OLS estimate')

plt.xlabel('features'); plt.ylabel('weights'); plt.legend(loc='best')

<matplotlib.legend.Legend at 0x7f76708423a0>


plt.title('model weights - histrogram')
plt.hist(clf.coef_, 
         bins=n_features, 
         color='gold', log=True, edgecolor='black')

plt.scatter(clf.coef_[relevant_features], 
            np.full(len(relevant_features), 5.),
            color='navy', label="Relevant features")

plt.ylabel("features"); plt.xlabel("weights"); plt.legend(loc="upper left")

<matplotlib.legend.Legend at 0x7f76707aefd0>


plt.title("Marginal log-likelihood")
plt.plot(clf.scores_, color='navy', linewidth=2)
plt.ylabel("Score"); plt.xlabel("Iterations")

Text(0.5, 0, 'Iterations')


# Plotting some predictions for polynomial regression
def f(x, noise_amount):
    y     = np.sqrt(x) * np.sin(x)
    noise = np.random.normal(0, 1, len(x))
    return y + noise_amount * noise

degree   = 10
X        = np.linspace(0, 10, 100)
y        = f(X, noise_amount=0.1)
clf_poly = BR()

clf_poly.fit(np.vander(X, degree), y)

X_plot        = np.linspace(0, 11, 25)
y_plot        = f(X_plot, noise_amount=0)
y_mean, y_std = clf_poly.predict(np.vander(X_plot, degree), return_std=True)

plt.errorbar(X_plot, y_mean, y_std, 
             color='navy',
             label="polynomial bayes ridge regression", 
             linewidth=2)

plt.plot(X_plot, y_plot, color='gold', 
         linewidth=2, label='ground truth')

plt.ylabel('output y'); plt.xlabel("feature X"); plt.legend(loc="lower left")

<matplotlib.legend.Legend at 0x7f7670680910>


print(np.vander([1,2,3,4],None,increasing=False))

[[ 1  1  1  1]
 [ 8  4  2  1]
 [27  9  3  1]
 [64 16  4  1]]


# generate sinusoidal signal with noise
def fun(x): return np.sin(2*np.pi*x)
size    = 25; 
rng     = np.random.RandomState(1234)
x_train = rng.uniform(0.0, 1.0, size)
y_train = fun(x_train) + rng.normal(scale=0.1, size=size)
x_test  = np.linspace(0.0, 1.0, 100)

n_order = 3
X_train = np.vander(x_train, n_order+1, increasing=True)
X_test  = np.vander(x_test,  n_order+1, increasing=True)


reg = BR(tol          =1e-6, 
         fit_intercept=False, 
         compute_score=True)

fig, axes = plt.subplots(1, 2, figsize=(8, 4))
for i, ax in enumerate(axes):
    # Bayesian ridge regression with different initial value pairs
    if i == 0:
        init = [1 / np.var(y_train), 1.]  # Default values
    elif i == 1:
        init = [1., 1e-3]
        reg.set_params(alpha_init=init[0], lambda_init=init[1])

    reg.fit(X_train, y_train)
    ymean, ystd = reg.predict(X_test, return_std=True)

    ax.plot(        x_test,  fun(x_test), 
                    color="blue", label="sin($2\\pi x$)")
    ax.scatter(     x_train, y_train, 
                    s=50, alpha=0.5, label="observation")
    ax.plot(        x_test,  ymean, 
                    color="red", 
                    label="predict mean")
    ax.fill_between(x_test,  ymean-ystd, ymean+ystd, 
                    color="pink", alpha=0.5, 
                    label="predict std")
    
    ax.set_ylim(-1.3, 1.3)
    ax.legend()
    title = "$\\alpha$_init$={:.2f},\\ \\lambda$_init$={}$".format(
            init[0], init[1])
    if i == 0:
        title += " (Default)"
    ax.set_title(title, fontsize=12)
    text = "$\\alpha={:.1f}$\n$\\lambda={:.3f}$\n$L={:.1f}$".format(
           reg.alpha_, reg.lambda_, reg.scores_[-1])
    ax.text(0.05, -1.0, text, fontsize=12)

plt.tight_layout()
plt.show()


from sklearn.linear_model import TweedieRegressor as Tweedie
reg = Tweedie(power=1, alpha=0.5, link='log')
reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2])

print(reg.coef_, reg.intercept_)

[0.24631611 0.43370317] -0.7638091359123445


from sklearn.datasets import fetch_openml
#df = fetch_openml(data_id=41214, as_frame=True).frame; df


import numpy as np
from sklearn.linear_model import SGDClassifier as SGDC
from sklearn.preprocessing import StandardScaler as SS
from sklearn.pipeline import make_pipeline as MP

X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
Y = np.array([1, 1, 2, 2])

# Always scale inputs. The most convenient way is to use a pipeline.
clf = MP(SS(), SGDC(max_iter=1000, tol=1e-3)); clf.fit(X, Y)

clf.predict([[-0.8, -1]])

array([1])


import numpy as np
from sklearn.linear_model import SGDRegressor as SGDR
from sklearn.pipeline import make_pipeline as MP
from sklearn.preprocessing import StandardScaler as SS

n_samples, n_features = 10, 5
rng = np.random.RandomState(0)
y   = rng.randn(n_samples)
X   = rng.randn(n_samples, n_features)

reg = MP(SS(), SGDR(max_iter=1000, tol=1e-3)); reg.fit(X, y)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('sgdregressor', SGDRegressor())])


from sklearn.datasets import load_digits
from sklearn.linear_model import Perceptron
X,y = load_digits(return_X_y=True)
clf = Perceptron(tol=1e-3, random_state=0); clf.fit(X,y)
clf.score(X,y)

0.9393433500278241


from sklearn.linear_model import PassiveAggressiveClassifier as PAC
from sklearn.datasets import make_classification as MC

X,y = MC(n_features=4, random_state=0)
clf = PAC(max_iter=1000, random_state=0, tol=1e-3); clf.fit(X,y)

print(clf.coef_); print(clf.intercept_); print(clf.predict([[0,0,0,0]]))

[[0.26642044 0.45070924 0.67251877 0.64185414]]
[1.84127814]
[1]


from sklearn.linear_model import PassiveAggressiveRegressor as PAR
from sklearn.datasets import make_regression as MR

X,y = MR(n_features=4, random_state=0)
reg = PAR(max_iter=1000, random_state=0, tol=1e-3); reg.fit(X,y)

print(reg.coef_); print(reg.intercept_); print(reg.predict([[0,0,0,0]]))

[20.48736655 34.18818427 67.59122734 87.94731329]
[-0.02306214]
[-0.02306214]


import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model as LM
from sklearn.datasets import make_regression as MR


n_samples, n_outliers = 1000,50
X,y,coef = MR(n_samples=n_samples,
              n_features=1,
              n_informative=1,
              noise=10,
              coef=True,
              random_state=0
             )
# add outliers
np.random.seed(0)
X[:n_outliers] = 3+0.5*(np.random.normal(size=(n_outliers,1)))
y[:n_outliers] = -3+10*(np.random.normal(size=n_outliers))

# fit using all data
lr = LR(); lr.fit(X,y)

# fit using RANSAC
ransac      = LM.RANSACRegressor(); ransac.fit(X,y)
inlier_mask = ransac.inlier_mask_
outlier_mask = np.logical_not(inlier_mask)


#predict
line_X        = np.arange(X.min(), X.max())[: np.newaxis]
print(line_X)

#line_y        = lr.predict(line_X) <------------- BUG. Need reshape
#line_y_ransac = ransac.predict(line_X)

[-3.04614305 -2.04614305 -1.04614305 -0.04614305  0.95385695  1.95385695
  2.95385695  3.95385695]


import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression as LR
from sklearn.linear_model import TheilSenRegressor as TSR
from sklearn.linear_model import RANSACRegressor as RR


# example: only y outliers
# model: y = 3*x + N(2,0.1**2)
np.random.seed(0)
n_samples = 200; 
w = 3.
c = 2.
x         =     np.random.randn(n_samples)
noise     = 0.1*np.random.randn(n_samples)
y         = w*x+c+noise
# 10% outliers
y[-20:]  += -20*x[-20:]
X         = x[:, np.newaxis]


plt.scatter(x,y,color='indigo',marker='x',s=40)
line_x = np.array([-3,+3])

for name,model in [('OLS',LR()), 
                   ('Theil-Sen', TSR(random_state=42)), 
                   ('RANSAC', RR(random_state=42))]:
    model.fit(X,y)
    y_pred = model.predict(line_x.reshape(2,1))
    
    plt.plot(line_x, y_pred, 
             color={'OLS': 'turquoise', 
                    'Theil-Sen': 'gold', 
                    'RANSAC': 'lightgreen'}[name],
             linewidth=2,
             label=name
            )
plt.legend(loc='upper left'); plt.title('corrupt y values')

Text(0.5, 1.0, 'corrupt y values')


# only X outliers (10%)
# model: y = 3*x+N(2,0.1**2)
x        =     np.random.randn(n_samples)
noise    = 0.1*np.random.randn(n_samples)
y        = 3*x+2+noise
x[-20:]  = 9.9
y[-20:] += 22
X        = x[:, np.newaxis]


plt.figure()
plt.scatter(x,y,color='blue',marker='x',s=40)

line_x = np.array([-3,10])
for name,model in [('OLS',LR()), 
                   ('Theil-Sen', TSR(random_state=42)), 
                   ('RANSAC', RR(random_state=42))]:
    model.fit(X,y)
    y_pred = model.predict(line_x.reshape(2,1))
    
    plt.plot(line_x, y_pred, 
             color={'OLS': 'turquoise', 
                    'Theil-Sen': 'gold', 
                    'RANSAC': 'lightgreen'}[name],
             linewidth=2,
             label=name
            )
plt.legend(loc='upper left'); plt.title("corrupt X values")

Text(0.5, 1.0, 'corrupt X values')


import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression as MR
from sklearn.linear_model import HuberRegressor as HR 
from sklearn.linear_model import Ridge as R


rng   = np.random.RandomState(0)
X,y   = MR(n_samples=20, n_features=1, random_state=0, noise=4.0, bias=100.0)

# Add four strong outliers to the dataset.
X_outliers         = rng.normal(0, 0.5, size=(4, 1))
y_outliers         = rng.normal(0, 2.0, size=4)
X_outliers[:2, :] += X.max() + X.mean() / 4.
X_outliers[2:, :] += X.min() - X.mean() / 4.
y_outliers[:2]    += y.min() - y.mean() / 4.
y_outliers[2:]    += y.max() + y.mean() / 4.
X                  = np.vstack(     (X, X_outliers))
y                  = np.concatenate((y, y_outliers))


plt.plot(X, y, 'b.')

# plot huber regressor over a range of values
colors = ['r-', 'b-', 'y-', 'm-']
x      = np.linspace(X.min(), X.max(), 7)

for k, epsilon in enumerate([1.35, 1.5, 1.75, 1.9]):
    huber = HR(alpha=0.0, epsilon=epsilon)
    huber.fit(X, y)
    coef_ = huber.coef_ * x + huber.intercept_
    plt.plot(x, coef_, colors[k], label="huber loss, %s" % epsilon)

# Fit a ridge regressor for comparison
ridge = R(alpha=0.0, random_state=0, normalize=True)
ridge.fit(X, y)

#coef_ridge = ridge.coef_
coef_ = ridge.coef_ * x + ridge.intercept_

plt.plot(x, coef_, 'g-', label="ridge")
plt.title("Huber vs Ridge")
plt.xlabel("X"); plt.ylabel("y"); plt.legend(loc=0); plt.show()


from sklearn.preprocessing import PolynomialFeatures as PF
import numpy as np
X = np.arange(6).reshape(3, 2)
print(X)
poly = PF(degree=2)
poly.fit_transform(X)

[[0 1]
 [2 3]
 [4 5]]

array([[ 1.,  0.,  1.,  0.,  0.,  1.],
       [ 1.,  2.,  3.,  4.,  6.,  9.],
       [ 1.,  4.,  5., 16., 20., 25.]])


from sklearn.linear_model import Perceptron as P
from sklearn.preprocessing import PolynomialFeatures as PF
import numpy as np

X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = X[:, 0] ^ X[:, 1]
X = PF(interaction_only=True).fit_transform(X).astype(int)

print(y,X)

clf = P(fit_intercept=False, 
        max_iter=10, tol=None, shuffle=False).fit(X, y)

# classifier predictions:
print('prediction:\t',clf.predict(X),'\n', 
      'score:\t',clf.score(X,y))

[0 1 1 0] [[1 0 0 0]
 [1 0 1 0]
 [1 1 0 0]
 [1 1 1 1]]
prediction:	 [0 1 1 0] 
 score:	 1.0


import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge as R
from sklearn.preprocessing import PolynomialFeatures as PF
from sklearn.pipeline import make_pipeline as MP


def f(x):
    return x*np.sin(x)

x_plot = np.linspace(0,10,100)
x      = np.linspace(0,10,100)
rng    = np.random.RandomState(0); rng.shuffle(x)
x      = np.sort(x[:20])
y      = f(x)


# create matrix versions of the arrays
X      =      x[:,np.newaxis]
X_plot = x_plot[:,np.newaxis]


colors = ['teal','yellowgreen','gold']; lw=2

plt.plot(
    x_plot, f(x_plot), color='cornflowerblue', linewidth=lw, label='ground truth')
plt.scatter(
    x,y,color='navy',s=30,marker='o',label='training data')

for count,degree in enumerate([3,4,5]):
    model=MP(PF(degree),R())
    model.fit(X,y)
    y_plot = model.predict(X_plot)
    plt.plot(
        x_plot,y_plot,color=colors[count],linewidth=lw,
        label="degree %d" % degree
    )

Linear Models¶

Ordinary Least Squares (OLS)¶

Least Squares (Non-Negative)¶

Ridge regression¶

Ridge Classification¶

Ridge Regression with built-in Alpha Cross Validation¶

Lasso Regression¶

lasso_path (API call)¶

Model selection using Info criteria techniques¶

Multitask Lasso Regression ¶

Elastic-Net ¶

Elastic-Net (Multitask)¶

Elastic-Net (Multitask w/ Cross Validation)¶

Least Angle Regression (LARS)¶

LARS Lasso ¶

example: compute Lasso path vs regularization using the LARS algorithm¶

Orthogonal Matching Pursuit (OMP)¶

Example: Sparse signal recovery from a noisy measurement - encoded with a dictionary¶

Bayesian Regression ¶

Example: Bayesian Ridge Regression - synthetic dataset¶

Example: Use polynomial feature expansion to plot BRR prediction & error¶

Example: Curve fit using BRR¶

Generalized Linear Regression (GLM)¶

GLM / Tweedie Model Regression ¶

Example: log-linear Poisson regression vs LSE-based regression¶

Stochastic Gradient Descent (SGD)¶

Perceptron ¶

Passive-Aggressive Algos (ref: JMLR paper)¶

Robustness: Outliers & Modeling Errors¶

Example: RANSAC¶

Example: Theil Sen regression¶

Example: Huber vs Ridge Regression - artificial dataset with strong outliers¶

Polynomial Regression & Basis Functions¶

example: solve XOR with a linear classifier.¶

example - approximate n-degree polynomial with ridge regression¶

Linear Models¶

Ordinary Least Squares (OLS)¶

Least Squares (Non-Negative)¶

Ridge regression¶

Ridge Classification¶

Ridge Regression with built-in Alpha Cross Validation¶

Lasso Regression¶

lasso_path (API call)¶

Model selection using Info criteria techniques¶

example: compute Lasso path vs regularization using the LARS algorithm¶

Example: Sparse signal recovery from a noisy measurement - encoded with a dictionary¶

Example: Bayesian Ridge Regression - synthetic dataset¶

Example: Use polynomial feature expansion to plot BRR prediction & error¶

Example: Curve fit using BRR¶

Generalized Linear Regression (GLM)¶

GLM / Tweedie Model Regression¶

Example: log-linear Poisson regression vs LSE-based regression¶

Stochastic Gradient Descent (SGD)¶

Robustness: Outliers & Modeling Errors¶

Example: RANSAC¶

Example: Theil Sen regression¶

Example: Huber vs Ridge Regression - artificial dataset with strong outliers¶

Polynomial Regression & Basis Functions¶

example: solve XOR with a linear classifier.¶

example - approximate n-degree polynomial with ridge regression¶

GLM / Tweedie Model Regression ¶