(nsamples, nfeatures)
, OLS has a cost of $O(n_{\text{samples}} n_{\text{features}}^2)$.from sklearn import linear_model as LM
reg = LM.LinearRegression()
reg.fit([[0,0],[1,1],[2,2]],
[0,1,2])
LinearRegression()
reg.coef_
array([0.5, 0.5])
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model as LM
from sklearn.metrics import mean_squared_error, r2_score
# diabetes dataset
X,y = datasets.load_diabetes(return_X_y=True)
# use only one feature
X = X[:, np.newaxis, 2]
# split into training & test sets
Xtrain,Xtest = X[:-20], X[-20:]
# split target data
ytrain,ytest = y[:-20], y[-20:]
# create & train regression model
regr = LM.LinearRegression()
regr.fit(Xtrain,ytrain)
LinearRegression()
# predict from test data & return results
ypred = regr.predict(Xtest)
print('Coefficients:\n', regr.coef_)
print('Mean sqd error:\n', mean_squared_error(ytest,ypred))
print('r2 score:\n', r2_score( ytest,ypred))
Coefficients: [938.23786125] Mean sqd error: 2548.0723987259694 r2 score: 0.47257544798227147
# plot
plt.scatter(Xtest,ytest,color='black')
plt.plot( Xtest,ypred,color='blue')
plt.xticks(()), plt.yticks(()), plt.show()
(([], []), ([], []), None)
# generate random data
np.random.seed(42)
nsamps,nfeats = 200,50
X = np.random.randn(nsamps,nfeats)
# coefficients
true_coefs = 3*np.random.randn(nfeats)
true_coefs[true_coefs<0] = 0
# set y to dot-product; add normally distributed noise
y = np.dot(X,true_coefs)
y += np.random.normal(size=(nsamps,))
# split data. split_size = % of data to include in test split.
from sklearn.model_selection import train_test_split as TTS
Xtrain,Xtest,ytrain,ytest = TTS(X,y,test_size=0.5)
# fit model and return results
from sklearn.linear_model import LinearRegression as LR
regr_nn = LR(positive=True)
ypred_nn = regr_nn.fit(Xtrain,ytrain).predict(Xtest)
print('r2 score:\n', r2_score(ytest,ypred_nn))
r2 score: 0.9898521625129549
# fit using OLS for comparison
regr_ols = LR()
ypred_ols = regr_ols.fit(Xtrain,ytrain).predict(Xtest)
print('r2 score:\n', r2_score(ytest,ypred_ols))
r2 score: 0.9876061239769228
# plot regression coeffs between OLS & NNLS
# note how NNLS constraint shrinks some coeffs to zero
# NNLS inherently yield sparse results.
fix,ax = plt.subplots()
ax.plot(regr_ols.coef_, regr_nn.coef_, marker='.', linewidth=0)
minx,maxx = ax.get_xlim()
miny,maxy = ax.get_ylim()
low, high = max(minx,miny), min(maxx,maxy)
ax.plot([low,high],[low,high], ls='--', c='0.3', alpha=0.5)
ax.set_xlabel('OLS coeffs'); ax.set_ylabel('NNLS coeffs')
Text(0, 0.5, 'NNLS coeffs')
from sklearn import linear_model as LM
regr = LM.Ridge(alpha=0.5)
regr.fit([[0,0],[0,0],[1,1]], [0.0, 0.1, 1.0])
print("Coefficients:\n", regr.coef_)
print('Intercept:\n', regr.intercept_)
Coefficients: [0.34545455 0.34545455] Intercept: 0.13636363636363638
# example: plot coefficients vs regularization
# when alpha is very large, regularization dominates - coeffs trend to zero.
# when alpha ~0, squared loss function dominates - coeffs start oscillating.
# build 10x10 Hilbert matrix (sq.matrix with entries as unit fractions)
X = 1.0/(np.arange(1,11) + np.arange(0,10)[:, np.newaxis])
y = np.ones(10)
# paths
alphas = np.logspace(-10,2,200)
coefs = []
for a in alphas:
ridge = LM.Ridge(alpha=a, fit_intercept=False)
ridge.fit(X,y)
coefs.append(ridge.coef_)
# plot
ax = plt.gca()
ax.plot(alphas,coefs)
ax.set_xscale('log')
ax.set_xlim(ax.get_xlim()[::-1]) # reverse axis
plt.xlabel('alpha')
plt.ylabel('weights')
plt.title('coefficients vs regularization')
plt.axis('tight')
plt.show()
{-1,+1}
, then treats problem as a regression task. import numpy as np
from sklearn import linear_model as LM
reg = LM.RidgeCV(alphas=np.logspace(-6, 6, 13))
reg.fit([[0, 0], [0, 0], [1, 1]], [0, .1, 1])
reg.alpha_
0.01
regr = LM.Lasso(alpha=0.1)
regr.fit([[0,0], [1,1]],
[0,1])
regr.predict([[1,1]])
array([0.8])
# Lasso vs Elastic Net - sparse signal fit comparison
np.random.seed(42)
nsamps, nfeats = 50,100
X = np.random.randn(nsamps,nfeats)
# create decreasing coeffs, with alternating signs, for viz
idx = np.arange(nfeats)
coef = (-1)**idx*np.exp(-idx/10)
# sparsify it
coef[10:]=0
y = np.dot(X,coef)
# add normally-distributed noise
y += 0.01*np.random.normal(size=nsamps)
# split into training, test data
nsamps = X.shape[0]
Xtrain,ytrain = X[:nsamps//2 ], y[:nsamps//2 ]
Xtest, ytest = X[:nsamps//2:], y[ nsamps//2:]
# fit & predict
from sklearn.linear_model import Lasso as LA
from sklearn.linear_model import ElasticNet as EN
alpha = 0.1
lasso = LA(alpha=alpha)
enet = EN(alpha=alpha, l1_ratio=0.7)
ypred_lasso = lasso.fit(Xtrain,ytrain).predict(Xtest)
ypred_enet = enet.fit(Xtrain,ytrain).predict(Xtest)
r2_lasso = r2_score(ytest,ypred_lasso)
r2_enet = r2_score(ytest,ypred_enet)
print('r2 score (lasso):\t', r2_lasso)
print('r2 score (enet): \t', r2_enet)
r2 score (lasso): -0.2977169214889326 r2 score (enet): -0.321518995138536
# display using a stem plot
m,s,_ = plt.stem(
np.where(enet.coef_)[0],
enet.coef_[enet.coef_ != 0],
markerfmt='x',
label='enet coefficients',
use_line_collection=True)
plt.setp([m,s], color='#2ca02c')
m, s, _ = plt.stem(
np.where(lasso.coef_)[0],
lasso.coef_[lasso.coef_ != 0],
markerfmt='x',
label='Lasso coefficients',
use_line_collection=True)
plt.setp([m,s], color='#ff7f0e')
plt.stem(
np.where(coef)[0],
coef[coef != 0],
label='true coefficients',
markerfmt='bx',
use_line_collection=True)
plt.legend(loc='best')
plt.title("Lasso $R^2$: %.3f, Elastic Net $R^2$: %.3f" %
(r2_lasso, r2_enet))
plt.show()
import time
from sklearn.linear_model import LassoCV, LassoLarsCV, LassoLarsIC
from sklearn import datasets
# example: numpy c_ (translate slice objs to concatenate on 2nd axis)
np.c_[np.array([1,2,3]), np.array([4,5,6])]
array([[1, 4], [2, 5], [3, 6]])
# to avoid division by zero while doing np.log10
EPSILON=1e-4
X,y = datasets.load_diabetes(return_X_y=True)
print(X.shape)
rng = np.random.RandomState(42)
X = np.c_[X,rng.randn(X.shape[0],14)]
(442, 10)
# normalize data, as done by Lars, to allow comparisons
X /= np.sqrt(np.sum(X**2,axis=0))
print(X.shape)
(442, 24)
# LassoLarsIC: least angle regression with BIC/AIC criterion
bic = LassoLarsIC(criterion='bic')
aic = LassoLarsIC(criterion='aic')
bic.fit(X,y); bic_alpha = bic.alpha_
aic.fit(X,y); aic_alpha = aic.alpha_
def plot_criterion(model,name,color):
criterion_ = model.criterion_
plt.semilogx(
model.alphas_ + EPSILON,
model.criterion_,
'--', color=color, linewidth=3, label='%s criterion' % name
)
# plt.axvline(
# model.alphas_ + EPSILON,
# color=color, linewidth=3, label='alpha: %s estimate' % name
# )
plt.xlabel(r'$\alpha$')
plt.ylabel('criterion')
plt.figure()
plot_criterion(aic,'AIC','b')
plot_criterion(bic,'BIC','r')
plt.legend()
plt.title('info criteria for model selection')
Text(0.5, 1.0, 'info criteria for model selection')
# LassoCV: coordinate descent
t1 = time.time()
model = LassoCV(cv=20).fit(X, y)
t_lasso_cv = time.time() - t1
plt.figure()
ymin, ymax = 2300, 3800
plt.semilogx(model.alphas_ + EPSILON, model.mse_path_, ':')
plt.plot( model.alphas_ + EPSILON, model.mse_path_.mean(axis=-1), 'k', label='avg across folds', linewidth=2)
plt.axvline( model.alpha_ + EPSILON, linestyle='--', color='k', label='alpha: CV estimate')
plt.legend()
plt.xlabel(r'$\alpha$')
plt.ylabel('Mean square error')
plt.title('MSE on each fold: coordinate descent '
'(train time: %.2fs)' % t_lasso_cv)
plt.axis('tight')
plt.ylim(ymin, ymax)
(2300.0, 3800.0)
# LassoLarsCV: least angle regression
t1 = time.time()
model = LassoLarsCV(cv=20).fit(X, y)
t_lasso_lars_cv = time.time() - t1
plt.figure()
plt.semilogx(model.cv_alphas_ + EPSILON, model.mse_path_, ':')
plt.semilogx(model.cv_alphas_ + EPSILON, model.mse_path_.mean(axis=-1), 'k', label='avg across folds', linewidth=2)
plt.axvline(model.alpha_, linestyle='--', color='k', label='alpha CV')
plt.legend()
plt.xlabel(r'$\alpha$')
plt.ylabel('Mean square error')
plt.title('MSE on each fold: Lars (train time: %.2fs)'
% t_lasso_lars_cv)
plt.axis('tight')
plt.ylim(ymin, ymax)
plt.show()
from sklearn.linear_model import MultiTaskLasso as MTL
from sklearn.linear_model import Lasso as L
# generate sine-based coefficients - random frequencies & phases
nsamps, nfeats, ntasks = 100,30,40
nrelevant_feats = 5
coefs = np.zeros((ntasks,nfeats))
times = np.linspace(0,2*np.pi,ntasks)
rng = np.random.RandomState(42)
for k in range(nrelevant_feats):
coefs[:,k] = np.sin((1.0+rng.randn(1))*times+3*rng.randn(1))
X = rng.randn(nsamps,nfeats)
Y = np.dot(X,coefs.T) + rng.randn(nsamps,ntasks)
Lcoefs = np.array([L(alpha=0.5).fit(X,y).coef_ for y in Y.T])
MTLcoefs = MTL(alpha=1.0).fit(X,Y).coef_
fig = plt.figure()
plt.subplot(1,2,1); plt.spy(Lcoefs)
plt.xlabel('feature'); plt.ylabel('time or task')
plt.text(10,5,'lasso')
plt.subplot(1,2,2); plt.spy(MTLcoefs)
plt.xlabel('feature'); plt.ylabel('time or task')
plt.text(10,5,'MT lasso')
Text(10, 5, 'MT lasso')
featurenum=0
plt.figure()
plt.plot(coefs[:, featurenum],
linewidth=2, color='green', label='ground truth')
plt.plot(Lcoefs[:, featurenum],
linewidth=2, color='blue', label='lasso')
plt.plot(MTLcoefs[:, featurenum],
linewidth=2, color='gold', label='MT lasso')
plt.show()
l1_ratio
parameter.from sklearn.linear_model import lasso_path as LP
from sklearn.linear_model import enet_path as EP
from sklearn import datasets
X,y = datasets.load_diabetes(return_X_y=True)
X /= X.std(axis=0) #standardize data for easier l1_ratio
# compute Lasso & Enet paths via coordinate descent
alphas_l, coefs_l, _ = LP(X,y,eps=5e-3,fit_intercept=False)
alphas_posl, coefs_posl, _ = LP(X,y,eps=5e-3,fit_intercept=False,positive=True)
alphas_e, coefs_e, _ = EP(X,y,eps=5e-3,fit_intercept=False,l1_ratio=0.8)
alphas_pose, coefs_pose, _ = EP(X,y,eps=5e-3,fit_intercept=False,l1_ratio=0.8, positive=True)
# plot
import matplotlib.pyplot as plt
import numpy as np
from itertools import cycle
plt.figure()
colors = cycle(['b', 'r', 'g', 'c', 'k'])
neg_log_alphas_lasso = -np.log10(alphas_l)
neg_log_alphas_enet = -np.log10(alphas_e)
for coef_l, coef_e, c in zip(coefs_l, coefs_e, colors):
l1 = plt.plot(neg_log_alphas_lasso, coef_l, c=c)
l2 = plt.plot(neg_log_alphas_enet, coef_e, linestyle='--', c=c)
plt.xlabel('-Log(alpha)')
plt.ylabel('coefficients')
plt.title('Lasso and Elastic-Net Paths')
plt.legend((l1[-1], l2[-1]), ('Lasso', 'Elastic-Net'), loc='lower left')
<matplotlib.legend.Legend at 0x7f6b54925f10>
plt.figure()
neg_log_alphas_positive_lasso = -np.log10(alphas_positive_lasso)
for coef_l, coef_pl, c in zip(coefs_lasso, coefs_positive_lasso, colors):
l1 = plt.plot(neg_log_alphas_lasso, coef_l, c=c)
l2 = plt.plot(neg_log_alphas_positive_lasso, coef_pl, linestyle='--', c=c)
plt.xlabel('-Log(alpha)')
plt.ylabel('coefficients')
plt.title('Lasso and positive Lasso')
plt.legend((l1[-1], l2[-1]), ('Lasso', 'positive Lasso'), loc='lower left')
alpha
and l1_ratio
parameters.# best practice: pass X & y as Fortran-contiguous numpy arrays
# to avoid unnecessary memory duplication
from sklearn import linear_model as LM
clf = LM.MultiTaskElasticNet(alpha=0.1)
clfcv = LM.MultiTaskElasticNetCV(cv=3)
X,y = [[0, 0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]]
clf.fit(X,y); clfcv.fit(X,y)
print(clf.coef_, clf.intercept_)
print(clfcv.coef_, clfcv.intercept_)
[[0.45663524 0.45612256] [0.45663524 0.45612256]] [0.0872422 0.0872422] [[0.52875032 0.46958558] [0.52875032 0.46958558]] [0.00166409 0.00166409]
reg = LM.Lars(n_nonzero_coefs=1)
reg.fit([[-1,1], [0, 0], [1, 1]],
[-1.1111, 0, -1.1111])
print(reg.coef_)
[ 0. -1.1111]
coef_path_
(n_features, max_features+1) contains the coefficients path. The first column is always zero.import numpy as np; import matplotlib.pyplot as plt
from sklearn import linear_model as LM
from sklearn import datasets as DATA
X,y = DATA.load_diabetes(return_X_y=True)
_,_,coefs = LM.lars_path(X,y,method='lasso',verbose=True)
xx = np.sum(np.abs(coefs.T),axis=1); xx /= xx[-1]
print(coefs.shape); print(xx.shape)
.(10, 13) (13,)
plt.plot(xx,coefs.T)
ymin,ymax = plt.ylim()
plt.vlines(xx,ymin,ymax,linestyle="dotted")
plt.xlabel('|coef\/max|coef|')
plt.ylabel('coeffs')
plt.title('LASSO path')
plt.show()
from sklearn.linear_model import OrthogonalMatchingPursuit as OMP
from sklearn.linear_model import OrthogonalMatchingPursuitCV as OMPCV
from sklearn.datasets import make_sparse_coded_signal as SCS
n_components, n_features, n_nonzero_coefs = 512,100,17
y,X,w = SCS(n_samples =1,
n_components =n_components,
n_features =n_features,
n_nonzero_coefs=n_nonzero_coefs,
random_state =0)
y_noisy = y+0.05*np.random.randn(len(y))
# indexes
idx0, = w.nonzero()
# noise-free reconstruction
omp = OMP(n_nonzero_coefs=n_nonzero_coefs); omp.fit(X,y)
coef_ = omp.coef_
idxr, = coef.nonzero()
# noisy reconstruction
omp.fit(X,y_noisy)
coef = omp.coef_
idxn, = coef.nonzero()
# noisy reconstruction, #non-zeros set by CV
ompcv = OMPCV()
ompcv.fit(X,y_noisy)
coef = ompcv.coef_
idxcv, = coef.nonzero()
# plot comparisons
plt.figure()
plt.subplot(4,1,1); plt.xlim(0,512); plt.title('sparse signal')
plt.stem(idx0,w[idx0],use_line_collection=True)
plt.subplot(4,1,2); plt.xlim(0,512); plt.title('recovered/noise-free')
plt.stem(idxr,coef[idxr],use_line_collection=True)
plt.subplot(4,1,3); plt.xlim(0,512); plt.title('recovered/noisy')
plt.stem(idxn,coef[idxn],use_line_collection=True)
plt.subplot(4,1,4); plt.xlim(0,512); plt.title('recovered/noisy/CV')
plt.stem(idxcv,coef[idxcv],use_line_collection=True)
plt.subplots_adjust(0.06,0.04,0.94,0.90,0.20,0.38)
plt.suptitle('sparse signal recovery w/ OMP'); plt.show()
alpha_init
and lambda_init
.from sklearn import linear_model as LM
X = [[0., 0.], [1., 1.], [2., 2.], [3., 3.]]
Y = [ 0., 1., 2., 3.]
reg = LM.BayesianRidge(); reg.fit(X, Y)
print(reg.predict([[1,0]]))
print(reg.coef_)
[0.50000013] [0.49999993 0.49999993]
from scipy import stats
from sklearn.linear_model import BayesianRidge as BR
from sklearn.linear_model import LinearRegression as LR
np.random.seed(0); n_samples, n_features = 100,100
X = np.random.randn(n_samples,n_features) # gaussian
w = np.zeros(n_features)
# keep 10 weights of interest
relevant_features = np.random.randint(0,n_features,10)
lambda_ = 4.0
for i in relevant_features:
w[i] = stats.norm.rvs(loc=0,scale=1.0/np.sqrt(lambda_))
# create noise - precision alpha of 50
alpha_ = 50.0
noise = stats.norm.rvs(loc=0, scale=1.0/np.sqrt(alpha_), size=n_samples)
# create target
y = np.dot(X,w)+noise
clf = BR(compute_score=True); clf.fit(X,y)
ols = LR(); ols.fit(X,y)
LinearRegression()
plt.title('model weights')
plt.plot(clf.coef_, color='lightgreen', linewidth=2, label='bayes ridge')
plt.plot(w, color='gold', linewidth=2, label='ground truth')
plt.plot(ols.coef_, color='navy', linewidth=1, label='OLS estimate')
plt.xlabel('features'); plt.ylabel('weights'); plt.legend(loc='best')
<matplotlib.legend.Legend at 0x7f76708423a0>
plt.title('model weights - histrogram')
plt.hist(clf.coef_,
bins=n_features,
color='gold', log=True, edgecolor='black')
plt.scatter(clf.coef_[relevant_features],
np.full(len(relevant_features), 5.),
color='navy', label="Relevant features")
plt.ylabel("features"); plt.xlabel("weights"); plt.legend(loc="upper left")
<matplotlib.legend.Legend at 0x7f76707aefd0>
plt.title("Marginal log-likelihood")
plt.plot(clf.scores_, color='navy', linewidth=2)
plt.ylabel("Score"); plt.xlabel("Iterations")
Text(0.5, 0, 'Iterations')
# Plotting some predictions for polynomial regression
def f(x, noise_amount):
y = np.sqrt(x) * np.sin(x)
noise = np.random.normal(0, 1, len(x))
return y + noise_amount * noise
degree = 10
X = np.linspace(0, 10, 100)
y = f(X, noise_amount=0.1)
clf_poly = BR()
clf_poly.fit(np.vander(X, degree), y)
X_plot = np.linspace(0, 11, 25)
y_plot = f(X_plot, noise_amount=0)
y_mean, y_std = clf_poly.predict(np.vander(X_plot, degree), return_std=True)
plt.errorbar(X_plot, y_mean, y_std,
color='navy',
label="polynomial bayes ridge regression",
linewidth=2)
plt.plot(X_plot, y_plot, color='gold',
linewidth=2, label='ground truth')
plt.ylabel('output y'); plt.xlabel("feature X"); plt.legend(loc="lower left")
<matplotlib.legend.Legend at 0x7f7670680910>
print(np.vander([1,2,3,4],None,increasing=False))
[[ 1 1 1 1] [ 8 4 2 1] [27 9 3 1] [64 16 4 1]]
# generate sinusoidal signal with noise
def fun(x): return np.sin(2*np.pi*x)
size = 25;
rng = np.random.RandomState(1234)
x_train = rng.uniform(0.0, 1.0, size)
y_train = fun(x_train) + rng.normal(scale=0.1, size=size)
x_test = np.linspace(0.0, 1.0, 100)
n_order = 3
X_train = np.vander(x_train, n_order+1, increasing=True)
X_test = np.vander(x_test, n_order+1, increasing=True)
reg = BR(tol =1e-6,
fit_intercept=False,
compute_score=True)
fig, axes = plt.subplots(1, 2, figsize=(8, 4))
for i, ax in enumerate(axes):
# Bayesian ridge regression with different initial value pairs
if i == 0:
init = [1 / np.var(y_train), 1.] # Default values
elif i == 1:
init = [1., 1e-3]
reg.set_params(alpha_init=init[0], lambda_init=init[1])
reg.fit(X_train, y_train)
ymean, ystd = reg.predict(X_test, return_std=True)
ax.plot( x_test, fun(x_test),
color="blue", label="sin($2\\pi x$)")
ax.scatter( x_train, y_train,
s=50, alpha=0.5, label="observation")
ax.plot( x_test, ymean,
color="red",
label="predict mean")
ax.fill_between(x_test, ymean-ystd, ymean+ystd,
color="pink", alpha=0.5,
label="predict std")
ax.set_ylim(-1.3, 1.3)
ax.legend()
title = "$\\alpha$_init$={:.2f},\\ \\lambda$_init$={}$".format(
init[0], init[1])
if i == 0:
title += " (Default)"
ax.set_title(title, fontsize=12)
text = "$\\alpha={:.1f}$\n$\\lambda={:.3f}$\n$L={:.1f}$".format(
reg.alpha_, reg.lambda_, reg.scores_[-1])
ax.text(0.05, -1.0, text, fontsize=12)
plt.tight_layout()
plt.show()
power
parameter:power=0
: Normal distributionpower=1
: Poisson distributionpower=2
: Gamma distributionpower=3
: Inverse Gaussian distributionlink
.X
should be standardized before fitting to ensure equal penalty treatment across all features.from sklearn.linear_model import TweedieRegressor as Tweedie
reg = Tweedie(power=1, alpha=0.5, link='log')
reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2])
print(reg.coef_, reg.intercept_)
[0.24631611 0.43370317] -0.7638091359123445
from sklearn.datasets import fetch_openml
#df = fetch_openml(data_id=41214, as_frame=True).frame; df
partial_fit
param allows out-of-core learning.loss="log"
fits a logistic regression model; loss="hinge"
fits a linear Support Vector Machine.import numpy as np
from sklearn.linear_model import SGDClassifier as SGDC
from sklearn.preprocessing import StandardScaler as SS
from sklearn.pipeline import make_pipeline as MP
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
Y = np.array([1, 1, 2, 2])
# Always scale inputs. The most convenient way is to use a pipeline.
clf = MP(SS(), SGDC(max_iter=1000, tol=1e-3)); clf.fit(X, Y)
clf.predict([[-0.8, -1]])
array([1])
import numpy as np
from sklearn.linear_model import SGDRegressor as SGDR
from sklearn.pipeline import make_pipeline as MP
from sklearn.preprocessing import StandardScaler as SS
n_samples, n_features = 10, 5
rng = np.random.RandomState(0)
y = rng.randn(n_samples)
X = rng.randn(n_samples, n_features)
reg = MP(SS(), SGDR(max_iter=1000, tol=1e-3)); reg.fit(X, y)
Pipeline(steps=[('standardscaler', StandardScaler()), ('sgdregressor', SGDRegressor())])
from sklearn.datasets import load_digits
from sklearn.linear_model import Perceptron
X,y = load_digits(return_X_y=True)
clf = Perceptron(tol=1e-3, random_state=0); clf.fit(X,y)
clf.score(X,y)
0.9393433500278241
c
)hinge
or squared_hinge
loss parameters.or
squared_epsilon_insensitive` loss parameters.from sklearn.linear_model import PassiveAggressiveClassifier as PAC
from sklearn.datasets import make_classification as MC
X,y = MC(n_features=4, random_state=0)
clf = PAC(max_iter=1000, random_state=0, tol=1e-3); clf.fit(X,y)
print(clf.coef_); print(clf.intercept_); print(clf.predict([[0,0,0,0]]))
[[0.26642044 0.45070924 0.67251877 0.64185414]] [1.84127814] [1]
from sklearn.linear_model import PassiveAggressiveRegressor as PAR
from sklearn.datasets import make_regression as MR
X,y = MR(n_features=4, random_state=0)
reg = PAR(max_iter=1000, random_state=0, tol=1e-3); reg.fit(X,y)
print(reg.coef_); print(reg.intercept_); print(reg.predict([[0,0,0,0]]))
[20.48736655 34.18818427 67.59122734 87.94731329] [-0.02306214] [-0.02306214]
epsilon
=1.35 for 95% statistical efficiency.max_trials
).import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model as LM
from sklearn.datasets import make_regression as MR
n_samples, n_outliers = 1000,50
X,y,coef = MR(n_samples=n_samples,
n_features=1,
n_informative=1,
noise=10,
coef=True,
random_state=0
)
# add outliers
np.random.seed(0)
X[:n_outliers] = 3+0.5*(np.random.normal(size=(n_outliers,1)))
y[:n_outliers] = -3+10*(np.random.normal(size=n_outliers))
# fit using all data
lr = LR(); lr.fit(X,y)
# fit using RANSAC
ransac = LM.RANSACRegressor(); ransac.fit(X,y)
inlier_mask = ransac.inlier_mask_
outlier_mask = np.logical_not(inlier_mask)
#predict
line_X = np.arange(X.min(), X.max())[: np.newaxis]
print(line_X)
#line_y = lr.predict(line_X) <------------- BUG. Need reshape
#line_y_ransac = ransac.predict(line_X)
[-3.04614305 -2.04614305 -1.04614305 -0.04614305 0.95385695 1.95385695 2.95385695 3.95385695]
import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression as LR
from sklearn.linear_model import TheilSenRegressor as TSR
from sklearn.linear_model import RANSACRegressor as RR
# example: only y outliers
# model: y = 3*x + N(2,0.1**2)
np.random.seed(0)
n_samples = 200;
w = 3.
c = 2.
x = np.random.randn(n_samples)
noise = 0.1*np.random.randn(n_samples)
y = w*x+c+noise
# 10% outliers
y[-20:] += -20*x[-20:]
X = x[:, np.newaxis]
plt.scatter(x,y,color='indigo',marker='x',s=40)
line_x = np.array([-3,+3])
for name,model in [('OLS',LR()),
('Theil-Sen', TSR(random_state=42)),
('RANSAC', RR(random_state=42))]:
model.fit(X,y)
y_pred = model.predict(line_x.reshape(2,1))
plt.plot(line_x, y_pred,
color={'OLS': 'turquoise',
'Theil-Sen': 'gold',
'RANSAC': 'lightgreen'}[name],
linewidth=2,
label=name
)
plt.legend(loc='upper left'); plt.title('corrupt y values')
Text(0.5, 1.0, 'corrupt y values')
# only X outliers (10%)
# model: y = 3*x+N(2,0.1**2)
x = np.random.randn(n_samples)
noise = 0.1*np.random.randn(n_samples)
y = 3*x+2+noise
x[-20:] = 9.9
y[-20:] += 22
X = x[:, np.newaxis]
plt.figure()
plt.scatter(x,y,color='blue',marker='x',s=40)
line_x = np.array([-3,10])
for name,model in [('OLS',LR()),
('Theil-Sen', TSR(random_state=42)),
('RANSAC', RR(random_state=42))]:
model.fit(X,y)
y_pred = model.predict(line_x.reshape(2,1))
plt.plot(line_x, y_pred,
color={'OLS': 'turquoise',
'Theil-Sen': 'gold',
'RANSAC': 'lightgreen'}[name],
linewidth=2,
label=name
)
plt.legend(loc='upper left'); plt.title("corrupt X values")
Text(0.5, 1.0, 'corrupt X values')
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression as MR
from sklearn.linear_model import HuberRegressor as HR
from sklearn.linear_model import Ridge as R
rng = np.random.RandomState(0)
X,y = MR(n_samples=20, n_features=1, random_state=0, noise=4.0, bias=100.0)
# Add four strong outliers to the dataset.
X_outliers = rng.normal(0, 0.5, size=(4, 1))
y_outliers = rng.normal(0, 2.0, size=4)
X_outliers[:2, :] += X.max() + X.mean() / 4.
X_outliers[2:, :] += X.min() - X.mean() / 4.
y_outliers[:2] += y.min() - y.mean() / 4.
y_outliers[2:] += y.max() + y.mean() / 4.
X = np.vstack( (X, X_outliers))
y = np.concatenate((y, y_outliers))
plt.plot(X, y, 'b.')
# plot huber regressor over a range of values
colors = ['r-', 'b-', 'y-', 'm-']
x = np.linspace(X.min(), X.max(), 7)
for k, epsilon in enumerate([1.35, 1.5, 1.75, 1.9]):
huber = HR(alpha=0.0, epsilon=epsilon)
huber.fit(X, y)
coef_ = huber.coef_ * x + huber.intercept_
plt.plot(x, coef_, colors[k], label="huber loss, %s" % epsilon)
# Fit a ridge regressor for comparison
ridge = R(alpha=0.0, random_state=0, normalize=True)
ridge.fit(X, y)
#coef_ridge = ridge.coef_
coef_ = ridge.coef_ * x + ridge.intercept_
plt.plot(x, coef_, 'g-', label="ridge")
plt.title("Huber vs Ridge")
plt.xlabel("X"); plt.ylabel("y"); plt.legend(loc=0); plt.show()
from sklearn.preprocessing import PolynomialFeatures as PF
import numpy as np
X = np.arange(6).reshape(3, 2)
print(X)
poly = PF(degree=2)
poly.fit_transform(X)
[[0 1] [2 3] [4 5]]
array([[ 1., 0., 1., 0., 0., 1.], [ 1., 2., 3., 4., 6., 9.], [ 1., 4., 5., 16., 20., 25.]])
interaction_only=True
.)from sklearn.linear_model import Perceptron as P
from sklearn.preprocessing import PolynomialFeatures as PF
import numpy as np
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = X[:, 0] ^ X[:, 1]
X = PF(interaction_only=True).fit_transform(X).astype(int)
print(y,X)
clf = P(fit_intercept=False,
max_iter=10, tol=None, shuffle=False).fit(X, y)
# classifier predictions:
print('prediction:\t',clf.predict(X),'\n',
'score:\t',clf.score(X,y))
[0 1 1 0] [[1 0 0 0] [1 0 1 0] [1 1 0 0] [1 1 1 1]] prediction: [0 1 1 0] score: 1.0
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge as R
from sklearn.preprocessing import PolynomialFeatures as PF
from sklearn.pipeline import make_pipeline as MP
def f(x):
return x*np.sin(x)
x_plot = np.linspace(0,10,100)
x = np.linspace(0,10,100)
rng = np.random.RandomState(0); rng.shuffle(x)
x = np.sort(x[:20])
y = f(x)
# create matrix versions of the arrays
X = x[:,np.newaxis]
X_plot = x_plot[:,np.newaxis]
colors = ['teal','yellowgreen','gold']; lw=2
plt.plot(
x_plot, f(x_plot), color='cornflowerblue', linewidth=lw, label='ground truth')
plt.scatter(
x,y,color='navy',s=30,marker='o',label='training data')
for count,degree in enumerate([3,4,5]):
model=MP(PF(degree),R())
model.fit(X,y)
y_plot = model.predict(X_plot)
plt.plot(
x_plot,y_plot,color=colors[count],linewidth=lw,
label="degree %d" % degree
)