C
and l1_ratio
params.import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression as LR
from sklearn import datasets
from sklearn.preprocessing import StandardScaler as SS
X, y = datasets.load_digits(return_X_y=True)
X = SS().fit_transform(X)
y = (y > 4).astype(int) # classify smaller vs larger digits
l1_ratio = 0.5 # L1 weight in Elastic-Net regularization
fig, axes = plt.subplots(3, 3)
# Set regularization
for i, (C, axes_row) in enumerate(zip((1, 0.1, 0.01), axes)):
# turn down tolerance for short training time
clf_l1_LR = LR(C=C, penalty='l1', tol=0.01, solver='saga')
clf_l2_LR = LR(C=C, penalty='l2', tol=0.01, solver='saga')
clf_en_LR = LR(C=C, penalty='elasticnet', tol=0.01, solver='saga', l1_ratio=l1_ratio)
clf_l1_LR.fit(X, y)
clf_l2_LR.fit(X, y)
clf_en_LR.fit(X, y)
coef_l1_LR = clf_l1_LR.coef_.ravel()
coef_l2_LR = clf_l2_LR.coef_.ravel()
coef_en_LR = clf_en_LR.coef_.ravel()
# coef_l1_LR contains zeros due to L1 sparsity inducing norm
sparsity_l1_LR = np.mean(coef_l1_LR == 0) * 100
sparsity_l2_LR = np.mean(coef_l2_LR == 0) * 100
sparsity_en_LR = np.mean(coef_en_LR == 0) * 100
print("C=%.2f" % C)
print("{:<40} {:.2f}%".format("Sparsity with L1 penalty:", sparsity_l1_LR))
print("{:<40} {:.2f}%".format("Sparsity with Elastic-Net penalty:",
sparsity_en_LR))
print("{:<40} {:.2f}%".format("Sparsity with L2 penalty:", sparsity_l2_LR))
print("{:<40} {:.2f}".format("Score with L1 penalty:",
clf_l1_LR.score(X, y)))
print("{:<40} {:.2f}".format("Score with Elastic-Net penalty:",
clf_en_LR.score(X, y)))
print("{:<40} {:.2f}".format("Score with L2 penalty:",
clf_l2_LR.score(X, y)))
if i == 0:
axes_row[0].set_title("L1 penalty")
axes_row[1].set_title("Elastic-Net\nl1_ratio = %s" % l1_ratio)
axes_row[2].set_title("L2 penalty")
for ax, coefs in zip(axes_row, [coef_l1_LR, coef_en_LR, coef_l2_LR]):
ax.imshow(np.abs(coefs.reshape(8, 8)), interpolation='nearest',
cmap='binary', vmax=1, vmin=0)
ax.set_xticks(())
ax.set_yticks(())
axes_row[0].set_ylabel('C = %s' % C)
plt.show()
C=1.00 Sparsity with L1 penalty: 4.69% Sparsity with Elastic-Net penalty: 4.69% Sparsity with L2 penalty: 4.69% Score with L1 penalty: 0.90 Score with Elastic-Net penalty: 0.90 Score with L2 penalty: 0.90 C=0.10 Sparsity with L1 penalty: 29.69% Sparsity with Elastic-Net penalty: 10.94% Sparsity with L2 penalty: 4.69% Score with L1 penalty: 0.90 Score with Elastic-Net penalty: 0.90 Score with L2 penalty: 0.90 C=0.01 Sparsity with L1 penalty: 82.81% Sparsity with Elastic-Net penalty: 67.19% Sparsity with L2 penalty: 4.69% Score with L1 penalty: 0.86 Score with Elastic-Net penalty: 0.88 Score with L2 penalty: 0.89
iris = datasets.load_iris()
X,y = iris.data, iris.target
X = X[y != 2]
y = y[y != 2]
X /= X.max() # Normalize X to speed-up convergence
from sklearn.svm import l1_min_c
cs = l1_min_c(X, y, loss='log') * np.logspace(0, 7, 16)
from time import time
start = time()
clf = LR(penalty='l1',
solver='liblinear',
tol=1e-6,
max_iter=int(1e6),
warm_start=True,
intercept_scaling=10000.)
coefs_ = []
for c in cs:
clf.set_params(C=c)
clf.fit(X, y)
coefs_.append(clf.coef_.ravel().copy())
print("This took %0.3fs" % (time() - start))
This took 0.041s
coefs_ = np.array(coefs_)
plt.plot(np.log10(cs), coefs_, marker='o')
ymin, ymax = plt.ylim()
plt.xlabel('log(C)'); plt.ylabel('Coefficients'); plt.title('Logistic Regression Path')
plt.axis('tight')
plt.show()
from sklearn.datasets import make_blobs
centers = [[-5, 0], [0, 1.5], [5, -1]]
X, y = make_blobs(n_samples=1000, centers=centers, random_state=40)
transformation = [[0.4, 0.2], [-0.4, 1.2]]
X = np.dot(X, transformation)
for multi_class in ('multinomial', 'ovr'):
clf = LR(solver='sag', max_iter=100, random_state=42,
multi_class=multi_class).fit(X, y)
print("training score : %.3f (%s)" % (clf.score(X, y), multi_class))
# create a mesh to plot in
h = .02 # step size in the mesh
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
# Plot decision boundary. Assign a color to each point in the mesh
# [x_min, x_max]x[y_min, y_max].
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.figure()
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
plt.title("Decision surface (%s)" % multi_class)
plt.axis('tight')
# Plot also the training points
colors = "bry"
for i, color in zip(clf.classes_, colors):
idx = np.where(y == i)
plt.scatter(X[idx, 0], X[idx, 1], c=color, cmap=plt.cm.Paired,
edgecolor='black', s=20)
# Plot the three one-against-all classifiers
xmin, xmax = plt.xlim()
ymin, ymax = plt.ylim()
coef = clf.coef_
intercept = clf.intercept_
def plot_hyperplane(c, color):
def line(x0):
return (-(x0 * coef[c, 0]) - intercept[c]) / coef[c, 1]
plt.plot([xmin, xmax], [line(xmin), line(xmax)],
ls="--", color=color)
for i, color in zip(clf.classes_, colors):
plot_hyperplane(i, color)
plt.show()
training score : 0.995 (multinomial) training score : 0.976 (ovr)
import warnings, timeit
from sklearn.datasets import fetch_20newsgroups_vectorized as Fetch20
from sklearn.linear_model import LogisticRegression as LR
from sklearn.model_selection import train_test_split as TTS
from sklearn.exceptions import ConvergenceWarning as CW
warnings.filterwarnings("ignore", category=CW,module="sklearn")
t0 = timeit.default_timer()
solver = 'saga'
n_samples = 10000
X, y = Fetch20(subset='all', return_X_y=True)
X = X[:n_samples]
y = y[:n_samples]
X_train, X_test, y_train, y_test = TTS(X, y, random_state=42, stratify=y, test_size=0.1)
train_samples, n_features = X_train.shape
n_classes = np.unique(y).shape[0]
models = {'ovr': {'name': 'One versus Rest', 'iters': [1, 2, 4]},
'multinomial': {'name': 'Multinomial', 'iters': [1, 3, 7]}}
for model in models:
accuracies = [1 / n_classes]
times = [0]
densities = [1]
model_params = models[model]
# Small #epochs = faster runtime
for this_max_iter in model_params['iters']:
print('[model=%s, solver=%s] Number of epochs: %s' %
(model_params['name'], solver, this_max_iter))
lr = LR(solver=solver,
multi_class=model,
penalty='l1',
max_iter=this_max_iter,
random_state=42)
t1 = timeit.default_timer()
lr.fit(X_train, y_train)
train_time = timeit.default_timer() - t1
y_pred = lr.predict(X_test)
accuracy = np.sum(y_pred == y_test) / y_test.shape[0]
density = np.mean(lr.coef_ != 0, axis=1) * 100
accuracies.append(accuracy)
densities.append(density)
times.append(train_time)
models[model]['times'] = times
models[model]['densities'] = densities
models[model]['accuracies'] = accuracies
print('Test accuracy for %s: %.4f' % (model, accuracies[-1]))
print('%% non-zero coeffs for %s, '
'per class:\n %s' % (model, densities[-1]))
print('Run time (%i epochs) for %s:'
'%.2f' % (model_params['iters'][-1], model, times[-1]))
fig = plt.figure()
ax = fig.add_subplot(111)
for model in models:
name = models[model]['name']
times = models[model]['times']
accuracies = models[model]['accuracies']
ax.plot(times, accuracies, marker='o',
label='Model: %s' % name)
ax.set_xlabel('Train time (s)')
ax.set_ylabel('Test accuracy')
ax.legend()
fig.suptitle('Multinomial vs One-vs-Rest Logistic L1\n'
'Dataset %s' % '20newsgroups')
fig.tight_layout()
fig.subplots_adjust(top=0.85)
run_time = timeit.default_timer() - t0
print('Example run in %.3f s' % run_time)
plt.show()
[model=One versus Rest, solver=saga] Number of epochs: 1 [model=One versus Rest, solver=saga] Number of epochs: 2 [model=One versus Rest, solver=saga] Number of epochs: 4 Test accuracy for ovr: 0.7490 % non-zero coeffs for ovr, per class: [0.31743104 0.36815852 0.4181174 0.46115889 0.24595141 0.41350581 0.31281945 0.27054655 0.58720899 0.32972861 0.4158116 0.3312658 0.41888599 0.41120001 0.59643217 0.31666244 0.34279478 0.28130692 0.35278655 0.24748861] Run time (4 epochs) for ovr:2.37 [model=Multinomial, solver=saga] Number of epochs: 1 [model=Multinomial, solver=saga] Number of epochs: 3 [model=Multinomial, solver=saga] Number of epochs: 7 Test accuracy for multinomial: 0.7450 % non-zero coeffs for multinomial, per class: [0.13219888 0.11452112 0.13066169 0.13681047 0.12066991 0.15909982 0.13450468 0.09146318 0.07916561 0.12143851 0.13911627 0.10760374 0.18984374 0.12143851 0.17524038 0.22289346 0.11605832 0.07916561 0.07301682 0.15141384] Run time (7 epochs) for multinomial:2.15 Example run in 4132.328 s
from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression as LR
from sklearn.model_selection import train_test_split as TTS
from sklearn.preprocessing import StandardScaler as SS
from sklearn.utils import check_random_state as CRS
train_samples = 5000
# Load data from https://www.openml.org/d/554
X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)
# Turn seed into a np.random.RandomState instance
random_state = CRS(0); permutation = random_state.permutation(X.shape[0])
print('permutation: ', permutation)
X = X[permutation]
y = y[permutation]
X = X.reshape((X.shape[0], -1))
X_train, X_test, y_train, y_test = TTS(
X, y, train_size=train_samples, test_size=10000)
scaler = SS()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
permutation: [10840 56267 14849 ... 42613 43567 68268]
# Turn up tolerance for faster convergence
clf = LR(C =50.0/train_samples,
penalty='l1',
solver ='saga',
tol=0.1)
clf.fit(X_train, y_train)
sparsity = np.mean(clf.coef_ == 0) * 100
score = clf.score(X_test, y_test)
print("Sparsity with L1 penalty: %.2f%%" % sparsity)
print("Test score with L1 penalty: %.4f" % score)
coef = clf.coef_.copy()
plt.figure(figsize=(10, 5))
scale = np.abs(coef).max()
for i in range(10):
l1_plot = plt.subplot(2, 5, i + 1)
l1_plot.imshow(coef[i].reshape(28, 28), interpolation='nearest',
cmap=plt.cm.RdBu, vmin=-scale, vmax=scale)
l1_plot.set_xticks(())
l1_plot.set_yticks(())
l1_plot.set_xlabel('Class %i' % i)
plt.suptitle('Classification vector for...')
plt.show()
Sparsity with L1 penalty: 79.49% Test score with L1 penalty: 0.8362