numpy.ndarray
or scipy.sparse.csr_matrix
data with dtype=float64
.from sklearn import svm
X,y = [[0,0],[1,1]], [0,1]
clf = svm.SVC(); clf.fit(X,y)
print(clf.predict([[2.0,2.0]]))
print(clf.support_) # support vectors (training data subset)
print(clf.n_support_)
[1] [0 1] [1 1]
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.datasets import make_blobs
# 40 separable points
X, y = make_blobs(n_samples=40,
centers=2,
random_state=6)
clf = svm.SVC(kernel='linear', C=1000)
clf.fit(X, y)
# create grid to evaluate the model
xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
YY, XX = np.meshgrid(yy, xx)
# ravel: return contiguous flattened array
# T: return transposed array
xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = clf.decision_function(xy).reshape(XX.shape)
print(xx.shape, yy.shape)
print(XX.shape, YY.shape)
print(xy.shape)
print(Z.shape)
(30,) (30,) (30, 30) (30, 30) (900, 2) (30, 30)
plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired)
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
# plot decision boundary
ax.contour(XX, YY, Z,
colors='k', levels=[-1, 0, 1], alpha=0.5,
linestyles=['--', '-', '--'])
# plot support vectors
ax.scatter(clf.support_vectors_[:, 0],
clf.support_vectors_[:, 1],
s=100, linewidth=1, facecolors='none', edgecolors='k')
plt.show()
xx, yy = np.meshgrid(np.linspace(-3, 3, 500),
np.linspace(-3, 3, 500))
np.random.seed(0)
X = np.random.randn(300, 2)
Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0)
print(xx.shape,yy.shape)
print(X.shape,Y.shape)
(500, 500) (500, 500) (300, 2) (300,)
clf = svm.NuSVC(gamma='auto'); clf.fit(X, Y)
# numpy c_: translate slice objects to concat along 2nd axis
print(np.c_[np.array([1,2,3]), np.array([4,5,6])])
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
[[1 4] [2 5] [3 6]]
plt.imshow(Z,
interpolation='nearest',
extent=(xx.min(), xx.max(),
yy.min(), yy.max()),
aspect='auto',
origin='lower',
cmap=plt.cm.PuOr_r)
contours = plt.contour(xx, yy, Z,
levels=[0],
linewidths=2,
linestyles='dotted')
plt.scatter(X[:, 0], X[:, 1], s=30, c=Y, cmap=plt.cm.Paired,
edgecolors='k')
plt.xticks(())
plt.yticks(())
plt.axis([-3, 3, -3, 3])
plt.show()
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectPercentile
from sklearn.feature_selection import chi2
from sklearn.model_selection import cross_val_score as CVS
from sklearn.pipeline import Pipeline as Pipe
from sklearn.preprocessing import StandardScaler as SS
from sklearn.svm import SVC
X, y = load_iris(return_X_y=True)
np.random.seed(0)
# add non-informative features
X = np.hstack((X, 2*np.random.random((X.shape[0], 36))))
# Create a pipeline:
# - feature-selection transform
# - a scaler
# - an instance of SVM
clf = Pipe([('anova', SelectPercentile(chi2)),
('scaler', SS()),
('svc', SVC(gamma="auto"))])
# plot CV score as function(% of features)
score_means, score_stds = list(), list()
percentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100)
for percentile in percentiles:
clf.set_params(anova__percentile=percentile)
this_scores = CVS(clf, X, y)
score_means.append(this_scores.mean())
score_stds.append(this_scores.std())
plt.errorbar(percentiles, score_means, np.array(score_stds))
plt.title('SVM-Anova vs # of features selected')
plt.xticks(np.linspace(0, 100, 11, endpoint=True))
plt.xlabel('Percentile')
plt.ylabel('Accuracy Score')
plt.axis('tight')
plt.show()
#classes*(#classes-1)/2
classifiers are built; each trains itself with data from two classes.decision_function_shape
to transform the results of a OvO classifier into a OvR function of shape (#samples,#classes)
.coef_
(#classes,#features) and intercept_
(#classes) attributes.X = [[0], [1], [2], [3]]
Y = [0, 1, 2, 3]
clf = svm.SVC(decision_function_shape='ovo')
clf.fit(X, Y)
dec = clf.decision_function([[1]])
print(dec.shape[1]) # 4 classes: 4*3/2 = 6
clf.decision_function_shape = "ovr"
dec = clf.decision_function([[1]])
print(dec.shape[1]) # 4 classes
lin_clf = svm.LinearSVC()
lin_clf.fit(X, Y)
dec = lin_clf.decision_function([[1]])
print(dec.shape[1])
6 4 4
decision_function
method returns per-class scoring for each sample.probability
is True
, class membership probabilities are enabled.probability=False
and use decision_function
instead of predict_proba
.class_weight
and sample_weight
params when you need to assign more importance to selected classes.class_weight
when fitting. It's a dictionary of {class_label : value}
, where value is a floating point number > 0. It sets the C
of class_label
to C*value
.# we create two clusters of random points
n_samples_1, n_samples_2 = 1000,100
centers = [[0.0, 0.0], [2.0, 2.0]]
clusters_std = [1.5, 0.5]
X, y = make_blobs(n_samples=[n_samples_1, n_samples_2],
centers=centers,
cluster_std=clusters_std,
random_state=0, shuffle=False)
clf = svm.SVC(kernel='linear', C=1.0)
wclf = svm.SVC(kernel='linear', class_weight={1: 10})
clf.fit(X,y); wclf.fit(X,y)
SVC(class_weight={1: 10}, kernel='linear')
plt.scatter(X[:, 0],
X[:, 1],
c=y, cmap=plt.cm.Paired, edgecolors='k')
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
# create grid to evaluate model
xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
# separating hyperplane
Z = clf.decision_function(xy).reshape(XX.shape)
# plot decision boundary and margins
a = ax.contour(XX, YY, Z,
colors='k', levels=[0],
alpha=0.5, linestyles=['-'])
# separating hyperplane - weighted classes
Z = wclf.decision_function(xy).reshape(XX.shape)
# plot decision boundary and margins (weighted)
b = ax.contour(XX, YY, Z,
colors='r', levels=[0],
alpha=0.5, linestyles=['-'])
plt.legend([a.collections[0],
b.collections[0]],
["non weighted", "weighted"],
loc="lower left")
plt.show()
fit
ting via the sample_weight
param.C
for the ith sample to C*sample_weight[i]
.def plot_decision_function(classifier, sample_weight, axis, title):
xx, yy = np.meshgrid(np.linspace(-4, 5, 500),
np.linspace(-4, 5, 500))
Z = classifier.decision_function(np.c_[xx.ravel(),
yy.ravel()])
Z = Z.reshape(xx.shape)
# plot line, points & nearest vectors to the plane
axis.contourf(xx, yy, Z,
alpha=0.75, cmap=plt.cm.bone)
axis.scatter(X[:, 0],
X[:, 1],
c=y, s=100*sample_weight,
alpha=0.9,
cmap=plt.cm.bone,
edgecolors='black')
axis.axis('off')
axis.set_title(title)
# numpy.r_: translate slice objects to concat along 1st axis
print(np.r_[np.array([1,2,3]), 0, 0, np.array([4,5,6])])
print(np.random.randn(10,2))
[1 2 3 0 0 4 5 6] [[-0.35362786 -0.74074747] [-0.67502183 -0.13278426] [ 0.61980106 1.79116846] [ 0.17100044 -1.72567135] [ 0.16065854 -0.85898532] [-0.20642094 0.48842647] [-0.83833097 0.38116374] [-0.99090328 1.01788005] [ 0.3415874 -1.25088622] [ 0.92525075 -0.90478616]]
# Create 20 points
np.random.seed(0)
X = np.r_[np.random.randn(10, 2) + [1, 1],
np.random.randn(10, 2)]
y = [1]*10 + [-1]*10
sample_weight_last_ten = abs(np.random.randn(len(X)))
sample_weight_constant = np.ones(len(X))
# and bigger weights to some outliers
sample_weight_last_ten[15:] *= 5
sample_weight_last_ten[9] *= 15
# fit the model
clf_weights = svm.SVC(gamma=1)
clf_weights.fit(X, y,
sample_weight=sample_weight_last_ten)
clf_no_weights = svm.SVC(gamma=1)
clf_no_weights.fit(X, y)
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
plot_decision_function(clf_no_weights,
sample_weight_constant,
axes[0],
"Constant weights")
plot_decision_function(clf_weights,
sample_weight_last_ten,
axes[1],
"Modified weights")
plt.show()
from sklearn import svm
X = [[0, 0], [2, 2]]
y = [0.5, 2.5]
regr = svm.SVR(); regr.fit(X, y)
print(regr.predict([[1, 1]]))
[1.5]
from sklearn.svm import SVR
X = np.sort(5 * np.random.rand(40, 1), axis=0)
y = np.sin(X).ravel()
y[::5] += 3 * (0.5 - np.random.rand(8)) #noise
svr_rbf = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1)
svr_lin = SVR(kernel='linear', C=100, gamma='auto')
svr_poly = SVR(kernel='poly', C=100, gamma='auto',
degree=3, epsilon=.1, coef0=1)
svrs = [svr_rbf, svr_lin, svr_poly]
kernel_label = ['RBF', 'Linear', 'Polynomial']
model_color = ['m', 'c', 'g']
fig, axes = plt.subplots(nrows=1, ncols=3,
figsize=(15, 10), sharey=True)
for ix, svr in enumerate(svrs):
axes[ix].plot(X,
svr.fit(X, y).predict(X),
color=model_color[ix], lw=2,
label='{} model'.format(kernel_label[ix]))
axes[ix].scatter(X[svr.support_],
y[svr.support_],
facecolor="none",
edgecolor=model_color[ix], s=50,
label='{} support vectors'.format(kernel_label[ix]))
axes[ix].scatter(X[np.setdiff1d(np.arange(len(X)), svr.support_)],
y[np.setdiff1d(np.arange(len(X)), svr.support_)],
facecolor="none",
edgecolor="k", s=50,
label='other training data')
axes[ix].legend(loc='upper center',
bbox_to_anchor=(0.5, 1.1),
ncol=1, fancybox=True, shadow=True)
fig.text(0.5, 0.04, 'data', ha='center', va='center')
fig.text(0.06, 0.5, 'target', ha='center', va='center', rotation='vertical')
fig.suptitle("SVR"); plt.show()
flags
attribute.)cache_size
higher than the default (200MB), if memory allows.C
is set to 1.0 by default. If you have a noisy dataset, smaller values of C leads to more regularization.C
consume more training time, sometimes 10X. Also, LinearSVR & LinearSVC are less sensitive to C
as it increases.from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC())
degree
; $r$ specified by coef0
.gamma
>0.coef0
.linear_svc = svm.SVC(kernel='linear'); print(linear_svc.kernel)
rbf_svc = svm.SVC(kernel='rbf'); print(rbf_svc.kernel)
linear rbf
C
and gamma
¶C
trades misclassifications agains decision surface simplicity.C
smooths the decision surfaceC
strives to correctly classify all samples.gamma
sets an influence weight for each training sample.gamma
requires other samples to be closer to be affectedC
and gamma
spaced exponentially, to find correct values of each.from matplotlib.colors import Normalize
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV
# Utility function to move the midpoint of a colormap to be around
# the values of interest.
class MidpointNormalize(Normalize):
def __init__(self, vmin=None, vmax=None, midpoint=None,
clip=False):
self.midpoint = midpoint
Normalize.__init__(self, vmin, vmax, clip)
def __call__(self, value, clip=None):
x, y = [self.vmin, self.midpoint, self.vmax], [0, 0.5, 1]
return np.ma.masked_array(np.interp(value, x, y))
iris = load_iris()
X,y = iris.data, iris.target
# Dataset for decision function viz - only keep the first two
# features in X, sub-sample the dataset to keep only 2 classes and
# make it a binary classification problem.
X_2d = X[:, :2]
X_2d = X_2d[y > 0]
y_2d = y[y > 0]
y_2d -= 1
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_2d = scaler.fit_transform(X_2d)
# Train classifiers
C_range = np.logspace(-2, 10, 13)
gamma_range = np.logspace(-9, 3, 13)
param_grid = dict(gamma=gamma_range, C=C_range)
cv = StratifiedShuffleSplit(n_splits=5,
test_size=0.2,
random_state=42)
grid = GridSearchCV(SVC(),
param_grid=param_grid,
cv=cv)
grid.fit(X, y)
GridSearchCV(cv=StratifiedShuffleSplit(n_splits=5, random_state=42, test_size=0.2, train_size=None), estimator=SVC(), param_grid={'C': array([1.e-02, 1.e-01, 1.e+00, 1.e+01, 1.e+02, 1.e+03, 1.e+04, 1.e+05, 1.e+06, 1.e+07, 1.e+08, 1.e+09, 1.e+10]), 'gamma': array([1.e-09, 1.e-08, 1.e-07, 1.e-06, 1.e-05, 1.e-04, 1.e-03, 1.e-02, 1.e-01, 1.e+00, 1.e+01, 1.e+02, 1.e+03])})
# Fit a classifier for all params in the 2d version
# Use a smaller set of params - training time
C_2d_range = [1e-2, 1, 1e2]
gamma_2d_range = [1e-1, 1, 1e1]
classifiers = []
for C in C_2d_range:
for gamma in gamma_2d_range:
clf = SVC(C=C, gamma=gamma)
clf.fit(X_2d, y_2d)
classifiers.append((C, gamma, clf))
# Viz of param effect
plt.figure(figsize=(8, 6))
xx, yy = np.meshgrid(np.linspace(-3, 3, 200),
np.linspace(-3, 3, 200))
for (k, (C, gamma, clf)) in enumerate(classifiers):
# evaluate decision function in a grid
Z = clf.decision_function(np.c_[xx.ravel(),
yy.ravel()])
Z = Z.reshape(xx.shape)
# visualize decision function
plt.subplot(len(C_2d_range),
len(gamma_2d_range),
k + 1)
plt.title("gamma=10^%d, C=10^%d" % (np.log10(gamma),
np.log10(C)),
size='medium')
# visualize parameter's effect on decision function
plt.pcolormesh(xx, yy, -Z,
cmap=plt.cm.RdBu)
plt.scatter(X_2d[:, 0],
X_2d[:, 1],
c=y_2d, cmap=plt.cm.RdBu_r,
edgecolors='k')
plt.xticks(()); plt.yticks(()); plt.axis('tight')
scores = grid.cv_results_['mean_test_score'].reshape(len(C_range),
len(gamma_range))
<ipython-input-55-c13565a55180>:24: MatplotlibDeprecationWarning: shading='flat' when X and Y have the same dimensions as C is deprecated since 3.3. Either specify the corners of the quadrilaterals with X and Y, or pass shading='auto', 'nearest' or 'gouraud', or set rcParams['pcolor.shading']. This will become an error two minor releases later. plt.pcolormesh(xx, yy, -Z, <ipython-input-55-c13565a55180>:24: MatplotlibDeprecationWarning: shading='flat' when X and Y have the same dimensions as C is deprecated since 3.3. Either specify the corners of the quadrilaterals with X and Y, or pass shading='auto', 'nearest' or 'gouraud', or set rcParams['pcolor.shading']. This will become an error two minor releases later. plt.pcolormesh(xx, yy, -Z, <ipython-input-55-c13565a55180>:24: MatplotlibDeprecationWarning: shading='flat' when X and Y have the same dimensions as C is deprecated since 3.3. Either specify the corners of the quadrilaterals with X and Y, or pass shading='auto', 'nearest' or 'gouraud', or set rcParams['pcolor.shading']. This will become an error two minor releases later. plt.pcolormesh(xx, yy, -Z, <ipython-input-55-c13565a55180>:24: MatplotlibDeprecationWarning: shading='flat' when X and Y have the same dimensions as C is deprecated since 3.3. Either specify the corners of the quadrilaterals with X and Y, or pass shading='auto', 'nearest' or 'gouraud', or set rcParams['pcolor.shading']. This will become an error two minor releases later. plt.pcolormesh(xx, yy, -Z, <ipython-input-55-c13565a55180>:24: MatplotlibDeprecationWarning: shading='flat' when X and Y have the same dimensions as C is deprecated since 3.3. Either specify the corners of the quadrilaterals with X and Y, or pass shading='auto', 'nearest' or 'gouraud', or set rcParams['pcolor.shading']. This will become an error two minor releases later. plt.pcolormesh(xx, yy, -Z, <ipython-input-55-c13565a55180>:24: MatplotlibDeprecationWarning: shading='flat' when X and Y have the same dimensions as C is deprecated since 3.3. Either specify the corners of the quadrilaterals with X and Y, or pass shading='auto', 'nearest' or 'gouraud', or set rcParams['pcolor.shading']. This will become an error two minor releases later. plt.pcolormesh(xx, yy, -Z, <ipython-input-55-c13565a55180>:24: MatplotlibDeprecationWarning: shading='flat' when X and Y have the same dimensions as C is deprecated since 3.3. Either specify the corners of the quadrilaterals with X and Y, or pass shading='auto', 'nearest' or 'gouraud', or set rcParams['pcolor.shading']. This will become an error two minor releases later. plt.pcolormesh(xx, yy, -Z, <ipython-input-55-c13565a55180>:24: MatplotlibDeprecationWarning: shading='flat' when X and Y have the same dimensions as C is deprecated since 3.3. Either specify the corners of the quadrilaterals with X and Y, or pass shading='auto', 'nearest' or 'gouraud', or set rcParams['pcolor.shading']. This will become an error two minor releases later. plt.pcolormesh(xx, yy, -Z, <ipython-input-55-c13565a55180>:24: MatplotlibDeprecationWarning: shading='flat' when X and Y have the same dimensions as C is deprecated since 3.3. Either specify the corners of the quadrilaterals with X and Y, or pass shading='auto', 'nearest' or 'gouraud', or set rcParams['pcolor.shading']. This will become an error two minor releases later. plt.pcolormesh(xx, yy, -Z,
# Validation accuracy heatmap vs gamma & C
# varies from dark to bright yellow
# most interesting data at 0.92 to 0.97, so normalize data
# so midpoint is at 0.92.
plt.figure(figsize=(8, 6))
plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95)
plt.imshow(scores,
interpolation='nearest',
cmap=plt.cm.hot,
norm=MidpointNormalize(vmin=0.2, midpoint=0.92))
plt.xlabel('gamma'); plt.ylabel('C'); plt.colorbar()
plt.xticks(np.arange(len(gamma_range)), gamma_range, rotation=45)
plt.yticks(np.arange(len(C_range)), C_range)
plt.title('Validation accuracy')
plt.show()
kernel
parameter. The kernel must input two matrices (#samples1, #features)
& (#samples2, #features)
, and return a kernel matrix of (#samples1, #samples2)
.from sklearn import svm,datasets
iris = datasets.load_iris()
X,Y = iris.data[:,:2], iris.target # only use 1st two features
def mykernel(X,Y):
M = np.array([[2.0,0.0],[0.0,1.0]])
return np.dot(np.dot(X,M),Y.T)
h = 0.02 # mesh step size
clf = svm.SVC(kernel=mykernel); clf.fit(X,Y)
SVC(kernel=<function mykernel at 0x7f55d1d2c280>)
# plot decision boundary. assign color to each point in mesh
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(),
yy.ravel()])
Z = Z.reshape(xx.shape)
plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)
plt.scatter(X[:, 0],
X[:, 1],
c=Y, cmap=plt.cm.Paired, edgecolors='k')
plt.title('3-class classification with SVM - custom kernel')
plt.axis('tight'); plt.show()
<ipython-input-65-b240529d676e>:2: MatplotlibDeprecationWarning: shading='flat' when X and Y have the same dimensions as C is deprecated since 3.3. Either specify the corners of the quadrilaterals with X and Y, or pass shading='auto', 'nearest' or 'gouraud', or set rcParams['pcolor.shading']. This will become an error two minor releases later. plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)
kernel='precomputed'
option.import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn import svm
X, y = make_classification(n_samples=10, random_state=0)
X_train , X_test , y_train, y_test = train_test_split(X, y,
random_state=0)
clf = svm.SVC(kernel='precomputed')
# linear kernel computation
gram_train = np.dot(X_train, X_train.T)
clf.fit(gram_train, y_train)
# predict on training examples
gram_test = np.dot(X_test, X_train.T)
clf.predict(gram_test)
array([0, 1, 0])