from sklearn.linear_model import SGDClassifier
X,y = [[0.0, 0.0], [1.0, 1.0]], [0.0, 1.0]

clf = SGDClassifier(loss="hinge", penalty="l2", max_iter=50)
clf.fit(X, y)
print(clf.coef_) # model params
print(clf.intercept_) # offset/bias
print(clf.predict([[2.0,2.0]])) # predicting new values

[[9.85221675 9.85221675]]
[-9.97004991]
[1.]


clf.decision_function([[2.0, 2.0]])

array([29.43881708])


clf = SGDClassifier(loss="log", max_iter=25).fit(X, y)
clf.predict_proba([[1., 1.]])

array([[6.54276816e-07, 9.99999346e-01]])


import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.linear_model import SGDClassifier


iris = datasets.load_iris()
X,y,colors = iris.data[:,:2], iris.target, "bry"

# shuffle
idx = np.arange(X.shape[0])
np.random.seed(13); np.random.shuffle(idx)
X,y = X[idx],y[idx]

# standardize
mean = X.mean(axis=0)
std  = X.std(axis=0)
X    = (X-mean)/std


h = .02  # step size in the mesh

clf = SGDClassifier(alpha=0.001, max_iter=100).fit(X, y)

# create a mesh to plot in
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))


Z = clf.predict(np.c_[xx.ravel(), 
                      yy.ravel()])

Z = Z.reshape(xx.shape)
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)

for i, color in zip(clf.classes_, colors):
    idx = np.where(y == i)
    plt.scatter(X[idx, 0], 
                X[idx, 1], 
                c=color, label=iris.target_names[i],
                cmap=plt.cm.Paired, edgecolor='black', s=20)
    
plt.title("Decision surface of multi-class SGD")

# Plot the three one-against-all classifiers
xmin, xmax = plt.xlim()
ymin, ymax = plt.ylim()
coef = clf.coef_
intercept = clf.intercept_

def plot_hyperplane(c, color):
    def line(x0):
        return (-(x0 * coef[c, 0]) - intercept[c]) / coef[c, 1]

    plt.plot([xmin, xmax], [line(xmin), line(xmax)],
             ls="--", color=color)

for i, color in zip(clf.classes_, colors):
    plot_hyperplane(i, color)

plt.legend(); plt.show()


from sklearn import linear_model

np.random.seed(0)
X = np.r_[np.random.randn(10, 2) + [1, 1], 
          np.random.randn(10, 2)]
y = [1] * 10 + [-1] * 10

sample_weight       = 100 * np.abs(np.random.randn(20))
sample_weight[:10] *= 10 # assign more weight to last 10 samples

xx, yy = np.meshgrid(np.linspace(-4, 5, 500), 
                     np.linspace(-4, 5, 500))
plt.figure()
plt.scatter(X[:, 0], 
            X[:, 1], 
            c=y, s=sample_weight, alpha=0.9,
            cmap=plt.cm.bone, edgecolor='black')

# fit the unweighted model
clf = linear_model.SGDClassifier(alpha=0.01, max_iter=100)
clf.fit(X, y)
Z = clf.decision_function(np.c_[xx.ravel(), 
                                yy.ravel()])
Z = Z.reshape(xx.shape)
no_weights = plt.contour(xx, yy, Z, 
                         levels=[0], linestyles=['solid'])

# fit the weighted model
clf = linear_model.SGDClassifier(alpha=0.01, max_iter=100)
clf.fit(X, y, sample_weight=sample_weight)
Z = clf.decision_function(np.c_[xx.ravel(), 
                                yy.ravel()])
Z = Z.reshape(xx.shape)
samples_weights = plt.contour(xx, yy, Z, 
                              levels=[0], linestyles=['dashed'])

plt.legend([no_weights.collections[0], 
            samples_weights.collections[0]],
           ["no weights", "with weights"], loc="lower left")

plt.xticks(()); plt.yticks(()); plt.show()


from sklearn.model_selection import train_test_split as TTS
from sklearn.linear_model import SGDClassifier as SGC
from sklearn.linear_model import Perceptron as PERCEPT
from sklearn.linear_model import PassiveAggressiveClassifier as PAC
from sklearn.linear_model import LogisticRegression as LR


heldout = [0.95, 0.90, 0.75, 0.50, 0.01]
rounds = 20
X, y = datasets.load_digits(return_X_y=True)


classifiers = [
    ("SGD",           SGC(max_iter=100)),
    ("ASGD",          SGC(average=True)),
    ("Perceptron",    PERCEPT()),
    ("Pasv-Agrsv I",  PAC(loss='hinge', C=1.0, tol=1e-4)),
    ("Pasv-Agrsv II", PAC(loss='squared_hinge', C=1.0, tol=1e-4)),
    ("SAG",           LR(solver='sag', tol=1e-1, C=1.e4 / X.shape[0]))
]


xx = 1. - np.array(heldout)

for name, clf in classifiers:
    print("training %s" % name)
    rng = np.random.RandomState(42)
    yy = []
    for i in heldout:
        yy_ = []
        for r in range(rounds):
            X_train, X_test, y_train, y_test = \
                TTS(X, y, test_size=i, random_state=rng)
            
            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_test)
            yy_.append(1 - np.mean(y_pred == y_test))
        yy.append(np.mean(yy_))
    plt.plot(xx, yy, label=name)

plt.legend(loc="upper right")
plt.xlabel("Proportion train")
plt.ylabel("Test Error Rate")
plt.show()

training SGD
training ASGD
training Perceptron
training Pasv-Agrsv I
training Pasv-Agrsv II
training SAG


from sklearn.linear_model import SGDRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

n_samples, n_features = 10, 5
rng = np.random.RandomState(0)
y   = rng.randn(n_samples)
X   = rng.randn(n_samples, n_features)

# Always scale inputs. The most convenient way is to use a pipeline.
reg = make_pipeline(StandardScaler(),
                    SGDRegressor(max_iter=1000, tol=1e-3))
reg.fit(X, y)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('sgdregressor', SGDRegressor())])

Stochastic Gradient Descent (SGD)¶

SGD Classification ¶

Multiclass Classification¶

Weighted Classification¶

Averaged SGD¶

Example: Solver comparison¶

SGD Regression ¶

SGD & Sparse Data¶

Computational Complexity¶

Stopping & Convergence¶

Tips¶

Stochastic Gradient Descent (SGD)¶

SGD Classification¶

Multiclass Classification¶

Weighted Classification¶

Averaged SGD¶

Example: Solver comparison¶

SGD Regression¶

SGD & Sparse Data¶

Computational Complexity¶

Stopping & Convergence¶

Tips¶

SGD Classification ¶

SGD Regression ¶