from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris
from sklearn.ensemble import AdaBoostClassifier

X, y   = load_iris(return_X_y=True)
clf    = AdaBoostClassifier(n_estimators=100) # AdaBoost on 100 weak classifiers
scores = cross_val_score(clf, X, y, cv=5)
scores.mean()

0.9466666666666665


import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.metrics import zero_one_loss
from sklearn.ensemble import AdaBoostClassifier as ABC


n, lr = 400, 1.0

X, y             = datasets.make_hastie_10_2(n_samples=12000, random_state=1)
X_test, y_test   = X[2000:], y[2000:]
X_train, y_train = X[:2000], y[:2000]

dt_stump = DTC(max_depth=1, min_samples_leaf=1)
dt       = DTC(max_depth=9, min_samples_leaf=1)

dt.fit(      X_train, y_train)
dt_stump.fit(X_train, y_train)

dt_err       = 1.0 - dt.score(      X_test, y_test)
dt_stump_err = 1.0 - dt_stump.score(X_test, y_test)

ada_discrete = ABC(base_estimator=dt_stump, learning_rate=lr, n_estimators=n, algorithm="SAMME")
ada_real     = ABC(base_estimator=dt_stump, learning_rate=lr, n_estimators=n, algorithm="SAMME.R")

ada_discrete.fit(X_train, y_train)
ada_real.fit(X_train, y_train)

AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1),
                   n_estimators=400)


fig = plt.figure()
ax = fig.add_subplot(111)

ax.plot([1, n], [dt_stump_err] * 2, 'k-',
        label='Decision Stump Error')
ax.plot([1, n], [dt_err] * 2, 'k--',
        label='Decision Tree Error')

ada_discrete_err = np.zeros(      (n,))
ada_discrete_err_train = np.zeros((n,))
ada_real_err = np.zeros(          (n,))
ada_real_err_train = np.zeros(    (n,))

for i, y_pred in enumerate(ada_discrete.staged_predict(X_test)):
    ada_discrete_err[i] = zero_one_loss(y_pred, y_test)

for i, y_pred in enumerate(ada_discrete.staged_predict(X_train)):
    ada_discrete_err_train[i] = zero_one_loss(y_pred, y_train)

for i, y_pred in enumerate(ada_real.staged_predict(X_test)):
    ada_real_err[i] = zero_one_loss(y_pred, y_test)

for i, y_pred in enumerate(ada_real.staged_predict(X_train)):
    ada_real_err_train[i] = zero_one_loss(y_pred, y_train)

ax.plot(np.arange(n) + 1, ada_discrete_err,
        label='Discrete AdaBoost Test Error',
        color='red')
ax.plot(np.arange(n) + 1, ada_discrete_err_train,
        label='Discrete AdaBoost Train Error',
        color='blue')
ax.plot(np.arange(n) + 1, ada_real_err,
        label='Real AdaBoost Test Error',
        color='orange')
ax.plot(np.arange(n) + 1, ada_real_err_train,
        label='Real AdaBoost Train Error',
        color='green')

ax.set_ylim((0.0, 0.5))
ax.set_xlabel('n_estimators')
ax.set_ylabel('error rate')

leg = ax.legend(loc='upper right', fancybox=True)
leg.get_frame().set_alpha(0.7)

plt.show()


import matplotlib.pyplot as plt

from sklearn.datasets import make_gaussian_quantiles
from sklearn.metrics  import accuracy_score
from sklearn.ensemble import AdaBoostClassifier as ABC
from sklearn.tree     import DecisionTreeClassifier as DTC

X, y = make_gaussian_quantiles(n_samples=13000, n_features=10, n_classes=3, random_state=1)

n_split = 3000

X_train, X_test = X[:n_split], X[n_split:]
y_train, y_test = y[:n_split], y[n_split:]

bdt_real     = ABC(DTC(max_depth=2), n_estimators=600, learning_rate=1)
bdt_discrete = ABC(DTC(max_depth=2), n_estimators=600, learning_rate=1.5, algorithm="SAMME")

bdt_real.fit(    X_train, y_train)
bdt_discrete.fit(X_train, y_train)

real_test_errors, discrete_test_errors = [],[]


for real_test_predict, discrete_train_predict in zip(
        bdt_real.staged_predict(X_test), 
        bdt_discrete.staged_predict(X_test)):

    real_test_errors.append(    1. - accuracy_score(     real_test_predict, y_test))
    discrete_test_errors.append(1. - accuracy_score(discrete_train_predict, y_test))

n_trees_discrete = len(bdt_discrete)
n_trees_real = len(bdt_real)


# Boosting might terminate early, but the arrays are always n_estimators long. 
# Crop them to the actual number of trees here:

discrete_estimator_errors  = bdt_discrete.estimator_errors_[:n_trees_discrete]
real_estimator_errors      = bdt_real.estimator_errors_[:n_trees_real]
discrete_estimator_weights = bdt_discrete.estimator_weights_[:n_trees_discrete]


plt.figure(figsize=(15, 5))

plt.subplot(131)
plt.plot(range(1, n_trees_discrete + 1), discrete_test_errors, c='black', label='SAMME')
plt.plot(range(1, n_trees_real + 1),         real_test_errors, c='black', linestyle='dashed', label='SAMME.R')
plt.legend()
plt.ylim(0.18, 0.62)
plt.ylabel('Test Error'); plt.xlabel('Number of Trees')

plt.subplot(132)
plt.plot(range(1, n_trees_discrete + 1), discrete_estimator_errors, "b", label='SAMME', alpha=.5)
plt.plot(range(1, n_trees_real + 1),         real_estimator_errors, "r", label='SAMME.R', alpha=.5)
plt.legend()
plt.ylabel('Error'); plt.xlabel('Number of Trees')
plt.ylim((.2,
         max(real_estimator_errors.max(),
             discrete_estimator_errors.max()) * 1.2))
plt.xlim((-20, len(bdt_discrete) + 20))

plt.subplot(133)
plt.plot(range(1, n_trees_discrete + 1), discrete_estimator_weights, "b", label='SAMME')
plt.legend()
plt.ylabel('Weight')
plt.xlabel('Number of Trees')
plt.ylim((0, discrete_estimator_weights.max() * 1.2))
plt.xlim((-20, n_trees_discrete + 20))

# prevent overlapping y-axis labels
plt.subplots_adjust(wspace=0.25)
plt.show()


import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import AdaBoostClassifier as ABC
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.datasets import make_gaussian_quantiles

# Construct dataset
X1, y1 = make_gaussian_quantiles(cov=2.,
                                 n_samples=200, n_features=2,
                                 n_classes=2, random_state=1)
X2, y2 = make_gaussian_quantiles(mean=(3, 3), cov=1.5,
                                 n_samples=300, n_features=2,
                                 n_classes=2, random_state=1)
X = np.concatenate((X1, X2))
y = np.concatenate((y1, - y2 + 1))

# Create and fit an AdaBoosted decision tree
bdt = ABC(DTC(max_depth=1), algorithm="SAMME", n_estimators=200)
bdt.fit(X, y)

AdaBoostClassifier(algorithm='SAMME',
                   base_estimator=DecisionTreeClassifier(max_depth=1),
                   n_estimators=200)


plot_colors, plot_step, class_names = "br", 0.02, "AB"

plt.figure(figsize=(10, 5))

# decision boundaries
plt.subplot(121)
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
                     np.arange(y_min, y_max, plot_step))

Z = bdt.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
plt.axis("tight")

# training points
for i, n, c in zip(range(2), class_names, plot_colors):
    idx = np.where(y == i)
    plt.scatter(X[idx, 0], X[idx, 1],
                c=c, cmap=plt.cm.Paired,
                s=20, edgecolor='k',
                label="Class %s" % n)
    
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.legend(loc='upper right')
plt.xlabel('x'); plt.ylabel('y'); plt.title('Decision Boundary')

# two-class decision scores
twoclass_output = bdt.decision_function(X)
plot_range = (twoclass_output.min(), twoclass_output.max())
plt.subplot(122)
for i, n, c in zip(range(2), class_names, plot_colors):
    plt.hist(twoclass_output[y == i],
             bins=10,
             range=plot_range,
             facecolor=c,
             label='Class %s' % n,
             alpha=.5,
             edgecolor='k')
    
x1, x2, y1, y2 = plt.axis()
plt.axis((x1, x2, y1, y2 * 1.2))
plt.legend(loc='upper right')
plt.ylabel('Samples'); plt.xlabel('Score'); plt.title('Decision Scores')

plt.tight_layout()
plt.subplots_adjust(wspace=0.35)
plt.show()


import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor as DTR
from sklearn.ensemble import AdaBoostRegressor as ABR


rng = np.random.RandomState(1)
X = np.linspace(0, 6, 100)[:, np.newaxis]
y = np.sin(X).ravel() + np.sin(6 * X).ravel() + rng.normal(0, 0.1, X.shape[0])

# Fit regression model
regr_1 =     DTR(max_depth=4)
regr_2 = ABR(DTR(max_depth=4), n_estimators=300, random_state=rng)

regr_1.fit(X,y); regr_2.fit(X,y)

# Predict
y_1 = regr_1.predict(X)
y_2 = regr_2.predict(X)

# Plot the results
plt.figure()
plt.scatter(X, y, c="k", label="training samples")
plt.plot(X, y_1, c="g", label="n_estimators=1", linewidth=2)
plt.plot(X, y_2, c="r", label="n_estimators=300", linewidth=2)
plt.xlabel("data")
plt.ylabel("target")
plt.title("Boosted Decision Tree Regression")
plt.legend()
plt.show()


from sklearn.datasets import make_hastie_10_2
from sklearn.ensemble import GradientBoostingClassifier

X, y = make_hastie_10_2(random_state=0)
X_train, X_test = X[:2000], X[2000:]
y_train, y_test = y[:2000], y[2000:]

clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
    max_depth=1, random_state=0).fit(X_train, y_train)
clf.score(X_test, y_test)

0.913


import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.datasets import make_friedman1
from sklearn.ensemble import GradientBoostingRegressor as GBR

X, y = make_friedman1(n_samples=1200, 
                      random_state=0, 
                      noise=1.0)

X_train, X_test = X[:200], X[200:]
y_train, y_test = y[:200], y[200:]

est = GBR(n_estimators=100, 
          learning_rate=0.1,
          max_depth=1, 
          random_state=0, 
          loss='ls').fit(X_train, y_train)

mean_squared_error(y_test, est.predict(X_test))

5.009154859960321


import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, ensemble
from sklearn.inspection import permutation_importance
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split


diabetes = datasets.load_diabetes()
X, y = diabetes.data, diabetes.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1, random_state=13)

params = {'n_estimators': 500,
          'max_depth': 4,
          'min_samples_split': 5,
          'learning_rate': 0.01,
          'loss': 'ls'}

reg = ensemble.GradientBoostingRegressor(**params)
reg.fit(X_train, y_train)

mse = mean_squared_error(y_test, reg.predict(X_test))
print("The mean squared error (MSE) on test set: {:.4f}".format(mse))

The mean squared error (MSE) on test set: 3030.9181


test_score = np.zeros((params['n_estimators'],), dtype=np.float64)
for i, y_pred in enumerate(reg.staged_predict(X_test)):
    test_score[i] = reg.loss_(y_test, y_pred)

fig = plt.figure(figsize=(6, 6))
plt.subplot(1, 1, 1)
plt.title('Deviance')
plt.plot(np.arange(params['n_estimators']) + 1, reg.train_score_, 'b-',
         label='Training Set Deviance')
plt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',
         label='Test Set Deviance')
plt.legend(loc='upper right')
plt.xlabel('Boosting Iterations')
plt.ylabel('Deviance')
fig.tight_layout()


from sklearn.datasets import make_hastie_10_2
from sklearn.ensemble import GradientBoostingClassifier as GBC

X, y = make_hastie_10_2(random_state=0)

clf = GBC(n_estimators=100, 
          learning_rate=1.0,
          max_depth=1, 
          random_state=0).fit(X, y)

clf.feature_importances_

array([0.10684213, 0.10461707, 0.11265447, 0.09863589, 0.09469133,
       0.10729306, 0.09163753, 0.09718194, 0.09581415, 0.09063242])


import numpy as np
import matplotlib.pyplot as plt
from sklearn import ensemble
from sklearn import datasets

X, y = datasets.make_hastie_10_2(n_samples=12000, 
                                 random_state=1)
# map labels from {-1, 1} to {0, 1}
X               = X.astype(np.float32)
labels, y       = np.unique(y, return_inverse=True)
X_train, X_test = X[:2000], X[2000:]
y_train, y_test = y[:2000], y[2000:]

original_params = {'n_estimators': 1000, 
                   'max_leaf_nodes': 4, 
                   'max_depth': None, 
                   'random_state': 2,
                   'min_samples_split': 5}

plt.figure()

for label, color, setting in [('No shrinkage', 'orange',
                               {'learning_rate': 1.0, 'subsample': 1.0}),
                              ('learning_rate=0.1', 'turquoise',
                               {'learning_rate': 0.1, 'subsample': 1.0}),
                              ('subsample=0.5', 'blue',
                               {'learning_rate': 1.0, 'subsample': 0.5}),
                              ('learning_rate=0.1, subsample=0.5', 'gray',
                               {'learning_rate': 0.1, 'subsample': 0.5}),
                              ('learning_rate=0.1, max_features=2', 'magenta',
                               {'learning_rate': 0.1, 'max_features': 2})]:
    params = dict(original_params)
    params.update(setting)

    clf = ensemble.GradientBoostingClassifier(**params).fit(X_train, y_train)
    test_deviance = np.zeros((params['n_estimators'],), 
                             dtype=np.float64)

    for i, y_pred in enumerate(clf.staged_decision_function(X_test)):
        # clf.loss_ assumes that y_test[i] in {0, 1}
        test_deviance[i] = clf.loss_(y_test, y_pred)

    plt.plot((np.arange(test_deviance.shape[0]) + 1)[::5], 
             test_deviance[::5],
            '-', color=color, label=label)

plt.legend(loc='upper left')
plt.xlabel('Boosting Iterations')
plt.ylabel('Test Set Deviance')

Text(0, 0.5, 'Test Set Deviance')


from sklearn.experimental import enable_hist_gradient_boosting  # noqa
from sklearn.ensemble import HistGradientBoostingClassifier as HGBC
import numpy as np

X = np.array([0, 1, 2, np.nan]).reshape(-1, 1)
y = [0, 0, 1, 1]

gbdt = HGBC(min_samples_leaf=1).fit(X, y); gbdt.predict(X)

array([0, 0, 1, 1])


# explicitly require the feature before using
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier as HGBC
from sklearn.datasets import make_hastie_10_2

X, y = make_hastie_10_2(random_state=0)
X_train, X_test = X[:2000], X[2000:]
y_train, y_test = y[:2000], y[2000:]

clf = HGBC(max_iter=100).fit(X_train, y_train); clf.score(X_test, y_test)

0.8965


X = np.array([0, np.nan, 1, 2, np.nan]).reshape(-1, 1)
y = [0, 1, 0, 0, 1]
gbdt = HGBC(min_samples_leaf=1, max_depth=2, learning_rate=1, max_iter=1).fit(X, y)
gbdt.predict(X)

array([0, 1, 0, 0, 1])


X = [[1, 0], [1, 0], [1, 0], [0, 1]]
y = [ 0,      0,      1,      0]
w = [ 0,      0,      1,      1] # ignore 1st 2 samples by setting their weight to 0

gb = HGBC(min_samples_leaf=1)
gb.fit(X, y, sample_weight=w)

print(gb.predict([[1, 0]]))
print(gb.predict_proba([[1, 0]])[0, 1])

[1]
0.9990209190235209


gbdt = HGBC(categorical_features=[True, False])
gbdt = HGBC(categorical_features=[0])


# load Ames housing dataset
from sklearn.datasets import fetch_openml

X, y = fetch_openml(data_id=41211, as_frame=True, return_X_y=True)

n_categorical_features = (X.dtypes == 'category').sum()
n_numerical_features   = (X.dtypes == 'float').sum()

print(f"#samples: {X.shape[0]}")
print(f"#features: {X.shape[1]}")
print(f"#categorical features: {n_categorical_features}")
print(f"#numerical features: {n_numerical_features}")

/home/bjpcjp/.local/lib/python3.8/site-packages/sklearn/datasets/_openml.py:849: UserWarning: Version 1 of dataset ames-housing is inactive, meaning that issues have been found in the dataset. Try using a newer version from this URL: https://www.openml.org/data/v1/download/20649135/ames-housing.arff
  warn("Version {} of dataset {} is inactive, meaning that issues have "

#samples: 2930
#features: 80
#categorical features: 46
#numerical features: 34


# create estimator
from sklearn.experimental import enable_hist_gradient_boosting  # noqa
from sklearn.ensemble import HistGradientBoostingRegressor as HGBR
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from sklearn.compose import make_column_selector

dropper = make_column_transformer(
    ('drop', make_column_selector(dtype_include='category')),
    remainder='passthrough')
hist_dropped = make_pipeline(dropper, HGBR(random_state=42))


# one-hot encoding
from sklearn.preprocessing import OneHotEncoder as OHE

one_hot_encoder = make_column_transformer(
    (OHE(sparse=False, handle_unknown='ignore'),
     make_column_selector(dtype_include='category')),
    remainder='passthrough')

hist_one_hot = make_pipeline(one_hot_encoder, HGBR(random_state=42))


# ordinal encoding
from sklearn.preprocessing import OrdinalEncoder as OE
import numpy as np

ordinal_encoder = make_column_transformer(
    (OE(handle_unknown='use_encoded_value', unknown_value=np.nan),
     make_column_selector(dtype_include='category')),
    remainder='passthrough')

hist_ordinal = make_pipeline(ordinal_encoder, HGBR(random_state=42))


# native category support
categorical_mask = ([True]*n_categorical_features + [False]*n_numerical_features)
hist_native = make_pipeline(
    ordinal_encoder, HGBR(random_state=42, categorical_features=categorical_mask))


# compare with cross validation
from sklearn.model_selection import cross_validate as CV
import matplotlib.pyplot as plt

scoring = "neg_mean_absolute_percentage_error"
dropped_result = CV(hist_dropped, X, y, cv=3, scoring=scoring)
one_hot_result = CV(hist_one_hot, X, y, cv=3, scoring=scoring)
ordinal_result = CV(hist_ordinal, X, y, cv=3, scoring=scoring)
native_result  = CV(hist_native,  X, y, cv=3, scoring=scoring)

def plot_results(figure_title):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 8))

    plot_info = [('fit_time', 'Fit times (s)', ax1, None),
                 ('test_score', 'Mean Absolute Percentage Error', ax2,
                  (0, 0.20))]

    x, width = np.arange(4), 0.9
    for key, title, ax, y_limit in plot_info:
        items = [dropped_result[key], one_hot_result[key], ordinal_result[key],
                 native_result[key]]
        ax.bar(x, [np.mean(np.abs(item)) for item in items],
               width, yerr=[np.std(item) for item in items],
               color=['C0', 'C1', 'C2', 'C3'])
        ax.set(xlabel='Model', title=title, xticks=x,
               xticklabels=["Dropped", "One Hot", "Ordinal", "Native"],
               ylim=y_limit)
    fig.suptitle(figure_title)

plot_results("Gradient Boosting on Adult Census")


# limiting the #of splits 
# rerun analysis with artificially low split count by limiting #trees & tree depth
for pipe in (hist_dropped, hist_one_hot, hist_ordinal, hist_native):
    pipe.set_params(histgradientboostingregressor__max_depth=3,
                    histgradientboostingregressor__max_iter=15)

dropped_result = CV(hist_dropped, X, y, cv=3, scoring=scoring)
one_hot_result = CV(hist_one_hot, X, y, cv=3, scoring=scoring)
ordinal_result = CV(hist_ordinal, X, y, cv=3, scoring=scoring)
native_result = CV(hist_native, X, y, cv=3, scoring=scoring)

plot_results("Gradient Boosting on Adult Census (few and small trees)")
plt.show()


from sklearn.experimental import enable_hist_gradient_boosting  # noqa
from sklearn.ensemble import HistGradientBoostingRegressor as HGBR
from sklearn.inspection import plot_partial_dependence as PPD
import numpy as np
import matplotlib.pyplot as plt


rng   = np.random.RandomState(0)
n     = 5000
f_0   = rng.rand(n)  # positive correlation with y
f_1   = rng.rand(n)  # negative correlation with y
X     = np.c_[f_0, f_1]
noise = rng.normal(loc=0.0, scale=0.01, size=n)
y     = (5*f_0 + np.sin(10*np.pi*f_0) -
         5*f_1 - np.cos(10*np.pi*f_1) + noise)


fig, ax = plt.subplots()

# Without any constraint
gbdt = HGBR()
gbdt.fit(X, y)
disp = PPD(gbdt, X, features=[0, 1],
           line_kw={"linewidth": 4, "label": "unconstrained", "color": "tab:blue"},
           ax=ax)

# With positive and negative constraints
gbdt = HGBR(monotonic_cst=[1, -1])
gbdt.fit(X, y)

PPD(gbdt, X, features=[0, 1],
    feature_names=(
        "First feature\nPositive constraint",
        "Second feature\nNegtive constraint",
    ),
    line_kw={"linewidth": 4, "label": "constrained", "color": "tab:orange"},
    ax=disp.axes_)

for f_idx in (0, 1):
    disp.axes_[0, f_idx].plot(
        X[:, f_idx], y, "o", alpha=0.3, zorder=-1, color="tab:green"
    )
    disp.axes_[0, f_idx].set_ylim(-6, 6)

plt.legend()
fig.suptitle("Monotonic constraints illustration")
plt.show()


from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.neighbors import KNeighborsRegressor as KNR
estimators = [('ridge', RidgeCV()),
              ('lasso', LassoCV(random_state=42)),
              ('knr',   KNR(n_neighbors=20, metric='euclidean'))]


from sklearn.ensemble import GradientBoostingRegressor as GBR
from sklearn.ensemble import StackingRegressor as SR

final_estimator = GBR(n_estimators=25, 
                      subsample=0.5, 
                      min_samples_leaf=25, 
                      max_features=1,
                      random_state=42)

reg = SR(estimators=estimators, 
         final_estimator=final_estimator)


from sklearn.datasets import load_diabetes
X, y = load_diabetes(return_X_y=True)

from sklearn.model_selection import train_test_split as TTS

X_train, X_test, y_train, y_test = TTS(X, y, random_state=42)
reg.fit(X_train, y_train)

StackingRegressor(estimators=[('ridge',
                               RidgeCV(alphas=array([ 0.1,  1. , 10. ]))),
                              ('lasso', LassoCV(random_state=42)),
                              ('knr',
                               KNeighborsRegressor(metric='euclidean',
                                                   n_neighbors=20))],
                  final_estimator=GradientBoostingRegressor(max_features=1,
                                                            min_samples_leaf=25,
                                                            n_estimators=25,
                                                            random_state=42,
                                                            subsample=0.5))


y_pred = reg.predict(X_test)
from sklearn.metrics import r2_score
print('R2 score: {:.2f}'.format(r2_score(y_test, y_pred)))

R2 score: 0.53


# getting output of stacked estimators:
reg.transform(X_test[:5])

array([[142.36214074, 138.30765507, 146.1       ],
       [179.70207217, 182.90046333, 151.75      ],
       [139.89924327, 132.47007083, 158.25      ],
       [286.94742491, 292.65164781, 225.4       ],
       [126.88190192, 124.11964797, 164.65      ]])


final_layer_rfr = RFR(n_estimators=10, max_features=1, max_leaf_nodes=5,random_state=42)
final_layer_gbr = GBR(n_estimators=10, max_features=1, max_leaf_nodes=5,random_state=42)

final_layer     = SR(
    estimators=[('rf', final_layer_rfr),
                ('gbrt', final_layer_gbr)],
    final_estimator=RidgeCV()
    )

multi_layer_regressor = SR(
    estimators=[('ridge', RidgeCV()),
                ('lasso', LassoCV(random_state=42)),
                ('knr',   KNR(n_neighbors=20, metric='euclidean'))],
    final_estimator=final_layer
)
multi_layer_regressor.fit(X_train, y_train)

print('R2 score: {:.2f}'.format(multi_layer_regressor.score(X_test, y_test)))

R2 score: 0.53

AdaBoost Classification & Regression ¶

Example: Discrete (SAMME) vs Real (SAMME.R) AdaBoost comparison¶

Example: Multiclass AdaBoost¶

Example: AdaBoost, 2-classes¶

Example: Decision Tree Regression with AdaBoost¶

Gradient Boosted Decision Trees ¶

Gradient Boosting Classifier ¶

Gradient Boosting Regressor ¶

Example: Gradient Boosting Regressor ¶

Tree Size¶

Math Foundations¶

Loss Functions¶

Shrinkage via Learning Rate¶

Subsampling¶

Interpretation with Feature Importance¶

Example: Gradient Boosting Regularization ¶

Histogram-based Gradient Boosting¶

Example: Histogram Gradient Boosting - Category Feature Support ¶

Histrogram Gradient Boosting - Monotonic Constraints¶

Example: Histogram Gradient Boosting w/ Monotonic Constraints ¶

Histogram Gradient Boosting - Performance¶

Stacked Generalization¶

AdaBoost Classification & Regression¶

Example: Discrete (SAMME) vs Real (SAMME.R) AdaBoost comparison¶

Example: Multiclass AdaBoost¶

Example: AdaBoost, 2-classes¶

Example: Decision Tree Regression with AdaBoost¶

Gradient Boosted Decision Trees¶

Gradient Boosting Classifier¶

Gradient Boosting Regressor¶

Example: Gradient Boosting Regressor¶

Tree Size¶

Math Foundations¶

Loss Functions¶

Shrinkage via Learning Rate¶

Subsampling¶

Interpretation with Feature Importance¶

Example: Gradient Boosting Regularization¶

Histogram-based Gradient Boosting¶

Example: Histogram Gradient Boosting - Category Feature Support¶

Histrogram Gradient Boosting - Monotonic Constraints¶

Example: Histogram Gradient Boosting w/ Monotonic Constraints¶

Histogram Gradient Boosting - Performance¶

Stacked Generalization¶

AdaBoost Classification & Regression ¶

Gradient Boosted Decision Trees ¶

Gradient Boosting Classifier ¶

Gradient Boosting Regressor ¶

Example: Gradient Boosting Regressor ¶

Example: Gradient Boosting Regularization ¶

Example: Histogram Gradient Boosting - Category Feature Support ¶

Example: Histogram Gradient Boosting w/ Monotonic Constraints ¶