estimators
controls the list of stacked estimators. It is a list of names & methods.from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.neighbors import KNeighborsRegressor as KNR
estimators = [('ridge', RidgeCV()),
('lasso', LassoCV(random_state=42)),
('knr', KNR(n_neighbors=20, metric='euclidean'))]
# final estimator definition
from sklearn.ensemble import GradientBoostingRegressor as GBR
from sklearn.ensemble import StackingRegressor as SR
final_estimator = GBR(
n_estimators=25, subsample=0.5, min_samples_leaf=25, max_features=1,
random_state=42)
reg = SR(estimators=estimators, final_estimator=final_estimator)
# training
from sklearn.datasets import load_diabetes
X, y = load_diabetes(return_X_y=True)
from sklearn.model_selection import train_test_split as TTS
X_train, X_test, y_train, y_test = TTS(X, y, random_state=42)
reg.fit(X_train, y_train)
StackingRegressor(estimators=[('ridge', RidgeCV(alphas=array([ 0.1, 1. , 10. ]))), ('lasso', LassoCV(random_state=42)), ('knr', KNeighborsRegressor(metric='euclidean', n_neighbors=20))], final_estimator=GradientBoostingRegressor(max_features=1, min_samples_leaf=25, n_estimators=25, random_state=42, subsample=0.5))
X_train
).stack_method
. stack_method="auto"
will select a method from predict_proba
, decision_function
, and predict
.transform
method.y_pred = reg.predict(X_test)
from sklearn.metrics import r2_score
print('R2 score: {:.2f}'.format(r2_score(y_test, y_pred)))
print(reg.transform(X_test[:5]))
R2 score: 0.53 [[142.36214074 138.30765507 146.1 ] [179.70207217 182.90046333 151.75 ] [139.89924327 132.47007083 158.25 ] [286.94742491 292.65164781 225.4 ] [126.88190192 124.11964797 164.65 ]]
final_estimator
to a stacking classifier or regressor.from sklearn.ensemble import RandomForestRegressor as RFR
from sklearn.ensemble import GradientBoostingRegressor as GBR
from sklearn.ensemble import StackingRegressor as SR
from sklearn.neighbors import KNeighborsRegressor as KNR
final_layer_rfr = RFR(n_estimators=10, max_features=1, max_leaf_nodes=5,random_state=42)
final_layer_gbr = GBR(n_estimators=10, max_features=1, max_leaf_nodes=5,random_state=42)
final_layer = SR(
estimators=[('rf', final_layer_rfr),
('gbrt', final_layer_gbr)],
final_estimator=RidgeCV()
)
multi_layer_regressor = SR(
estimators=[('ridge', RidgeCV()),
('lasso', LassoCV(random_state=42)),
('knr', KNR(n_neighbors=20, metric='euclidean'))],
final_estimator=final_layer
)
multi_layer_regressor.fit(X_train, y_train)
print('R2 score: {:.2f}'
.format(multi_layer_regressor.score(X_test, y_test)))
R2 score: 0.53