print(X_scaled.mean(axis=0))
print(X_scaled.std(axis=0))

[0. 0. 0.]
[1. 1. 1.]


from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

X, y = make_classification(random_state=42)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=42)

pipe = make_pipeline(StandardScaler(), 
                     LogisticRegression()).fit(X_train, 
                                               y_train)

pipe.score(X_test, y_test)

0.96


X_train = np.array([[ 1., -1.,  2.],
                    [ 2.,  0.,  0.],
                    [ 0.,  1., -1.]])

min_max_scaler = preprocessing.MinMaxScaler()
X_train_minmax = min_max_scaler.fit_transform(X_train)
X_train_minmax

array([[0.5       , 0.        , 1.        ],
       [1.        , 0.5       , 0.33333333],
       [0.        , 1.        , 0.        ]])


X_test = np.array([[-3., -1.,  4.]])
X_test_minmax = min_max_scaler.transform(X_test)
X_test_minmax

array([[-1.5       ,  0.        ,  1.66666667]])


print(min_max_scaler.scale_)
print(min_max_scaler.min_)

[0.5        0.5        0.33333333]
[0.         0.5        0.33333333]


from sklearn.preprocessing import RobustScaler
X = [[ 1., -2.,  2.],
     [ -2.,  1.,  3.],
     [ 4.,  1., -2.]]
transformer = RobustScaler().fit(X)
transformer.transform(X)

array([[ 0. , -2. ,  0. ],
       [-1. ,  0. ,  0.4],
       [ 1. ,  0. , -1.6]])


from sklearn.preprocessing import KernelCenterer
from sklearn.metrics.pairwise import pairwise_kernels

X = [[ 1., -2.,  2.],
     [ -2.,  1.,  3.],
     [ 4.,  1., -2.]]
K = pairwise_kernels(X, metric='linear'); print(K)

transformer = KernelCenterer().fit(K)
transformer

transformer.transform(K)

[[  9.   2.  -2.]
 [  2.  14. -13.]
 [ -2. -13.  21.]]

array([[  5.,   0.,  -5.],
       [  0.,  14., -14.],
       [ -5., -14.,  19.]])


from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split as TTS
from sklearn.preprocessing import QuantileTransformer as QT

X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = TTS(X, y, random_state=0)

qt            = QT(random_state=0)
X_train_trans = qt.fit_transform(X_train)
X_test_trans  = qt.transform(X_test)
np.percentile(X_train[:, 0], [0, 25, 50, 75, 100])

/home/bjpcjp/.local/lib/python3.8/site-packages/sklearn/preprocessing/_data.py:2612: UserWarning: n_quantiles (1000) is greater than the total number of samples (112). n_quantiles is set to n_samples.
  warnings.warn("n_quantiles (%s) is greater than the total number "

array([4.3, 5.1, 5.8, 6.5, 7.9])


import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import PowerTransformer as PT
from sklearn.preprocessing import QuantileTransformer as QT
from sklearn.model_selection import train_test_split as TTS


# n_quantiles is set to the training set size rather than the default value
# to avoid a warning being raised by this example

N_SAMPLES = 1000
FONT_SIZE = 9
BINS      = 30
rng       = np.random.RandomState(304)
bc        = PT(method='box-cox')
yj        = PT(method='yeo-johnson')
qt        = QT(n_quantiles=500, 
               output_distribution='normal',
               random_state=rng)
size      = (N_SAMPLES, 1)

# distributions
X_lognormal = rng.lognormal(      size=size)
X_chisq     = rng.chisquare(df=3, size=size)
X_weibull   = rng.weibull(  a=50, size=size)
X_gaussian  = rng.normal(loc=100, size=size)
X_uniform   = rng.uniform(low=0, high=1, size=size)
X_a, X_b    = rng.normal(loc=100, size=size), rng.normal(loc=105, size=size)
X_bimodal   = np.concatenate([X_a, X_b], axis=0)


distributions = [
    ('Lognormal', X_lognormal),
    ('Chi-squared', X_chisq),
    ('Weibull', X_weibull),
    ('Gaussian', X_gaussian),
    ('Uniform', X_uniform),
    ('Bimodal', X_bimodal)
]

colors = ['#D81B60', '#0188FF', '#FFC107',
          '#B7A2FF', '#000000', '#2EC5AC']

#fig, axes = plt.subplots(nrows=8, ncols=3, figsize=plt.figaspect(2))
fig, axes = plt.subplots(nrows=8, ncols=3, figsize=(10,20))
axes = axes.flatten()
axes_idxs = [(0, 3, 6, 9), (1, 4, 7, 10), (2, 5, 8, 11), (12, 15, 18, 21),
             (13, 16, 19, 22), (14, 17, 20, 23)]
axes_list = [(axes[i], axes[j], axes[k], axes[l])
             for (i, j, k, l) in axes_idxs]

for distribution, color, axes in zip(distributions, colors, axes_list):
    name, X = distribution
    X_train, X_test = TTS(X, test_size=.5)

    # perform power transforms and quantile transform
    X_trans_bc = bc.fit(X_train).transform(X_test)
    X_trans_yj = yj.fit(X_train).transform(X_test)
    X_trans_qt = qt.fit(X_train).transform(X_test)

    lmbda_bc = round(bc.lambdas_[0], 2)
    lmbda_yj = round(yj.lambdas_[0], 2)

    ax_original, ax_bc, ax_yj, ax_qt = axes
    ax_original.hist(X_train, color=color, bins=BINS)
    ax_original.set_title(name, fontsize=FONT_SIZE)
    ax_original.tick_params(axis='both', which='major', labelsize=FONT_SIZE)

    for ax, X_trans, meth_name, lmbda in zip(
            (ax_bc, ax_yj, ax_qt),
            (X_trans_bc, X_trans_yj, X_trans_qt),
            ('Box-Cox', 'Yeo-Johnson', 'Quantile transform'),
            (lmbda_bc, lmbda_yj, None)):
        ax.hist(X_trans, color=color, bins=BINS)
        title = 'After {}'.format(meth_name)
        if lmbda is not None:
            title += r'\n$\lambda$ = {}'.format(lmbda)
        ax.set_title(title, fontsize=FONT_SIZE)
        ax.tick_params(axis='both', which='major', labelsize=FONT_SIZE)
        ax.set_xlim([-3.5, 3.5])

plt.tight_layout()


qt = QT(output_distribution='normal', 
        random_state=0)
X_trans = qt.fit_transform(X)

print(qt.quantiles_)

[[ 96.87146878]
 [ 97.36869589]
 [ 97.44161038]
 [ 97.66766296]
 [ 97.76395443]
 [ 97.84308546]
 [ 97.89998854]
 [ 97.94780408]
 [ 98.01976535]
 [ 98.06714001]
 [ 98.12758478]
 [ 98.18402141]
 [ 98.20336658]
 [ 98.20808321]
 [ 98.21968918]
 [ 98.2290644 ]
 [ 98.24487676]
 [ 98.27428825]
 [ 98.29010168]
 [ 98.32456892]
 [ 98.34896539]
 [ 98.36658477]
 [ 98.40316074]
 [ 98.40861504]
 [ 98.46878547]
 [ 98.53351882]
 [ 98.5627032 ]
 [ 98.57312525]
 [ 98.57755366]
 [ 98.59628204]
 [ 98.61045419]
 [ 98.62232126]
 [ 98.62642224]
 [ 98.64620207]
 [ 98.65164177]
 [ 98.65806511]
 [ 98.66597467]
 [ 98.67885972]
 [ 98.68554714]
 [ 98.7185098 ]
 [ 98.73718624]
 [ 98.74926564]
 [ 98.76154933]
 [ 98.78446872]
 [ 98.79674209]
 [ 98.81062763]
 [ 98.82947932]
 [ 98.83557198]
 [ 98.83793393]
 [ 98.83942389]
 [ 98.84137921]
 [ 98.85504734]
 [ 98.86901985]
 [ 98.87513607]
 [ 98.88896597]
 [ 98.89121561]
 [ 98.89807706]
 [ 98.89982972]
 [ 98.90449052]
 [ 98.90815477]
 [ 98.91994158]
 [ 98.9275831 ]
 [ 98.93704353]
 [ 98.94655469]
 [ 98.95256436]
 [ 98.96227765]
 [ 98.96438282]
 [ 98.96894502]
 [ 98.98226624]
 [ 98.99509023]
 [ 99.00825738]
 [ 99.01462213]
 [ 99.01646174]
 [ 99.02000082]
 [ 99.02896925]
 [ 99.03576822]
 [ 99.04403643]
 [ 99.05072809]
 [ 99.05548091]
 [ 99.06381698]
 [ 99.07570707]
 [ 99.07755079]
 [ 99.09210767]
 [ 99.09469663]
 [ 99.09677484]
 [ 99.09934563]
 [ 99.10600422]
 [ 99.11445922]
 [ 99.11733914]
 [ 99.12550626]
 [ 99.13241879]
 [ 99.13548886]
 [ 99.14211493]
 [ 99.14870832]
 [ 99.16563472]
 [ 99.17736512]
 [ 99.17966448]
 [ 99.18131757]
 [ 99.18382827]
 [ 99.18745413]
 [ 99.19102632]
 [ 99.19816417]
 [ 99.20303136]
 [ 99.20555064]
 [ 99.21821279]
 [ 99.2208516 ]
 [ 99.22548808]
 [ 99.23258941]
 [ 99.23495782]
 [ 99.24306345]
 [ 99.24538773]
 [ 99.24592936]
 [ 99.25522827]
 [ 99.25762176]
 [ 99.26345068]
 [ 99.27189691]
 [ 99.27411014]
 [ 99.28014081]
 [ 99.28261832]
 [ 99.28666823]
 [ 99.28998137]
 [ 99.30147148]
 [ 99.30365331]
 [ 99.31271713]
 [ 99.31689138]
 [ 99.3320804 ]
 [ 99.33896351]
 [ 99.34536255]
 [ 99.35053619]
 [ 99.35958229]
 [ 99.36093293]
 [ 99.3691498 ]
 [ 99.37465489]
 [ 99.37752816]
 [ 99.38326035]
 [ 99.38731825]
 [ 99.39134224]
 [ 99.39390924]
 [ 99.40372935]
 [ 99.40965689]
 [ 99.41324161]
 [ 99.41595275]
 [ 99.41946107]
 [ 99.42628299]
 [ 99.43163562]
 [ 99.43876474]
 [ 99.44362713]
 [ 99.44763612]
 [ 99.46280987]
 [ 99.46395778]
 [ 99.46817292]
 [ 99.47109552]
 [ 99.48187139]
 [ 99.49080645]
 [ 99.49229896]
 [ 99.49325652]
 [ 99.50334509]
 [ 99.50958013]
 [ 99.51222903]
 [ 99.52165825]
 [ 99.52926723]
 [ 99.54204792]
 [ 99.5452066 ]
 [ 99.54613645]
 [ 99.54886467]
 [ 99.5528844 ]
 [ 99.55828364]
 [ 99.56075126]
 [ 99.56386489]
 [ 99.57186354]
 [ 99.57580139]
 [ 99.58303785]
 [ 99.59586642]
 [ 99.59898223]
 [ 99.60575331]
 [ 99.61508295]
 [ 99.61831349]
 [ 99.63294748]
 [ 99.63672027]
 [ 99.63887051]
 [ 99.64285705]
 [ 99.65391793]
 [ 99.65816661]
 [ 99.66195728]
 [ 99.66512566]
 [ 99.66579191]
 [ 99.66730973]
 [ 99.67314652]
 [ 99.69015751]
 [ 99.70021023]
 [ 99.70499307]
 [ 99.72058796]
 [ 99.72213266]
 [ 99.72559412]
 [ 99.7271614 ]
 [ 99.73171841]
 [ 99.73751999]
 [ 99.7460929 ]
 [ 99.75131644]
 [ 99.75532076]
 [ 99.75916867]
 [ 99.77187449]
 [ 99.77548022]
 [ 99.78341242]
 [ 99.78673864]
 [ 99.79332569]
 [ 99.7979775 ]
 [ 99.80973737]
 [ 99.81559585]
 [ 99.81976553]
 [ 99.82318612]
 [ 99.82603118]
 [ 99.82849325]
 [ 99.83472526]
 [ 99.83763113]
 [ 99.84693421]
 [ 99.85464087]
 [ 99.85797634]
 [ 99.86567847]
 [ 99.87706707]
 [ 99.88613253]
 [ 99.89159333]
 [ 99.90199729]
 [ 99.90381537]
 [ 99.90577775]
 [ 99.90693677]
 [ 99.91013882]
 [ 99.91255079]
 [ 99.91636489]
 [ 99.93026947]
 [ 99.93248148]
 [ 99.94025593]
 [ 99.94336956]
 [ 99.94912371]
 [ 99.95403832]
 [ 99.96886654]
 [ 99.97389626]
 [ 99.97872669]
 [ 99.98440639]
 [ 99.99017796]
 [ 99.99056214]
 [ 99.99488323]
 [100.00153153]
 [100.00717504]
 [100.01045319]
 [100.01104184]
 [100.01188466]
 [100.01724113]
 [100.02187492]
 [100.0229598 ]
 [100.03507222]
 [100.04180979]
 [100.04736559]
 [100.05059778]
 [100.05492061]
 [100.05823252]
 [100.06130253]
 [100.06882303]
 [100.07023695]
 [100.07363115]
 [100.07655089]
 [100.08146782]
 [100.09063144]
 [100.09577763]
 [100.09699653]
 [100.0999343 ]
 [100.10958957]
 [100.11215023]
 [100.11900962]
 [100.12483164]
 [100.13530442]
 [100.14452965]
 [100.15400202]
 [100.15521351]
 [100.15587289]
 [100.1586817 ]
 [100.16790404]
 [100.17641183]
 [100.1788725 ]
 [100.18242972]
 [100.1866265 ]
 [100.18723767]
 [100.19345109]
 [100.1947246 ]
 [100.20115673]
 [100.21136303]
 [100.21847278]
 [100.22206484]
 [100.23131436]
 [100.2421285 ]
 [100.2438263 ]
 [100.24619146]
 [100.24773341]
 [100.25438697]
 [100.25748679]
 [100.26048523]
 [100.26844877]
 [100.27190073]
 [100.27748488]
 [100.28220988]
 [100.28509817]
 [100.28792358]
 [100.29217801]
 [100.29380332]
 [100.2959329 ]
 [100.29822206]
 [100.30361343]
 [100.30787317]
 [100.31021263]
 [100.3111769 ]
 [100.31312011]
 [100.31698561]
 [100.31932445]
 [100.32511081]
 [100.33058964]
 [100.33338659]
 [100.3337823 ]
 [100.33666975]
 [100.34491567]
 [100.35114208]
 [100.35322163]
 [100.35407243]
 [100.35795593]
 [100.36077392]
 [100.37418416]
 [100.37607351]
 [100.37948831]
 [100.38452591]
 [100.3907214 ]
 [100.39669143]
 [100.40380013]
 [100.40625564]
 [100.4097989 ]
 [100.41798227]
 [100.42170184]
 [100.43491769]
 [100.44646185]
 [100.45575547]
 [100.46105668]
 [100.46568786]
 [100.47055721]
 [100.48013789]
 [100.48396315]
 [100.49442944]
 [100.49597702]
 [100.50273889]
 [100.51067412]
 [100.51714866]
 [100.51863055]
 [100.51979101]
 [100.52145276]
 [100.52243225]
 [100.52796847]
 [100.53132506]
 [100.53530194]
 [100.54034768]
 [100.5418776 ]
 [100.55213009]
 [100.55404294]
 [100.55952065]
 [100.56351538]
 [100.57264337]
 [100.58025795]
 [100.58189835]
 [100.59069279]
 [100.59239753]
 [100.59693449]
 [100.606058  ]
 [100.61322165]
 [100.6338534 ]
 [100.63618886]
 [100.64109484]
 [100.64565371]
 [100.64680692]
 [100.65472057]
 [100.65914533]
 [100.66021672]
 [100.66160247]
 [100.66712716]
 [100.67125844]
 [100.67982748]
 [100.68719572]
 [100.70324637]
 [100.70523319]
 [100.70989619]
 [100.71593776]
 [100.72736094]
 [100.72801278]
 [100.73253153]
 [100.73585305]
 [100.73776128]
 [100.74613375]
 [100.75163874]
 [100.75652047]
 [100.7612883 ]
 [100.76688004]
 [100.7711027 ]
 [100.77302758]
 [100.78734009]
 [100.79304895]
 [100.79977334]
 [100.80327545]
 [100.812918  ]
 [100.81677271]
 [100.82525499]
 [100.82835423]
 [100.83833854]
 [100.8467093 ]
 [100.84753249]
 [100.85583914]
 [100.8697842 ]
 [100.88171913]
 [100.89285549]
 [100.89722041]
 [100.90101114]
 [100.90861622]
 [100.91867612]
 [100.926387  ]
 [100.93247918]
 [100.93816627]
 [100.94020219]
 [100.95222729]
 [100.95808321]
 [100.96650558]
 [100.96888985]
 [100.97421184]
 [100.98215617]
 [100.98708108]
 [100.99411384]
 [101.00967314]
 [101.0167916 ]
 [101.02521024]
 [101.03119952]
 [101.05488307]
 [101.06645037]
 [101.07523017]
 [101.09380424]
 [101.10272043]
 [101.11487407]
 [101.12036627]
 [101.12250128]
 [101.12834666]
 [101.13473127]
 [101.15065807]
 [101.16407227]
 [101.17134221]
 [101.17889585]
 [101.19553193]
 [101.20830729]
 [101.21627389]
 [101.23163602]
 [101.23573211]
 [101.25047096]
 [101.25588971]
 [101.27091366]
 [101.27367392]
 [101.28365247]
 [101.29092617]
 [101.3053029 ]
 [101.33168023]
 [101.34546385]
 [101.36040088]
 [101.37316815]
 [101.38287385]
 [101.39934904]
 [101.41204826]
 [101.44216852]
 [101.45820176]
 [101.47450723]
 [101.48887376]
 [101.4948088 ]
 [101.50715602]
 [101.53187094]
 [101.54580325]
 [101.55801359]
 [101.57190396]
 [101.59717043]
 [101.60813684]
 [101.61625087]
 [101.64023951]
 [101.68708264]
 [101.7094543 ]
 [101.72388024]
 [101.73744567]
 [101.75844922]
 [101.79146953]
 [101.84308555]
 [101.89561866]
 [101.94624784]
 [102.0557121 ]
 [102.09607112]
 [102.11007864]
 [102.13562221]
 [102.16000502]
 [102.20025565]
 [102.2291444 ]
 [102.29246   ]
 [102.41640111]
 [102.49280123]
 [102.54008131]
 [102.56961025]
 [102.66068064]
 [102.73576462]
 [102.78364945]
 [102.8212558 ]
 [102.85235453]
 [102.86144875]
 [102.95296178]
 [102.97072559]
 [103.01366978]
 [103.06660592]
 [103.08746936]
 [103.10774066]
 [103.18386065]
 [103.1974555 ]
 [103.21942494]
 [103.25628239]
 [103.27551544]
 [103.28444714]
 [103.32567628]
 [103.34293317]
 [103.34805115]
 [103.35087887]
 [103.36876911]
 [103.3802271 ]
 [103.39218645]
 [103.40087679]
 [103.40761259]
 [103.433298  ]
 [103.43469473]
 [103.44215478]
 [103.46353066]
 [103.48473198]
 [103.50300287]
 [103.51150623]
 [103.52625105]
 [103.53904534]
 [103.55558203]
 [103.56954784]
 [103.58818009]
 [103.62748604]
 [103.64020453]
 [103.6460137 ]
 [103.65413174]
 [103.66431069]
 [103.67496135]
 [103.67959746]
 [103.68972995]
 [103.70540889]
 [103.71072539]
 [103.72643026]
 [103.7387804 ]
 [103.75053223]
 [103.76726951]
 [103.77097425]
 [103.78151425]
 [103.79218615]
 [103.79974346]
 [103.8053964 ]
 [103.82310169]
 [103.83017286]
 [103.83695119]
 [103.85429998]
 [103.86651389]
 [103.87198796]
 [103.8755917 ]
 [103.87819252]
 [103.89039364]
 [103.90742097]
 [103.91502758]
 [103.92142579]
 [103.92873513]
 [103.93075752]
 [103.93591991]
 [103.94235892]
 [103.94712484]
 [103.95467387]
 [103.97186737]
 [103.98758498]
 [104.00222588]
 [104.00909979]
 [104.01026939]
 [104.01823861]
 [104.02335834]
 [104.03019505]
 [104.03833934]
 [104.04116164]
 [104.05059421]
 [104.06096964]
 [104.07581408]
 [104.0830009 ]
 [104.09334124]
 [104.09774913]
 [104.10276591]
 [104.10940203]
 [104.11797941]
 [104.12134766]
 [104.12502316]
 [104.13111029]
 [104.14250446]
 [104.15782763]
 [104.17189144]
 [104.17494104]
 [104.18148807]
 [104.18330754]
 [104.19624935]
 [104.20321126]
 [104.21580145]
 [104.22022836]
 [104.22687317]
 [104.23270659]
 [104.23833523]
 [104.24270712]
 [104.25074373]
 [104.25972283]
 [104.26455508]
 [104.26811074]
 [104.27976089]
 [104.28642847]
 [104.29595622]
 [104.32163747]
 [104.32676421]
 [104.32935765]
 [104.33752607]
 [104.34096665]
 [104.34370548]
 [104.34646461]
 [104.35194728]
 [104.36125276]
 [104.37462754]
 [104.37753593]
 [104.37846885]
 [104.38421819]
 [104.3891483 ]
 [104.39102852]
 [104.39918716]
 [104.40327224]
 [104.41187851]
 [104.41409015]
 [104.41926622]
 [104.42317624]
 [104.42502016]
 [104.42943485]
 [104.43805722]
 [104.44548409]
 [104.45125601]
 [104.45458261]
 [104.45702049]
 [104.45757247]
 [104.45847016]
 [104.45956103]
 [104.4637195 ]
 [104.46807216]
 [104.47108469]
 [104.48212837]
 [104.48720286]
 [104.49075758]
 [104.49354847]
 [104.49641255]
 [104.50329096]
 [104.50391262]
 [104.50933105]
 [104.51310765]
 [104.51481069]
 [104.52224746]
 [104.52552349]
 [104.52870643]
 [104.53400011]
 [104.5374438 ]
 [104.54405065]
 [104.55657944]
 [104.56704536]
 [104.57248277]
 [104.57912958]
 [104.58602991]
 [104.58750754]
 [104.58869331]
 [104.5892115 ]
 [104.59388725]
 [104.59962199]
 [104.60389381]
 [104.61526896]
 [104.62438356]
 [104.63220376]
 [104.63883049]
 [104.64398644]
 [104.64577133]
 [104.64789793]
 [104.65054534]
 [104.6555658 ]
 [104.66429473]
 [104.67156847]
 [104.67789286]
 [104.68342484]
 [104.68929638]
 [104.69709219]
 [104.69993556]
 [104.70462457]
 [104.70798393]
 [104.71762995]
 [104.72786378]
 [104.73348191]
 [104.74290948]
 [104.74669045]
 [104.75233897]
 [104.75632635]
 [104.75904651]
 [104.76274801]
 [104.76495634]
 [104.77139936]
 [104.77530262]
 [104.77706217]
 [104.78032574]
 [104.7853258 ]
 [104.78948506]
 [104.79428705]
 [104.80132648]
 [104.80562329]
 [104.81965355]
 [104.82275197]
 [104.82855141]
 [104.83217868]
 [104.83872642]
 [104.84152532]
 [104.84234998]
 [104.84526783]
 [104.85268041]
 [104.85646403]
 [104.85935479]
 [104.86224124]
 [104.86333151]
 [104.86925412]
 [104.87545747]
 [104.87956291]
 [104.88428036]
 [104.89149895]
 [104.89793648]
 [104.90228365]
 [104.9063931 ]
 [104.90998766]
 [104.91147202]
 [104.92266036]
 [104.92848585]
 [104.94153325]
 [104.9447166 ]
 [104.95072478]
 [104.95376709]
 [104.95815215]
 [104.96060681]
 [104.96292709]
 [104.97390186]
 [104.97695455]
 [104.98079441]
 [104.98319189]
 [104.98969234]
 [104.99315316]
 [105.00342933]
 [105.00558349]
 [105.00680353]
 [105.00848288]
 [105.01656302]
 [105.0380369 ]
 [105.04255558]
 [105.04910392]
 [105.05199485]
 [105.05436428]
 [105.05791747]
 [105.059141  ]
 [105.06169737]
 [105.06495652]
 [105.07230193]
 [105.07971086]
 [105.0837342 ]
 [105.08793748]
 [105.09512689]
 [105.09750732]
 [105.10020188]
 [105.1069159 ]
 [105.10911915]
 [105.11990179]
 [105.12291614]
 [105.12350927]
 [105.13286495]
 [105.14892296]
 [105.15375202]
 [105.15703377]
 [105.15934564]
 [105.1629746 ]
 [105.16860627]
 [105.17405465]
 [105.17515461]
 [105.18201387]
 [105.18862692]
 [105.19348464]
 [105.19632428]
 [105.20763857]
 [105.21408683]
 [105.21918674]
 [105.22071327]
 [105.2286184 ]
 [105.23127896]
 [105.23414638]
 [105.23865928]
 [105.240734  ]
 [105.2441326 ]
 [105.24953496]
 [105.25860996]
 [105.26303157]
 [105.26843374]
 [105.27320345]
 [105.27750014]
 [105.28404726]
 [105.29059379]
 [105.29558394]
 [105.30302532]
 [105.30905016]
 [105.31545487]
 [105.31806906]
 [105.32643529]
 [105.33004279]
 [105.3335749 ]
 [105.33686491]
 [105.34406981]
 [105.34690985]
 [105.35315132]
 [105.35871032]
 [105.36275627]
 [105.36920171]
 [105.3775623 ]
 [105.38206012]
 [105.38452292]
 [105.3864759 ]
 [105.39414001]
 [105.39638172]
 [105.40664586]
 [105.42069361]
 [105.43742153]
 [105.44460071]
 [105.44684615]
 [105.44967752]
 [105.45745609]
 [105.46304598]
 [105.46500253]
 [105.47138044]
 [105.47817089]
 [105.48071739]
 [105.4843876 ]
 [105.48580869]
 [105.49604751]
 [105.49945672]
 [105.50232879]
 [105.50406277]
 [105.51082306]
 [105.51972903]
 [105.52684745]
 [105.53092374]
 [105.53305513]
 [105.53593164]
 [105.54936179]
 [105.5510153 ]
 [105.55362435]
 [105.5587146 ]
 [105.56481448]
 [105.57384994]
 [105.58535817]
 [105.58975368]
 [105.59939469]
 [105.60672079]
 [105.61223613]
 [105.61547735]
 [105.61653577]
 [105.61960059]
 [105.62769003]
 [105.63407597]
 [105.64312303]
 [105.64465797]
 [105.64662769]
 [105.65567738]
 [105.65933895]
 [105.66429752]
 [105.67507155]
 [105.67988467]
 [105.68165387]
 [105.6887387 ]
 [105.6948339 ]
 [105.70191541]
 [105.70962269]
 [105.71367922]
 [105.72284829]
 [105.72540307]
 [105.73007496]
 [105.73452141]
 [105.73632469]
 [105.74448181]
 [105.74882713]
 [105.75268437]
 [105.75606348]
 [105.76327621]
 [105.76948024]
 [105.77079758]
 [105.77384903]
 [105.77752127]
 [105.77990312]
 [105.78426414]
 [105.78919358]
 [105.79242443]
 [105.79803891]
 [105.80878908]
 [105.81124446]
 [105.81767151]
 [105.8199971 ]
 [105.82086749]
 [105.8333895 ]
 [105.85327953]
 [105.86048867]
 [105.86315592]
 [105.87149725]
 [105.87482485]
 [105.87714976]
 [105.87874751]
 [105.89423123]
 [105.90256614]
 [105.90388445]
 [105.90906082]
 [105.91723899]
 [105.9217969 ]
 [105.94340463]
 [105.96241739]
 [105.96558089]
 [105.96721702]
 [105.96905749]
 [105.97541034]
 [105.98314705]
 [105.99401167]
 [105.99903862]
 [106.00936362]
 [106.01568582]
 [106.02161592]
 [106.02922932]
 [106.0344201 ]
 [106.04276544]
 [106.05426389]
 [106.06330133]
 [106.07066554]
 [106.08735299]
 [106.09774699]
 [106.10965516]
 [106.11894293]
 [106.1318016 ]
 [106.15777725]
 [106.19457802]
 [106.20158185]
 [106.21468966]
 [106.22863512]
 [106.23772792]
 [106.24430929]
 [106.26311516]
 [106.27756147]
 [106.28903937]
 [106.3008005 ]
 [106.31518297]
 [106.32531692]
 [106.33263179]
 [106.3521046 ]
 [106.3857955 ]
 [106.40061052]
 [106.41222871]
 [106.44675019]
 [106.4972832 ]
 [106.52193599]
 [106.53237681]
 [106.56949371]
 [106.59239143]
 [106.62398443]
 [106.65687429]
 [106.67078906]
 [106.68513339]
 [106.72518567]
 [106.74708482]
 [106.75421224]
 [106.76905571]
 [106.79350368]
 [106.83658006]
 [106.86383404]
 [106.87564121]
 [106.87960379]
 [106.9023011 ]
 [106.91059839]
 [106.96887818]
 [107.06049536]
 [107.08452843]
 [107.13773194]
 [107.16478988]
 [107.17755065]
 [107.25343046]
 [107.39442513]
 [107.41802732]
 [107.50478648]
 [107.6271605 ]
 [107.79680694]]


from sklearn.preprocessing import normalize as Norm, Normalizer as Normlzr

X = [[ 1., -1.,  2.],
     [ 2.,  0.,  0.],
     [ 0.,  1., -1.]]

X_normalized = Norm(X, norm='l2')
print(X_normalized,"\n")

normalizer   = Normlzr().fit(X) # fit does nothing
normalizer.transform(X)
print(normalizer.transform([[-1.0, 1.0, 0.0]]))

[[ 0.40824829 -0.40824829  0.81649658]
 [ 1.          0.          0.        ]
 [ 0.          0.70710678 -0.70710678]] 

[[-0.70710678  0.70710678  0.        ]]


enc = preprocessing.OrdinalEncoder()
X = [['male',   'from US',     'uses Safari'], 
     ['female', 'from Europe', 'uses Firefox']]
enc.fit(X)

enc.transform([['female', 'from US', 'uses Safari']])

array([[0., 1., 1.]])


enc = preprocessing.OneHotEncoder()
X = [['male',   'from US',     'uses Safari'], 
     ['female', 'from Europe', 'uses Firefox']]
enc.fit(X)

enc.transform([['female', 'from US', 'uses Safari'],
               ['male', 'from Europe', 'uses Safari']]).toarray()

array([[1., 0., 0., 1., 0., 1.],
       [0., 1., 1., 0., 0., 1.]])


enc.categories_

[array(['female', 'male'], dtype=object),
 array(['from Europe', 'from US'], dtype=object),
 array(['uses Firefox', 'uses Safari'], dtype=object)]


genders   = ['female',      'male']
locations = ['from Africa', 'from Asia',    'from Europe', 'from US']
browsers  = ['uses Chrome', 'uses Firefox', 'uses IE',     'uses Safari']

enc = preprocessing.OneHotEncoder(categories=[genders, locations, browsers])

# Note: missing categorical values for the 2nd and 3rd feature
X = [['male',   'from US',     'uses Safari'], 
     ['female', 'from Europe', 'uses Firefox']]

enc.fit(X).transform([['female', 'from Asia', 'uses Chrome']]).toarray()

array([[1., 0., 0., 1., 0., 0., 1., 0., 0., 0.]])


enc = preprocessing.OneHotEncoder(handle_unknown='ignore')
X = [['male', 'from US', 'uses Safari'], 
     ['female', 'from Europe', 'uses Firefox']]

enc.fit(X).transform([['female', 'from Asia', 'uses Chrome']]).toarray()

array([[1., 0., 0., 0., 0., 0.]])


X = [['male', 'from US', 'uses Safari'],
     ['female', 'from Europe', 'uses Firefox']]

drop_enc = preprocessing.OneHotEncoder(drop='first').fit(X)
print(drop_enc.categories_)
print(drop_enc.transform(X).toarray())

[array(['female', 'male'], dtype=object), array(['from Europe', 'from US'], dtype=object), array(['uses Firefox', 'uses Safari'], dtype=object)]
[[1. 1. 1.]
 [0. 0. 0.]]


X = [['male', 'US', 'Safari'],
     ['female', 'Europe', 'Firefox'],
     ['female', 'Asia', 'Chrome']]

drop_enc = preprocessing.OneHotEncoder(drop='if_binary').fit(X)
print(drop_enc.categories_)
print(drop_enc.transform(X).toarray())

[array(['female', 'male'], dtype=object), array(['Asia', 'Europe', 'US'], dtype=object), array(['Chrome', 'Firefox', 'Safari'], dtype=object)]
[[1. 0. 0. 1. 0. 0. 1.]
 [0. 0. 1. 0. 0. 1. 0.]
 [0. 1. 0. 0. 1. 0. 0.]]


X = [['male', 'Safari'],
     ['female', None],
     [np.nan, 'Firefox']]

enc = preprocessing.OneHotEncoder(handle_unknown='error').fit(X)
print(enc.categories_)
print(enc.transform(X).toarray())

[array(['female', 'male', nan], dtype=object), array(['Firefox', 'Safari', None], dtype=object)]
[[0. 1. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 1.]
 [0. 0. 1. 1. 0. 0.]]


X = [['Safari'], [None], [np.nan], ['Firefox']]

enc = preprocessing.OneHotEncoder(handle_unknown='error').fit(X)
print(enc.categories_)
print(enc.transform(X).toarray())

[array(['Firefox', 'Safari', None, nan], dtype=object)]
[[0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]
 [1. 0. 0. 0.]]


X = np.array([[ -3., 5., 15 ],
              [  0., 6., 14 ],
              [  6., 3., 11 ]])

est = preprocessing.KBinsDiscretizer(n_bins=[3, 2, 2], 
                                     encode='ordinal').fit(X)
est.transform(X)

array([[0., 1., 1.],
       [1., 1., 1.],
       [2., 0., 0.]])


import pandas as pd
import numpy as np

bins   = [0,         1,     13,     20,      60, np.inf]
labels = ['infant', 'kid', 'teen', 'adult', 'senior citizen']

transformer = preprocessing.FunctionTransformer(
    pd.cut, kw_args={'bins': bins, 
                     'labels': labels, 
                     'retbins': False})

X = np.array([0.2, 2, 15, 25, 97])
transformer.fit_transform(X)

['infant', 'kid', 'teen', 'adult', 'senior citizen']
Categories (5, object): ['infant' < 'kid' < 'teen' < 'adult' < 'senior citizen']


import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.tree import DecisionTreeRegressor


rnd = np.random.RandomState(42)
X   = rnd.uniform(-3, 3, size=100)
y   = np.sin(X) + rnd.normal(size=len(X)) / 3
X   = X.reshape(-1, 1)

enc      = KBinsDiscretizer(n_bins=10, encode='onehot')
X_binned = enc.fit_transform(X)


# predict with original dataset
line = np.linspace(-3, 3, 1000, endpoint=False).reshape(-1, 1)
reg = LinearRegression().fit(X, y)

fig, (ax1, ax2) = plt.subplots(ncols=2, sharey=True, figsize=(10, 4))

ax1.plot(line, reg.predict(line), linewidth=2, color='green',
         label="linear regression")
reg = DecisionTreeRegressor(min_samples_split=3, random_state=0).fit(X, y)
ax1.plot(line, reg.predict(line), linewidth=2, color='red',
         label="decision tree")
ax1.plot(X[:, 0], y, 'o', c='k')
ax1.legend(loc="best")
ax1.set_ylabel("Regression output")
ax1.set_xlabel("Input feature")
ax1.set_title("Result before discretization")

# predict with transformed dataset
line_binned = enc.transform(line)
reg = LinearRegression().fit(X_binned, y)

ax2.plot(line, reg.predict(line_binned), linewidth=2, color='green',
         linestyle='-', label='linear regression')
reg = DecisionTreeRegressor(min_samples_split=3,
                            random_state=0).fit(X_binned, y)
ax2.plot(line, reg.predict(line_binned), linewidth=2, color='red',
         linestyle=':', label='decision tree')
ax2.plot(X[:, 0], y, 'o', c='k')
ax2.vlines(enc.bin_edges_[0], *plt.gca().get_ylim(), linewidth=1, alpha=.2)
ax2.legend(loc="best")
ax2.set_xlabel("Input feature")
ax2.set_title("Result after discretization")

plt.tight_layout()


import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.utils._testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning


h = .02  # step size in the mesh


def get_name(estimator):
    name = estimator.__class__.__name__
    if name == 'Pipeline':
        name = [get_name(est[1]) for est in estimator.steps]
        name = ' + '.join(name)
    return name

classifiers = [
    (LogisticRegression(random_state=0), {
        'C': np.logspace(-2, 7, 10)
    }),
    (LinearSVC(random_state=0), {
        'C': np.logspace(-2, 7, 10)
    }),
    (make_pipeline(
        KBinsDiscretizer(encode='onehot'),
        LogisticRegression(random_state=0)), {
            'kbinsdiscretizer__n_bins': np.arange(2, 10),
            'logisticregression__C': np.logspace(-2, 7, 10),
        }),
    (make_pipeline(
        KBinsDiscretizer(encode='onehot'), LinearSVC(random_state=0)), {
            'kbinsdiscretizer__n_bins': np.arange(2, 10),
            'linearsvc__C': np.logspace(-2, 7, 10),
        }),
    (GradientBoostingClassifier(n_estimators=50, random_state=0), {
        'learning_rate': np.logspace(-4, 0, 10)
    }),
    (SVC(random_state=0), {
        'C': np.logspace(-2, 7, 10)
    }),
]

names = [get_name(e) for e, g in classifiers]

n_samples = 100
datasets = [
    make_moons(n_samples=n_samples, noise=0.2, random_state=0),
    make_circles(n_samples=n_samples, noise=0.2, factor=0.5, random_state=1),
    make_classification(n_samples=n_samples, n_features=2, n_redundant=0,
                        n_informative=2, random_state=2,
                        n_clusters_per_class=1)
]


fig, axes = plt.subplots(nrows=len(datasets), ncols=len(classifiers) + 1,
                         figsize=(21, 9))

cm = plt.cm.PiYG
cm_bright = ListedColormap(['#b30065', '#178000'])

# iterate over datasets
for ds_cnt, (X, y) in enumerate(datasets):
    print('\ndataset %d\n---------' % ds_cnt)

    # preprocess dataset, split into training and test part
    X = StandardScaler().fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=.5, random_state=42)

    # create the grid for background colors
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    xx, yy = np.meshgrid(
        np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

    # plot the dataset first
    ax = axes[ds_cnt, 0]
    if ds_cnt == 0:
        ax.set_title("Input data")
    # plot the training points
    ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,
               edgecolors='k')
    # and testing points
    ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6,
               edgecolors='k')
    ax.set_xlim(xx.min(), xx.max())
    ax.set_ylim(yy.min(), yy.max())
    ax.set_xticks(())
    ax.set_yticks(())

    # iterate over classifiers
    for est_idx, (name, (estimator, param_grid)) in \
            enumerate(zip(names, classifiers)):
        ax = axes[ds_cnt, est_idx + 1]

        clf = GridSearchCV(estimator=estimator, param_grid=param_grid)
        with ignore_warnings(category=ConvergenceWarning):
            clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)
        print('%s: %.2f' % (name, score))

        # plot the decision boundary. For that, we will assign a color to each
        # point in the mesh [x_min, x_max]*[y_min, y_max].
        if hasattr(clf, "decision_function"):
            Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
        else:
            Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]

        # put the result into a color plot
        Z = Z.reshape(xx.shape)
        ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)

        # plot the training points
        ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,
                   edgecolors='k')
        # and testing points
        ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
                   edgecolors='k', alpha=0.6)
        ax.set_xlim(xx.min(), xx.max())
        ax.set_ylim(yy.min(), yy.max())
        ax.set_xticks(())
        ax.set_yticks(())

        if ds_cnt == 0:
            ax.set_title(name.replace(' + ', '\n'))
        ax.text(0.95, 0.06, ('%.2f' % score).lstrip('0'), size=15,
                bbox=dict(boxstyle='round', alpha=0.8, facecolor='white'),
                transform=ax.transAxes, horizontalalignment='right')


plt.tight_layout()

# Add suptitles above the figure
plt.subplots_adjust(top=0.90)
suptitles = [
    'Linear classifiers',
    'Feature discretization and linear classifiers',
    'Non-linear classifiers',
]
for i, suptitle in zip([1, 3, 5], suptitles):
    ax = axes[0, i]
    ax.text(1.05, 1.25, suptitle, transform=ax.transAxes,
            horizontalalignment='center', size='x-large')

dataset 0
---------
LogisticRegression: 0.86
LinearSVC: 0.86
KBinsDiscretizer + LogisticRegression: 0.86
KBinsDiscretizer + LinearSVC: 0.92
GradientBoostingClassifier: 0.90
SVC: 0.94

dataset 1
---------
LogisticRegression: 0.40
LinearSVC: 0.40
KBinsDiscretizer + LogisticRegression: 0.88
KBinsDiscretizer + LinearSVC: 0.86
GradientBoostingClassifier: 0.80
SVC: 0.84

dataset 2
---------
LogisticRegression: 0.98
LinearSVC: 0.98
KBinsDiscretizer + LogisticRegression: 0.94
KBinsDiscretizer + LinearSVC: 0.94
GradientBoostingClassifier: 0.88
SVC: 0.98


import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import KBinsDiscretizer
from sklearn.datasets import make_blobs


strategies = ['uniform', 'quantile', 'kmeans']

n_samples = 200
centers_0 = np.array([[0, 0], [0, 5], [2, 4], [8, 8]])
centers_1 = np.array([[0, 0], [3, 1]])

# construct the datasets
random_state = 42
X_list = [
    np.random.RandomState(random_state).uniform(-3, 3, size=(n_samples, 2)),

    make_blobs(n_samples=[n_samples // 10, n_samples * 4 // 10,
                          n_samples // 10, n_samples * 4 // 10],
               cluster_std=0.5, 
               centers=centers_0,
               random_state=random_state)[0],

    make_blobs(n_samples=[n_samples // 5, n_samples * 4 // 5],
               cluster_std=0.5, 
               centers=centers_1,
               random_state=random_state)[0],
]


figure = plt.figure(figsize=(14, 9))
i = 1
for ds_cnt, X in enumerate(X_list):

    ax = plt.subplot(len(X_list), len(strategies) + 1, i)
    ax.scatter(X[:, 0], X[:, 1], edgecolors='k')
    if ds_cnt == 0:
        ax.set_title("Input data", size=14)

    xx, yy = np.meshgrid(
        np.linspace(X[:, 0].min(), X[:, 0].max(), 300),
        np.linspace(X[:, 1].min(), X[:, 1].max(), 300))
    grid = np.c_[xx.ravel(), yy.ravel()]

    ax.set_xlim(xx.min(), xx.max())
    ax.set_ylim(yy.min(), yy.max())
    ax.set_xticks(())
    ax.set_yticks(())

    i += 1
    # transform the dataset with KBinsDiscretizer
    for strategy in strategies:
        enc = KBinsDiscretizer(n_bins=4, encode='ordinal', strategy=strategy)
        enc.fit(X)
        grid_encoded = enc.transform(grid)

        ax = plt.subplot(len(X_list), len(strategies) + 1, i)

        # horizontal stripes
        horizontal = grid_encoded[:, 0].reshape(xx.shape)
        ax.contourf(xx, yy, horizontal, alpha=.5)
        # vertical stripes
        vertical = grid_encoded[:, 1].reshape(xx.shape)
        ax.contourf(xx, yy, vertical, alpha=.5)

        ax.scatter(X[:, 0], X[:, 1], edgecolors='k')
        ax.set_xlim(xx.min(), xx.max())
        ax.set_ylim(yy.min(), yy.max())
        ax.set_xticks(())
        ax.set_yticks(())
        if ds_cnt == 0:
            ax.set_title("strategy='%s'" % (strategy, ), size=14)

        i += 1

plt.tight_layout()


X = [[ 1., -1.,  2.],
     [ 2.,  0.,  0.],
     [ 0.,  1., -1.]]

binarizer = preprocessing.Binarizer().fit(X)  # fit does nothing
print(binarizer)
print(binarizer.transform(X))

# binarizer with adjusted threshold
binarizer = preprocessing.Binarizer(threshold=1.1)
print(binarizer.transform(X))

Binarizer()
[[1. 0. 1.]
 [1. 0. 0.]
 [0. 1. 0.]]
[[0. 0. 1.]
 [1. 0. 0.]
 [0. 0. 0.]]


import numpy as np
from sklearn.preprocessing import PolynomialFeatures

X    = np.arange(6).reshape(3, 2); print(X,"\n")
poly = PolynomialFeatures(2).fit_transform(X)
print(poly)
print()
# in some cases, only feature interaction terms are needed.
X    = np.arange(9).reshape(3, 3); print(X,"\n")
poly = PolynomialFeatures(degree=3, 
                          interaction_only=True).fit_transform(X)
print(poly)

[[0 1]
 [2 3]
 [4 5]] 

[[ 1.  0.  1.  0.  0.  1.]
 [ 1.  2.  3.  4.  6.  9.]
 [ 1.  4.  5. 16. 20. 25.]]

[[0 1 2]
 [3 4 5]
 [6 7 8]] 

[[  1.   0.   1.   2.   0.   0.   2.   0.]
 [  1.   3.   4.   5.  12.  15.  20.  60.]
 [  1.   6.   7.   8.  42.  48.  56. 336.]]


import warnings
import numpy as np
from sklearn.preprocessing import FunctionTransformer as FT

warnings.filterwarnings("error",
                        message=".*check_inverse*.",
                        category=UserWarning,
                        append=False)

transformer = FT(np.log1p, validate=True)
X           = np.array([[0, 1], [2, 3]])
transformer.transform(X)

array([[0.        , 0.69314718],
       [1.09861229, 1.38629436]])

Preprocessing ¶

Standard Scaling ¶

Min-Max Scaling and Max Abs Scaling ¶

Scaling sparse data¶

Scaling with outliers with Robust Scaler ¶

Scaling kernel matrices with KernelCenterer ¶

Quantile Transforms ¶

Quantile Mapping to a Uniform [0..1] Distribution¶

Power Mapping to a Gaussian Distribution¶

Example: Map data to Normal Distributions (Box-Cox, Yeo-Johnson)¶

Normalization ¶

Categories to Integers ¶

Categories to one-of-K ("One Hot")¶

Quantization, aka Binning¶

Example: Binning Continuous Features with KBinsDiscretizer ¶

Example: Feature discretization ¶

KBinsDiscretizer strategy comparisons ¶

Feature Binarization ¶

Generating polynomial features ¶

Custom Transformers ¶

Min-Max Scaling and Max Abs Scaling¶

Scaling sparse data¶

Scaling with outliers with Robust Scaler¶

Scaling kernel matrices with KernelCenterer¶

Quantile Mapping to a Uniform [0..1] Distribution¶

Power Mapping to a Gaussian Distribution¶

Quantization, aka Binning¶

Min-Max Scaling and Max Abs Scaling ¶

Scaling with outliers with Robust Scaler ¶

Scaling kernel matrices with KernelCenterer ¶