from sklearn import metrics
labels_true  = [0, 0, 0, 1, 1, 1]
labels_pred  = [0, 0, 1, 1, 2, 2]
labels_pred2 = [1, 1, 0, 0, 3, 3]

# ignoring permutations:
print(metrics.rand_score(labels_true, labels_pred))

# adjusted rand index - corrects for chance
print(metrics.adjusted_rand_score(labels_true, labels_pred))

# you can swap 0s for 1s and replace 2s with 3s in the predictions - same score
print(metrics.rand_score(labels_true, labels_pred2))
print(metrics.adjusted_rand_score(labels_true, labels_pred2))

# swapping arguments = no effect, so can be used for consensus scoring
labels_true = labels_true[:]
print(metrics.rand_score(labels_true, labels_true))
print(metrics.adjusted_rand_score(labels_true, labels_true))

# poor (independent) labels = low scores
# adjusted RI will be <0 or near 0. unadjusted RI, while lower, not necessarily near 0.
labels_true = [0, 0, 0, 0, 0, 0, 1, 1]
labels_pred = [0, 1, 2, 3, 4, 5, 5, 6]
print(metrics.rand_score(labels_true, labels_pred))
print(metrics.adjusted_rand_score(labels_true, labels_pred))

0.6666666666666666
0.24242424242424243
0.6666666666666666
0.24242424242424243
1.0
1.0
0.39285714285714285
-0.07207207207207207


from sklearn.metrics import adjusted_mutual_info_score as AMIS
from sklearn.metrics import          mutual_info_score as MIS

labels_true = [0, 0, 0, 1, 1, 1]
labels_pred = [0, 0, 1, 1, 2, 2]
print(AMIS(labels_true, labels_pred))  

# swap 0s & 1s; rename 2s to 3s: get same score:
labels_pred = [1, 1, 0, 0, 3, 3]
print(AMIS(labels_true, labels_pred))

# swapping arguments = no effect. can be used for consensus scoring.
print(AMIS(labels_pred, labels_true))

# perfect labeling = 1.0
labels_pred = labels_true[:]
print(AMIS(labels_true, labels_pred))

# (not true for mutual_info_score = harder to judge)
print(MIS(labels_true, labels_pred))

# bad labels = <0 scores
labels_true = [0, 1, 2, 0, 3, 4, 5, 1]
labels_pred = [1, 1, 0, 0, 2, 2, 2, 2]
print(AMIS(labels_true, labels_pred))

0.2987924581708901
0.2987924581708901
0.2987924581708903
1.0
0.6931471805599452
-0.16666666666666655


from sklearn import metrics
labels_true = [0, 0, 0, 1, 1, 1]
labels_pred = [0, 0, 1, 1, 2, 2]

print(metrics.homogeneity_score(labels_true, labels_pred))
print(metrics.completeness_score(labels_true, labels_pred))
print(metrics.v_measure_score(   labels_true, labels_pred))

print(metrics.homogeneity_completeness_v_measure(labels_true, labels_pred))

0.6666666666666669
0.420619835714305
0.5158037429793889
(0.6666666666666669, 0.420619835714305, 0.5158037429793889)


import numpy as np
import matplotlib.pyplot as plt
from time import time
from sklearn import metrics

# find score for 2 random, uniform clusters.
# both random labelings have same #clusters for each possible value in n_clusters_range
# when fixed_n_classes != None, the 1st labeling is a ground truth assignment.

def uniform_labelings_scores(score_func, n_samples, n_clusters_range,
                             fixed_n_classes=None, n_runs=5, seed=42):

    random_labels = np.random.RandomState(seed).randint
    scores        = np.zeros((len(n_clusters_range), n_runs))

    if fixed_n_classes is not None:
        labels_a = random_labels(low=0, high=fixed_n_classes, size=n_samples)

    for i, k in enumerate(n_clusters_range):
        for j in range(n_runs):
            if fixed_n_classes is None:
                labels_a = random_labels(low=0, high=k, size=n_samples)
            labels_b = random_labels(low=0, high=k, size=n_samples)
            scores[i, j] = score_func(labels_a, labels_b)
    return scores

def ami_score(U, V):
    return metrics.adjusted_mutual_info_score(U, V)

score_funcs = [
    metrics.adjusted_rand_score,
    metrics.v_measure_score,
    ami_score,
    metrics.mutual_info_score]

n_samples = 100
n_clusters_range = np.linspace(2, n_samples, 10).astype(int)


plots = []
names = []
for score_func in score_funcs:
    print("Computing %s for %d values of n_clusters and n_samples=%d"
          % (score_func.__name__, len(n_clusters_range), n_samples))

    t0 = time()
    scores = uniform_labelings_scores(score_func, n_samples, n_clusters_range)
    print("done in %0.3fs" % (time() - t0))
    plots.append(plt.errorbar(
        n_clusters_range, np.median(scores, axis=1), scores.std(axis=1))[0])
    names.append(score_func.__name__)

plt.title("Clustering measures for 2 random uniform labelings\n"
          "with equal number of clusters")
plt.xlabel('Number of clusters (Number of samples is fixed to %d)' % n_samples)
plt.ylabel('Score value')
plt.legend(plots, names)
plt.ylim(bottom=-0.05, top=1.05)


# Random labeling with varying n_clusters against ground class labels
# with fixed number of clusters

n_samples = 1000
n_clusters_range = np.linspace(2, 100, 10).astype(int)
n_classes = 10

plt.figure(2)

plots = []
names = []
for score_func in score_funcs:
    print("Computing %s for %d values of n_clusters and n_samples=%d"
          % (score_func.__name__, len(n_clusters_range), n_samples))

    t0 = time()
    scores = uniform_labelings_scores(score_func, n_samples, n_clusters_range,
                                      fixed_n_classes=n_classes)
    print("done in %0.3fs" % (time() - t0))
    plots.append(plt.errorbar(
        n_clusters_range, scores.mean(axis=1), scores.std(axis=1))[0])
    names.append(score_func.__name__)

plt.xlabel('#clusters (#samples is fixed to %d)' % n_samples)
plt.ylabel('Score value')
plt.ylim(bottom=-0.05, top=1.05)
plt.legend(plots, names)
plt.show()

Computing adjusted_rand_score for 10 values of n_clusters and n_samples=1000
done in 0.051s
Computing v_measure_score for 10 values of n_clusters and n_samples=1000
done in 0.055s
Computing ami_score for 10 values of n_clusters and n_samples=1000
done in 0.598s
Computing mutual_info_score for 10 values of n_clusters and n_samples=1000
done in 0.044s
Computing adjusted_rand_score for 10 values of n_clusters and n_samples=1000
done in 0.033s
Computing v_measure_score for 10 values of n_clusters and n_samples=1000
done in 0.050s
Computing ami_score for 10 values of n_clusters and n_samples=1000
done in 0.159s
Computing mutual_info_score for 10 values of n_clusters and n_samples=1000
done in 0.041s


from sklearn.metrics import fowlkes_mallows_score as FMI
labels_true = [0, 0, 0, 1, 1, 1]
labels_pred = [0, 0, 1, 1, 2, 2]
print(FMI(labels_true, labels_pred))

# permutations allowed
labels_pred = [1, 1, 0, 0, 3, 3]
print(FMI(labels_true, labels_pred))

# perfect scoring
labels_pred = labels_true[:]
print(FMI(labels_true, labels_pred))

# bad labeling
labels_true = [0, 1, 2, 0, 3, 4, 5, 1]
labels_pred = [1, 1, 0, 0, 2, 2, 2, 2]
print(FMI(labels_true, labels_pred))

0.4714045207910317
0.4714045207910317
1.0
0.0


from sklearn import metrics
from sklearn.metrics import pairwise_distances
from sklearn import datasets
import numpy as np
from sklearn.cluster import KMeans

X, y   = datasets.load_iris(return_X_y=True)
kmeans = KMeans(n_clusters=3, random_state=1).fit(X)
labels = kmeans.labels_
metrics.silhouette_score(X, labels, metric='euclidean')

0.5528190123564091


from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np


X, y = make_blobs(n_samples=500,
                  n_features=2,
                  centers=4,
                  cluster_std=1,
                  center_box=(-10.0, 10.0),
                  shuffle=True,
                  random_state=1)  # For reproducibility

range_n_clusters = [2, 3, 4, 5, 6]


for n_clusters in range_n_clusters:
    # Create a subplot with 1 row and 2 columns
    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.set_size_inches(18, 7)

    # The 1st subplot is the silhouette plot
    # The silhouette coefficient can range from -1, 1 but in this example all
    # lie within [-0.1, 1]
    ax1.set_xlim([-0.1, 1])
    # The (n_clusters+1)*10 is for inserting blank space between silhouette
    # plots of individual clusters, to demarcate them clearly.
    ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10])

    # Initialize clusterer
    clusterer      = KMeans(n_clusters=n_clusters, random_state=10)
    cluster_labels = clusterer.fit_predict(X)
    silhouette_avg = silhouette_score(X, cluster_labels)

    print("For n_clusters =", n_clusters,
          "Avg silhouette_score:\t", silhouette_avg)

    # Silhouette scores per sample
    sample_silhouette_values = silhouette_samples(X, cluster_labels)

    y_lower = 10
    for i in range(n_clusters):
        # Aggregate & sort silhouette scores for samples in cluster i
        ith_cluster_silhouette_values = \
            sample_silhouette_values[cluster_labels == i]
        ith_cluster_silhouette_values.sort()

        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper        = y_lower + size_cluster_i
        color          = cm.nipy_spectral(float(i) / n_clusters)

        ax1.fill_betweenx(np.arange(y_lower, y_upper),
                          0, ith_cluster_silhouette_values,
                          facecolor=color, edgecolor=color, alpha=0.7)

        # Label silhouette plots with cluster numbers at the middle
        ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))

        # Compute the new y_lower for next plot
        y_lower = y_upper + 10  # 10 for the 0 samples

    ax1.set_title("Silhouette plot")
    ax1.set_xlabel("Silhouette coefficients")
    ax1.set_ylabel("Cluster label")

    # The vertical line for average silhouette score of all the values
    ax1.axvline(x=silhouette_avg, color="red", linestyle="--")

    ax1.set_yticks([])  # Clear the yaxis labels / ticks
    ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])

    # 2nd Plot showing the actual clusters formed
    colors = cm.nipy_spectral(cluster_labels.astype(float) / n_clusters)
    ax2.scatter(X[:, 0], 
                X[:, 1], 
                marker='.', s=30, lw=0, alpha=0.7, c=colors, edgecolor='k')

    # Label clusters - white circles in centers
    centers = clusterer.cluster_centers_
    ax2.scatter(centers[:, 0], centers[:, 1], marker='o',
                c="white", alpha=1, s=200, edgecolor='k')

    for i, c in enumerate(centers):
        ax2.scatter(c[0], c[1], marker='$%d$' % i, alpha=1,
                    s=50, edgecolor='k')

    ax2.set_title("Visualization.")
    ax2.set_xlabel("1st feature")
    ax2.set_ylabel("2nd feature")
    plt.suptitle(("Silhouette analysis, KMeans, n_clusters = %d" % n_clusters),
                 fontsize=14, fontweight='bold')

plt.show()

For n_clusters = 2 Avg silhouette_score:	 0.7049787496083262
For n_clusters = 3 Avg silhouette_score:	 0.5882004012129721
For n_clusters = 4 Avg silhouette_score:	 0.6505186632729437
For n_clusters = 5 Avg silhouette_score:	 0.56376469026194
For n_clusters = 6 Avg silhouette_score:	 0.4504666294372765


from sklearn import metrics
from sklearn.metrics import pairwise_distances
from sklearn import datasets
import numpy as np
from sklearn.cluster import KMeans

X, y = datasets.load_iris(return_X_y=True)
model = KMeans(n_clusters=3, random_state=1).fit(X)
labels = model.labels_
print(metrics.calinski_harabasz_score(X, labels))

561.62775662962


from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score
iris   = datasets.load_iris()
X      = iris.data
kmeans = KMeans(n_clusters=3, random_state=1).fit(X)
labels = kmeans.labels_
print(davies_bouldin_score(X, labels))

0.6619715465007511


from sklearn.metrics.cluster import contingency_matrix
x = ["a", "a", "a", "b", "b", "b"]
y = [0, 0, 1, 1, 2, 2]
print(contingency_matrix(x, y))

[[2 1 0]
 [0 1 2]]


from sklearn.metrics.cluster import pair_confusion_matrix as PCM

print(PCM([0, 0, 1, 1], [0, 0, 1, 1])) #perfectly-matched labelings
print(PCM([0, 0, 1, 1], [1, 1, 0, 0]))

[[8 0]
 [0 4]]
[[8 0]
 [0 4]]


print(PCM([0, 0, 1, 2], [0, 0, 1, 1]))
print(PCM([0, 0, 1, 1], [0, 0, 1, 2]))

[[8 2]
 [0 2]]
[[8 0]
 [2 2]]


# class members completely split across different clusters:
# assignment is totally incomplete
# hence the matrix has all zero diagonal entries
print(PCM([0, 0, 0, 0], [0, 1, 2, 3]))

[[ 0  0]
 [12  0]]

Clustering Metrics ¶

Rand Index, Adjusted Rand Index ¶

Mutual Information Score (Std, Adjusted, Normalized)¶

Homogeneity, Completeness, V-Measure ¶

Example: Compare cluster evaluation metrics vs #clusters, #samples ¶

Fowlkes-Mallows Score ¶

Silhouette Coefficient ¶

Example: Using Silhouette Analysis to find optimal Kmeans cluster count ¶

Calinski-Harabasz Index ¶

Davies-Bouldin Index ¶

Contingency Matrix ¶

Pair Confusion Matrix ¶

Rand Index, Adjusted Rand Index¶

Mutual Information Score (Std, Adjusted, Normalized)¶

Homogeneity, Completeness, V-Measure¶

Rand Index, Adjusted Rand Index ¶

Homogeneity, Completeness, V-Measure ¶