Measures the similarity of ground truth (labels_true
) data vs a cluster algorithm's assigned labels (labels_pred
).
Definition:
Advantages:
labels_pred
& labels_true
, or are different in both.n_clusters
and n_samples
.Drawbacks:
from sklearn import metrics
labels_true = [0, 0, 0, 1, 1, 1]
labels_pred = [0, 0, 1, 1, 2, 2]
labels_pred2 = [1, 1, 0, 0, 3, 3]
# ignoring permutations:
print(metrics.rand_score(labels_true, labels_pred))
# adjusted rand index - corrects for chance
print(metrics.adjusted_rand_score(labels_true, labels_pred))
# you can swap 0s for 1s and replace 2s with 3s in the predictions - same score
print(metrics.rand_score(labels_true, labels_pred2))
print(metrics.adjusted_rand_score(labels_true, labels_pred2))
# swapping arguments = no effect, so can be used for consensus scoring
labels_true = labels_true[:]
print(metrics.rand_score(labels_true, labels_true))
print(metrics.adjusted_rand_score(labels_true, labels_true))
# poor (independent) labels = low scores
# adjusted RI will be <0 or near 0. unadjusted RI, while lower, not necessarily near 0.
labels_true = [0, 0, 0, 0, 0, 0, 1, 1]
labels_pred = [0, 1, 2, 3, 4, 5, 5, 6]
print(metrics.rand_score(labels_true, labels_pred))
print(metrics.adjusted_rand_score(labels_true, labels_pred))
0.6666666666666666 0.24242424242424243 0.6666666666666666 0.24242424242424243 1.0 1.0 0.39285714285714285 -0.07207207207207207
labels_true
and algorithm assignments labels_pred
.from sklearn.metrics import adjusted_mutual_info_score as AMIS
from sklearn.metrics import mutual_info_score as MIS
labels_true = [0, 0, 0, 1, 1, 1]
labels_pred = [0, 0, 1, 1, 2, 2]
print(AMIS(labels_true, labels_pred))
# swap 0s & 1s; rename 2s to 3s: get same score:
labels_pred = [1, 1, 0, 0, 3, 3]
print(AMIS(labels_true, labels_pred))
# swapping arguments = no effect. can be used for consensus scoring.
print(AMIS(labels_pred, labels_true))
# perfect labeling = 1.0
labels_pred = labels_true[:]
print(AMIS(labels_true, labels_pred))
# (not true for mutual_info_score = harder to judge)
print(MIS(labels_true, labels_pred))
# bad labels = <0 scores
labels_true = [0, 1, 2, 0, 3, 4, 5, 1]
labels_pred = [1, 1, 0, 0, 2, 2, 2, 2]
print(AMIS(labels_true, labels_pred))
0.2987924581708901 0.2987924581708901 0.2987924581708903 1.0 0.6931471805599452 -0.16666666666666655
from sklearn import metrics
labels_true = [0, 0, 0, 1, 1, 1]
labels_pred = [0, 0, 1, 1, 2, 2]
print(metrics.homogeneity_score(labels_true, labels_pred))
print(metrics.completeness_score(labels_true, labels_pred))
print(metrics.v_measure_score( labels_true, labels_pred))
print(metrics.homogeneity_completeness_v_measure(labels_true, labels_pred))
0.6666666666666669 0.420619835714305 0.5158037429793889 (0.6666666666666669, 0.420619835714305, 0.5158037429793889)
import numpy as np
import matplotlib.pyplot as plt
from time import time
from sklearn import metrics
# find score for 2 random, uniform clusters.
# both random labelings have same #clusters for each possible value in n_clusters_range
# when fixed_n_classes != None, the 1st labeling is a ground truth assignment.
def uniform_labelings_scores(score_func, n_samples, n_clusters_range,
fixed_n_classes=None, n_runs=5, seed=42):
random_labels = np.random.RandomState(seed).randint
scores = np.zeros((len(n_clusters_range), n_runs))
if fixed_n_classes is not None:
labels_a = random_labels(low=0, high=fixed_n_classes, size=n_samples)
for i, k in enumerate(n_clusters_range):
for j in range(n_runs):
if fixed_n_classes is None:
labels_a = random_labels(low=0, high=k, size=n_samples)
labels_b = random_labels(low=0, high=k, size=n_samples)
scores[i, j] = score_func(labels_a, labels_b)
return scores
def ami_score(U, V):
return metrics.adjusted_mutual_info_score(U, V)
score_funcs = [
metrics.adjusted_rand_score,
metrics.v_measure_score,
ami_score,
metrics.mutual_info_score]
n_samples = 100
n_clusters_range = np.linspace(2, n_samples, 10).astype(int)
plots = []
names = []
for score_func in score_funcs:
print("Computing %s for %d values of n_clusters and n_samples=%d"
% (score_func.__name__, len(n_clusters_range), n_samples))
t0 = time()
scores = uniform_labelings_scores(score_func, n_samples, n_clusters_range)
print("done in %0.3fs" % (time() - t0))
plots.append(plt.errorbar(
n_clusters_range, np.median(scores, axis=1), scores.std(axis=1))[0])
names.append(score_func.__name__)
plt.title("Clustering measures for 2 random uniform labelings\n"
"with equal number of clusters")
plt.xlabel('Number of clusters (Number of samples is fixed to %d)' % n_samples)
plt.ylabel('Score value')
plt.legend(plots, names)
plt.ylim(bottom=-0.05, top=1.05)
# Random labeling with varying n_clusters against ground class labels
# with fixed number of clusters
n_samples = 1000
n_clusters_range = np.linspace(2, 100, 10).astype(int)
n_classes = 10
plt.figure(2)
plots = []
names = []
for score_func in score_funcs:
print("Computing %s for %d values of n_clusters and n_samples=%d"
% (score_func.__name__, len(n_clusters_range), n_samples))
t0 = time()
scores = uniform_labelings_scores(score_func, n_samples, n_clusters_range,
fixed_n_classes=n_classes)
print("done in %0.3fs" % (time() - t0))
plots.append(plt.errorbar(
n_clusters_range, scores.mean(axis=1), scores.std(axis=1))[0])
names.append(score_func.__name__)
plt.xlabel('#clusters (#samples is fixed to %d)' % n_samples)
plt.ylabel('Score value')
plt.ylim(bottom=-0.05, top=1.05)
plt.legend(plots, names)
plt.show()
Computing adjusted_rand_score for 10 values of n_clusters and n_samples=1000 done in 0.051s Computing v_measure_score for 10 values of n_clusters and n_samples=1000 done in 0.055s Computing ami_score for 10 values of n_clusters and n_samples=1000 done in 0.598s Computing mutual_info_score for 10 values of n_clusters and n_samples=1000 done in 0.044s Computing adjusted_rand_score for 10 values of n_clusters and n_samples=1000 done in 0.033s Computing v_measure_score for 10 values of n_clusters and n_samples=1000 done in 0.050s Computing ami_score for 10 values of n_clusters and n_samples=1000 done in 0.159s Computing mutual_info_score for 10 values of n_clusters and n_samples=1000 done in 0.041s
from sklearn.metrics import fowlkes_mallows_score as FMI
labels_true = [0, 0, 0, 1, 1, 1]
labels_pred = [0, 0, 1, 1, 2, 2]
print(FMI(labels_true, labels_pred))
# permutations allowed
labels_pred = [1, 1, 0, 0, 3, 3]
print(FMI(labels_true, labels_pred))
# perfect scoring
labels_pred = labels_true[:]
print(FMI(labels_true, labels_pred))
# bad labeling
labels_true = [0, 1, 2, 0, 3, 4, 5, 1]
labels_pred = [1, 1, 0, 0, 2, 2, 2, 2]
print(FMI(labels_true, labels_pred))
0.4714045207910317 0.4714045207910317 1.0 0.0
from sklearn import metrics
from sklearn.metrics import pairwise_distances
from sklearn import datasets
import numpy as np
from sklearn.cluster import KMeans
X, y = datasets.load_iris(return_X_y=True)
kmeans = KMeans(n_clusters=3, random_state=1).fit(X)
labels = kmeans.labels_
metrics.silhouette_score(X, labels, metric='euclidean')
0.5528190123564091
n_clusters
values of 3,5,6 are bad picks due to finding clusters with poor silhouette scores (and due to wide flunctuations in plot sizes.)n_clusters
values of 2,4 appear to be ambivalent.from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
X, y = make_blobs(n_samples=500,
n_features=2,
centers=4,
cluster_std=1,
center_box=(-10.0, 10.0),
shuffle=True,
random_state=1) # For reproducibility
range_n_clusters = [2, 3, 4, 5, 6]
for n_clusters in range_n_clusters:
# Create a subplot with 1 row and 2 columns
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_size_inches(18, 7)
# The 1st subplot is the silhouette plot
# The silhouette coefficient can range from -1, 1 but in this example all
# lie within [-0.1, 1]
ax1.set_xlim([-0.1, 1])
# The (n_clusters+1)*10 is for inserting blank space between silhouette
# plots of individual clusters, to demarcate them clearly.
ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10])
# Initialize clusterer
clusterer = KMeans(n_clusters=n_clusters, random_state=10)
cluster_labels = clusterer.fit_predict(X)
silhouette_avg = silhouette_score(X, cluster_labels)
print("For n_clusters =", n_clusters,
"Avg silhouette_score:\t", silhouette_avg)
# Silhouette scores per sample
sample_silhouette_values = silhouette_samples(X, cluster_labels)
y_lower = 10
for i in range(n_clusters):
# Aggregate & sort silhouette scores for samples in cluster i
ith_cluster_silhouette_values = \
sample_silhouette_values[cluster_labels == i]
ith_cluster_silhouette_values.sort()
size_cluster_i = ith_cluster_silhouette_values.shape[0]
y_upper = y_lower + size_cluster_i
color = cm.nipy_spectral(float(i) / n_clusters)
ax1.fill_betweenx(np.arange(y_lower, y_upper),
0, ith_cluster_silhouette_values,
facecolor=color, edgecolor=color, alpha=0.7)
# Label silhouette plots with cluster numbers at the middle
ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
# Compute the new y_lower for next plot
y_lower = y_upper + 10 # 10 for the 0 samples
ax1.set_title("Silhouette plot")
ax1.set_xlabel("Silhouette coefficients")
ax1.set_ylabel("Cluster label")
# The vertical line for average silhouette score of all the values
ax1.axvline(x=silhouette_avg, color="red", linestyle="--")
ax1.set_yticks([]) # Clear the yaxis labels / ticks
ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])
# 2nd Plot showing the actual clusters formed
colors = cm.nipy_spectral(cluster_labels.astype(float) / n_clusters)
ax2.scatter(X[:, 0],
X[:, 1],
marker='.', s=30, lw=0, alpha=0.7, c=colors, edgecolor='k')
# Label clusters - white circles in centers
centers = clusterer.cluster_centers_
ax2.scatter(centers[:, 0], centers[:, 1], marker='o',
c="white", alpha=1, s=200, edgecolor='k')
for i, c in enumerate(centers):
ax2.scatter(c[0], c[1], marker='$%d$' % i, alpha=1,
s=50, edgecolor='k')
ax2.set_title("Visualization.")
ax2.set_xlabel("1st feature")
ax2.set_ylabel("2nd feature")
plt.suptitle(("Silhouette analysis, KMeans, n_clusters = %d" % n_clusters),
fontsize=14, fontweight='bold')
plt.show()
For n_clusters = 2 Avg silhouette_score: 0.7049787496083262 For n_clusters = 3 Avg silhouette_score: 0.5882004012129721 For n_clusters = 4 Avg silhouette_score: 0.6505186632729437 For n_clusters = 5 Avg silhouette_score: 0.56376469026194 For n_clusters = 6 Avg silhouette_score: 0.4504666294372765
from sklearn import metrics
from sklearn.metrics import pairwise_distances
from sklearn import datasets
import numpy as np
from sklearn.cluster import KMeans
X, y = datasets.load_iris(return_X_y=True)
model = KMeans(n_clusters=3, random_state=1).fit(X)
labels = model.labels_
print(metrics.calinski_harabasz_score(X, labels))
561.62775662962
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score
iris = datasets.load_iris()
X = iris.data
kmeans = KMeans(n_clusters=3, random_state=1).fit(X)
labels = kmeans.labels_
print(davies_bouldin_score(X, labels))
0.6619715465007511
from sklearn.metrics.cluster import contingency_matrix
x = ["a", "a", "a", "b", "b", "b"]
y = [0, 0, 1, 1, 2, 2]
print(contingency_matrix(x, y))
[[2 1 0] [0 1 2]]
from sklearn.metrics.cluster import pair_confusion_matrix as PCM
print(PCM([0, 0, 1, 1], [0, 0, 1, 1])) #perfectly-matched labelings
print(PCM([0, 0, 1, 1], [1, 1, 0, 0]))
[[8 0] [0 4]] [[8 0] [0 4]]
print(PCM([0, 0, 1, 2], [0, 0, 1, 1]))
print(PCM([0, 0, 1, 1], [0, 0, 1, 2]))
[[8 2] [0 2]] [[8 0] [2 2]]
# class members completely split across different clusters:
# assignment is totally incomplete
# hence the matrix has all zero diagonal entries
print(PCM([0, 0, 0, 0], [0, 1, 2, 3]))
[[ 0 0] [12 0]]