import matplotlib.pyplot as plt
plt.subplots_adjust(hspace=0.7, wspace=0.7)
plt.subplot(2, 3, 1)
plt.title("(2, 3, 1)")
plt.plot(0, 0, 'ro', ms=50)
plt.subplot(2, 3, 2)
plt.title("(2, 3, 2)")
plt.plot(0, 0, 'b+', ms=50)
plt.subplot(2, 3, 3)
plt.title("(2, 3, 3)")
plt.subplot(2, 3, 5)
plt.title("(2, 3, 5)")
plt.show()
from ipywidgets import interact, fixed
def multiply(a, b):
    return a*b
interact(multiply, a=(1, 10), b=(1, 10));
interactive(children=(IntSlider(value=5, description='a', max=10, min=1), IntSlider(value=5, description='b', …
interact(multiply, a=(1, 10), b=fixed(10));
interactive(children=(IntSlider(value=5, description='a', max=10, min=1), Output()), _dom_classes=('widget-int…
import numpy as np
def f(n, color, sin, cos):
    x = np.linspace(0, 7, 1000)
    
    if sin:
        plt.plot(x, np.sin(n*x), '--', color=color)
    if cos:
        plt.plot(x, np.cos(n*x), color=color)
    plt.show()
f(n=10, color='r', sin=True, cos=True)
interact(f, 
         n=(0., 10., 0.1),
         color={
             "red": 'r',
             "blue": 'b',
             "green": 'g',
             "black": 'k'
         },
         sin=True,
         cos=True
);
interactive(children=(FloatSlider(value=5.0, description='n', max=10.0), Dropdown(description='color', options…
Supervised learning:
Unsupervised learning:
from sklearn.datasets import make_blobs
X, _ = make_blobs(n_samples=200,
                  n_features=2,
                  centers=4, 
                  cluster_std=1.3,
                  random_state=10
)
plt.scatter(X[:, 0], X[:, 1]);
from sklearn.datasets import make_moons
X, _ = make_moons(n_samples=200, noise=0.07)
plt.scatter(X[:, 0], X[:, 1]);
Note: With this algorithm we need to specify how many clusters we want to get.
k data points as the initial cluster centroids%config InlineBackend.figure_format = 'retina'
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
def get_distances(X, Y):
    """
    Given 2-dimensional numpy arrays X and Y
    returns a 2-dimensional array whose (i, j)-entry
    is the square of the Euclidean distance between
    the i-th row of X and the j-th row of Y
    """
    return np.sum((X[:, np.newaxis, :] - Y[np.newaxis, :, :])**2, axis=-1)
def kmeans(X, n):
    """
    Implements the k-mneans algorithm.
    X:
        2-dimensional numpy array whose rows are coordinates of data points.
    n:
        Integer, the number of clusters.
    Returns:
        A list of tuples (labels, centers), one tuple for each iteration step.
        labels is a 1-dimensional array with labels od points in X, centers is
        a 2-dimensional array with coordinates of centroids of clusters.
        The last tuple on the list is the clustering final result.
    """
    X = X.copy()
    h = X.shape[0]
    centers = X[np.random.choice(h, size=n, replace=False)]
    new_centers = np.zeros_like(centers)
    steps = []
    while True:
        d = get_distances(X, centers)
        labels = np.argmin(d, axis=1)
        steps.append((labels, centers))
        for j in range(n):
            new_centers[j] = np.mean(X[labels == j], axis=0)
        if np.array_equal(centers, new_centers):
            break
        else:
            centers = new_centers.copy()
    return steps
def plot_clusters(X, labels=None, centers=None):
    """
    Plots clusters and their centoids.
    X:
        2-dimensional numpy array whose rows are coordinates of data points.
    labels:
        1-dimensional numpy array with labels of points in X.
    centers:
        2-dimensional numpy array whose rows are coordinates of cluster
        centroids.
    """
    if labels is not None:
        n = len(set(labels))
    else:
        n = 1
    colors = ListedColormap(
        ['tab:blue', 'tab:red', 'tab:green', 'tab:orange', 'tab:purple'][:n])
    plt.figure(figsize=(6, 6))
    plt.scatter(X[:, 0], X[:, 1], c=labels, marker="+", cmap=colors)
    if centers is not None:
        plt.scatter(centers[:, 0],
                    centers[:, 1],
                    c=range(centers.shape[0]),
                    s=250,
                    linewidths=3,
                    edgecolors="k",
                    cmap=colors)
    plt.show()
def plot_iteration(X, steps, k=0):
    """
    Plots clusters computed by a given iteration of k-means.
    X:
        2-dimensional numpy array whose rows are coordinates of data points.
    steps:
        A list with cluster data produced by the kmeans function.
    k:
        An index of the steps list
    """
    plot_clusters(X, steps[k][0], steps[k][1])
n = 3
X, y = make_blobs(n_samples=600,
                  n_features=2,
                  centers=3, 
                  cluster_std=2,
                  random_state=10
                 )
plt.figure(figsize=(6, 6))
plt.plot(X[:, 0], X[:, 1], "+");
steps = kmeans(X, n)
steps[2]
(array([2, 2, 2, 2, 2, 0, 2, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 2, 0, 0, 1, 2,
        2, 2, 2, 1, 0, 2, 1, 0, 2, 1, 0, 0, 1, 0, 1, 2, 1, 1, 2, 2, 2, 1,
        1, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 1, 2, 2, 0, 0, 1, 0, 1, 1, 2, 0,
        2, 0, 1, 2, 1, 1, 2, 1, 0, 2, 0, 1, 2, 1, 2, 1, 0, 0, 2, 2, 1, 1,
        2, 2, 2, 1, 0, 0, 1, 0, 2, 1, 1, 0, 2, 0, 0, 0, 2, 1, 0, 0, 1, 0,
        0, 2, 0, 1, 2, 1, 1, 2, 1, 0, 1, 2, 1, 1, 2, 1, 0, 0, 0, 0, 0, 1,
        0, 0, 0, 1, 0, 2, 1, 0, 2, 0, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 0, 0,
        1, 2, 2, 0, 2, 0, 2, 2, 1, 1, 2, 1, 1, 0, 1, 0, 1, 0, 1, 0, 2, 2,
        0, 1, 0, 0, 1, 0, 2, 1, 2, 2, 1, 1, 1, 2, 0, 2, 0, 1, 1, 0, 1, 0,
        0, 2, 2, 0, 0, 2, 1, 2, 1, 0, 1, 2, 0, 2, 2, 0, 0, 2, 1, 2, 2, 2,
        2, 0, 0, 1, 0, 1, 2, 0, 1, 0, 1, 1, 1, 2, 2, 0, 0, 0, 0, 2, 2, 1,
        2, 0, 1, 1, 0, 0, 2, 2, 2, 0, 0, 2, 2, 2, 1, 1, 2, 2, 2, 0, 1, 0,
        0, 0, 0, 0, 2, 2, 0, 0, 0, 1, 2, 2, 2, 0, 2, 2, 1, 1, 1, 0, 2, 2,
        1, 2, 1, 1, 2, 0, 0, 2, 2, 2, 1, 0, 2, 0, 1, 1, 2, 0, 1, 0, 1, 0,
        2, 2, 1, 1, 1, 2, 2, 1, 2, 2, 2, 0, 1, 1, 0, 2, 2, 2, 0, 2, 1, 0,
        2, 1, 1, 2, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 2, 2, 0, 1, 2, 2, 1,
        0, 1, 0, 1, 0, 0, 1, 0, 2, 0, 2, 0, 2, 0, 1, 2, 2, 0, 0, 2, 0, 1,
        0, 0, 2, 2, 1, 0, 2, 2, 1, 1, 0, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 0,
        1, 2, 0, 0, 2, 1, 2, 1, 0, 0, 0, 2, 0, 2, 2, 2, 0, 2, 0, 1, 0, 2,
        2, 1, 0, 0, 2, 2, 2, 1, 0, 2, 2, 2, 0, 2, 0, 0, 0, 2, 0, 1, 2, 2,
        2, 0, 1, 0, 1, 2, 2, 1, 1, 2, 1, 2, 2, 1, 0, 2, 1, 1, 1, 1, 1, 1,
        0, 2, 1, 1, 1, 2, 1, 2, 0, 0, 1, 1, 2, 1, 2, 0, 2, 0, 1, 2, 0, 1,
        0, 0, 2, 0, 0, 1, 1, 0, 2, 1, 1, 1, 0, 0, 0, 0, 2, 1, 1, 1, 1, 0,
        0, 1, 0, 1, 0, 0, 0, 1, 2, 1, 2, 2, 2, 0, 2, 1, 2, 2, 1, 2, 0, 1,
        0, 1, 0, 2, 1, 1, 2, 0, 0, 1, 0, 0, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2,
        2, 0, 1, 0, 0, 2, 2, 1, 0, 1, 0, 0, 0, 2, 2, 2, 1, 1, 2, 0, 0, 1,
        0, 2, 2, 0, 2, 0, 1, 1, 0, 0, 0, 1, 0, 2, 0, 1, 1, 1, 1, 1, 2, 2,
        1, 2, 1, 2, 0, 1]),
 array([[-0.09439861, -5.58928855],
        [ 2.57971185,  4.96332167],
        [ 5.8159796 , -9.67547057]]))
plot_iteration(X, steps, k=3)
interact(plot_iteration,
        X = fixed(X), 
        steps=fixed(steps),
        k=(0, len(steps)-1)
        );
interactive(children=(IntSlider(value=0, description='k', max=4), Output()), _dom_classes=('widget-interact',)…
moons, _ = make_moons(n_samples=400, noise=0.07)
steps = kmeans(moons, 2)
interact(plot_iteration,
        X = fixed(moons), 
        steps=fixed(steps),
        k=(0, len(steps)-1)
        );
interactive(children=(IntSlider(value=0, description='k', max=4), Output()), _dom_classes=('widget-interact',)…
from sklearn.cluster import KMeans
KMeans?
km = KMeans(n_clusters=3)
X, y = make_blobs(n_samples=600,
                  centers=3,
                  cluster_std=2,
                  random_state=10
)
X
array([[  6.24961769,  -9.00934014],
       [  4.95624841, -12.10661662],
       [  3.33614779, -11.18097865],
       ...,
       [  4.96651958, -11.8440634 ],
       [ -0.0815766 ,  -7.75275153],
       [  2.22743256,   6.09982258]])
y
array([0, 0, 0, 0, 0, 2, 0, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 0, 2, 2, 1, 0,
       0, 0, 0, 1, 2, 0, 1, 2, 0, 1, 2, 2, 1, 2, 1, 0, 1, 1, 0, 0, 0, 1,
       1, 0, 2, 2, 2, 0, 0, 1, 1, 0, 0, 1, 0, 0, 2, 2, 1, 2, 1, 1, 2, 2,
       0, 2, 1, 0, 1, 1, 0, 1, 2, 0, 2, 1, 0, 1, 0, 1, 2, 2, 0, 0, 1, 1,
       0, 0, 0, 1, 2, 2, 1, 2, 0, 1, 1, 2, 0, 2, 2, 2, 0, 1, 2, 2, 1, 2,
       2, 0, 2, 1, 0, 1, 1, 0, 1, 2, 1, 0, 1, 1, 0, 1, 2, 2, 2, 2, 2, 1,
       2, 2, 2, 1, 2, 0, 1, 2, 0, 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 2, 2,
       1, 0, 0, 2, 0, 2, 0, 2, 1, 1, 0, 1, 1, 2, 1, 2, 1, 2, 1, 2, 0, 0,
       2, 1, 2, 2, 1, 2, 0, 1, 0, 0, 1, 1, 1, 0, 2, 0, 2, 1, 1, 2, 1, 2,
       2, 0, 0, 2, 2, 0, 1, 0, 1, 2, 1, 0, 2, 0, 0, 2, 2, 0, 1, 0, 0, 0,
       0, 2, 2, 1, 2, 1, 0, 2, 1, 2, 1, 1, 1, 0, 0, 2, 2, 0, 2, 0, 0, 1,
       0, 2, 1, 1, 2, 2, 0, 0, 0, 2, 2, 0, 0, 0, 1, 1, 0, 0, 0, 2, 1, 2,
       2, 2, 2, 2, 0, 0, 2, 2, 2, 1, 0, 0, 0, 2, 0, 0, 1, 1, 1, 2, 0, 0,
       1, 0, 1, 1, 0, 2, 2, 0, 0, 0, 1, 2, 0, 2, 1, 1, 0, 2, 1, 2, 1, 2,
       0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 2, 1, 1, 2, 0, 0, 0, 2, 0, 1, 2,
       0, 1, 1, 0, 1, 2, 2, 2, 2, 1, 0, 1, 2, 1, 1, 0, 0, 2, 1, 0, 0, 1,
       2, 1, 2, 1, 2, 2, 1, 2, 0, 2, 0, 2, 0, 2, 1, 0, 2, 2, 2, 0, 2, 1,
       2, 2, 0, 0, 1, 2, 0, 0, 1, 1, 2, 2, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2,
       1, 0, 2, 2, 0, 1, 0, 1, 2, 2, 2, 0, 2, 0, 0, 0, 2, 0, 2, 1, 2, 0,
       0, 1, 2, 2, 0, 0, 0, 1, 2, 0, 0, 0, 2, 0, 2, 0, 2, 0, 2, 1, 2, 0,
       0, 2, 1, 2, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 2, 0, 1, 1, 1, 1, 1, 1,
       2, 0, 1, 1, 1, 0, 1, 0, 2, 2, 1, 1, 0, 1, 0, 2, 0, 2, 1, 0, 2, 1,
       2, 2, 0, 2, 2, 1, 1, 2, 0, 1, 1, 1, 2, 2, 2, 2, 0, 1, 1, 1, 1, 2,
       2, 1, 2, 1, 0, 2, 2, 1, 0, 1, 2, 0, 0, 2, 0, 1, 0, 0, 1, 0, 2, 1,
       2, 1, 2, 0, 1, 1, 0, 2, 2, 1, 2, 2, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0,
       0, 2, 1, 2, 2, 0, 0, 1, 2, 1, 2, 2, 2, 0, 0, 0, 1, 1, 0, 2, 2, 1,
       2, 0, 0, 2, 0, 2, 1, 1, 2, 2, 2, 1, 2, 0, 2, 1, 1, 1, 1, 1, 0, 0,
       1, 0, 1, 0, 2, 1])
km.fit(X)
KMeans(n_clusters=3)
km.labels_
array([2, 2, 2, 2, 2, 0, 2, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 2, 0, 0, 1, 2,
       2, 2, 2, 1, 0, 2, 1, 0, 2, 1, 0, 0, 1, 0, 1, 2, 1, 1, 2, 2, 2, 1,
       1, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 1, 2, 2, 0, 0, 1, 0, 1, 1, 2, 0,
       2, 0, 1, 2, 1, 1, 2, 1, 0, 2, 0, 1, 2, 1, 2, 1, 0, 0, 2, 2, 1, 1,
       2, 2, 2, 1, 0, 0, 1, 0, 2, 1, 1, 0, 2, 0, 0, 0, 2, 1, 0, 0, 1, 0,
       0, 2, 0, 1, 2, 1, 1, 2, 1, 0, 1, 2, 1, 1, 2, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 2, 1, 0, 2, 0, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 0, 0,
       1, 2, 2, 0, 2, 0, 2, 2, 1, 1, 2, 1, 1, 0, 1, 0, 1, 0, 1, 0, 2, 2,
       0, 1, 0, 0, 1, 0, 2, 1, 2, 2, 1, 1, 1, 2, 0, 2, 0, 1, 1, 0, 1, 0,
       0, 2, 2, 0, 0, 2, 1, 2, 1, 0, 1, 2, 0, 2, 2, 0, 0, 2, 1, 2, 2, 2,
       2, 0, 0, 1, 0, 1, 2, 0, 1, 0, 1, 1, 1, 2, 2, 0, 0, 0, 0, 2, 2, 1,
       2, 0, 1, 1, 0, 0, 2, 2, 2, 0, 0, 2, 2, 2, 1, 1, 2, 2, 2, 0, 1, 0,
       0, 0, 0, 0, 2, 2, 0, 0, 0, 1, 2, 2, 2, 0, 2, 2, 1, 1, 1, 0, 2, 2,
       1, 2, 1, 1, 2, 0, 0, 2, 2, 2, 1, 0, 2, 0, 1, 1, 2, 0, 1, 0, 1, 0,
       2, 2, 1, 1, 1, 2, 2, 1, 2, 2, 2, 0, 1, 1, 0, 2, 2, 2, 0, 2, 1, 0,
       2, 1, 1, 2, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 2, 2, 0, 1, 2, 2, 1,
       0, 1, 0, 1, 0, 0, 1, 0, 2, 0, 2, 0, 2, 0, 1, 2, 2, 0, 0, 2, 0, 1,
       0, 0, 2, 2, 1, 0, 2, 2, 1, 1, 0, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 0,
       1, 2, 0, 0, 2, 1, 2, 1, 0, 0, 0, 2, 0, 2, 2, 2, 0, 2, 0, 1, 0, 2,
       2, 1, 0, 0, 2, 2, 2, 1, 0, 2, 2, 2, 2, 2, 0, 0, 0, 2, 0, 1, 2, 2,
       2, 0, 1, 0, 1, 2, 2, 1, 1, 2, 1, 2, 2, 1, 0, 2, 1, 1, 1, 1, 1, 1,
       0, 2, 1, 1, 1, 2, 1, 2, 0, 0, 1, 1, 2, 1, 2, 0, 2, 0, 1, 2, 0, 1,
       0, 0, 2, 0, 0, 1, 1, 0, 2, 1, 1, 1, 0, 0, 0, 0, 2, 1, 1, 1, 1, 0,
       0, 1, 0, 1, 0, 0, 0, 1, 2, 1, 2, 2, 2, 0, 2, 1, 2, 2, 1, 2, 0, 1,
       0, 1, 0, 2, 1, 1, 2, 0, 0, 1, 0, 0, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2,
       2, 0, 1, 0, 0, 2, 2, 1, 0, 1, 0, 0, 0, 2, 2, 2, 1, 1, 2, 0, 0, 1,
       0, 2, 2, 0, 2, 0, 1, 1, 0, 0, 0, 1, 0, 2, 0, 1, 1, 1, 1, 1, 2, 2,
       1, 2, 1, 2, 0, 1], dtype=int32)
km.cluster_centers_
array([[-0.18630907, -5.52019381],
       [ 2.57971185,  4.96332167],
       [ 5.73051427, -9.62182535]])
plot_clusters(X, km.labels_, km.cluster_centers_)
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5)
X
array([[  6.24961769,  -9.00934014],
       [  4.95624841, -12.10661662],
       [  3.33614779, -11.18097865],
       ...,
       [  4.96651958, -11.8440634 ],
       [ -0.0815766 ,  -7.75275153],
       [  2.22743256,   6.09982258]])
y
array([0, 0, 0, 0, 0, 2, 0, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 0, 2, 2, 1, 0,
       0, 0, 0, 1, 2, 0, 1, 2, 0, 1, 2, 2, 1, 2, 1, 0, 1, 1, 0, 0, 0, 1,
       1, 0, 2, 2, 2, 0, 0, 1, 1, 0, 0, 1, 0, 0, 2, 2, 1, 2, 1, 1, 2, 2,
       0, 2, 1, 0, 1, 1, 0, 1, 2, 0, 2, 1, 0, 1, 0, 1, 2, 2, 0, 0, 1, 1,
       0, 0, 0, 1, 2, 2, 1, 2, 0, 1, 1, 2, 0, 2, 2, 2, 0, 1, 2, 2, 1, 2,
       2, 0, 2, 1, 0, 1, 1, 0, 1, 2, 1, 0, 1, 1, 0, 1, 2, 2, 2, 2, 2, 1,
       2, 2, 2, 1, 2, 0, 1, 2, 0, 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 2, 2,
       1, 0, 0, 2, 0, 2, 0, 2, 1, 1, 0, 1, 1, 2, 1, 2, 1, 2, 1, 2, 0, 0,
       2, 1, 2, 2, 1, 2, 0, 1, 0, 0, 1, 1, 1, 0, 2, 0, 2, 1, 1, 2, 1, 2,
       2, 0, 0, 2, 2, 0, 1, 0, 1, 2, 1, 0, 2, 0, 0, 2, 2, 0, 1, 0, 0, 0,
       0, 2, 2, 1, 2, 1, 0, 2, 1, 2, 1, 1, 1, 0, 0, 2, 2, 0, 2, 0, 0, 1,
       0, 2, 1, 1, 2, 2, 0, 0, 0, 2, 2, 0, 0, 0, 1, 1, 0, 0, 0, 2, 1, 2,
       2, 2, 2, 2, 0, 0, 2, 2, 2, 1, 0, 0, 0, 2, 0, 0, 1, 1, 1, 2, 0, 0,
       1, 0, 1, 1, 0, 2, 2, 0, 0, 0, 1, 2, 0, 2, 1, 1, 0, 2, 1, 2, 1, 2,
       0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 2, 1, 1, 2, 0, 0, 0, 2, 0, 1, 2,
       0, 1, 1, 0, 1, 2, 2, 2, 2, 1, 0, 1, 2, 1, 1, 0, 0, 2, 1, 0, 0, 1,
       2, 1, 2, 1, 2, 2, 1, 2, 0, 2, 0, 2, 0, 2, 1, 0, 2, 2, 2, 0, 2, 1,
       2, 2, 0, 0, 1, 2, 0, 0, 1, 1, 2, 2, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2,
       1, 0, 2, 2, 0, 1, 0, 1, 2, 2, 2, 0, 2, 0, 0, 0, 2, 0, 2, 1, 2, 0,
       0, 1, 2, 2, 0, 0, 0, 1, 2, 0, 0, 0, 2, 0, 2, 0, 2, 0, 2, 1, 2, 0,
       0, 2, 1, 2, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 2, 0, 1, 1, 1, 1, 1, 1,
       2, 0, 1, 1, 1, 0, 1, 0, 2, 2, 1, 1, 0, 1, 0, 2, 0, 2, 1, 0, 2, 1,
       2, 2, 0, 2, 2, 1, 1, 2, 0, 1, 1, 1, 2, 2, 2, 2, 0, 1, 1, 1, 1, 2,
       2, 1, 2, 1, 0, 2, 2, 1, 0, 1, 2, 0, 0, 2, 0, 1, 0, 0, 1, 0, 2, 1,
       2, 1, 2, 0, 1, 1, 0, 2, 2, 1, 2, 2, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0,
       0, 2, 1, 2, 2, 0, 0, 1, 2, 1, 2, 2, 2, 0, 0, 0, 1, 1, 0, 2, 2, 1,
       2, 0, 0, 2, 0, 2, 1, 1, 2, 2, 2, 1, 2, 0, 2, 1, 1, 1, 1, 1, 0, 0,
       1, 0, 1, 0, 2, 1])
knn.fit(X, y)
KNeighborsClassifier()
p = [0, 1]
ap = np.array(p).reshape(1, -1)
ap
array([[0, 1]])
ap.shape
(1, 2)
p = [0, 1]
knn.predict(np.array(p).reshape(1, -1))[0]
1
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, Normalize
def plot_clusters(X, y, p=None, p_label=None, neighbors=None):
    """
    Plots nearest neighbors of a given point.
    X:
        A 2-dimensional numpy array with coordinates
        of points in clusters
    y:
        A 1-dimensional numpy array with labels of points
    p:
        An array with coordinates of the point whose
        neighbors will be plotted.
    p_label:
        The predicted label of the point p.
    neighbors:
        A list of row numbers of X which are the nearest
        neighbors of the point p.
    """
    # build a custom colormap
    col_list = ['dodgerblue', 'limegreen', 'red', 'orange', 'fuchsia']
    colors = ListedColormap(col_list)
    norm = Normalize(0, len(col_list))
    plt.figure(figsize=(8, 8))
    with plt.style.context('seaborn'):
        scatter = plt.scatter(X[:, 0],
                              X[:, 1],
                              c=y,
                              s=90,
                              cmap=colors,
                              norm=norm,
                              label=y)
        if p is not None:
            p_col = "k" if p_label is None else colors(norm(p_label))
            plt.plot(p[0], p[1], marker="*", mfc=p_col, mec="k", ms=30, mew=2)
        if neighbors is not None:
            plt.scatter(X[neighbors, 0],
                        X[neighbors, 1],
                        edgecolors='black',
                        linewidth=3,
                        facecolors="None",
                        s=300)
        plt.legend(*scatter.legend_elements(),
                   markerscale=1.5,
                   prop={
                       "size": 12,
                       "weight": "normal"
                   })
    plt.show()
p = [3, -8]
label = knn.predict(np.array(p).reshape(1, -1))[0]
plot_clusters(X, y, p=p, p_label=label)
X, y = make_blobs(n_samples=600,
                  centers=4, 
                  cluster_std=3, 
                  random_state=100
                 )
N = 500
train_X = X[:N]
train_y = y[:N]
test_X = X[N:]
test_y = y[N:]
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(train_X, train_y)
KNeighborsClassifier()
predictions = knn.predict(test_X)
predictions
array([2, 3, 0, 2, 3, 3, 3, 1, 0, 1, 2, 0, 1, 3, 1, 2, 1, 1, 0, 1, 0, 2,
       1, 3, 0, 2, 3, 2, 0, 0, 1, 3, 2, 1, 1, 1, 1, 1, 3, 0, 1, 2, 3, 0,
       2, 0, 0, 0, 1, 0, 3, 1, 0, 1, 2, 0, 3, 0, 0, 0, 1, 3, 2, 3, 1, 1,
       3, 0, 1, 0, 2, 1, 0, 1, 0, 0, 2, 1, 1, 3, 2, 1, 0, 2, 2, 1, 1, 0,
       1, 1, 1, 1, 3, 0, 2, 0, 3, 0, 1, 0])
test_y
array([2, 3, 0, 2, 1, 3, 1, 1, 0, 3, 2, 0, 1, 3, 3, 2, 1, 1, 0, 1, 0, 2,
       1, 1, 0, 2, 3, 2, 3, 0, 1, 3, 2, 1, 1, 1, 1, 3, 1, 0, 3, 2, 3, 0,
       2, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 0, 3, 0, 0, 0, 1, 3, 2, 3, 3, 3,
       3, 0, 1, 3, 2, 1, 3, 1, 0, 0, 2, 1, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0,
       1, 1, 1, 1, 3, 0, 2, 0, 1, 0, 3, 0])
predictions == test_y
array([ True,  True,  True,  True, False,  True, False,  True,  True,
       False,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True, False,  True,  True,  True,
        True, False,  True,  True,  True,  True,  True,  True,  True,
        True, False, False,  True, False,  True,  True,  True,  True,
        True,  True,  True, False,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True, False, False,  True,  True,  True, False,  True,  True,
       False,  True,  True,  True,  True,  True, False,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True, False,  True, False,
        True])
(predictions == test_y).sum()
82
accuracy = (predictions == test_y).sum()/len(test_y)
accuracy
0.82
for p in zip([1, 2, 3], ['a', 'b', 'c']):
    print(p)
(1, 'a') (2, 'b') (3, 'c')
confusion = np.zeros((4, 4), dtype=int)
confusion
array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]])
for p in zip(test_y, predictions):
    confusion[p[0], p[1]] += 1
confusion
array([[27,  1,  0,  0],
       [ 0, 25,  0,  6],
       [ 0,  0, 18,  0],
       [ 3,  8,  0, 12]])
import seaborn as sns
sns.heatmap(confusion,
            annot=True,
            annot_kws={"fontsize": 16},
            linewidth=2, 
            square=True
           );
rng = np.random.default_rng(0)
a = rng.integers(0, 100, (4, 4))
a
array([[85, 63, 51, 26],
       [30,  4,  7,  1],
       [17, 81, 64, 91],
       [50, 60, 97, 72]])
plt.imshow(a, cmap="gray");
a = rng.integers(0, 255, (4, 4, 3))
a
array([[[161, 138, 142],
        [238,  70, 208],
        [171,   0, 100],
        [218, 141,   8]],
       [[195, 186, 215],
        [ 44,  22, 220],
        [  5, 138,  20],
        [ 76, 122, 107]],
       [[102,   7,   1],
        [ 31,   2, 171],
        [134, 165,  65],
        [156, 194,  97]],
       [[117, 254, 205],
        [250,  96, 174],
        [242, 165, 214],
        [175, 179,  99]]])
plt.imshow(a);
plt.figure(figsize=(16, 16))
plt.imshow(a.astype(int).reshape(1, -1, 3));
tiger = plt.imread("tiger.jpg")
tiger
array([[[243, 224, 194],
        [242, 223, 193],
        [236, 217, 187],
        ...,
        [241, 218, 184],
        [245, 222, 188],
        [250, 227, 193]],
       [[246, 229, 199],
        [246, 229, 199],
        [241, 224, 194],
        ...,
        [247, 224, 190],
        [252, 229, 195],
        [252, 229, 195]],
       [[249, 232, 204],
        [251, 234, 206],
        [249, 232, 204],
        ...,
        [253, 230, 198],
        [255, 233, 201],
        [248, 225, 193]],
       ...,
       [[254, 251, 244],
        [254, 251, 244],
        [254, 251, 244],
        ...,
        [254, 247, 237],
        [254, 247, 237],
        [254, 247, 237]],
       [[254, 251, 244],
        [254, 251, 244],
        [254, 251, 244],
        ...,
        [254, 247, 237],
        [254, 247, 237],
        [254, 247, 237]],
       [[254, 251, 244],
        [254, 251, 244],
        [254, 251, 244],
        ...,
        [254, 247, 237],
        [254, 247, 237],
        [254, 247, 237]]], dtype=uint8)
tiger.shape
(644, 1000, 3)
plt.figure(figsize=(16, 16))
plt.imshow(tiger);
pixels = tiger.reshape(-1, 3)
pixels
array([[243, 224, 194],
       [242, 223, 193],
       [236, 217, 187],
       ...,
       [254, 247, 237],
       [254, 247, 237],
       [254, 247, 237]], dtype=uint8)
pixels.shape
(644000, 3)
import numpy as np
np.unique(pixels, axis=0).shape
(85796, 3)