import matplotlib.pyplot as plt

plt.subplots_adjust(hspace=0.7, wspace=0.7)

plt.subplot(2, 3, 1)
plt.title("(2, 3, 1)")
plt.plot(0, 0, 'ro', ms=50)

plt.subplot(2, 3, 2)
plt.title("(2, 3, 2)")
plt.plot(0, 0, 'b+', ms=50)

plt.subplot(2, 3, 3)
plt.title("(2, 3, 3)")

plt.subplot(2, 3, 5)
plt.title("(2, 3, 5)")


plt.show()


from ipywidgets import interact, fixed


def multiply(a, b):
    return a*b

interact(multiply, a=(1, 10), b=(1, 10));

interactive(children=(IntSlider(value=5, description='a', max=10, min=1), IntSlider(value=5, description='b', …


interact(multiply, a=(1, 10), b=fixed(10));

interactive(children=(IntSlider(value=5, description='a', max=10, min=1), Output()), _dom_classes=('widget-int…


import numpy as np


def f(n, color, sin, cos):
    x = np.linspace(0, 7, 1000)
    
    if sin:
        plt.plot(x, np.sin(n*x), '--', color=color)
    if cos:
        plt.plot(x, np.cos(n*x), color=color)
    plt.show()


f(n=10, color='r', sin=True, cos=True)


interact(f, 
         n=(0., 10., 0.1),
         color={
             "red": 'r',
             "blue": 'b',
             "green": 'g',
             "black": 'k'
         },
         sin=True,
         cos=True
);

interactive(children=(FloatSlider(value=5.0, description='n', max=10.0), Dropdown(description='color', options…


from sklearn.datasets import make_blobs

X, _ = make_blobs(n_samples=200,
                  n_features=2,
                  centers=4, 
                  cluster_std=1.3,
                  random_state=10
)

plt.scatter(X[:, 0], X[:, 1]);


from sklearn.datasets import make_moons


X, _ = make_moons(n_samples=200, noise=0.07)
plt.scatter(X[:, 0], X[:, 1]);


%config InlineBackend.figure_format = 'retina'
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap


def get_distances(X, Y):
    """
    Given 2-dimensional numpy arrays X and Y
    returns a 2-dimensional array whose (i, j)-entry
    is the square of the Euclidean distance between
    the i-th row of X and the j-th row of Y
    """

    return np.sum((X[:, np.newaxis, :] - Y[np.newaxis, :, :])**2, axis=-1)


def kmeans(X, n):
    """
    Implements the k-mneans algorithm.

    X:
        2-dimensional numpy array whose rows are coordinates of data points.
    n:
        Integer, the number of clusters.

    Returns:
        A list of tuples (labels, centers), one tuple for each iteration step.
        labels is a 1-dimensional array with labels od points in X, centers is
        a 2-dimensional array with coordinates of centroids of clusters.
        The last tuple on the list is the clustering final result.
    """

    X = X.copy()
    h = X.shape[0]
    centers = X[np.random.choice(h, size=n, replace=False)]

    new_centers = np.zeros_like(centers)

    steps = []
    while True:
        d = get_distances(X, centers)
        labels = np.argmin(d, axis=1)
        steps.append((labels, centers))
        for j in range(n):
            new_centers[j] = np.mean(X[labels == j], axis=0)
        if np.array_equal(centers, new_centers):
            break
        else:
            centers = new_centers.copy()
    return steps


def plot_clusters(X, labels=None, centers=None):
    """
    Plots clusters and their centoids.

    X:
        2-dimensional numpy array whose rows are coordinates of data points.
    labels:
        1-dimensional numpy array with labels of points in X.
    centers:
        2-dimensional numpy array whose rows are coordinates of cluster
        centroids.
    """

    if labels is not None:
        n = len(set(labels))
    else:
        n = 1
    colors = ListedColormap(
        ['tab:blue', 'tab:red', 'tab:green', 'tab:orange', 'tab:purple'][:n])
    plt.figure(figsize=(6, 6))
    plt.scatter(X[:, 0], X[:, 1], c=labels, marker="+", cmap=colors)
    if centers is not None:
        plt.scatter(centers[:, 0],
                    centers[:, 1],
                    c=range(centers.shape[0]),
                    s=250,
                    linewidths=3,
                    edgecolors="k",
                    cmap=colors)
    plt.show()


def plot_iteration(X, steps, k=0):
    """
    Plots clusters computed by a given iteration of k-means.

    X:
        2-dimensional numpy array whose rows are coordinates of data points.
    steps:
        A list with cluster data produced by the kmeans function.
    k:
        An index of the steps list
    """

    plot_clusters(X, steps[k][0], steps[k][1])


n = 3

X, y = make_blobs(n_samples=600,
                  n_features=2,
                  centers=3, 
                  cluster_std=2,
                  random_state=10
                 )
plt.figure(figsize=(6, 6))
plt.plot(X[:, 0], X[:, 1], "+");


steps = kmeans(X, n)


steps[2]

(array([2, 2, 2, 2, 2, 0, 2, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 2, 0, 0, 1, 2,
        2, 2, 2, 1, 0, 2, 1, 0, 2, 1, 0, 0, 1, 0, 1, 2, 1, 1, 2, 2, 2, 1,
        1, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 1, 2, 2, 0, 0, 1, 0, 1, 1, 2, 0,
        2, 0, 1, 2, 1, 1, 2, 1, 0, 2, 0, 1, 2, 1, 2, 1, 0, 0, 2, 2, 1, 1,
        2, 2, 2, 1, 0, 0, 1, 0, 2, 1, 1, 0, 2, 0, 0, 0, 2, 1, 0, 0, 1, 0,
        0, 2, 0, 1, 2, 1, 1, 2, 1, 0, 1, 2, 1, 1, 2, 1, 0, 0, 0, 0, 0, 1,
        0, 0, 0, 1, 0, 2, 1, 0, 2, 0, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 0, 0,
        1, 2, 2, 0, 2, 0, 2, 2, 1, 1, 2, 1, 1, 0, 1, 0, 1, 0, 1, 0, 2, 2,
        0, 1, 0, 0, 1, 0, 2, 1, 2, 2, 1, 1, 1, 2, 0, 2, 0, 1, 1, 0, 1, 0,
        0, 2, 2, 0, 0, 2, 1, 2, 1, 0, 1, 2, 0, 2, 2, 0, 0, 2, 1, 2, 2, 2,
        2, 0, 0, 1, 0, 1, 2, 0, 1, 0, 1, 1, 1, 2, 2, 0, 0, 0, 0, 2, 2, 1,
        2, 0, 1, 1, 0, 0, 2, 2, 2, 0, 0, 2, 2, 2, 1, 1, 2, 2, 2, 0, 1, 0,
        0, 0, 0, 0, 2, 2, 0, 0, 0, 1, 2, 2, 2, 0, 2, 2, 1, 1, 1, 0, 2, 2,
        1, 2, 1, 1, 2, 0, 0, 2, 2, 2, 1, 0, 2, 0, 1, 1, 2, 0, 1, 0, 1, 0,
        2, 2, 1, 1, 1, 2, 2, 1, 2, 2, 2, 0, 1, 1, 0, 2, 2, 2, 0, 2, 1, 0,
        2, 1, 1, 2, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 2, 2, 0, 1, 2, 2, 1,
        0, 1, 0, 1, 0, 0, 1, 0, 2, 0, 2, 0, 2, 0, 1, 2, 2, 0, 0, 2, 0, 1,
        0, 0, 2, 2, 1, 0, 2, 2, 1, 1, 0, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 0,
        1, 2, 0, 0, 2, 1, 2, 1, 0, 0, 0, 2, 0, 2, 2, 2, 0, 2, 0, 1, 0, 2,
        2, 1, 0, 0, 2, 2, 2, 1, 0, 2, 2, 2, 0, 2, 0, 0, 0, 2, 0, 1, 2, 2,
        2, 0, 1, 0, 1, 2, 2, 1, 1, 2, 1, 2, 2, 1, 0, 2, 1, 1, 1, 1, 1, 1,
        0, 2, 1, 1, 1, 2, 1, 2, 0, 0, 1, 1, 2, 1, 2, 0, 2, 0, 1, 2, 0, 1,
        0, 0, 2, 0, 0, 1, 1, 0, 2, 1, 1, 1, 0, 0, 0, 0, 2, 1, 1, 1, 1, 0,
        0, 1, 0, 1, 0, 0, 0, 1, 2, 1, 2, 2, 2, 0, 2, 1, 2, 2, 1, 2, 0, 1,
        0, 1, 0, 2, 1, 1, 2, 0, 0, 1, 0, 0, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2,
        2, 0, 1, 0, 0, 2, 2, 1, 0, 1, 0, 0, 0, 2, 2, 2, 1, 1, 2, 0, 0, 1,
        0, 2, 2, 0, 2, 0, 1, 1, 0, 0, 0, 1, 0, 2, 0, 1, 1, 1, 1, 1, 2, 2,
        1, 2, 1, 2, 0, 1]),
 array([[-0.09439861, -5.58928855],
        [ 2.57971185,  4.96332167],
        [ 5.8159796 , -9.67547057]]))


plot_iteration(X, steps, k=3)


interact(plot_iteration,
        X = fixed(X), 
        steps=fixed(steps),
        k=(0, len(steps)-1)
        );

interactive(children=(IntSlider(value=0, description='k', max=4), Output()), _dom_classes=('widget-interact',)…


moons, _ = make_moons(n_samples=400, noise=0.07)
steps = kmeans(moons, 2)

interact(plot_iteration,
        X = fixed(moons), 
        steps=fixed(steps),
        k=(0, len(steps)-1)
        );

interactive(children=(IntSlider(value=0, description='k', max=4), Output()), _dom_classes=('widget-interact',)…


from sklearn.cluster import KMeans


KMeans?


km = KMeans(n_clusters=3)


X, y = make_blobs(n_samples=600,
                  centers=3,
                  cluster_std=2,
                  random_state=10
)

X

array([[  6.24961769,  -9.00934014],
       [  4.95624841, -12.10661662],
       [  3.33614779, -11.18097865],
       ...,
       [  4.96651958, -11.8440634 ],
       [ -0.0815766 ,  -7.75275153],
       [  2.22743256,   6.09982258]])

y

array([0, 0, 0, 0, 0, 2, 0, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 0, 2, 2, 1, 0,
       0, 0, 0, 1, 2, 0, 1, 2, 0, 1, 2, 2, 1, 2, 1, 0, 1, 1, 0, 0, 0, 1,
       1, 0, 2, 2, 2, 0, 0, 1, 1, 0, 0, 1, 0, 0, 2, 2, 1, 2, 1, 1, 2, 2,
       0, 2, 1, 0, 1, 1, 0, 1, 2, 0, 2, 1, 0, 1, 0, 1, 2, 2, 0, 0, 1, 1,
       0, 0, 0, 1, 2, 2, 1, 2, 0, 1, 1, 2, 0, 2, 2, 2, 0, 1, 2, 2, 1, 2,
       2, 0, 2, 1, 0, 1, 1, 0, 1, 2, 1, 0, 1, 1, 0, 1, 2, 2, 2, 2, 2, 1,
       2, 2, 2, 1, 2, 0, 1, 2, 0, 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 2, 2,
       1, 0, 0, 2, 0, 2, 0, 2, 1, 1, 0, 1, 1, 2, 1, 2, 1, 2, 1, 2, 0, 0,
       2, 1, 2, 2, 1, 2, 0, 1, 0, 0, 1, 1, 1, 0, 2, 0, 2, 1, 1, 2, 1, 2,
       2, 0, 0, 2, 2, 0, 1, 0, 1, 2, 1, 0, 2, 0, 0, 2, 2, 0, 1, 0, 0, 0,
       0, 2, 2, 1, 2, 1, 0, 2, 1, 2, 1, 1, 1, 0, 0, 2, 2, 0, 2, 0, 0, 1,
       0, 2, 1, 1, 2, 2, 0, 0, 0, 2, 2, 0, 0, 0, 1, 1, 0, 0, 0, 2, 1, 2,
       2, 2, 2, 2, 0, 0, 2, 2, 2, 1, 0, 0, 0, 2, 0, 0, 1, 1, 1, 2, 0, 0,
       1, 0, 1, 1, 0, 2, 2, 0, 0, 0, 1, 2, 0, 2, 1, 1, 0, 2, 1, 2, 1, 2,
       0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 2, 1, 1, 2, 0, 0, 0, 2, 0, 1, 2,
       0, 1, 1, 0, 1, 2, 2, 2, 2, 1, 0, 1, 2, 1, 1, 0, 0, 2, 1, 0, 0, 1,
       2, 1, 2, 1, 2, 2, 1, 2, 0, 2, 0, 2, 0, 2, 1, 0, 2, 2, 2, 0, 2, 1,
       2, 2, 0, 0, 1, 2, 0, 0, 1, 1, 2, 2, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2,
       1, 0, 2, 2, 0, 1, 0, 1, 2, 2, 2, 0, 2, 0, 0, 0, 2, 0, 2, 1, 2, 0,
       0, 1, 2, 2, 0, 0, 0, 1, 2, 0, 0, 0, 2, 0, 2, 0, 2, 0, 2, 1, 2, 0,
       0, 2, 1, 2, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 2, 0, 1, 1, 1, 1, 1, 1,
       2, 0, 1, 1, 1, 0, 1, 0, 2, 2, 1, 1, 0, 1, 0, 2, 0, 2, 1, 0, 2, 1,
       2, 2, 0, 2, 2, 1, 1, 2, 0, 1, 1, 1, 2, 2, 2, 2, 0, 1, 1, 1, 1, 2,
       2, 1, 2, 1, 0, 2, 2, 1, 0, 1, 2, 0, 0, 2, 0, 1, 0, 0, 1, 0, 2, 1,
       2, 1, 2, 0, 1, 1, 0, 2, 2, 1, 2, 2, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0,
       0, 2, 1, 2, 2, 0, 0, 1, 2, 1, 2, 2, 2, 0, 0, 0, 1, 1, 0, 2, 2, 1,
       2, 0, 0, 2, 0, 2, 1, 1, 2, 2, 2, 1, 2, 0, 2, 1, 1, 1, 1, 1, 0, 0,
       1, 0, 1, 0, 2, 1])


km.fit(X)

KMeans(n_clusters=3)


km.labels_

array([2, 2, 2, 2, 2, 0, 2, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 2, 0, 0, 1, 2,
       2, 2, 2, 1, 0, 2, 1, 0, 2, 1, 0, 0, 1, 0, 1, 2, 1, 1, 2, 2, 2, 1,
       1, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 1, 2, 2, 0, 0, 1, 0, 1, 1, 2, 0,
       2, 0, 1, 2, 1, 1, 2, 1, 0, 2, 0, 1, 2, 1, 2, 1, 0, 0, 2, 2, 1, 1,
       2, 2, 2, 1, 0, 0, 1, 0, 2, 1, 1, 0, 2, 0, 0, 0, 2, 1, 0, 0, 1, 0,
       0, 2, 0, 1, 2, 1, 1, 2, 1, 0, 1, 2, 1, 1, 2, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 2, 1, 0, 2, 0, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 0, 0,
       1, 2, 2, 0, 2, 0, 2, 2, 1, 1, 2, 1, 1, 0, 1, 0, 1, 0, 1, 0, 2, 2,
       0, 1, 0, 0, 1, 0, 2, 1, 2, 2, 1, 1, 1, 2, 0, 2, 0, 1, 1, 0, 1, 0,
       0, 2, 2, 0, 0, 2, 1, 2, 1, 0, 1, 2, 0, 2, 2, 0, 0, 2, 1, 2, 2, 2,
       2, 0, 0, 1, 0, 1, 2, 0, 1, 0, 1, 1, 1, 2, 2, 0, 0, 0, 0, 2, 2, 1,
       2, 0, 1, 1, 0, 0, 2, 2, 2, 0, 0, 2, 2, 2, 1, 1, 2, 2, 2, 0, 1, 0,
       0, 0, 0, 0, 2, 2, 0, 0, 0, 1, 2, 2, 2, 0, 2, 2, 1, 1, 1, 0, 2, 2,
       1, 2, 1, 1, 2, 0, 0, 2, 2, 2, 1, 0, 2, 0, 1, 1, 2, 0, 1, 0, 1, 0,
       2, 2, 1, 1, 1, 2, 2, 1, 2, 2, 2, 0, 1, 1, 0, 2, 2, 2, 0, 2, 1, 0,
       2, 1, 1, 2, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 2, 2, 0, 1, 2, 2, 1,
       0, 1, 0, 1, 0, 0, 1, 0, 2, 0, 2, 0, 2, 0, 1, 2, 2, 0, 0, 2, 0, 1,
       0, 0, 2, 2, 1, 0, 2, 2, 1, 1, 0, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 0,
       1, 2, 0, 0, 2, 1, 2, 1, 0, 0, 0, 2, 0, 2, 2, 2, 0, 2, 0, 1, 0, 2,
       2, 1, 0, 0, 2, 2, 2, 1, 0, 2, 2, 2, 2, 2, 0, 0, 0, 2, 0, 1, 2, 2,
       2, 0, 1, 0, 1, 2, 2, 1, 1, 2, 1, 2, 2, 1, 0, 2, 1, 1, 1, 1, 1, 1,
       0, 2, 1, 1, 1, 2, 1, 2, 0, 0, 1, 1, 2, 1, 2, 0, 2, 0, 1, 2, 0, 1,
       0, 0, 2, 0, 0, 1, 1, 0, 2, 1, 1, 1, 0, 0, 0, 0, 2, 1, 1, 1, 1, 0,
       0, 1, 0, 1, 0, 0, 0, 1, 2, 1, 2, 2, 2, 0, 2, 1, 2, 2, 1, 2, 0, 1,
       0, 1, 0, 2, 1, 1, 2, 0, 0, 1, 0, 0, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2,
       2, 0, 1, 0, 0, 2, 2, 1, 0, 1, 0, 0, 0, 2, 2, 2, 1, 1, 2, 0, 0, 1,
       0, 2, 2, 0, 2, 0, 1, 1, 0, 0, 0, 1, 0, 2, 0, 1, 1, 1, 1, 1, 2, 2,
       1, 2, 1, 2, 0, 1], dtype=int32)


km.cluster_centers_

array([[-0.18630907, -5.52019381],
       [ 2.57971185,  4.96332167],
       [ 5.73051427, -9.62182535]])


plot_clusters(X, km.labels_, km.cluster_centers_)


from sklearn.neighbors import KNeighborsClassifier


knn = KNeighborsClassifier(n_neighbors=5)

X

array([[  6.24961769,  -9.00934014],
       [  4.95624841, -12.10661662],
       [  3.33614779, -11.18097865],
       ...,
       [  4.96651958, -11.8440634 ],
       [ -0.0815766 ,  -7.75275153],
       [  2.22743256,   6.09982258]])

y

array([0, 0, 0, 0, 0, 2, 0, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 0, 2, 2, 1, 0,
       0, 0, 0, 1, 2, 0, 1, 2, 0, 1, 2, 2, 1, 2, 1, 0, 1, 1, 0, 0, 0, 1,
       1, 0, 2, 2, 2, 0, 0, 1, 1, 0, 0, 1, 0, 0, 2, 2, 1, 2, 1, 1, 2, 2,
       0, 2, 1, 0, 1, 1, 0, 1, 2, 0, 2, 1, 0, 1, 0, 1, 2, 2, 0, 0, 1, 1,
       0, 0, 0, 1, 2, 2, 1, 2, 0, 1, 1, 2, 0, 2, 2, 2, 0, 1, 2, 2, 1, 2,
       2, 0, 2, 1, 0, 1, 1, 0, 1, 2, 1, 0, 1, 1, 0, 1, 2, 2, 2, 2, 2, 1,
       2, 2, 2, 1, 2, 0, 1, 2, 0, 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 2, 2,
       1, 0, 0, 2, 0, 2, 0, 2, 1, 1, 0, 1, 1, 2, 1, 2, 1, 2, 1, 2, 0, 0,
       2, 1, 2, 2, 1, 2, 0, 1, 0, 0, 1, 1, 1, 0, 2, 0, 2, 1, 1, 2, 1, 2,
       2, 0, 0, 2, 2, 0, 1, 0, 1, 2, 1, 0, 2, 0, 0, 2, 2, 0, 1, 0, 0, 0,
       0, 2, 2, 1, 2, 1, 0, 2, 1, 2, 1, 1, 1, 0, 0, 2, 2, 0, 2, 0, 0, 1,
       0, 2, 1, 1, 2, 2, 0, 0, 0, 2, 2, 0, 0, 0, 1, 1, 0, 0, 0, 2, 1, 2,
       2, 2, 2, 2, 0, 0, 2, 2, 2, 1, 0, 0, 0, 2, 0, 0, 1, 1, 1, 2, 0, 0,
       1, 0, 1, 1, 0, 2, 2, 0, 0, 0, 1, 2, 0, 2, 1, 1, 0, 2, 1, 2, 1, 2,
       0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 2, 1, 1, 2, 0, 0, 0, 2, 0, 1, 2,
       0, 1, 1, 0, 1, 2, 2, 2, 2, 1, 0, 1, 2, 1, 1, 0, 0, 2, 1, 0, 0, 1,
       2, 1, 2, 1, 2, 2, 1, 2, 0, 2, 0, 2, 0, 2, 1, 0, 2, 2, 2, 0, 2, 1,
       2, 2, 0, 0, 1, 2, 0, 0, 1, 1, 2, 2, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2,
       1, 0, 2, 2, 0, 1, 0, 1, 2, 2, 2, 0, 2, 0, 0, 0, 2, 0, 2, 1, 2, 0,
       0, 1, 2, 2, 0, 0, 0, 1, 2, 0, 0, 0, 2, 0, 2, 0, 2, 0, 2, 1, 2, 0,
       0, 2, 1, 2, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 2, 0, 1, 1, 1, 1, 1, 1,
       2, 0, 1, 1, 1, 0, 1, 0, 2, 2, 1, 1, 0, 1, 0, 2, 0, 2, 1, 0, 2, 1,
       2, 2, 0, 2, 2, 1, 1, 2, 0, 1, 1, 1, 2, 2, 2, 2, 0, 1, 1, 1, 1, 2,
       2, 1, 2, 1, 0, 2, 2, 1, 0, 1, 2, 0, 0, 2, 0, 1, 0, 0, 1, 0, 2, 1,
       2, 1, 2, 0, 1, 1, 0, 2, 2, 1, 2, 2, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0,
       0, 2, 1, 2, 2, 0, 0, 1, 2, 1, 2, 2, 2, 0, 0, 0, 1, 1, 0, 2, 2, 1,
       2, 0, 0, 2, 0, 2, 1, 1, 2, 2, 2, 1, 2, 0, 2, 1, 1, 1, 1, 1, 0, 0,
       1, 0, 1, 0, 2, 1])


knn.fit(X, y)

KNeighborsClassifier()


p = [0, 1]
ap = np.array(p).reshape(1, -1)
ap

array([[0, 1]])


ap.shape

(1, 2)


p = [0, 1]
knn.predict(np.array(p).reshape(1, -1))[0]

1


%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, Normalize


def plot_clusters(X, y, p=None, p_label=None, neighbors=None):
    """
    Plots nearest neighbors of a given point.

    X:
        A 2-dimensional numpy array with coordinates
        of points in clusters
    y:
        A 1-dimensional numpy array with labels of points
    p:
        An array with coordinates of the point whose
        neighbors will be plotted.
    p_label:
        The predicted label of the point p.
    neighbors:
        A list of row numbers of X which are the nearest
        neighbors of the point p.
    """

    # build a custom colormap
    col_list = ['dodgerblue', 'limegreen', 'red', 'orange', 'fuchsia']
    colors = ListedColormap(col_list)
    norm = Normalize(0, len(col_list))

    plt.figure(figsize=(8, 8))
    with plt.style.context('seaborn'):
        scatter = plt.scatter(X[:, 0],
                              X[:, 1],
                              c=y,
                              s=90,
                              cmap=colors,
                              norm=norm,
                              label=y)
        if p is not None:
            p_col = "k" if p_label is None else colors(norm(p_label))
            plt.plot(p[0], p[1], marker="*", mfc=p_col, mec="k", ms=30, mew=2)
        if neighbors is not None:
            plt.scatter(X[neighbors, 0],
                        X[neighbors, 1],
                        edgecolors='black',
                        linewidth=3,
                        facecolors="None",
                        s=300)
        plt.legend(*scatter.legend_elements(),
                   markerscale=1.5,
                   prop={
                       "size": 12,
                       "weight": "normal"
                   })
    plt.show()


p = [3, -8]
label = knn.predict(np.array(p).reshape(1, -1))[0]
plot_clusters(X, y, p=p, p_label=label)


X, y = make_blobs(n_samples=600,
                  centers=4, 
                  cluster_std=3, 
                  random_state=100
                 )

N = 500

train_X = X[:N]
train_y = y[:N]

test_X = X[N:]
test_y = y[N:]

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(train_X, train_y)

KNeighborsClassifier()


predictions = knn.predict(test_X)
predictions

array([2, 3, 0, 2, 3, 3, 3, 1, 0, 1, 2, 0, 1, 3, 1, 2, 1, 1, 0, 1, 0, 2,
       1, 3, 0, 2, 3, 2, 0, 0, 1, 3, 2, 1, 1, 1, 1, 1, 3, 0, 1, 2, 3, 0,
       2, 0, 0, 0, 1, 0, 3, 1, 0, 1, 2, 0, 3, 0, 0, 0, 1, 3, 2, 3, 1, 1,
       3, 0, 1, 0, 2, 1, 0, 1, 0, 0, 2, 1, 1, 3, 2, 1, 0, 2, 2, 1, 1, 0,
       1, 1, 1, 1, 3, 0, 2, 0, 3, 0, 1, 0])


test_y

array([2, 3, 0, 2, 1, 3, 1, 1, 0, 3, 2, 0, 1, 3, 3, 2, 1, 1, 0, 1, 0, 2,
       1, 1, 0, 2, 3, 2, 3, 0, 1, 3, 2, 1, 1, 1, 1, 3, 1, 0, 3, 2, 3, 0,
       2, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 0, 3, 0, 0, 0, 1, 3, 2, 3, 3, 3,
       3, 0, 1, 3, 2, 1, 3, 1, 0, 0, 2, 1, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0,
       1, 1, 1, 1, 3, 0, 2, 0, 1, 0, 3, 0])


predictions == test_y

array([ True,  True,  True,  True, False,  True, False,  True,  True,
       False,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True, False,  True,  True,  True,
        True, False,  True,  True,  True,  True,  True,  True,  True,
        True, False, False,  True, False,  True,  True,  True,  True,
        True,  True,  True, False,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True, False, False,  True,  True,  True, False,  True,  True,
       False,  True,  True,  True,  True,  True, False,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True, False,  True, False,
        True])


(predictions == test_y).sum()

82


accuracy = (predictions == test_y).sum()/len(test_y)
accuracy

0.82


for p in zip([1, 2, 3], ['a', 'b', 'c']):
    print(p)

(1, 'a')
(2, 'b')
(3, 'c')


confusion = np.zeros((4, 4), dtype=int)
confusion

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]])


for p in zip(test_y, predictions):
    confusion[p[0], p[1]] += 1


confusion

array([[27,  1,  0,  0],
       [ 0, 25,  0,  6],
       [ 0,  0, 18,  0],
       [ 3,  8,  0, 12]])


import seaborn as sns

sns.heatmap(confusion,
            annot=True,
            annot_kws={"fontsize": 16},
            linewidth=2, 
            square=True
           );


rng = np.random.default_rng(0)

a = rng.integers(0, 100, (4, 4))
a

array([[85, 63, 51, 26],
       [30,  4,  7,  1],
       [17, 81, 64, 91],
       [50, 60, 97, 72]])


plt.imshow(a, cmap="gray");


a = rng.integers(0, 255, (4, 4, 3))
a

array([[[161, 138, 142],
        [238,  70, 208],
        [171,   0, 100],
        [218, 141,   8]],

       [[195, 186, 215],
        [ 44,  22, 220],
        [  5, 138,  20],
        [ 76, 122, 107]],

       [[102,   7,   1],
        [ 31,   2, 171],
        [134, 165,  65],
        [156, 194,  97]],

       [[117, 254, 205],
        [250,  96, 174],
        [242, 165, 214],
        [175, 179,  99]]])


plt.imshow(a);


plt.figure(figsize=(16, 16))
plt.imshow(a.astype(int).reshape(1, -1, 3));


tiger = plt.imread("tiger.jpg")
tiger

array([[[243, 224, 194],
        [242, 223, 193],
        [236, 217, 187],
        ...,
        [241, 218, 184],
        [245, 222, 188],
        [250, 227, 193]],

       [[246, 229, 199],
        [246, 229, 199],
        [241, 224, 194],
        ...,
        [247, 224, 190],
        [252, 229, 195],
        [252, 229, 195]],

       [[249, 232, 204],
        [251, 234, 206],
        [249, 232, 204],
        ...,
        [253, 230, 198],
        [255, 233, 201],
        [248, 225, 193]],

       ...,

       [[254, 251, 244],
        [254, 251, 244],
        [254, 251, 244],
        ...,
        [254, 247, 237],
        [254, 247, 237],
        [254, 247, 237]],

       [[254, 251, 244],
        [254, 251, 244],
        [254, 251, 244],
        ...,
        [254, 247, 237],
        [254, 247, 237],
        [254, 247, 237]],

       [[254, 251, 244],
        [254, 251, 244],
        [254, 251, 244],
        ...,
        [254, 247, 237],
        [254, 247, 237],
        [254, 247, 237]]], dtype=uint8)


tiger.shape

(644, 1000, 3)


plt.figure(figsize=(16, 16))
plt.imshow(tiger);


pixels = tiger.reshape(-1, 3)
pixels

array([[243, 224, 194],
       [242, 223, 193],
       [236, 217, 187],
       ...,
       [254, 247, 237],
       [254, 247, 237],
       [254, 247, 237]], dtype=uint8)


pixels.shape

(644000, 3)


import numpy as np

np.unique(pixels, axis=0).shape

(85796, 3)

Week 3¶

Review: matplolib subplots¶

Sidenote: ipywidgets and interact¶

Types of machine learning:¶

Clustering¶

k-Means¶

K-means with sklearn¶

k-NN with sklearn¶

Clustering colors¶