Week 10 (4/11-4/17)

Notebook

Weekly digest

Resources

1. Marathon data

[1]:
Copy to clipboard
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

df = pd.read_csv("https://www.mth548.org/_static/kde_marathon_results/marathon_results.csv")
df["tot_minutes"] = pd.to_timedelta(df["Finish"]).dt.total_seconds()/60
Copy to clipboard

2. Tip amounts

[ ]:
Copy to clipboard
from ipywidgets import interact, fixed
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('ggplot')
sns.set_context("notebook")


df = sns.load_dataset('tips')

def tip_plot(frac):
    frac=frac/100
    plt.figure(figsize=(12,7))
    sns.scatterplot(data=df, x="total_bill", y="tip", marker='o')
    x = np.arange(0, 55)
    plt.plot(x, frac*x, c='b', label=f"{frac:.0%} tip")
    plt.ylim(0, 11)
    plt.title("Total bill vs tip amount")
    plt.legend()
    plt.show()

interact(tip_plot, frac=(10, 20));
Copy to clipboard

3. Gradient descent

[3]:
Copy to clipboard
import plotly.graph_objects as go
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


def descent(Df, x0, l_rate=0.1, nsteps=1000):
    '''
    Performs gradient descent of a given function f.

    Df:
        Differential of f
    x0:
        The xtarrting point.
    l_rate:
        The learning rate.
    nsteps:
        Number of iterations to run.

    Returns:
        A list of points computed during steps of the gradient descent.
    '''

    x = np.array(x0, dtype='float')
    path = [x]
    for i in range(nsteps):
        Dfx = np.array(Df(x))
        x = x - l_rate*Dfx
        path.append(x)
    return path


def plot_descent(f, xlim, ylim, path=None, levels=20):
    '''
    Creates contour plot of a functions and the path
    computed by gradient descent applied to the function.

    f:
        Function to be plotted
    path:
        List of coordinates of points computed by the
        gradient descent algorithm.
    xlim, ylim:
        Tuples with limits of x- and y-values for the contour
        plot of the function.
    levels:
        Specifies levels of the contour plot.
    '''

    plt.figure(figsize=(8, 8))
    x, y = np.meshgrid(np.linspace(*xlim, 1000), np.linspace(*ylim, 1000))
    Z = f(np.vstack([x.ravel(), y.ravel()])).reshape(x.shape)
    plt.contourf(x, y, Z, levels=levels, cmap='bone')
    plt.contour(x, y, Z, levels=levels, colors='gray')
    if path is not None:
        plt.plot([x[0] for x in path], [x[1] for x in path], 'ro-', ms=4)
    plt.show()


def plot_descent_step(f, xlim, ylim, path=None, levels=20, last=None, step=1):
    plot_descent(f=f,
                 xlim=xlim,
                 ylim=ylim,
                 path=path[:last:step],
                 levels=levels)


def plot3d(f, xlim, ylim):
    x = np.linspace(xlim[0], xlim[1], 400)
    y = np.linspace(ylim[0], ylim[1], 400)
    X, Y = np.meshgrid(x, y)
    Z = f(np.array([X, Y]))
    fig = go.Figure(go.Surface(x=X, y=Y, z=Z, colorscale="picnic"))
    fig.update_layout(autosize=False, width=800, height=600)
    fig.show()
Copy to clipboard

4. Gradient descent test functions

[4]:
Copy to clipboard
def h(x):
    '''
    Himmelblau's function
    h(x, y) = (x^2 + y - 11)^2 + (x + y^2 - 7)^2
    '''
    return (x[0]**2 + x[1] - 11)**2 + (x[0] + x[1]**2 - 7)**2

def Dh(x):
    return np.array([
        2 * (x[0]**2 + x[1] - 11) * 2 * x[0] + 2 * (x[0] + x[1]**2 - 7),
        2 * (x[0]**2 + x[1] - 11) + 2 * (x[0] + x[1]**2 - 7) * 2 * x[1]
    ])


def r(x):
    '''
    Rosenbrock function
    r(x, y) = (1-x)^2 + 100(y-x^2)^2
    '''
    return (1-x[0])**2 + 100*(x[1]-x[0]**2)**2

def Dr(x):
    return np.array([-2*(1-x[0]) - 400*(x[1]-x[0]**2)*x[0], 200*(x[1]-x[0]**2)])
Copy to clipboard

Exercise

  1. Split marathon runners data into training and test data.

  2. Write a function prob_F() that takes as the argument the finish time of a runner in minutes and returns the probability that the runner was a female (based on KDE for male and female runners in the training data and the Bayes theorem). Plot the graph of this function for the range of 120-500 minutes.

  3. Use the function prob_F() to predict if each runner in the test data was a males or a females based on their finish time. Compute accuracy of the predictions.