Week 10 (4/11-4/17)¶
Notebook¶
Download the notebook file: week_10_class.ipynb
Weekly digest¶
Resources¶
1. Marathon data¶
[1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
df = pd.read_csv("https://www.mth548.org/_static/kde_marathon_results/marathon_results.csv")
df["tot_minutes"] = pd.to_timedelta(df["Finish"]).dt.total_seconds()/60
2. Tip amounts¶
[ ]:
from ipywidgets import interact, fixed
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')
sns.set_context("notebook")
df = sns.load_dataset('tips')
def tip_plot(frac):
frac=frac/100
plt.figure(figsize=(12,7))
sns.scatterplot(data=df, x="total_bill", y="tip", marker='o')
x = np.arange(0, 55)
plt.plot(x, frac*x, c='b', label=f"{frac:.0%} tip")
plt.ylim(0, 11)
plt.title("Total bill vs tip amount")
plt.legend()
plt.show()
interact(tip_plot, frac=(10, 20));
3. Gradient descent¶
[3]:
import plotly.graph_objects as go
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
def descent(Df, x0, l_rate=0.1, nsteps=1000):
'''
Performs gradient descent of a given function f.
Df:
Differential of f
x0:
The xtarrting point.
l_rate:
The learning rate.
nsteps:
Number of iterations to run.
Returns:
A list of points computed during steps of the gradient descent.
'''
x = np.array(x0, dtype='float')
path = [x]
for i in range(nsteps):
Dfx = np.array(Df(x))
x = x - l_rate*Dfx
path.append(x)
return path
def plot_descent(f, xlim, ylim, path=None, levels=20):
'''
Creates contour plot of a functions and the path
computed by gradient descent applied to the function.
f:
Function to be plotted
path:
List of coordinates of points computed by the
gradient descent algorithm.
xlim, ylim:
Tuples with limits of x- and y-values for the contour
plot of the function.
levels:
Specifies levels of the contour plot.
'''
plt.figure(figsize=(8, 8))
x, y = np.meshgrid(np.linspace(*xlim, 1000), np.linspace(*ylim, 1000))
Z = f(np.vstack([x.ravel(), y.ravel()])).reshape(x.shape)
plt.contourf(x, y, Z, levels=levels, cmap='bone')
plt.contour(x, y, Z, levels=levels, colors='gray')
if path is not None:
plt.plot([x[0] for x in path], [x[1] for x in path], 'ro-', ms=4)
plt.show()
def plot_descent_step(f, xlim, ylim, path=None, levels=20, last=None, step=1):
plot_descent(f=f,
xlim=xlim,
ylim=ylim,
path=path[:last:step],
levels=levels)
def plot3d(f, xlim, ylim):
x = np.linspace(xlim[0], xlim[1], 400)
y = np.linspace(ylim[0], ylim[1], 400)
X, Y = np.meshgrid(x, y)
Z = f(np.array([X, Y]))
fig = go.Figure(go.Surface(x=X, y=Y, z=Z, colorscale="picnic"))
fig.update_layout(autosize=False, width=800, height=600)
fig.show()
4. Gradient descent test functions¶
[4]:
def h(x):
'''
Himmelblau's function
h(x, y) = (x^2 + y - 11)^2 + (x + y^2 - 7)^2
'''
return (x[0]**2 + x[1] - 11)**2 + (x[0] + x[1]**2 - 7)**2
def Dh(x):
return np.array([
2 * (x[0]**2 + x[1] - 11) * 2 * x[0] + 2 * (x[0] + x[1]**2 - 7),
2 * (x[0]**2 + x[1] - 11) + 2 * (x[0] + x[1]**2 - 7) * 2 * x[1]
])
def r(x):
'''
Rosenbrock function
r(x, y) = (1-x)^2 + 100(y-x^2)^2
'''
return (1-x[0])**2 + 100*(x[1]-x[0]**2)**2
def Dr(x):
return np.array([-2*(1-x[0]) - 400*(x[1]-x[0]**2)*x[0], 200*(x[1]-x[0]**2)])
Exercise¶
Split marathon runners data into training and test data.
Write a function
prob_F()
that takes as the argument the finish time of a runner in minutes and returns the probability that the runner was a female (based on KDE for male and female runners in the training data and the Bayes theorem). Plot the graph of this function for the range of 120-500 minutes.Use the function
prob_F()
to predict if each runner in the test data was a males or a females based on their finish time. Compute accuracy of the predictions.