{ "cells": [ { "cell_type": "markdown", "id": "ac4bf291", "metadata": {}, "source": [ "# Week 5" ] }, { "cell_type": "code", "execution_count": 1, "id": "e87f7edf", "metadata": {}, "outputs": [], "source": [ "import pandas" ] }, { "cell_type": "code", "execution_count": 2, "id": "05aeaf0c", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexsmokerdaytimesize
016.991.01FemaleNoSunDinner2
110.341.66MaleNoSunDinner3
221.013.50MaleNoSunDinner3
323.683.31MaleNoSunDinner2
424.593.61FemaleNoSunDinner4
........................
23929.035.92MaleNoSatDinner3
24027.182.00FemaleYesSatDinner2
24122.672.00MaleYesSatDinner2
24217.821.75MaleNoSatDinner2
24318.783.00FemaleNoThurDinner2
\n", "

244 rows × 7 columns

\n", "
" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "0 16.99 1.01 Female No Sun Dinner 2\n", "1 10.34 1.66 Male No Sun Dinner 3\n", "2 21.01 3.50 Male No Sun Dinner 3\n", "3 23.68 3.31 Male No Sun Dinner 2\n", "4 24.59 3.61 Female No Sun Dinner 4\n", ".. ... ... ... ... ... ... ...\n", "239 29.03 5.92 Male No Sat Dinner 3\n", "240 27.18 2.00 Female Yes Sat Dinner 2\n", "241 22.67 2.00 Male Yes Sat Dinner 2\n", "242 17.82 1.75 Male No Sat Dinner 2\n", "243 18.78 3.00 Female No Thur Dinner 2\n", "\n", "[244 rows x 7 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import seaborn as sns\n", "\n", "tips = sns.load_dataset(\"tips\")\n", "tips" ] }, { "cell_type": "code", "execution_count": 3, "id": "a2de7f90", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 244 entries, 0 to 243\n", "Data columns (total 7 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 total_bill 244 non-null float64 \n", " 1 tip 244 non-null float64 \n", " 2 sex 244 non-null category\n", " 3 smoker 244 non-null category\n", " 4 day 244 non-null category\n", " 5 time 244 non-null category\n", " 6 size 244 non-null int64 \n", "dtypes: category(4), float64(2), int64(1)\n", "memory usage: 7.4 KB\n" ] } ], "source": [ "tips.info()" ] }, { "cell_type": "code", "execution_count": 4, "id": "98b2e19b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Female', 'Male']\n", "Categories (2, object): ['Male', 'Female']" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tips[\"sex\"].unique()" ] }, { "cell_type": "code", "execution_count": 5, "id": "135a3810", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Sun', 'Sat', 'Thur', 'Fri']\n", "Categories (4, object): ['Thur', 'Fri', 'Sat', 'Sun']" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tips[\"day\"].unique()" ] }, { "cell_type": "code", "execution_count": 6, "id": "8c280219", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Dinner', 'Lunch']\n", "Categories (2, object): ['Lunch', 'Dinner']" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tips[\"time\"].unique()" ] }, { "cell_type": "code", "execution_count": 7, "id": "7edc8c11", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2.9982786885245902" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tips[\"tip\"].mean()" ] }, { "cell_type": "code", "execution_count": 8, "id": "7154c5ea", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "10.0" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tips[\"tip\"].max()" ] }, { "cell_type": "code", "execution_count": 9, "id": "d1969b61", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexsmokerdaytimesize
5638.013.00MaleYesSatDinner4
5811.241.76MaleYesSatDinner2
6020.293.21MaleYesSatDinner2
6113.812.00MaleYesSatDinner2
6211.021.98MaleYesSatDinner2
........................
23415.533.00MaleYesSatDinner2
23612.601.00MaleYesSatDinner2
23732.831.17MaleYesSatDinner2
24027.182.00FemaleYesSatDinner2
24122.672.00MaleYesSatDinner2
\n", "

93 rows × 7 columns

\n", "
" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "56 38.01 3.00 Male Yes Sat Dinner 4\n", "58 11.24 1.76 Male Yes Sat Dinner 2\n", "60 20.29 3.21 Male Yes Sat Dinner 2\n", "61 13.81 2.00 Male Yes Sat Dinner 2\n", "62 11.02 1.98 Male Yes Sat Dinner 2\n", ".. ... ... ... ... ... ... ...\n", "234 15.53 3.00 Male Yes Sat Dinner 2\n", "236 12.60 1.00 Male Yes Sat Dinner 2\n", "237 32.83 1.17 Male Yes Sat Dinner 2\n", "240 27.18 2.00 Female Yes Sat Dinner 2\n", "241 22.67 2.00 Male Yes Sat Dinner 2\n", "\n", "[93 rows x 7 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tips[tips[\"smoker\"]== \"Yes\"]" ] }, { "cell_type": "code", "execution_count": 11, "id": "a86777a1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexsmokerdaytimesize
17050.8110.00MaleYesSatDinner3
21248.339.00MaleNoSatDinner4
5948.276.73MaleNoSatDinner4
15648.175.00MaleNoSunDinner6
18245.353.50MaleYesSunDinner3
........................
1497.512.00MaleNoThurLunch2
1117.251.00FemaleNoSatDinner1
1727.255.15MaleYesSunDinner2
925.751.00FemaleYesFriDinner2
673.071.00FemaleYesSatDinner1
\n", "

244 rows × 7 columns

\n", "
" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "170 50.81 10.00 Male Yes Sat Dinner 3\n", "212 48.33 9.00 Male No Sat Dinner 4\n", "59 48.27 6.73 Male No Sat Dinner 4\n", "156 48.17 5.00 Male No Sun Dinner 6\n", "182 45.35 3.50 Male Yes Sun Dinner 3\n", ".. ... ... ... ... ... ... ...\n", "149 7.51 2.00 Male No Thur Lunch 2\n", "111 7.25 1.00 Female No Sat Dinner 1\n", "172 7.25 5.15 Male Yes Sun Dinner 2\n", "92 5.75 1.00 Female Yes Fri Dinner 2\n", "67 3.07 1.00 Female Yes Sat Dinner 1\n", "\n", "[244 rows x 7 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tips.sort_values(by=\"total_bill\", ascending=False)" ] }, { "cell_type": "markdown", "id": "b56c78b6", "metadata": {}, "source": [ "## Groupby" ] }, { "cell_type": "code", "execution_count": 12, "id": "90548dea", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grouped = tips.groupby(\"sex\")\n", "grouped" ] }, { "cell_type": "code", "execution_count": 14, "id": "ec742bd7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexsmokerdaytimesize
110.341.66MaleNoSunDinner3
221.013.50MaleNoSunDinner3
323.683.31MaleNoSunDinner2
525.294.71MaleNoSunDinner4
68.772.00MaleNoSunDinner2
........................
23612.601.00MaleYesSatDinner2
23732.831.17MaleYesSatDinner2
23929.035.92MaleNoSatDinner3
24122.672.00MaleYesSatDinner2
24217.821.75MaleNoSatDinner2
\n", "

157 rows × 7 columns

\n", "
" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "1 10.34 1.66 Male No Sun Dinner 3\n", "2 21.01 3.50 Male No Sun Dinner 3\n", "3 23.68 3.31 Male No Sun Dinner 2\n", "5 25.29 4.71 Male No Sun Dinner 4\n", "6 8.77 2.00 Male No Sun Dinner 2\n", ".. ... ... ... ... ... ... ...\n", "236 12.60 1.00 Male Yes Sat Dinner 2\n", "237 32.83 1.17 Male Yes Sat Dinner 2\n", "239 29.03 5.92 Male No Sat Dinner 3\n", "241 22.67 2.00 Male Yes Sat Dinner 2\n", "242 17.82 1.75 Male No Sat Dinner 2\n", "\n", "[157 rows x 7 columns]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grouped.get_group(\"Male\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "6f8cf676", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Male\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexsmokerdaytimesize
110.341.66MaleNoSunDinner3
221.013.50MaleNoSunDinner3
323.683.31MaleNoSunDinner2
\n", "
" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "1 10.34 1.66 Male No Sun Dinner 3\n", "2 21.01 3.50 Male No Sun Dinner 3\n", "3 23.68 3.31 Male No Sun Dinner 2" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", "Female\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexsmokerdaytimesize
016.991.01FemaleNoSunDinner2
424.593.61FemaleNoSunDinner4
1135.265.00FemaleNoSunDinner4
\n", "
" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "0 16.99 1.01 Female No Sun Dinner 2\n", "4 24.59 3.61 Female No Sun Dinner 4\n", "11 35.26 5.00 Female No Sun Dinner 4" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n" ] } ], "source": [ "for k, df in grouped:\n", " print(k)\n", " display(df.head(3))\n", " print(\"\\n\")" ] }, { "cell_type": "markdown", "id": "9a439ef8", "metadata": {}, "source": [ "### Aggregations" ] }, { "cell_type": "code", "execution_count": 16, "id": "c873302f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsmokerdaytimesize
sex
Male157157157157157157
Female878787878787
\n", "
" ], "text/plain": [ " total_bill tip smoker day time size\n", "sex \n", "Male 157 157 157 157 157 157\n", "Female 87 87 87 87 87 87" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grouped.count()" ] }, { "cell_type": "code", "execution_count": 17, "id": "9be7366f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsize
sex
Male3256.82485.07413
Female1570.95246.51214
\n", "
" ], "text/plain": [ " total_bill tip size\n", "sex \n", "Male 3256.82 485.07 413\n", "Female 1570.95 246.51 214" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grouped.sum()" ] }, { "cell_type": "code", "execution_count": 18, "id": "d17830f6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "sex\n", "Male 50.81\n", "Female 44.30\n", "Name: total_bill, dtype: float64" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grouped[\"total_bill\"].max()" ] }, { "cell_type": "code", "execution_count": 20, "id": "2fb24e88", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtip
sex
Male20.7440763.089618
Female18.0568972.833448
\n", "
" ], "text/plain": [ " total_bill tip\n", "sex \n", "Male 20.744076 3.089618\n", "Female 18.056897 2.833448" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grouped[[\"total_bill\", \"tip\"]].mean()" ] }, { "cell_type": "markdown", "id": "8c7d8147", "metadata": {}, "source": [ "### groupby.agg" ] }, { "cell_type": "code", "execution_count": 22, "id": "951738e3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tiptotal_bill
sex
Male20157
Female886
\n", "
" ], "text/plain": [ " tip total_bill\n", "sex \n", "Male 20 157\n", "Female 8 86" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def large_amounts(values, min_value=5):\n", " n = 0\n", " for t in values:\n", " if t >= min_value:\n", " n += 1\n", " return n\n", "\n", "grouped[[\"tip\", \"total_bill\"]].agg(large_amounts)" ] }, { "cell_type": "code", "execution_count": 23, "id": "19974d1a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tiptotal_bill
sex
Male82113
Female3951
\n", "
" ], "text/plain": [ " tip total_bill\n", "sex \n", "Male 82 113\n", "Female 39 51" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grouped[[\"tip\", \"total_bill\"]].agg({\"tip\": lambda x: large_amounts(x, min_value=3),\n", " \"total_bill\": lambda x: large_amounts(x, min_value=15)\n", " \n", "})" ] }, { "cell_type": "markdown", "id": "b68ec83f", "metadata": {}, "source": [ "## groupby.apply" ] }, { "cell_type": "code", "execution_count": 24, "id": "f7191d2b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexsmokerdaytimesize
sex
Male17050.8110.00MaleYesSatDinner3
21248.339.00MaleNoSatDinner4
2339.427.58MaleNoSatDinner4
Female21428.176.50FemaleYesSatDinner3
5234.815.20FemaleNoSunDinner4
8534.835.17FemaleNoThurLunch4
\n", "
" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "sex \n", "Male 170 50.81 10.00 Male Yes Sat Dinner 3\n", " 212 48.33 9.00 Male No Sat Dinner 4\n", " 23 39.42 7.58 Male No Sat Dinner 4\n", "Female 214 28.17 6.50 Female Yes Sat Dinner 3\n", " 52 34.81 5.20 Female No Sun Dinner 4\n", " 85 34.83 5.17 Female No Thur Lunch 4" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def large_tips(df):\n", " df = df.sort_values(by=\"tip\", ascending=False)\n", " return df.head(3)\n", "\n", "big_tips = grouped.apply(large_tips)\n", "big_tips" ] }, { "cell_type": "markdown", "id": "bfef5f92", "metadata": {}, "source": [ "### Gouping by multiple columns" ] }, { "cell_type": "code", "execution_count": 25, "id": "dad3e9e2", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexsmokerdaytimesize
016.991.01FemaleNoSunDinner2
110.341.66MaleNoSunDinner3
221.013.50MaleNoSunDinner3
323.683.31MaleNoSunDinner2
424.593.61FemaleNoSunDinner4
........................
23929.035.92MaleNoSatDinner3
24027.182.00FemaleYesSatDinner2
24122.672.00MaleYesSatDinner2
24217.821.75MaleNoSatDinner2
24318.783.00FemaleNoThurDinner2
\n", "

244 rows × 7 columns

\n", "
" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "0 16.99 1.01 Female No Sun Dinner 2\n", "1 10.34 1.66 Male No Sun Dinner 3\n", "2 21.01 3.50 Male No Sun Dinner 3\n", "3 23.68 3.31 Male No Sun Dinner 2\n", "4 24.59 3.61 Female No Sun Dinner 4\n", ".. ... ... ... ... ... ... ...\n", "239 29.03 5.92 Male No Sat Dinner 3\n", "240 27.18 2.00 Female Yes Sat Dinner 2\n", "241 22.67 2.00 Male Yes Sat Dinner 2\n", "242 17.82 1.75 Male No Sat Dinner 2\n", "243 18.78 3.00 Female No Thur Dinner 2\n", "\n", "[244 rows x 7 columns]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tips" ] }, { "cell_type": "code", "execution_count": 28, "id": "54b72998", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsize
daytime
ThurLunch17.6647542.7677052.459016
Dinner18.7800003.0000002.000000
FriLunch12.8457142.3828572.000000
Dinner19.6633332.9400002.166667
SatLunchNaNNaNNaN
Dinner20.4413792.9931032.517241
SunLunchNaNNaNNaN
Dinner21.4100003.2551322.842105
\n", "
" ], "text/plain": [ " total_bill tip size\n", "day time \n", "Thur Lunch 17.664754 2.767705 2.459016\n", " Dinner 18.780000 3.000000 2.000000\n", "Fri Lunch 12.845714 2.382857 2.000000\n", " Dinner 19.663333 2.940000 2.166667\n", "Sat Lunch NaN NaN NaN\n", " Dinner 20.441379 2.993103 2.517241\n", "Sun Lunch NaN NaN NaN\n", " Dinner 21.410000 3.255132 2.842105" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "means = tips.groupby(by=[\"day\", \"time\"]).mean()\n", "means" ] }, { "cell_type": "code", "execution_count": 29, "id": "0d0b462b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexsmokerdaytimesize
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [total_bill, tip, sex, smoker, day, time, size]\n", "Index: []" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tips[(tips[\"day\"]==\"Sat\") & (tips[\"time\"] == \"Lunch\") ]" ] }, { "cell_type": "code", "execution_count": 30, "id": "0c0824db", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "MultiIndex([('Thur', 'Lunch'),\n", " ('Thur', 'Dinner'),\n", " ( 'Fri', 'Lunch'),\n", " ( 'Fri', 'Dinner'),\n", " ( 'Sat', 'Lunch'),\n", " ( 'Sat', 'Dinner'),\n", " ( 'Sun', 'Lunch'),\n", " ( 'Sun', 'Dinner')],\n", " names=['day', 'time'])" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "means.index" ] }, { "cell_type": "code", "execution_count": 31, "id": "33d6eb08", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "total_bill 17.664754\n", "tip 2.767705\n", "size 2.459016\n", "Name: (Thur, Lunch), dtype: float64" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "means.loc[('Thur', 'Lunch')]" ] }, { "cell_type": "markdown", "id": "62bbc6a8", "metadata": {}, "source": [ "## Reindexing" ] }, { "cell_type": "code", "execution_count": 32, "id": "afaf5459", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearmonthpassengers
01949Jan112
11949Feb118
21949Mar132
31949Apr129
41949May121
............
1391960Aug606
1401960Sep508
1411960Oct461
1421960Nov390
1431960Dec432
\n", "

144 rows × 3 columns

\n", "
" ], "text/plain": [ " year month passengers\n", "0 1949 Jan 112\n", "1 1949 Feb 118\n", "2 1949 Mar 132\n", "3 1949 Apr 129\n", "4 1949 May 121\n", ".. ... ... ...\n", "139 1960 Aug 606\n", "140 1960 Sep 508\n", "141 1960 Oct 461\n", "142 1960 Nov 390\n", "143 1960 Dec 432\n", "\n", "[144 rows x 3 columns]" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "flights = sns.load_dataset(\"flights\")\n", "flights" ] }, { "cell_type": "code", "execution_count": 33, "id": "88d8dd31", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
monthpassengers
year
1949Jan112
1949Feb118
1949Mar132
1949Apr129
1949May121
.........
1960Aug606
1960Sep508
1960Oct461
1960Nov390
1960Dec432
\n", "

144 rows × 2 columns

\n", "
" ], "text/plain": [ " month passengers\n", "year \n", "1949 Jan 112\n", "1949 Feb 118\n", "1949 Mar 132\n", "1949 Apr 129\n", "1949 May 121\n", "... ... ...\n", "1960 Aug 606\n", "1960 Sep 508\n", "1960 Oct 461\n", "1960 Nov 390\n", "1960 Dec 432\n", "\n", "[144 rows x 2 columns]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "flights = flights.set_index(\"year\")\n", "flights" ] }, { "cell_type": "code", "execution_count": 34, "id": "98bf477e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
monthpassengers
year
1950Jan115
1950Feb126
1950Mar141
1950Apr135
1950May125
1950Jun149
1950Jul170
1950Aug170
1950Sep158
1950Oct133
1950Nov114
1950Dec140
\n", "
" ], "text/plain": [ " month passengers\n", "year \n", "1950 Jan 115\n", "1950 Feb 126\n", "1950 Mar 141\n", "1950 Apr 135\n", "1950 May 125\n", "1950 Jun 149\n", "1950 Jul 170\n", "1950 Aug 170\n", "1950 Sep 158\n", "1950 Oct 133\n", "1950 Nov 114\n", "1950 Dec 140" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "flights.loc[\"1950\"]" ] }, { "cell_type": "code", "execution_count": 35, "id": "06d97740", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearmonthpassengers
01949Jan112
11949Feb118
21949Mar132
31949Apr129
41949May121
............
1391960Aug606
1401960Sep508
1411960Oct461
1421960Nov390
1431960Dec432
\n", "

144 rows × 3 columns

\n", "
" ], "text/plain": [ " year month passengers\n", "0 1949 Jan 112\n", "1 1949 Feb 118\n", "2 1949 Mar 132\n", "3 1949 Apr 129\n", "4 1949 May 121\n", ".. ... ... ...\n", "139 1960 Aug 606\n", "140 1960 Sep 508\n", "141 1960 Oct 461\n", "142 1960 Nov 390\n", "143 1960 Dec 432\n", "\n", "[144 rows x 3 columns]" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "flights = flights.reset_index()\n", "flights" ] }, { "cell_type": "code", "execution_count": 37, "id": "d32db15b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
passengers
yearmonth
1949Jan112
Feb118
Mar132
Apr129
May121
.........
1960Aug606
Sep508
Oct461
Nov390
Dec432
\n", "

144 rows × 1 columns

\n", "
" ], "text/plain": [ " passengers\n", "year month \n", "1949 Jan 112\n", " Feb 118\n", " Mar 132\n", " Apr 129\n", " May 121\n", "... ...\n", "1960 Aug 606\n", " Sep 508\n", " Oct 461\n", " Nov 390\n", " Dec 432\n", "\n", "[144 rows x 1 columns]" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "flights = flights.set_index([\"year\", \"month\"])\n", "flights" ] }, { "cell_type": "code", "execution_count": 39, "id": "7b9b7ec6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "MultiIndex([(1949, 'Jan'),\n", " (1949, 'Feb'),\n", " (1949, 'Mar'),\n", " (1949, 'Apr'),\n", " (1949, 'May'),\n", " (1949, 'Jun'),\n", " (1949, 'Jul'),\n", " (1949, 'Aug'),\n", " (1949, 'Sep'),\n", " (1949, 'Oct'),\n", " ...\n", " (1960, 'Mar'),\n", " (1960, 'Apr'),\n", " (1960, 'May'),\n", " (1960, 'Jun'),\n", " (1960, 'Jul'),\n", " (1960, 'Aug'),\n", " (1960, 'Sep'),\n", " (1960, 'Oct'),\n", " (1960, 'Nov'),\n", " (1960, 'Dec')],\n", " names=['year', 'month'], length=144)" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "flights.index" ] }, { "cell_type": "code", "execution_count": 41, "id": "8d0a799b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "passengers 419\n", "Name: (1960, Mar), dtype: int64" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "flights.loc[(1960, 'Mar')]" ] }, { "cell_type": "code", "execution_count": 42, "id": "4989f5d9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
passengers
monthJanFebMarAprMayJunJulAugSepOctNovDec
year
1949112118132129121135148148136119104118
1950115126141135125149170170158133114140
1951145150178163172178199199184162146166
1952171180193181183218230242209191172194
1953196196236235229243264272237211180201
1954204188235227234264302293259229203229
1955242233267269270315364347312274237278
1956284277317313318374413405355306271306
1957315301356348355422465467404347305336
1958340318362348363435491505404359310337
1959360342406396420472548559463407362405
1960417391419461472535622606508461390432
\n", "
" ], "text/plain": [ " passengers \n", "month Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec\n", "year \n", "1949 112 118 132 129 121 135 148 148 136 119 104 118\n", "1950 115 126 141 135 125 149 170 170 158 133 114 140\n", "1951 145 150 178 163 172 178 199 199 184 162 146 166\n", "1952 171 180 193 181 183 218 230 242 209 191 172 194\n", "1953 196 196 236 235 229 243 264 272 237 211 180 201\n", "1954 204 188 235 227 234 264 302 293 259 229 203 229\n", "1955 242 233 267 269 270 315 364 347 312 274 237 278\n", "1956 284 277 317 313 318 374 413 405 355 306 271 306\n", "1957 315 301 356 348 355 422 465 467 404 347 305 336\n", "1958 340 318 362 348 363 435 491 505 404 359 310 337\n", "1959 360 342 406 396 420 472 548 559 463 407 362 405\n", "1960 417 391 419 461 472 535 622 606 508 461 390 432" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "flights = flights.unstack()\n", "flights" ] }, { "cell_type": "code", "execution_count": 43, "id": "1e7b23f1", "metadata": {}, "outputs": [ { "ename": "KeyError", "evalue": "'Jan'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3360\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3361\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3362\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", "\u001b[0;31mKeyError\u001b[0m: 'Jan'", "\nThe above exception was the direct cause of the following exception:\n", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m/var/folders/vd/9gpvwb493r52y4sgtl_fvtvm0000gn/T/ipykernel_12352/3691583124.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mflights\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Jan\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3455\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_single_key\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3456\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3457\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3458\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3459\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_getitem_multilevel\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3506\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3507\u001b[0m \u001b[0;31m# self.columns is a MultiIndex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3508\u001b[0;31m \u001b[0mloc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3509\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mslice\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3510\u001b[0m \u001b[0mnew_columns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/multi.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method)\u001b[0m\n\u001b[1;32m 2920\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2921\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2922\u001b[0;31m \u001b[0mloc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_level_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2923\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_maybe_to_slice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2924\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/multi.py\u001b[0m in \u001b[0;36m_get_level_indexer\u001b[0;34m(self, key, level, indexer)\u001b[0m\n\u001b[1;32m 3202\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3203\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3204\u001b[0;31m \u001b[0midx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_loc_single_level_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlevel_index\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3205\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3206\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlevel\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lexsort_depth\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/multi.py\u001b[0m in \u001b[0;36m_get_loc_single_level_index\u001b[0;34m(self, level_index, key)\u001b[0m\n\u001b[1;32m 2853\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2854\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2855\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mlevel_index\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2856\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2857\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3361\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3362\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3363\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3364\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3365\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhasnans\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyError\u001b[0m: 'Jan'" ] } ], "source": [ "flights[\"Jan\"]" ] }, { "cell_type": "code", "execution_count": 44, "id": "afbeeb2b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "MultiIndex([('passengers', 'Jan'),\n", " ('passengers', 'Feb'),\n", " ('passengers', 'Mar'),\n", " ('passengers', 'Apr'),\n", " ('passengers', 'May'),\n", " ('passengers', 'Jun'),\n", " ('passengers', 'Jul'),\n", " ('passengers', 'Aug'),\n", " ('passengers', 'Sep'),\n", " ('passengers', 'Oct'),\n", " ('passengers', 'Nov'),\n", " ('passengers', 'Dec')],\n", " names=[None, 'month'])" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "flights.columns" ] }, { "cell_type": "code", "execution_count": 45, "id": "1975263d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "year\n", "1949 112\n", "1950 115\n", "1951 145\n", "1952 171\n", "1953 196\n", "1954 204\n", "1955 242\n", "1956 284\n", "1957 315\n", "1958 340\n", "1959 360\n", "1960 417\n", "Name: (passengers, Jan), dtype: int64" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "flights[('passengers', 'Jan')]" ] }, { "cell_type": "code", "execution_count": 48, "id": "6d88bb1c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
monthJanFebMarAprMayJunJulAugSepOctNovDec
year
1949112118132129121135148148136119104118
1950115126141135125149170170158133114140
1951145150178163172178199199184162146166
1952171180193181183218230242209191172194
1953196196236235229243264272237211180201
1954204188235227234264302293259229203229
1955242233267269270315364347312274237278
1956284277317313318374413405355306271306
1957315301356348355422465467404347305336
1958340318362348363435491505404359310337
1959360342406396420472548559463407362405
1960417391419461472535622606508461390432
\n", "
" ], "text/plain": [ "month Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec\n", "year \n", "1949 112 118 132 129 121 135 148 148 136 119 104 118\n", "1950 115 126 141 135 125 149 170 170 158 133 114 140\n", "1951 145 150 178 163 172 178 199 199 184 162 146 166\n", "1952 171 180 193 181 183 218 230 242 209 191 172 194\n", "1953 196 196 236 235 229 243 264 272 237 211 180 201\n", "1954 204 188 235 227 234 264 302 293 259 229 203 229\n", "1955 242 233 267 269 270 315 364 347 312 274 237 278\n", "1956 284 277 317 313 318 374 413 405 355 306 271 306\n", "1957 315 301 356 348 355 422 465 467 404 347 305 336\n", "1958 340 318 362 348 363 435 491 505 404 359 310 337\n", "1959 360 342 406 396 420 472 548 559 463 407 362 405\n", "1960 417 391 419 461 472 535 622 606 508 461 390 432" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "flights = flights.droplevel(0, axis=1)\n", "flights" ] }, { "cell_type": "code", "execution_count": 49, "id": "934ad7de", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "CategoricalIndex(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',\n", " 'Sep', 'Oct', 'Nov', 'Dec'],\n", " categories=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', ...], ordered=False, dtype='category', name='month')" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "flights.columns" ] }, { "cell_type": "code", "execution_count": 50, "id": "916998fe", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "year\n", "1949 112\n", "1950 115\n", "1951 145\n", "1952 171\n", "1953 196\n", "1954 204\n", "1955 242\n", "1956 284\n", "1957 315\n", "1958 340\n", "1959 360\n", "1960 417\n", "Name: Jan, dtype: int64" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "flights[\"Jan\"]" ] }, { "cell_type": "code", "execution_count": 51, "id": "9f5a2030", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
monthJanFebMarAprMayJunJulAugSepOctNovDec
year
1949112118132129121135148148136119104118
1950115126141135125149170170158133114140
1951145150178163172178199199184162146166
1952171180193181183218230242209191172194
1953196196236235229243264272237211180201
1954204188235227234264302293259229203229
1955242233267269270315364347312274237278
1956284277317313318374413405355306271306
1957315301356348355422465467404347305336
1958340318362348363435491505404359310337
1959360342406396420472548559463407362405
1960417391419461472535622606508461390432
\n", "
" ], "text/plain": [ "month Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec\n", "year \n", "1949 112 118 132 129 121 135 148 148 136 119 104 118\n", "1950 115 126 141 135 125 149 170 170 158 133 114 140\n", "1951 145 150 178 163 172 178 199 199 184 162 146 166\n", "1952 171 180 193 181 183 218 230 242 209 191 172 194\n", "1953 196 196 236 235 229 243 264 272 237 211 180 201\n", "1954 204 188 235 227 234 264 302 293 259 229 203 229\n", "1955 242 233 267 269 270 315 364 347 312 274 237 278\n", "1956 284 277 317 313 318 374 413 405 355 306 271 306\n", "1957 315 301 356 348 355 422 465 467 404 347 305 336\n", "1958 340 318 362 348 363 435 491 505 404 359 310 337\n", "1959 360 342 406 396 420 472 548 559 463 407 362 405\n", "1960 417 391 419 461 472 535 622 606 508 461 390 432" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "flights" ] }, { "cell_type": "code", "execution_count": 52, "id": "7e0019ff", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "year month\n", "1949 Jan 112\n", " Feb 118\n", " Mar 132\n", " Apr 129\n", " May 121\n", " ... \n", "1960 Aug 606\n", " Sep 508\n", " Oct 461\n", " Nov 390\n", " Dec 432\n", "Length: 144, dtype: int64" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "flights = flights.stack()\n", "flights" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.12" } }, "nbformat": 4, "nbformat_minor": 5 }