{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Discussion 3 Supplemental Notebook\n",
    "\n",
    "## Data 100, Spring 2020\n",
    "\n",
    "### Suraj Rampure (suraj.rampure@berkeley.edu)\n",
    "\n",
    "In this notebook, we'll use dataframes containing (imaginary) students' roster information and grades to demonstrate various types of joins/merges. We'll also use the same dataframes to extract information using `pandas` syntax you should be familiar with."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Demoing Joins\n",
    "\n",
    "These demos use `pandas`, but joining in SQL is the same as merging in `pandas`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "roster = pd.DataFrame()\n",
    "roster['sid'] = [123, 456, 194, 994, 430, 824, 881, 519]\n",
    "roster['name'] = ['amy', 'billy', 'billy', 'sam', 'carly', 'johnson', 'ernie', 'shaq']\n",
    "roster['email'] = ['amy@berkeley.edu', 'billy@berkeley.edu', 'notbilly@stanford.edu', 'sam2@berkeley.edu', 'icarly@berkeley.edu', 'magic@berkeley.edu', 'ern@berkeley.edu', 'big@berkeley.edu']\n",
    "\n",
    "grades = pd.DataFrame()\n",
    "grades['sid'] = [123, 456, 994, 519, 808, 456, 994, 430, 223, 881, 405, 519, 123, 456, 994, 430, 881, 519]\n",
    "grades['assignment'] = ['hw1', 'hw1', 'hw1', 'hw1', 'hw1', 'hw2', 'hw2', 'hw2', 'hw2', 'hw2', 'hw2', 'hw2', 'hw3', 'hw3', 'hw3', 'hw3', 'hw3', 'hw3']\n",
    "grades['scores'] = [94, 93, 91, 88, 75, 53, 100, 100, 89, 95, 96, 100, 82, 90, 91, 100, 25, 49]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sid</th>\n",
       "      <th>name</th>\n",
       "      <th>email</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>123</td>\n",
       "      <td>amy</td>\n",
       "      <td>amy@berkeley.edu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>194</td>\n",
       "      <td>billy</td>\n",
       "      <td>notbilly@stanford.edu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>430</td>\n",
       "      <td>carly</td>\n",
       "      <td>icarly@berkeley.edu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>824</td>\n",
       "      <td>johnson</td>\n",
       "      <td>magic@berkeley.edu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>881</td>\n",
       "      <td>ernie</td>\n",
       "      <td>ern@berkeley.edu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sid     name                  email\n",
       "0  123      amy       amy@berkeley.edu\n",
       "1  456    billy     billy@berkeley.edu\n",
       "2  194    billy  notbilly@stanford.edu\n",
       "3  994      sam      sam2@berkeley.edu\n",
       "4  430    carly    icarly@berkeley.edu\n",
       "5  824  johnson     magic@berkeley.edu\n",
       "6  881    ernie       ern@berkeley.edu\n",
       "7  519     shaq       big@berkeley.edu"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "roster"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sid</th>\n",
       "      <th>assignment</th>\n",
       "      <th>scores</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>123</td>\n",
       "      <td>hw1</td>\n",
       "      <td>94</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>456</td>\n",
       "      <td>hw1</td>\n",
       "      <td>93</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>994</td>\n",
       "      <td>hw1</td>\n",
       "      <td>91</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>519</td>\n",
       "      <td>hw1</td>\n",
       "      <td>88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>808</td>\n",
       "      <td>hw1</td>\n",
       "      <td>75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>456</td>\n",
       "      <td>hw2</td>\n",
       "      <td>53</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>994</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>430</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>223</td>\n",
       "      <td>hw2</td>\n",
       "      <td>89</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>881</td>\n",
       "      <td>hw2</td>\n",
       "      <td>95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>405</td>\n",
       "      <td>hw2</td>\n",
       "      <td>96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>519</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>123</td>\n",
       "      <td>hw3</td>\n",
       "      <td>82</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>456</td>\n",
       "      <td>hw3</td>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>994</td>\n",
       "      <td>hw3</td>\n",
       "      <td>91</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>430</td>\n",
       "      <td>hw3</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>881</td>\n",
       "      <td>hw3</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>519</td>\n",
       "      <td>hw3</td>\n",
       "      <td>49</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    sid assignment  scores\n",
       "0   123        hw1      94\n",
       "1   456        hw1      93\n",
       "2   994        hw1      91\n",
       "3   519        hw1      88\n",
       "4   808        hw1      75\n",
       "5   456        hw2      53\n",
       "6   994        hw2     100\n",
       "7   430        hw2     100\n",
       "8   223        hw2      89\n",
       "9   881        hw2      95\n",
       "10  405        hw2      96\n",
       "11  519        hw2     100\n",
       "12  123        hw3      82\n",
       "13  456        hw3      90\n",
       "14  994        hw3      91\n",
       "15  430        hw3     100\n",
       "16  881        hw3      25\n",
       "17  519        hw3      49"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grades"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You should note that there are some students with no grades (e.g. `johnson` has no grades, since `sid = 824` doesn't appear in `grades`), and there are some submissions in `grades` that belong to students who are not in the roster (e.g. `sid = 808` doesn't appear in `roster`).\n",
    "\n",
    "Let's first perform an inner join:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sid</th>\n",
       "      <th>name</th>\n",
       "      <th>email</th>\n",
       "      <th>assignment</th>\n",
       "      <th>scores</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>123</td>\n",
       "      <td>amy</td>\n",
       "      <td>amy@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>94</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>123</td>\n",
       "      <td>amy</td>\n",
       "      <td>amy@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>82</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>93</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>53</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>91</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>91</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>430</td>\n",
       "      <td>carly</td>\n",
       "      <td>icarly@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>430</td>\n",
       "      <td>carly</td>\n",
       "      <td>icarly@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>881</td>\n",
       "      <td>ernie</td>\n",
       "      <td>ern@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>881</td>\n",
       "      <td>ernie</td>\n",
       "      <td>ern@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>49</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    sid   name                email assignment  scores\n",
       "0   123    amy     amy@berkeley.edu        hw1      94\n",
       "1   123    amy     amy@berkeley.edu        hw3      82\n",
       "2   456  billy   billy@berkeley.edu        hw1      93\n",
       "3   456  billy   billy@berkeley.edu        hw2      53\n",
       "4   456  billy   billy@berkeley.edu        hw3      90\n",
       "5   994    sam    sam2@berkeley.edu        hw1      91\n",
       "6   994    sam    sam2@berkeley.edu        hw2     100\n",
       "7   994    sam    sam2@berkeley.edu        hw3      91\n",
       "8   430  carly  icarly@berkeley.edu        hw2     100\n",
       "9   430  carly  icarly@berkeley.edu        hw3     100\n",
       "10  881  ernie     ern@berkeley.edu        hw2      95\n",
       "11  881  ernie     ern@berkeley.edu        hw3      25\n",
       "12  519   shaq     big@berkeley.edu        hw1      88\n",
       "13  519   shaq     big@berkeley.edu        hw2     100\n",
       "14  519   shaq     big@berkeley.edu        hw3      49"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "roster.merge(grades, left_on = 'sid', right_on = 'sid', how = 'inner')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here, we see that we have all grades of every student in the course, *if they have any assignment submissions.* You'll note that `johnson` doesn't appear in this table, and neither does the `billy` whose email is `notbilly@stanford.edu`. We also see that the student with `sid = 808` is not in this table.\n",
    "\n",
    "What if we want **at least one row for every single student in the roster, whether or not they submitted assignments?** This comes up in practice from the instructor's side when computing grades at the end of the semester – even if you didn't submit something (or anything!), you still need to be assigned a grade. To do this, I can try a left join:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sid</th>\n",
       "      <th>name</th>\n",
       "      <th>email</th>\n",
       "      <th>assignment</th>\n",
       "      <th>scores</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>123</td>\n",
       "      <td>amy</td>\n",
       "      <td>amy@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>94.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>123</td>\n",
       "      <td>amy</td>\n",
       "      <td>amy@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>82.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>93.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>53.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>90.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>194</td>\n",
       "      <td>billy</td>\n",
       "      <td>notbilly@stanford.edu</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>91.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>91.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>430</td>\n",
       "      <td>carly</td>\n",
       "      <td>icarly@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>430</td>\n",
       "      <td>carly</td>\n",
       "      <td>icarly@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>824</td>\n",
       "      <td>johnson</td>\n",
       "      <td>magic@berkeley.edu</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>881</td>\n",
       "      <td>ernie</td>\n",
       "      <td>ern@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>95.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>881</td>\n",
       "      <td>ernie</td>\n",
       "      <td>ern@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>25.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>88.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>49.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    sid     name                  email assignment  scores\n",
       "0   123      amy       amy@berkeley.edu        hw1    94.0\n",
       "1   123      amy       amy@berkeley.edu        hw3    82.0\n",
       "2   456    billy     billy@berkeley.edu        hw1    93.0\n",
       "3   456    billy     billy@berkeley.edu        hw2    53.0\n",
       "4   456    billy     billy@berkeley.edu        hw3    90.0\n",
       "5   194    billy  notbilly@stanford.edu        NaN     NaN\n",
       "6   994      sam      sam2@berkeley.edu        hw1    91.0\n",
       "7   994      sam      sam2@berkeley.edu        hw2   100.0\n",
       "8   994      sam      sam2@berkeley.edu        hw3    91.0\n",
       "9   430    carly    icarly@berkeley.edu        hw2   100.0\n",
       "10  430    carly    icarly@berkeley.edu        hw3   100.0\n",
       "11  824  johnson     magic@berkeley.edu        NaN     NaN\n",
       "12  881    ernie       ern@berkeley.edu        hw2    95.0\n",
       "13  881    ernie       ern@berkeley.edu        hw3    25.0\n",
       "14  519     shaq       big@berkeley.edu        hw1    88.0\n",
       "15  519     shaq       big@berkeley.edu        hw2   100.0\n",
       "16  519     shaq       big@berkeley.edu        hw3    49.0"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "roster.merge(grades, how = 'left')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now, we see that rows exist for `johnson` and the other `billy`, with `NaN` for their assignment and scores. (Also, note that we can drop the `left_on` and `right_on` parameters if the column that we're merging on between the two tables has the same name. In practice, this won't be the case, so you should get used to specifying these values.)\n",
    "\n",
    "What if I want to see information for all students that submitted things? I can try a right join:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sid</th>\n",
       "      <th>name</th>\n",
       "      <th>email</th>\n",
       "      <th>assignment</th>\n",
       "      <th>scores</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>123</td>\n",
       "      <td>amy</td>\n",
       "      <td>amy@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>94</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>123</td>\n",
       "      <td>amy</td>\n",
       "      <td>amy@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>82</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>93</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>53</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>91</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>91</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>430</td>\n",
       "      <td>carly</td>\n",
       "      <td>icarly@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>430</td>\n",
       "      <td>carly</td>\n",
       "      <td>icarly@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>881</td>\n",
       "      <td>ernie</td>\n",
       "      <td>ern@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>881</td>\n",
       "      <td>ernie</td>\n",
       "      <td>ern@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>808</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>hw1</td>\n",
       "      <td>75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>223</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>hw2</td>\n",
       "      <td>89</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>405</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>hw2</td>\n",
       "      <td>96</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    sid   name                email assignment  scores\n",
       "0   123    amy     amy@berkeley.edu        hw1      94\n",
       "1   123    amy     amy@berkeley.edu        hw3      82\n",
       "2   456  billy   billy@berkeley.edu        hw1      93\n",
       "3   456  billy   billy@berkeley.edu        hw2      53\n",
       "4   456  billy   billy@berkeley.edu        hw3      90\n",
       "5   994    sam    sam2@berkeley.edu        hw1      91\n",
       "6   994    sam    sam2@berkeley.edu        hw2     100\n",
       "7   994    sam    sam2@berkeley.edu        hw3      91\n",
       "8   430  carly  icarly@berkeley.edu        hw2     100\n",
       "9   430  carly  icarly@berkeley.edu        hw3     100\n",
       "10  881  ernie     ern@berkeley.edu        hw2      95\n",
       "11  881  ernie     ern@berkeley.edu        hw3      25\n",
       "12  519   shaq     big@berkeley.edu        hw1      88\n",
       "13  519   shaq     big@berkeley.edu        hw2     100\n",
       "14  519   shaq     big@berkeley.edu        hw3      49\n",
       "15  808    NaN                  NaN        hw1      75\n",
       "16  223    NaN                  NaN        hw2      89\n",
       "17  405    NaN                  NaN        hw2      96"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "roster.merge(grades, how = 'right')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now, note that `johnson` and other `billy` don't appear in the output, but there are rows for `sid = 808` and the other two students who submitted work but aren't in the class (with `NaN` as their name and email).\n",
    "\n",
    "It should be noted that `LEFTJOIN(A, B)` is the same as `RIGHTJOIN(B, A)`. In other words, to generate the above dataframe, I could've used `roster.merge(grades, how = 'right')` OR `grades.merge(grades, how = 'left')`. The resulting dataframes would've been the same, just with the columns in a different order.\n",
    "\n",
    "Lastly, what if I want a combination of the above two dataframes? That is, rows for every student in the course, their assignment grades if they exist, and grades for students not in the class if they happened to submit. This can be achieved with an outer join:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sid</th>\n",
       "      <th>name</th>\n",
       "      <th>email</th>\n",
       "      <th>assignment</th>\n",
       "      <th>scores</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>123</td>\n",
       "      <td>amy</td>\n",
       "      <td>amy@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>94.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>123</td>\n",
       "      <td>amy</td>\n",
       "      <td>amy@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>82.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>93.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>53.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>90.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>194</td>\n",
       "      <td>billy</td>\n",
       "      <td>notbilly@stanford.edu</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>91.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>91.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>430</td>\n",
       "      <td>carly</td>\n",
       "      <td>icarly@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>430</td>\n",
       "      <td>carly</td>\n",
       "      <td>icarly@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>824</td>\n",
       "      <td>johnson</td>\n",
       "      <td>magic@berkeley.edu</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>881</td>\n",
       "      <td>ernie</td>\n",
       "      <td>ern@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>95.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>881</td>\n",
       "      <td>ernie</td>\n",
       "      <td>ern@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>25.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>88.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>49.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>808</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>hw1</td>\n",
       "      <td>75.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>223</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>hw2</td>\n",
       "      <td>89.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>405</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>hw2</td>\n",
       "      <td>96.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    sid     name                  email assignment  scores\n",
       "0   123      amy       amy@berkeley.edu        hw1    94.0\n",
       "1   123      amy       amy@berkeley.edu        hw3    82.0\n",
       "2   456    billy     billy@berkeley.edu        hw1    93.0\n",
       "3   456    billy     billy@berkeley.edu        hw2    53.0\n",
       "4   456    billy     billy@berkeley.edu        hw3    90.0\n",
       "5   194    billy  notbilly@stanford.edu        NaN     NaN\n",
       "6   994      sam      sam2@berkeley.edu        hw1    91.0\n",
       "7   994      sam      sam2@berkeley.edu        hw2   100.0\n",
       "8   994      sam      sam2@berkeley.edu        hw3    91.0\n",
       "9   430    carly    icarly@berkeley.edu        hw2   100.0\n",
       "10  430    carly    icarly@berkeley.edu        hw3   100.0\n",
       "11  824  johnson     magic@berkeley.edu        NaN     NaN\n",
       "12  881    ernie       ern@berkeley.edu        hw2    95.0\n",
       "13  881    ernie       ern@berkeley.edu        hw3    25.0\n",
       "14  519     shaq       big@berkeley.edu        hw1    88.0\n",
       "15  519     shaq       big@berkeley.edu        hw2   100.0\n",
       "16  519     shaq       big@berkeley.edu        hw3    49.0\n",
       "17  808      NaN                    NaN        hw1    75.0\n",
       "18  223      NaN                    NaN        hw2    89.0\n",
       "19  405      NaN                    NaN        hw2    96.0"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = roster.merge(grades, how = 'outer')\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Demoing Pandas Methods\n",
    "\n",
    "Now, let's try some queries. Moving forward we'll use the below merged dataframe."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sid</th>\n",
       "      <th>name</th>\n",
       "      <th>email</th>\n",
       "      <th>assignment</th>\n",
       "      <th>scores</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>123</td>\n",
       "      <td>amy</td>\n",
       "      <td>amy@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>94.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>123</td>\n",
       "      <td>amy</td>\n",
       "      <td>amy@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>82.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>93.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>53.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>90.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>194</td>\n",
       "      <td>billy</td>\n",
       "      <td>notbilly@stanford.edu</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>91.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>91.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>430</td>\n",
       "      <td>carly</td>\n",
       "      <td>icarly@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>430</td>\n",
       "      <td>carly</td>\n",
       "      <td>icarly@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>824</td>\n",
       "      <td>johnson</td>\n",
       "      <td>magic@berkeley.edu</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>881</td>\n",
       "      <td>ernie</td>\n",
       "      <td>ern@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>95.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>881</td>\n",
       "      <td>ernie</td>\n",
       "      <td>ern@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>25.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>88.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>49.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    sid     name                  email assignment  scores\n",
       "0   123      amy       amy@berkeley.edu        hw1    94.0\n",
       "1   123      amy       amy@berkeley.edu        hw3    82.0\n",
       "2   456    billy     billy@berkeley.edu        hw1    93.0\n",
       "3   456    billy     billy@berkeley.edu        hw2    53.0\n",
       "4   456    billy     billy@berkeley.edu        hw3    90.0\n",
       "5   194    billy  notbilly@stanford.edu        NaN     NaN\n",
       "6   994      sam      sam2@berkeley.edu        hw1    91.0\n",
       "7   994      sam      sam2@berkeley.edu        hw2   100.0\n",
       "8   994      sam      sam2@berkeley.edu        hw3    91.0\n",
       "9   430    carly    icarly@berkeley.edu        hw2   100.0\n",
       "10  430    carly    icarly@berkeley.edu        hw3   100.0\n",
       "11  824  johnson     magic@berkeley.edu        NaN     NaN\n",
       "12  881    ernie       ern@berkeley.edu        hw2    95.0\n",
       "13  881    ernie       ern@berkeley.edu        hw3    25.0\n",
       "14  519     shaq       big@berkeley.edu        hw1    88.0\n",
       "15  519     shaq       big@berkeley.edu        hw2   100.0\n",
       "16  519     shaq       big@berkeley.edu        hw3    49.0"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = roster.merge(grades, how = 'left')\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### What if I want to know how many submissions there were for each assignment?\n",
    "\n",
    "To do this, I can use `value_counts`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "hw3    6\n",
       "hw2    5\n",
       "hw1    4\n",
       "Name: assignment, dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['assignment'].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### What if we want only scores on Homework 2?\n",
    "\n",
    "This requires a use of boolean filtering, which likely have already seen:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sid</th>\n",
       "      <th>name</th>\n",
       "      <th>email</th>\n",
       "      <th>assignment</th>\n",
       "      <th>scores</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>53.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>430</td>\n",
       "      <td>carly</td>\n",
       "      <td>icarly@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>881</td>\n",
       "      <td>ernie</td>\n",
       "      <td>ern@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>95.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    sid   name                email assignment  scores\n",
       "3   456  billy   billy@berkeley.edu        hw2    53.0\n",
       "7   994    sam    sam2@berkeley.edu        hw2   100.0\n",
       "9   430  carly  icarly@berkeley.edu        hw2   100.0\n",
       "12  881  ernie     ern@berkeley.edu        hw2    95.0\n",
       "15  519   shaq     big@berkeley.edu        hw2   100.0"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df['assignment'] == 'hw2']\n",
    "# grades.loc[grades['assignment'] == 'hw2', :]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### What if we want the average homework score for each assignment?\n",
    "\n",
    "To do this, I can group by `assignment`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sid</th>\n",
       "      <th>scores</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>assignment</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>hw1</th>\n",
       "      <td>523.000000</td>\n",
       "      <td>91.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>hw2</th>\n",
       "      <td>656.000000</td>\n",
       "      <td>89.600000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>hw3</th>\n",
       "      <td>567.166667</td>\n",
       "      <td>72.833333</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   sid     scores\n",
       "assignment                       \n",
       "hw1         523.000000  91.500000\n",
       "hw2         656.000000  89.600000\n",
       "hw3         567.166667  72.833333"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('assignment').mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### What if we only want to see students with an average homework score of over 80%?\n",
    "\n",
    "I can create a custom `filter` function in order to help me do this."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sid</th>\n",
       "      <th>name</th>\n",
       "      <th>email</th>\n",
       "      <th>assignment</th>\n",
       "      <th>scores</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>123</td>\n",
       "      <td>amy</td>\n",
       "      <td>amy@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>94.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>123</td>\n",
       "      <td>amy</td>\n",
       "      <td>amy@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>82.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>91.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>91.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>430</td>\n",
       "      <td>carly</td>\n",
       "      <td>icarly@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>430</td>\n",
       "      <td>carly</td>\n",
       "      <td>icarly@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    sid   name                email assignment  scores\n",
       "0   123    amy     amy@berkeley.edu        hw1    94.0\n",
       "1   123    amy     amy@berkeley.edu        hw3    82.0\n",
       "6   994    sam    sam2@berkeley.edu        hw1    91.0\n",
       "7   994    sam    sam2@berkeley.edu        hw2   100.0\n",
       "8   994    sam    sam2@berkeley.edu        hw3    91.0\n",
       "9   430  carly  icarly@berkeley.edu        hw2   100.0\n",
       "10  430  carly  icarly@berkeley.edu        hw3   100.0"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def filter_fn(r):\n",
    "    # takes in all rows for a given student\n",
    "    # and returns true iff their mean homework score is > 80\n",
    "    return r['scores'].mean() > 80\n",
    "\n",
    "df.groupby(['sid']).filter(filter_fn)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### What if I want a new column with scores converted to letter grades?\n",
    "\n",
    "I can create a mapping between scores and letter grades, and then `apply` this to the scores column."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def to_letter(x):\n",
    "    if x >= 90: return 'A'\n",
    "    elif x >= 80: return 'B'\n",
    "    elif x >= 70: return 'C'\n",
    "    elif x >= 60: return 'D'\n",
    "    else: return 'F'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sid</th>\n",
       "      <th>name</th>\n",
       "      <th>email</th>\n",
       "      <th>assignment</th>\n",
       "      <th>scores</th>\n",
       "      <th>letter</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>123</td>\n",
       "      <td>amy</td>\n",
       "      <td>amy@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>94.0</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>123</td>\n",
       "      <td>amy</td>\n",
       "      <td>amy@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>82.0</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>93.0</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>53.0</td>\n",
       "      <td>F</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>456</td>\n",
       "      <td>billy</td>\n",
       "      <td>billy@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>90.0</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>194</td>\n",
       "      <td>billy</td>\n",
       "      <td>notbilly@stanford.edu</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>F</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>91.0</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100.0</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>994</td>\n",
       "      <td>sam</td>\n",
       "      <td>sam2@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>91.0</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>430</td>\n",
       "      <td>carly</td>\n",
       "      <td>icarly@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100.0</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>430</td>\n",
       "      <td>carly</td>\n",
       "      <td>icarly@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>100.0</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>824</td>\n",
       "      <td>johnson</td>\n",
       "      <td>magic@berkeley.edu</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>F</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>881</td>\n",
       "      <td>ernie</td>\n",
       "      <td>ern@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>95.0</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>881</td>\n",
       "      <td>ernie</td>\n",
       "      <td>ern@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>25.0</td>\n",
       "      <td>F</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "      <td>hw1</td>\n",
       "      <td>88.0</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "      <td>hw2</td>\n",
       "      <td>100.0</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>519</td>\n",
       "      <td>shaq</td>\n",
       "      <td>big@berkeley.edu</td>\n",
       "      <td>hw3</td>\n",
       "      <td>49.0</td>\n",
       "      <td>F</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    sid     name                  email assignment  scores letter\n",
       "0   123      amy       amy@berkeley.edu        hw1    94.0      A\n",
       "1   123      amy       amy@berkeley.edu        hw3    82.0      B\n",
       "2   456    billy     billy@berkeley.edu        hw1    93.0      A\n",
       "3   456    billy     billy@berkeley.edu        hw2    53.0      F\n",
       "4   456    billy     billy@berkeley.edu        hw3    90.0      A\n",
       "5   194    billy  notbilly@stanford.edu        NaN     NaN      F\n",
       "6   994      sam      sam2@berkeley.edu        hw1    91.0      A\n",
       "7   994      sam      sam2@berkeley.edu        hw2   100.0      A\n",
       "8   994      sam      sam2@berkeley.edu        hw3    91.0      A\n",
       "9   430    carly    icarly@berkeley.edu        hw2   100.0      A\n",
       "10  430    carly    icarly@berkeley.edu        hw3   100.0      A\n",
       "11  824  johnson     magic@berkeley.edu        NaN     NaN      F\n",
       "12  881    ernie       ern@berkeley.edu        hw2    95.0      A\n",
       "13  881    ernie       ern@berkeley.edu        hw3    25.0      F\n",
       "14  519     shaq       big@berkeley.edu        hw1    88.0      B\n",
       "15  519     shaq       big@berkeley.edu        hw2   100.0      A\n",
       "16  519     shaq       big@berkeley.edu        hw3    49.0      F"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['letter'] = df['scores'].apply(to_letter)\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### What if I want a more traditional gradebook?\n",
    "\n",
    "This might be something like the gradebook your teachers in high school or elementary school used, with the rows containing students' names, and the columns containing the various assignments in their course. To do this, we can use `pivot` (which you should've also seen in Data 8):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"4\" halign=\"left\">scores</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>assignment</th>\n",
       "      <th>NaN</th>\n",
       "      <th>hw1</th>\n",
       "      <th>hw2</th>\n",
       "      <th>hw3</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>123</th>\n",
       "      <td>NaN</td>\n",
       "      <td>94.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>82.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>194</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>430</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.0</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>456</th>\n",
       "      <td>NaN</td>\n",
       "      <td>93.0</td>\n",
       "      <td>53.0</td>\n",
       "      <td>90.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>519</th>\n",
       "      <td>NaN</td>\n",
       "      <td>88.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>49.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>824</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>881</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.0</td>\n",
       "      <td>25.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>994</th>\n",
       "      <td>NaN</td>\n",
       "      <td>91.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>91.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           scores                    \n",
       "assignment    NaN   hw1    hw2    hw3\n",
       "sid                                  \n",
       "123           NaN  94.0    NaN   82.0\n",
       "194           NaN   NaN    NaN    NaN\n",
       "430           NaN   NaN  100.0  100.0\n",
       "456           NaN  93.0   53.0   90.0\n",
       "519           NaN  88.0  100.0   49.0\n",
       "824           NaN   NaN    NaN    NaN\n",
       "881           NaN   NaN   95.0   25.0\n",
       "994           NaN  91.0  100.0   91.0"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.pivot(index = 'sid', columns = 'assignment', values = ['scores'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In practice, this looks a little weird: let's get rid of the `NaN` column and replace all `NaN` values with a 0.\n",
    "   - To get rid of the `NaN` column, we can use `.iloc[:, 1:]` to take all columns starting with `hw1`.\n",
    "   - To get rid of the `NaN` values, we can use `.fillna(0)`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"3\" halign=\"left\">scores</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>assignment</th>\n",
       "      <th>hw1</th>\n",
       "      <th>hw2</th>\n",
       "      <th>hw3</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>123</th>\n",
       "      <td>94.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>82.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>194</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>430</th>\n",
       "      <td>0.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>456</th>\n",
       "      <td>93.0</td>\n",
       "      <td>53.0</td>\n",
       "      <td>90.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>519</th>\n",
       "      <td>88.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>49.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>824</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>881</th>\n",
       "      <td>0.0</td>\n",
       "      <td>95.0</td>\n",
       "      <td>25.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>994</th>\n",
       "      <td>91.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>91.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           scores              \n",
       "assignment    hw1    hw2    hw3\n",
       "sid                            \n",
       "123          94.0    0.0   82.0\n",
       "194           0.0    0.0    0.0\n",
       "430           0.0  100.0  100.0\n",
       "456          93.0   53.0   90.0\n",
       "519          88.0  100.0   49.0\n",
       "824           0.0    0.0    0.0\n",
       "881           0.0   95.0   25.0\n",
       "994          91.0  100.0   91.0"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.pivot(index = 'sid', columns = 'assignment', values = ['scores']).iloc[:, 1:].fillna(0)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}