from datascience import *
import numpy as np
join
ing¶phones = Table().with_columns(
'Model', np.array(['iPhone 12', 'iPhone 12 Pro Max', 'Samsung Galaxy S21', 'OnePlus 8']),
'Price', np.array([799, 1099, 799, 699]),
'Screen Size', np.array([6.1, 6.7, 6.2, 6.6])
)
inventory = Table().with_columns(
'Handset', np.array(['Samsung Galaxy S21', 'iPhone 12', 'iPhone 12', 'OnePlus 8', 'Pixel 5']),
'Units', np.array([50, 40, 10, 100, 25]),
'Store', np.array(['Berkeley', 'Berkeley', 'San Francisco', 'Oakland', 'Oakland'])
)
phones
inventory
phones.join('Model', inventory, 'Handset')
inventory.join('Handset', phones, 'Model')
store = phones.join('Model', inventory, 'Handset')
store
store.column('Price') * store.column('Units')
# Total value of all of the phones in my inventory (that I know the price of)
np.sum(store.column('Price') * store.column('Units'))
# Equivalent to the above
np.dot(store.column('Price'), store.column('Units'))
contacts = Table().with_columns(
'Name', np.array(['Roxanne', 'Sandy', 'Stan', 'Tomas', 'Wilma']),
'Email', np.array(['roxanne@berkeley.edu', 'sandy@nyu.edu', 'stan.vg@gmail.com', 'tomastrain@umich.edu', 'wilma@columbia.edu']),
'Area Code', np.array([510, 212, 734, 734, 212]),
)
codes = Table().with_columns(
'Code', np.array([212, 310, 519, 734]),
'Region', np.array(['New York City', 'Los Angeles', 'Ontario, Canada', 'Metro Detroit'])
)
contacts
codes
# contacts.join(___, ___, ___)
extra_codes = Table().with_columns(
'Code', np.array([212, 212, 519, 734]),
'Region', np.array(['New York City', 'Los Angeles', 'Ontario, Canada', 'Metro Detroit'])
)
contacts
extra_codes
contacts.join('Area Code', extra_codes, 'Code')
# No output – because there are no matches between
# the 'Name' column in contacts and the 'Code' column in codes
contacts.join('Name', codes, 'Code')
roster = Table.read_table('data/roster.csv')
grades = Table.read_table('data/grades.csv')
roster.show()
grades.show()
roster.num_rows
grades.num_rows
grades_merged = roster.join('SID', grades)
grades_merged
grades_merged.num_rows
Let's see if there are any students in the roster who we don't have grades for:
for sid in roster.column('SID'):
if sid not in grades.column('SID'):
display(roster.where('SID', sid))
And vice versa:
for sid in grades.column('SID'):
if sid not in roster.column('SID'):
display(grades.where('SID', sid))
Whoever the above students are, they're not in our roster. (This happens often when students drop a class.)
Now we can do some grade calculations:
grades_merged
assignment_totals = {
'Homework 1': 24,
'Homework 2': 18,
'Quiz 1': 100,
'Quiz 2': 70,
'Final': 90
}
# Divides each assignment column by its denominator in assignment_totals
for assignment in assignment_totals.keys():
grades_merged = grades_merged.with_columns(
assignment + ' Percentage', grades_merged.column(assignment) / assignment_totals[assignment]
)
grades_merged
.row
¶phones
phones.row(1)
type(phones.row(1))
phones.row(1).item(1)
list(phones.row(-1))
.with_rows
¶phones
phones.with_row(['iPhone 12 Mini', 699, 5.8])
phones.with_rows([['iPhone 12 Mini', 699, 5.8],
['Moto RAZR', 459, 3.5]])
codes