from datascience import *
import numpy as np
pups = Table.read_table('data/pups.csv')
pups
pups.with_columns(
'human years', pups.column('age') * 7
)
def seven_times(x):
return 7 * x
pups.apply(seven_times, 'age')
Note, we wouldn't actually use the above example since we could just write pups.column('age') * 7
.
Here's a more useful example:
def email_from_name(name):
first, last = name.split(' ')
email = first + '.' + last + '@dogschool.edu'
return email.lower()
# Can use email_from_name on a single argument
email_from_name('Champ Major')
pups.apply(email_from_name, 'name')
pups.with_columns('email', pups.apply(email_from_name, 'name'))
# Note, the parameter names don't
# need to be 'age' and 'size'
def human_years_converter(years_old, kind):
if kind == 'small':
return years_old * 6
elif kind == 'medium':
return years_old * 7
else:
return years_old * 8
human_years_converter(11, 'medium')
human_years_converter(11, 'small')
pups.apply(human_years_converter, 'age', 'size')
pups.with_columns('accurate human years', pups.apply(human_years_converter, 'age', 'size'))
# Large file – this may take ~10 seconds to load
salary = Table.read_table('https://media.githubusercontent.com/media/dailycal-projects/ucb-faculty-salary/master/data/salary/salary_2015.csv')
salary
profs = salary.select('first', 'last', 'title', 'gross').where('title', are.containing('PROF'))
profs
Look at the very last row of the output – that gross income doesn't look right.
profs.sort('gross', descending = True)
It's because the entries in the 'gross'
column are strings, not integers.
profs.column('gross').item(0)
Your job is to fix that!
def fix_income(income):
return _____
fixed_income = profs.apply(_____, _____)
profs = profs.with_columns(
'gross', _____
)
grade_bins = {
'A+': 97,
'A': 92,
'B+': 85,
'B': 79,
'C+': 74,
'C': 68,
'D+': 58,
'D': 50,
'F': 0
}
def pct_to_letter(pct):
for letter in grade_bins.keys():
if pct >= grade_bins[letter]:
return letter
pct_to_letter(59)
pct_to_letter(98)
gradebook = Table().with_columns(
'Name', np.array(['Carrera', 'Panamera', 'Taycan', 'Cayenne', 'Macan', 'Cayman', 'Boxster']),
'Grading Option', np.array(['GRD', 'PNP', 'PNP', 'GRD', 'GRD', 'GRD', 'PNP']),
'Score', np.array([98, 86, 67.5, 45, 82, 88, 71])
)
gradebook
gradebook.apply(pct_to_letter, 'Score')
What if we want to factor in grading options?
def pct_to_letter_option(pct, option):
# If the student is enrolled for a letter grade
# call our function pct_to_letter
if option == 'GRD':
return pct_to_letter(pct)
# Otherwise, check to see if they have at least a C-
# (C here because our bins don't have a C-)
else:
if pct >= grade_bins['C']:
return 'P'
else:
return 'NP'
gradebook.apply(pct_to_letter_option, 'Score', 'Grading Option')
gradebook = gradebook.with_columns(
'Letter Grade', gradebook.apply(pct_to_letter_option, 'Score', 'Grading Option')
)
gradebook
numbers = np.array([15, 14, -2, 1, 9])
numbers[[True, False, False, True, False]]
gradebook
gradebook.where([True, False, False, True, True, False, False])
Run the following cell – ignore the lambda
parts:
countries = Table.read_table('data/countries.csv')
countries = countries.relabeled('Country(or dependent territory)', 'Country') \
.relabeled('% of world', '%') \
.relabeled('Source(official or UN)', 'Source')
countries = countries.with_columns(
'Country', countries.apply(lambda s: s[:s.index('[')].lower() if '[' in s else s.lower(), 'Country'),
'Population', countries.apply(lambda i: int(i.replace(',', '')), 'Population'),
'%', countries.apply(lambda f: float(f.replace('%', '')), '%')
)
countries
def starts_or_ends_with_a(name):
return name[0] == 'a' or name[-1] == 'a'
countries.apply(starts_or_ends_with_a, 'Country')
countries.where(countries.apply(starts_or_ends_with_a, 'Country'))