from datascience import *
import numpy as np
import plotly.express as px
sat = Table.read_table('data/sat2014-lecture.csv')
sat
sat.num_rows, sat.num_columns
It would be nice to have a combined score too.
sat.column('Critical Reading') + sat.column('Math') + sat.column('Writing')
sat = sat.with_columns(
'Combined', sat.column('Critical Reading') + sat.column('Math') + sat.column('Writing')
)
sat
sort
¶Which states had the highest combined scores? The lowest?
sat.sort('Combined')
# By default, descending is False
sat.sort('Combined', descending = True)
We can sort by any column:
sat.sort('Participation Rate')
sat.sort('Math', descending = True)
# Can also sort alphabetical columns; in this case the table was sorted by state name by default
sat.sort('State')
take
¶sat
sat.take(2)
sat.take(np.array([1, 4, 3]))
np.arange(5)
sat.take(np.arange(5))
When we combine sort
and take
, we can get some pretty powerful answers.
What are the top 5 states according to math scores?
sat.sort('Combined', descending = True).take(np.arange(5))
What are the top 8 states in terms of participation?
sat.sort('Participation Rate', descending = True).take(np.arange(8))
Note: .take
works on arrays too, not just tables!
sat.column('State').take(np.arange(5))
sat.take(np.arange(5)).column('State')
animals = Table.read_table('data/animals.csv')
animals
# animals._____(_____).column(_____).take(_____)
where
¶sat
sat.where('Combined', are.above(1800))
sat.where('State', are.equal_to('California'))
sat.where('State', are.containing('Dakota'))
sat.where('Math', are.between(580, 600))
are.equal_to
¶sat.where('State', are.equal_to('Pennsylvania'))
sat.where('State', 'Pennsylvania')
sat
sat.where('Participation Rate', are.above(20)).where('Combined', are.above(1500))
sat.where('Participation Rate', are.below(10)).where('Combined', are.above(1600))
deep_south = np.array(['Alabama', 'Georgia', 'Louisiana', 'Mississippi', 'South Carolina'])
sat.where('State', are.contained_in(deep_south))
sat.where('State', are.contained_in(deep_south)) \
.where('Participation Rate', are.below(10)) \
.where('Combined', are.above(1600))
px.scatter(data_frame = sat.to_df(),
x = 'Combined',
y = 'Participation Rate',
hover_data = {'State': True},
title = 'Participation Rate vs. Combined SAT Score for States in 2014')
wnba = Table.read_table('data/wnba-2020.csv').select('Player', 'Tm', 'Pos', 'G', 'PTS')
wnba
# wnba.where(____, ____) \
# .where('G',____) \
# .column(____).mean()