Lecture 16 – Table Fundamentals

Data 94, Spring 2021

In [1]:
from datascience import *
import numpy as np
In [2]:
schools = Table.read_table('data/cal_unis.csv')
In [3]:
schools
Out[3]:
Name City County Enrollment Founded
University of California, Berkeley Berkeley Alameda 42,519 1869
University of California, Davis Davis Yolo 39,152 1905
University of California, Irvine Irvine Orange 35,220 1965
University of California, Los Angeles Los Angeles Los Angeles 45,428 1882
University of California, Merced Merced Merced 8,544 2005
University of California, Riverside Riverside Riverside 23,278 1954
University of California, San Diego San Diego San Diego 38,798 1960
University of California, Santa Barbara Santa Barbara Santa Barbara 24,346 1891
University of California, Santa Cruz Santa Cruz Santa Cruz 19,700 1965
California State University Maritime Academy Vallejo Solano 1,017 1929

... (22 rows omitted)

In [4]:
schools.num_rows
Out[4]:
32
In [5]:
schools.num_columns
Out[5]:
5
In [6]:
# A subset of schools just for illustration purposes
some_schools = schools.take(np.arange(5))
In [8]:
some_schools
Out[8]:
Name City County Enrollment Founded
University of California, Berkeley Berkeley Alameda 42,519 1869
University of California, Davis Davis Yolo 39,152 1905
University of California, Irvine Irvine Orange 35,220 1965
University of California, Los Angeles Los Angeles Los Angeles 45,428 1882
University of California, Merced Merced Merced 8,544 2005
In [9]:
some_schools.column('City')
Out[9]:
array(['Berkeley', 'Davis', 'Irvine', 'Los Angeles', 'Merced'],
      dtype='<U15')
In [10]:
some_schools.column(2)
Out[10]:
array(['Alameda', 'Yolo', 'Orange', 'Los Angeles', 'Merced'], dtype='<U15')

Quick Check 1

In [11]:
countries = Table.read_table('data/us-state-capitals.csv')
In [12]:
countries
Out[12]:
name description latitude longitude
Alabama Montgomery 32.3777 -86.3006
Alaska Juneau 58.3016 -134.42
Arizona Phoenix 33.4481 -112.097
Arkansas Little Rock 34.7466 -92.289
California Sacramento 38.5767 -121.494
Colorado Denver 39.7392 -104.985
Connecticut Hartford
41.764 -72.6822
Delaware Dover 39.1573 -75.5197
Hawaii Honolulu 21.3074 -157.857
Florida Tallahassee 30.4381 -84.2813

... (40 rows omitted)

In [ ]:
# countries.column(___)
In [ ]:
 

select and drop

In [13]:
some_schools
Out[13]:
Name City County Enrollment Founded
University of California, Berkeley Berkeley Alameda 42,519 1869
University of California, Davis Davis Yolo 39,152 1905
University of California, Irvine Irvine Orange 35,220 1965
University of California, Los Angeles Los Angeles Los Angeles 45,428 1882
University of California, Merced Merced Merced 8,544 2005
In [14]:
some_schools.select('Name', 'Enrollment')
Out[14]:
Name Enrollment
University of California, Berkeley 42,519
University of California, Davis 39,152
University of California, Irvine 35,220
University of California, Los Angeles 45,428
University of California, Merced 8,544
In [15]:
some_schools.drop('Founded', 'County')
Out[15]:
Name City Enrollment
University of California, Berkeley Berkeley 42,519
University of California, Davis Davis 39,152
University of California, Irvine Irvine 35,220
University of California, Los Angeles Los Angeles 45,428
University of California, Merced Merced 8,544
In [16]:
some_schools
Out[16]:
Name City County Enrollment Founded
University of California, Berkeley Berkeley Alameda 42,519 1869
University of California, Davis Davis Yolo 39,152 1905
University of California, Irvine Irvine Orange 35,220 1965
University of California, Los Angeles Los Angeles Los Angeles 45,428 1882
University of California, Merced Merced Merced 8,544 2005

with_columns

Adding columns

In [17]:
some_schools
Out[17]:
Name City County Enrollment Founded
University of California, Berkeley Berkeley Alameda 42,519 1869
University of California, Davis Davis Yolo 39,152 1905
University of California, Irvine Irvine Orange 35,220 1965
University of California, Los Angeles Los Angeles Los Angeles 45,428 1882
University of California, Merced Merced Merced 8,544 2005
In [18]:
some_schools.with_columns(
    'Nickname', np.array(['Cal', 'UCD', 'UCI', 'UCLA', 'UCM'])
)
Out[18]:
Name City County Enrollment Founded Nickname
University of California, Berkeley Berkeley Alameda 42,519 1869 Cal
University of California, Davis Davis Yolo 39,152 1905 UCD
University of California, Irvine Irvine Orange 35,220 1965 UCI
University of California, Los Angeles Los Angeles Los Angeles 45,428 1882 UCLA
University of California, Merced Merced Merced 8,544 2005 UCM
In [19]:
some_schools.with_columns(
    'Nickname', np.array(['Cal', 'UCD', 'UCI', 'UCLA', 'UCM']),
    'Years Old', 2021 - some_schools.column('Founded')
)
Out[19]:
Name City County Enrollment Founded Nickname Years Old
University of California, Berkeley Berkeley Alameda 42,519 1869 Cal 152
University of California, Davis Davis Yolo 39,152 1905 UCD 116
University of California, Irvine Irvine Orange 35,220 1965 UCI 56
University of California, Los Angeles Los Angeles Los Angeles 45,428 1882 UCLA 139
University of California, Merced Merced Merced 8,544 2005 UCM 16

Creating tables from scratch

In [20]:
Table()
Out[20]:
In [21]:
states = Table().with_columns(
    'State', np.array(['California', 'New York', 'Florida', 'Texas', 'Pennsylvania']),
    'Code', np.array(['CA', 'NY', 'FL', 'TX', 'PA']),
    'Population', np.array([39.3, 19.3, 21.7, 29.3, 12.8])
)
In [22]:
states
Out[22]:
State Code Population
California CA 39.3
New York NY 19.3
Florida FL 21.7
Texas TX 29.3
Pennsylvania PA 12.8

Quick Check 2

In [23]:
states
Out[23]:
State Code Population
California CA 39.3
New York NY 19.3
Florida FL 21.7
Texas TX 29.3
Pennsylvania PA 12.8
In [ ]:
states._____('Population').with_columns(
    ____, ____
)
In [ ]:
 

Additional methods

show

In [24]:
schools.show(3)
Name City County Enrollment Founded
University of California, Berkeley Berkeley Alameda 42,519 1869
University of California, Davis Davis Yolo 39,152 1905
University of California, Irvine Irvine Orange 35,220 1965

... (29 rows omitted)

In [25]:
schools.show()
Name City County Enrollment Founded
University of California, Berkeley Berkeley Alameda 42,519 1869
University of California, Davis Davis Yolo 39,152 1905
University of California, Irvine Irvine Orange 35,220 1965
University of California, Los Angeles Los Angeles Los Angeles 45,428 1882
University of California, Merced Merced Merced 8,544 2005
University of California, Riverside Riverside Riverside 23,278 1954
University of California, San Diego San Diego San Diego 38,798 1960
University of California, Santa Barbara Santa Barbara Santa Barbara 24,346 1891
University of California, Santa Cruz Santa Cruz Santa Cruz 19,700 1965
California State University Maritime Academy Vallejo Solano 1,017 1929
California Polytechnic State University San Luis Obispo San Luis Obispo 21,812 1901
California State Polytechnic University, Pomona Pomona Los Angeles 26,443 1938
California State University, Bakersfield Bakersfield Kern 10,493 1965
California State University Channel Islands Camarillo Ventura 7,095 2002
California State University, Chico Chico Butte 17,488 1887
California State University, Dominguez Hills Carson Los Angeles 15,741 1960
California State University, East Bay Hayward Alameda 14,525 1959
California State University, Fresno Fresno Fresno 24,995 1911
California State University, Fullerton Fullerton Orange 39,774 1957
California State University, Long Beach Long Beach Los Angeles 36,846 1949
California State University, Los Angeles Los Angeles Los Angeles 27,685 1947
California State University, Monterey Bay Seaside-Marina Monterey 7,079 1994
California State University, Northridge Northridge Los Angeles 38,716 1958
California State University, Sacramento Sacramento Sacramento 31,131 1947
California State University, San Bernardino San Bernardino San Bernardino 19,973 1965
California State University San Marcos San Marcos San Diego 14,511 1988
California State University, Stanislaus Turlock Stanislaus 10,214 1957
Humboldt State University Arcata Humboldt 7,774 1913
San Diego State University San Diego San Diego 34,881 1897
San Francisco State University San Francisco San Francisco 29,586 1899
San Jose State University San Jose Santa Clara 32,828 1857
Sonoma State University Rohnert Park Sonoma 9,201 1960

labels

In [26]:
schools.show(5)
Name City County Enrollment Founded
University of California, Berkeley Berkeley Alameda 42,519 1869
University of California, Davis Davis Yolo 39,152 1905
University of California, Irvine Irvine Orange 35,220 1965
University of California, Los Angeles Los Angeles Los Angeles 45,428 1882
University of California, Merced Merced Merced 8,544 2005

... (27 rows omitted)

In [27]:
# The result is a "tuple" – think of it as a basic list
schools.labels
Out[27]:
('Name', 'City', 'County', 'Enrollment', 'Founded')
In [28]:
schools.relabeled('Name', 'University').show(5)
University City County Enrollment Founded
University of California, Berkeley Berkeley Alameda 42,519 1869
University of California, Davis Davis Yolo 39,152 1905
University of California, Irvine Irvine Orange 35,220 1965
University of California, Los Angeles Los Angeles Los Angeles 45,428 1882
University of California, Merced Merced Merced 8,544 2005

... (27 rows omitted)

Example: WNBA data

In [29]:
wnba = Table.read_table('data/wnba-2020.csv')
In [30]:
wnba
Out[30]:
Player Tm Pos G GS MP FG FGA FG% 3P 3PA 3P% 2P 2PA 2P% eFG% FT FTA FT% ORB TRB AST STL BLK TOV PF PTS
Natalie Achonwa IND F 18 11 364 57 115 0.496 0 10 0 57 105 0.543 0.496 26 32 0.813 26 99 31 9 11 33 46 140
Jaylyn Agnew ATL F 12 0 71 4 15 0.267 3 13 0.231 1 2 0.5 0.367 4 5 0.8 0 5 3 0 0 5 7 15
Bella Alarie DAL C-F 22 3 309 24 66 0.364 1 13 0.077 23 53 0.434 0.371 10 12 0.833 23 63 11 14 18 11 35 59
Kayla Alexander MIN C 16 0 89 16 30 0.533 0 0 nan 16 30 0.533 0.533 5 8 0.625 9 15 3 2 3 6 14 37
Julie Allemand IND G 22 22 716 61 134 0.455 44 92 0.478 17 42 0.405 0.619 22 30 0.733 8 100 128 25 9 57 57 188
Lindsay Allen LVA G 21 21 284 28 66 0.424 6 17 0.353 22 49 0.449 0.47 8 10 0.8 6 23 51 7 1 16 20 70
Kristine Anigwe LAS F-C 17 1 197 32 53 0.604 0 0 nan 32 53 0.604 0.604 14 26 0.538 18 45 4 11 7 16 33 78
Ariel Atkins WAS G 22 22 682 110 251 0.438 44 107 0.411 66 144 0.458 0.526 62 70 0.886 18 64 53 40 4 42 58 326
Seimone Augustus LAS G-F 21 0 332 54 110 0.491 12 22 0.545 42 88 0.477 0.545 4 6 0.667 3 37 25 12 2 8 20 124
Rachel Banham MIN G 20 1 339 48 104 0.462 25 53 0.472 23 51 0.451 0.582 16 20 0.8 2 25 47 9 2 20 40 137

... (152 rows omitted)

In [32]:
wnba.num_rows, wnba.num_columns
Out[32]:
(162, 27)
In [33]:
wnba_pts = wnba.select('Player', 'Tm', 'Pos', 'G', 'PTS')
In [34]:
wnba_pts
Out[34]:
Player Tm Pos G PTS
Natalie Achonwa IND F 18 140
Jaylyn Agnew ATL F 12 15
Bella Alarie DAL C-F 22 59
Kayla Alexander MIN C 16 37
Julie Allemand IND G 22 188
Lindsay Allen LVA G 21 70
Kristine Anigwe LAS F-C 17 78
Ariel Atkins WAS G 22 326
Seimone Augustus LAS G-F 21 124
Rachel Banham MIN G 20 137

... (152 rows omitted)

In [37]:
# Computes number of points scored per game
wnba_pts.column('PTS') / wnba_pts.column('G')
Out[37]:
array([ 7.77777778,  1.25      ,  2.68181818,  2.3125    ,  8.54545455,
        3.33333333,  4.58823529, 14.81818182,  5.9047619 ,  6.85      ,
        8.5       ,  9.81818182, 19.72727273,  0.8       ,  3.        ,
        6.41176471,  0.30769231,  7.22727273,  7.85      ,  0.        ,
        6.59090909,  3.18181818,  3.71428571,  4.3       , 17.4375    ,
        5.38095238,  4.5       , 11.47368421,  9.95454545,  2.5       ,
        2.66666667, 16.13636364,  1.76470588,  6.95      , 14.86363636,
        3.57142857,  4.95238095, 16.23809524, 12.86363636,  0.        ,
        6.84615385,  5.86363636, 17.72727273,  6.26666667,  1.5       ,
       12.54545455, 14.57142857,  1.83333333,  2.72222222,  3.16666667,
       13.05      , 14.        ,  1.4       , 17.66666667,  2.41666667,
        1.4       , 13.        ,  6.38461538,  3.76190476,  6.80952381,
       14.61538462,  8.52941176,  5.68181818,  6.09090909, 17.        ,
        4.88888889,  2.94736842,  9.5       ,  0.        ,  0.        ,
        0.        , 18.33333333,  5.        ,  5.        ,  0.        ,
        0.        ,  4.        ,  1.6       ,  4.72222222,  5.11764706,
        0.        ,  5.11111111,  9.2       ,  4.        , 11.19047619,
       10.8       ,  6.90909091, 17.18181818,  1.38461538,  5.52631579,
       15.45454545, 10.        ,  6.54545455,  2.        ,  5.        ,
        3.5       , 12.5       ,  0.        ,  2.68421053,  2.55      ,
       14.40909091, 10.90909091, 13.        , 17.90909091,  9.5       ,
       12.73684211,  2.28571429,  3.375     ,  3.53846154, 12.19047619,
        5.5       , 22.77272727, 13.33333333, 14.72727273, 13.35      ,
        4.        ,  3.76470588,  3.55555556,  2.46153846, 16.33333333,
        4.        ,  4.26666667, 15.36363636,  7.40909091,  3.40909091,
        2.88235294,  3.54545455, 13.875     ,  5.        ,  4.05      ,
        9.38461538,  3.85714286,  6.10526316, 11.53846154, 19.7       ,
        5.72727273,  6.13636364,  2.83333333,  2.86363636, 10.0952381 ,
       18.73684211, 15.47619048, 10.21052632,  7.31818182,  1.7       ,
        7.22727273, 13.63636364,  6.13636364,  4.83333333,  3.27777778,
        7.28571429,  2.66666667,  8.09090909,  6.84210526, 14.6       ,
       10.13636364,  7.72727273, 10.52380952,  5.81818182, 20.45454545,
       10.95454545,  9.        ])
In [38]:
wnba_pts = wnba_pts.with_columns(
    'PPG', wnba_pts.column('PTS') / wnba_pts.column('G')
)
In [39]:
wnba_pts
Out[39]:
Player Tm Pos G PTS PPG
Natalie Achonwa IND F 18 140 7.77778
Jaylyn Agnew ATL F 12 15 1.25
Bella Alarie DAL C-F 22 59 2.68182
Kayla Alexander MIN C 16 37 2.3125
Julie Allemand IND G 22 188 8.54545
Lindsay Allen LVA G 21 70 3.33333
Kristine Anigwe LAS F-C 17 78 4.58824
Ariel Atkins WAS G 22 326 14.8182
Seimone Augustus LAS G-F 21 124 5.90476
Rachel Banham MIN G 20 137 6.85

... (152 rows omitted)