Lecture 29 – Fun with Plotly

Data 94, Spring 2021

In [1]:
from datascience import *
import numpy as np
Table.interactive_plots()
import plotly.express as px
import seaborn as sns

Animated scatter plots

In [2]:
world = Table.from_df(px.data.gapminder())
world
Out[2]:
country continent year lifeExp pop gdpPercap iso_alpha iso_num
Afghanistan Asia 1952 28.801 8425333 779.445 AFG 4
Afghanistan Asia 1957 30.332 9240934 820.853 AFG 4
Afghanistan Asia 1962 31.997 10267083 853.101 AFG 4
Afghanistan Asia 1967 34.02 11537966 836.197 AFG 4
Afghanistan Asia 1972 36.088 13079460 739.981 AFG 4
Afghanistan Asia 1977 38.438 14880372 786.113 AFG 4
Afghanistan Asia 1982 39.854 12881816 978.011 AFG 4
Afghanistan Asia 1987 40.822 13867957 852.396 AFG 4
Afghanistan Asia 1992 41.674 16317921 649.341 AFG 4
Afghanistan Asia 1997 41.763 22227415 635.341 AFG 4

... (1694 rows omitted)

In [3]:
px.scatter(world.to_df(),
           x = 'gdpPercap',
           y = 'lifeExp', 
           hover_name = 'country',
           color = 'continent',
           size = 'pop',
           size_max = 60,
           log_x = True,
           range_y = [30, 90],
           animation_frame = 'year',
           title = 'Life Expectancy, GDP Per Capita, and Population over Time'
          )

Animated histograms

In [4]:
px.histogram(world.to_df(),
            x = 'lifeExp',
            animation_frame = 'year',
            range_x = [20, 90],
            range_y = [0, 50],
            title = 'Distribution of Life Expectancy over Time')

Boxplots

In [5]:
world_latest = world.where('year', 2007)
world_latest
Out[5]:
country continent year lifeExp pop gdpPercap iso_alpha iso_num
Afghanistan Asia 2007 43.828 31889923 974.58 AFG 4
Albania Europe 2007 76.423 3600523 5937.03 ALB 8
Algeria Africa 2007 72.301 33333216 6223.37 DZA 12
Angola Africa 2007 42.731 12420476 4797.23 AGO 24
Argentina Americas 2007 75.32 40301927 12779.4 ARG 32
Australia Oceania 2007 81.235 20434176 34435.4 AUS 36
Austria Europe 2007 79.829 8199783 36126.5 AUT 40
Bahrain Asia 2007 75.635 708573 29796 BHR 48
Bangladesh Asia 2007 64.062 150448339 1391.25 BGD 50
Belgium Europe 2007 79.441 10392226 33692.6 BEL 56

... (132 rows omitted)

In [6]:
px.box(world_latest.to_df(),
       y = 'lifeExp',
       x = 'continent',
       color = 'continent',
       hover_name = 'country',
       title = 'Distribution of Life Expectancy in 2007 by Continent'
      )

Pie charts

In [7]:
world_latest.where('continent', 'Americas')
Out[7]:
country continent year lifeExp pop gdpPercap iso_alpha iso_num
Argentina Americas 2007 75.32 40301927 12779.4 ARG 32
Bolivia Americas 2007 65.554 9119152 3822.14 BOL 68
Brazil Americas 2007 72.39 190010647 9065.8 BRA 76
Canada Americas 2007 80.653 33390141 36319.2 CAN 124
Chile Americas 2007 78.553 16284741 13171.6 CHL 152
Colombia Americas 2007 72.889 44227550 7006.58 COL 170
Costa Rica Americas 2007 78.782 4133884 9645.06 CRI 188
Cuba Americas 2007 78.273 11416987 8948.1 CUB 192
Dominican Republic Americas 2007 72.235 9319622 6025.37 DOM 214
Ecuador Americas 2007 74.994 13755680 6873.26 ECU 218

... (15 rows omitted)

In [8]:
px.pie(world_latest.where('continent', 'Americas').to_df(),
       values = 'pop',
       names = 'country',
       title = 'Population of the Americas'
)
In [9]:
world_for_pie = world_latest \
     .group('continent', sum) \
     .select('continent', 'pop sum')

world_for_pie
Out[9]:
continent pop sum
Africa 929539692
Americas 898871184
Asia 3811953827
Europe 586098529
Oceania 24549947
In [10]:
px.pie(world_for_pie.to_df(),
      values = 'pop sum',
      names = 'continent',
      title = 'World Population by Continent')

Timelines (also known as Gantt charts)

In [11]:
phases = [
 ['Newborn', '1998-11-26', '1999-11-26', 'Canada'],
 ['Toddler, Preschooler', '1999-11-26', '2005-09-03', 'US'],
 ['Elementary School Student', '2005-09-03', '2009-06-30', 'Canada'],
 ['Middle School Student', '2009-09-15', '2012-06-15', 'Canada'],
 ['High School Student', '2012-09-05', '2016-05-30', 'Canada'],
 ['Undergrad @ UC Berkeley', '2016-08-22','2020-05-15', 'US'],
 ['Masters @ UC Berkeley', '2020-08-25', '2021-05-14', 'Canada'],
 ['Teaching Data 94', '2021-01-20', '2021-05-14', 'Canada']]

phases_table = Table(labels = ['Phase', 'Start', 'End', 'Location']).with_rows(phases)
phases_table
Out[11]:
Phase Start End Location
Newborn 1998-11-26 1999-11-26 Canada
Toddler, Preschooler 1999-11-26 2005-09-03 US
Elementary School Student 2005-09-03 2009-06-30 Canada
Middle School Student 2009-09-15 2012-06-15 Canada
High School Student 2012-09-05 2016-05-30 Canada
Undergrad @ UC Berkeley 2016-08-22 2020-05-15 US
Masters @ UC Berkeley 2020-08-25 2021-05-14 Canada
Teaching Data 94 2021-01-20 2021-05-14 Canada
In [12]:
px.timeline(phases_table.to_df(),
           x_start = 'Start',
           x_end = 'End',
           y = 'Phase',
           text = 'Location',
           title = 'My Life Trajectory') \
.update_yaxes(autorange='reversed')

Choropleths

In [13]:
world_latest
Out[13]:
country continent year lifeExp pop gdpPercap iso_alpha iso_num
Afghanistan Asia 2007 43.828 31889923 974.58 AFG 4
Albania Europe 2007 76.423 3600523 5937.03 ALB 8
Algeria Africa 2007 72.301 33333216 6223.37 DZA 12
Angola Africa 2007 42.731 12420476 4797.23 AGO 24
Argentina Americas 2007 75.32 40301927 12779.4 ARG 32
Australia Oceania 2007 81.235 20434176 34435.4 AUS 36
Austria Europe 2007 79.829 8199783 36126.5 AUT 40
Bahrain Asia 2007 75.635 708573 29796 BHR 48
Bangladesh Asia 2007 64.062 150448339 1391.25 BGD 50
Belgium Europe 2007 79.441 10392226 33692.6 BEL 56

... (132 rows omitted)

In [14]:
px.choropleth(world_latest.to_df(),
              locations = 'iso_alpha',
              color = 'lifeExp',
              hover_name = 'country',
              title = 'Life Expectancy Per Country',
              color_continuous_scale = px.colors.sequential.tempo
)
In [15]:
wm = Table.read_table('data/walmart.csv')
wm
Out[15]:
storenum OPENDATE date_super conversion st county STREETADDR STRCITY STRSTATE ZIPCODE type_store LAT LON MONTH DAY YEAR
1 7/1/62 3/1/97 1 5 7 2110 WEST WALNUT Rogers AR 72756 Supercenter 36.3422 -94.0714 7 1 1962
2 8/1/64 3/1/96 1 5 9 1417 HWY 62/65 N Harrison AR 72601 Supercenter 36.237 -93.0935 8 1 1964
4 8/1/65 3/1/02 1 5 7 2901 HWY 412 EAST Siloam Springs AR 72761 Supercenter 36.1799 -94.5021 8 1 1965
8 10/1/67 3/1/93 1 5 29 1621 NORTH BUSINESS 9 Morrilton AR 72110 Supercenter 35.1565 -92.7586 10 1 1967
7 10/1/67 nan nan 5 119 3801 CAMP ROBINSON RD. North Little Rock AR 72118 Wal-Mart 34.8133 -92.3023 10 1 1967
10 7/1/68 3/1/98 1 40 21 2020 SOUTH MUSKOGEE Tahlequah OK 74464 Supercenter 35.9237 -94.9719 7 1 1968
13 11/1/68 3/1/96 1 29 97 2705 GRAND AVE Carthage MO 64836 Supercenter 37.169 -94.3116 11 1 1968
12 7/1/68 3/1/94 1 40 131 1500 LYNN RIGGS BLVD Claremore OK 74017 Supercenter 36.3271 -95.6119 7 1 1968
11 3/1/68 2/20/02 1 5 5 65 WAL-MART DRIVE Mountain Home AR 72653 Supercenter 36.329 -92.3578 3 1 1968
9 3/1/68 3/1/00 1 29 143 1303 SOUTH MAIN Sikeston MO 63801 Supercenter 36.8912 -89.5836 3 1 1968

... (2982 rows omitted)

In [16]:
# Number of Walmarts per state
wm_per_state = wm.group('STRSTATE')
wm_per_state
Out[16]:
STRSTATE count
AL 90
AR 81
AZ 55
CA 159
CO 56
DE 8
FL 175
GA 114
IA 55
ID 17

... (31 rows omitted)

In [17]:
px.choropleth(wm_per_state.to_df(),
             locations = 'STRSTATE',
             color = 'count',
             locationmode = 'USA-states',
             scope = 'usa',
             title = 'Number of Walmarts Per State')

3D scatter plots

In [18]:
penguins = Table.from_df(sns.load_dataset('penguins'))
penguins
Out[18]:
species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
Adelie Torgersen 39.1 18.7 181 3750 Male
Adelie Torgersen 39.5 17.4 186 3800 Female
Adelie Torgersen 40.3 18 195 3250 Female
Adelie Torgersen nan nan nan nan nan
Adelie Torgersen 36.7 19.3 193 3450 Female
Adelie Torgersen 39.3 20.6 190 3650 Male
Adelie Torgersen 38.9 17.8 181 3625 Female
Adelie Torgersen 39.2 19.6 195 4675 Male
Adelie Torgersen 34.1 18.1 193 3475 nan
Adelie Torgersen 42 20.2 190 4250 nan

... (334 rows omitted)

In [19]:
px.scatter_3d(penguins.to_df(),
             x = 'bill_length_mm',
             y = 'bill_depth_mm',
             z = 'flipper_length_mm',
             color = 'species',
             hover_name = 'island',
             title = 'Flipper Length vs. Bill Depth vs. Bill Length')