from datascience import *
import numpy as np
import seaborn as sns
Table.interactive_plots()
# Run this cell.
full_titanic = Table.from_df(sns.load_dataset('titanic').dropna())
titanic = full_titanic.select('sex', 'age', 'fare')
titanic
Visualizations are for humans!
titanic.select('age', 'fare').scatter('age', 'fare',
width = 500,
height = 500,
title = 'Fare vs. age for Titanic passengers',
show = False)
Visualize, then quantify!
ans = Table.from_df(sns.load_dataset('anscombe'))
ans
import plotly.graph_objects as go
from plotly.subplots import make_subplots
fig = make_subplots(rows=2, cols=2, start_cell="bottom-left")
for ds, pos in zip(['I', 'II', 'III', 'IV'], [(2, 1), (2, 2), (1, 1), (1, 2)]):
fig.add_trace(go.Scatter(
x = ans.where('dataset', ds).column('x'),
y = ans.where('dataset', ds).column('y'),
mode = 'markers',
name = 'Dataset ' + ds),
row=pos[0], col=pos[1])
fig.update_layout(title = "Anscombe's Quartet")
fig.show()
bay_codes = Table().with_columns(
'city', np.array(['Berkeley', 'San Francisco', 'Palo Alto']),
'area code', np.array([510, 415, 650])
)
bay_codes
titanic.group('sex', np.mean).select('sex', 'age mean').barh('sex', 'age mean', title = 'Average age of females and males on the Titanic', show = False)