from datascience import *
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
Run the following cell to load in our full dataset.
cars = Table.read_table('data/models-2021.csv')
Here we'll take a subset of the rows and columns for illustration.
gm = cars.where('Manufacturer', 'General Motors').select('Brand', 'Model', 'Cylinders', 'MPG').take([0, 1, 9, 16, 20, 30, 31, 35, -1]).take([1, 2, 4, 8, 5, 6, 3, 7, 0])
gm
Brand | Model | Cylinders | MPG |
---|---|---|---|
Buick | ENCLAVE FWD | 6 | 21 |
Cadillac | CT4 AWD | 4 | 26 |
Cadillac | XT5 AWD | 4 | 23 |
GMC | YUKON XL 4WD | 6 | 22 |
Chevrolet | CAMARO | 4 | 25 |
Chevrolet | COLORADO 2WD | 4 | 22 |
Cadillac | ESCALADE 2WD | 6 | 23 |
Chevrolet | EQUINOX AWD | 4 | 27 |
Buick | ENCLAVE AWD | 6 | 20 |
group
¶gm
Brand | Model | Cylinders | MPG |
---|---|---|---|
Buick | ENCLAVE FWD | 6 | 21 |
Cadillac | CT4 AWD | 4 | 26 |
Cadillac | XT5 AWD | 4 | 23 |
GMC | YUKON XL 4WD | 6 | 22 |
Chevrolet | CAMARO | 4 | 25 |
Chevrolet | COLORADO 2WD | 4 | 22 |
Cadillac | ESCALADE 2WD | 6 | 23 |
Chevrolet | EQUINOX AWD | 4 | 27 |
Buick | ENCLAVE AWD | 6 | 20 |
gm.group('Brand')
Brand | count |
---|---|
Buick | 2 |
Cadillac | 3 |
Chevrolet | 3 |
GMC | 1 |
gm.group('Cylinders')
Cylinders | count |
---|---|
4 | 5 |
6 | 4 |
# shuffles the rows in the table; returns a new table
cars.shuffle()
Manufacturer | Brand | Model | Displacement | Cylinders | MPG | Wheel |
---|---|---|---|---|---|---|
Volkswagen Group of | Audi | R8 Spyder | 5.2 | 10 | 16 | All Wheel Drive |
Toyota | TOYOTA | TACOMA 4WD D-CAB MT TRD-ORP/PRO | 3.5 | 6 | 18 | Part-time 4-Wheel Drive |
Volkswagen Group of | Volkswagen | Tiguan | 2 | 4 | 25 | 2-Wheel Drive, Front |
Ford Motor Company | Ford | MUSTANG HO COUPE | 2.3 | 4 | 23 | 2-Wheel Drive, Rear |
Kia | KIA MOTORS CORPORATION | Rio | 1.6 | 4 | 36 | 2-Wheel Drive, Front |
Volkswagen Group of | Audi | S6 | 2.9 | 6 | 22 | All Wheel Drive |
Porsche | Porsche | Panamera 4 Executive | 2.9 | 6 | 19 | All Wheel Drive |
General Motors | Chevrolet | SPARK | 1.4 | 4 | 33 | 2-Wheel Drive, Front |
General Motors | GMC | TERRAIN AWD | 1.5 | 4 | 26 | All Wheel Drive |
Ford Motor Company | Ford | EXPEDITION 2WD | 3.5 | 6 | 19 | 2-Wheel Drive, Rear |
... (764 rows omitted)
cars.group('Brand').sort('count', descending = True)
Brand | count |
---|---|
Mercedes-Benz | 84 |
BMW | 76 |
TOYOTA | 52 |
Porsche | 45 |
Ford | 42 |
Audi | 41 |
LEXUS | 35 |
NISSAN | 29 |
Chevrolet | 27 |
HYUNDAI MOTOR COMPANY | 22 |
... (31 rows omitted)
collect
function¶gm.group('Brand', np.mean)
Brand | Model mean | Cylinders mean | MPG mean |
---|---|---|---|
Buick | 6 | 20.5 | |
Cadillac | 4.66667 | 24 | |
Chevrolet | 4 | 24.6667 | |
GMC | 6 | 22 |
How does this work under the hood?
gm.where('Brand', 'Buick')
Brand | Model | Cylinders | MPG |
---|---|---|---|
Buick | ENCLAVE FWD | 6 | 21 |
Buick | ENCLAVE AWD | 6 | 20 |
print('mean of Cylinders: ', gm.where('Brand', 'Buick').column('Cylinders').mean())
print('mean of MPG: ', gm.where('Brand', 'Buick').column('MPG').mean())
mean of Cylinders: 6.0 mean of MPG: 20.5
gm.where('Brand', 'Cadillac')
Brand | Model | Cylinders | MPG |
---|---|---|---|
Cadillac | CT4 AWD | 4 | 26 |
Cadillac | XT5 AWD | 4 | 23 |
Cadillac | ESCALADE 2WD | 6 | 23 |
print('mean of Cylinders: ', gm.where('Brand', 'Cadillac').column('Cylinders').mean())
print('mean of MPG: ', gm.where('Brand', 'Cadillac').column('MPG').mean())
mean of Cylinders: 4.666666666666667 mean of MPG: 24.0
gm.where('Brand', 'Chevrolet')
Brand | Model | Cylinders | MPG |
---|---|---|---|
Chevrolet | CAMARO | 4 | 25 |
Chevrolet | COLORADO 2WD | 4 | 22 |
Chevrolet | EQUINOX AWD | 4 | 27 |
print('mean of Cylinders: ', gm.where('Brand', 'Chevrolet').column('Cylinders').mean())
print('mean of MPG: ', gm.where('Brand', 'Chevrolet').column('MPG').mean())
mean of Cylinders: 4.0 mean of MPG: 24.666666666666668
gm.where('Brand', 'GMC')
Brand | Model | Cylinders | MPG |
---|---|---|---|
GMC | YUKON XL 4WD | 6 | 22 |
print('mean of Cylinders: ', gm.where('Brand', 'GMC').column('Cylinders').mean())
print('mean of MPG: ', gm.where('Brand', 'GMC').column('MPG').mean())
mean of Cylinders: 6.0 mean of MPG: 22.0
If you want a more concise way of doing the above:
for brand in np.unique(gm.column('Brand')):
brand_only = gm.where('Brand', brand)
print(brand)
print('mean of Cylinders: ', brand_only.column('Cylinders').mean())
print('mean of MPG: ', brand_only.column('MPG').mean())
print('\n')
Buick mean of Cylinders: 6.0 mean of MPG: 20.5 Cadillac mean of Cylinders: 4.666666666666667 mean of MPG: 24.0 Chevrolet mean of Cylinders: 4.0 mean of MPG: 24.666666666666668 GMC mean of Cylinders: 6.0 mean of MPG: 22.0
What if we use other collect
functions?
gm
Brand | Model | Cylinders | MPG |
---|---|---|---|
Buick | ENCLAVE FWD | 6 | 21 |
Cadillac | CT4 AWD | 4 | 26 |
Cadillac | XT5 AWD | 4 | 23 |
GMC | YUKON XL 4WD | 6 | 22 |
Chevrolet | CAMARO | 4 | 25 |
Chevrolet | COLORADO 2WD | 4 | 22 |
Cadillac | ESCALADE 2WD | 6 | 23 |
Chevrolet | EQUINOX AWD | 4 | 27 |
Buick | ENCLAVE AWD | 6 | 20 |
gm.group('Brand', sum)
Brand | Model sum | Cylinders sum | MPG sum |
---|---|---|---|
Buick | 12 | 41 | |
Cadillac | 14 | 72 | |
Chevrolet | 12 | 74 | |
GMC | 6 | 22 |
gm.group('Brand', list)
/opt/miniconda3/lib/python3.8/site-packages/datascience/tables.py:920: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray values = np.array(tuple(values))
Brand | Model list | Cylinders list | MPG list |
---|---|---|---|
Buick | ['ENCLAVE FWD', 'ENCLAVE AWD'] | [6, 6] | [21, 20] |
Cadillac | ['CT4 AWD', 'XT5 AWD', 'ESCALADE 2WD'] | [4, 4, 6] | [26, 23, 23] |
Chevrolet | ['CAMARO', 'COLORADO 2WD', 'EQUINOX AWD'] | [4, 4, 4] | [25, 22, 27] |
GMC | ['YUKON XL 4WD'] | [6] | [22] |
gm.group('Brand', len)
Brand | Model len | Cylinders len | MPG len |
---|---|---|---|
Buick | 2 | 2 | 2 |
Cadillac | 3 | 3 | 3 |
Chevrolet | 3 | 3 | 3 |
GMC | 1 | 1 | 1 |
gm.group('Brand', max)
Brand | Model max | Cylinders max | MPG max |
---|---|---|---|
Buick | ENCLAVE FWD | 6 | 21 |
Cadillac | XT5 AWD | 6 | 26 |
Chevrolet | EQUINOX AWD | 4 | 27 |
GMC | YUKON XL 4WD | 6 | 22 |
cars.shuffle()
Manufacturer | Brand | Model | Displacement | Cylinders | MPG | Wheel |
---|---|---|---|---|---|---|
Toyota | LEXUS | ES 300h | 2.5 | 4 | 44 | 2-Wheel Drive, Front |
Ford Motor Company | Ford | EXPLORER AWD | 2.3 | 4 | 23 | Part-time 4-Wheel Drive |
Mercedes-Benz | Mercedes-Benz | AMG E53 4MATIC+ (Convertible) | 3 | 6 | 23 | 4-Wheel Drive |
Volkswagen Group of | Volkswagen | Atlas | 2 | 4 | 22 | 2-Wheel Drive, Front |
Rolls-Royce | Rolls-Royce Motor Cars Limited | Ghost | 6.7 | 12 | 14 | All Wheel Drive |
Nissan | NISSAN | ALTIMA SV/SL | 2.5 | 4 | 31 | 2-Wheel Drive, Front |
Mercedes-Benz | Mercedes-Benz | AMG S 63 4MATIC+ (convertible) | 4 | 8 | 17 | 4-Wheel Drive |
BMW | BMW | X3 M | 3 | 6 | 16 | All Wheel Drive |
Ford Motor Company | Ford | EXPLORER FFV AWD | 3.3 | 6 | 19 | Part-time 4-Wheel Drive |
Toyota | TOYOTA | C-HR | 2 | 4 | 29 | 2-Wheel Drive, Front |
... (764 rows omitted)
# cars.group('Cylinders', ____) \
# .where(____, 6) \
# .column(____) \
# .____
cars.group('Cylinders', np.mean).bar('Cylinders', 'MPG mean')
group
ing by multiple columns¶cars
Manufacturer | Brand | Model | Displacement | Cylinders | MPG | Wheel |
---|---|---|---|---|---|---|
BMW | BMW | 228i Gran Coupe | 2 | 4 | 28 | 2-Wheel Drive, Front |
BMW | BMW | 228i xDrive Gran Coupe | 2 | 4 | 27 | All Wheel Drive |
BMW | BMW | 230i Convertible | 2 | 4 | 27 | 2-Wheel Drive, Rear |
BMW | BMW | 230i Coupe | 2 | 4 | 28 | 2-Wheel Drive, Rear |
BMW | BMW | 230i xDrive Convertible | 2 | 4 | 24 | All Wheel Drive |
BMW | BMW | 230i xDrive Coupe | 2 | 4 | 24 | All Wheel Drive |
BMW | BMW | 330i | 2 | 4 | 30 | 2-Wheel Drive, Rear |
BMW | BMW | 330i xDrive | 2 | 4 | 28 | All Wheel Drive |
BMW | BMW | 430i Coupe | 2 | 4 | 29 | 2-Wheel Drive, Rear |
BMW | BMW | 430i xDrive Coupe | 2 | 4 | 27 | All Wheel Drive |
... (764 rows omitted)
cars.group(['Manufacturer', 'Brand']).show()
Manufacturer | Brand | count |
---|---|---|
BMW | BMW | 76 |
BMW | Mini | 17 |
BMW | TOYOTA | 2 |
FCA US LLC | ALFA ROMEO | 4 |
FCA US LLC | Chrysler | 5 |
FCA US LLC | Dodge | 12 |
FCA US LLC | FIAT | 1 |
FCA US LLC | Jeep | 20 |
FCA US LLC | RAM | 7 |
Ferrari | Ferrari North America, Inc. | 7 |
Ford Motor Company | Ford | 42 |
Ford Motor Company | Lincoln | 9 |
General Motors | Buick | 8 |
General Motors | Cadillac | 20 |
General Motors | Chevrolet | 27 |
General Motors | GMC | 15 |
Honda | Acura | 10 |
Honda | Honda | 17 |
Hyundai | GENESIS | 8 |
Hyundai | HYUNDAI MOTOR COMPANY | 22 |
Jaguar Land Rover L | Jaguar | 10 |
Jaguar Land Rover L | Land Rover | 19 |
Kia | KIA MOTORS CORPORATION | 22 |
Lotus | Lotus Cars Ltd | 1 |
MAZDA | MAZDA | 15 |
Maserati | MASERATI | 11 |
Mercedes-Benz | Mercedes-Benz | 84 |
Mitsubishi Motors Co | Mitsubishi Motors Corporation | 4 |
Nissan | INFINITI | 12 |
Nissan | NISSAN | 29 |
Porsche | Porsche | 45 |
Rolls-Royce | Rolls-Royce Motor Cars Limited | 10 |
Subaru | Subaru | 11 |
Toyota | LEXUS | 35 |
Toyota | TOYOTA | 50 |
Volkswagen Group of | Audi | 41 |
Volkswagen Group of | Bentley | 4 |
Volkswagen Group of | Bugatti | 2 |
Volkswagen Group of | Lamborghini | 6 |
Volkswagen Group of | Volkswagen | 14 |
Volvo | Volvo Cars of North America, LLC | 14 |
aston martin | Aston Martin Lagonda Ltd | 6 |
cars.group(['Brand', 'Cylinders'], np.mean)
Brand | Cylinders | Manufacturer mean | Model mean | Displacement mean | MPG mean | Wheel mean |
---|---|---|---|---|---|---|
ALFA ROMEO | 4 | 2 | 25.5 | |||
Acura | 4 | 2.04444 | 24.4444 | |||
Acura | 6 | 3.5 | 21 | |||
Aston Martin Lagonda Ltd | 8 | 4 | 18 | |||
Aston Martin Lagonda Ltd | 12 | 5.2 | 17.5 | |||
Audi | 4 | 2 | 25.6875 | |||
Audi | 5 | 2.5 | 24 | |||
Audi | 6 | 2.97143 | 21.6429 | |||
Audi | 8 | 4 | 16.5 | |||
Audi | 10 | 5.2 | 16.5 |
... (83 rows omitted)
cars.group(['Manufacturer', 'Brand', 'Displacement'])
Manufacturer | Brand | Displacement | count |
---|---|---|---|
BMW | BMW | 2 | 22 |
BMW | BMW | 3 | 34 |
BMW | BMW | 4.4 | 19 |
BMW | BMW | 6.6 | 1 |
BMW | Mini | 1.5 | 5 |
BMW | Mini | 2 | 12 |
BMW | TOYOTA | 2 | 1 |
BMW | TOYOTA | 3 | 1 |
FCA US LLC | ALFA ROMEO | 2 | 4 |
FCA US LLC | Chrysler | 3.6 | 5 |
... (144 rows omitted)
pivot
¶cars.group(['Brand', 'Cylinders']).show()
Brand | Cylinders | count |
---|---|---|
ALFA ROMEO | 4 | 4 |
Acura | 4 | 9 |
Acura | 6 | 1 |
Aston Martin Lagonda Ltd | 8 | 4 |
Aston Martin Lagonda Ltd | 12 | 2 |
Audi | 4 | 16 |
Audi | 5 | 1 |
Audi | 6 | 14 |
Audi | 8 | 6 |
Audi | 10 | 4 |
BMW | 4 | 22 |
BMW | 6 | 34 |
BMW | 8 | 19 |
BMW | 12 | 1 |
Bentley | 8 | 4 |
Bugatti | 16 | 2 |
Buick | 3 | 2 |
Buick | 4 | 4 |
Buick | 6 | 2 |
Cadillac | 4 | 16 |
Cadillac | 6 | 4 |
Chevrolet | 3 | 2 |
Chevrolet | 4 | 15 |
Chevrolet | 6 | 9 |
Chevrolet | 8 | 1 |
Chrysler | 6 | 5 |
Dodge | 6 | 6 |
Dodge | 8 | 6 |
FIAT | 4 | 1 |
Ferrari North America, Inc. | 8 | 5 |
Ferrari North America, Inc. | 12 | 2 |
Ford | 3 | 4 |
Ford | 4 | 20 |
Ford | 6 | 16 |
Ford | 8 | 2 |
GENESIS | 4 | 6 |
GENESIS | 6 | 2 |
GMC | 4 | 8 |
GMC | 6 | 7 |
HYUNDAI MOTOR COMPANY | 4 | 20 |
HYUNDAI MOTOR COMPANY | 6 | 2 |
Honda | 4 | 10 |
Honda | 6 | 7 |
INFINITI | 4 | 2 |
INFINITI | 6 | 8 |
INFINITI | 8 | 2 |
Jaguar | 4 | 1 |
Jaguar | 6 | 6 |
Jaguar | 8 | 3 |
Jeep | 4 | 10 |
Jeep | 6 | 8 |
Jeep | 8 | 2 |
KIA MOTORS CORPORATION | 4 | 19 |
KIA MOTORS CORPORATION | 6 | 3 |
LEXUS | 4 | 11 |
LEXUS | 6 | 19 |
LEXUS | 8 | 5 |
Lamborghini | 8 | 1 |
Lamborghini | 10 | 3 |
Lamborghini | 12 | 2 |
Land Rover | 4 | 5 |
Land Rover | 6 | 9 |
Land Rover | 8 | 5 |
Lincoln | 4 | 4 |
Lincoln | 6 | 5 |
Lotus Cars Ltd | 6 | 1 |
MASERATI | 6 | 7 |
MASERATI | 8 | 4 |
MAZDA | 4 | 15 |
Mercedes-Benz | 4 | 30 |
Mercedes-Benz | 6 | 23 |
Mercedes-Benz | 8 | 31 |
Mini | 3 | 5 |
Mini | 4 | 12 |
Mitsubishi Motors Corporation | 3 | 2 |
Mitsubishi Motors Corporation | 4 | 2 |
NISSAN | 4 | 17 |
NISSAN | 6 | 7 |
NISSAN | 8 | 5 |
Porsche | 4 | 7 |
Porsche | 6 | 30 |
Porsche | 8 | 8 |
RAM | 4 | 1 |
RAM | 6 | 5 |
RAM | 8 | 1 |
Rolls-Royce Motor Cars Limited | 12 | 10 |
Subaru | 4 | 11 |
TOYOTA | 4 | 35 |
TOYOTA | 6 | 12 |
TOYOTA | 8 | 5 |
Volkswagen | 4 | 13 |
Volkswagen | 6 | 1 |
Volvo Cars of North America, LLC | 4 | 14 |
cars.pivot('Cylinders', 'Brand', 'MPG', np.mean)
Brand | 3 | 4 | 5 | 6 | 8 | 10 | 12 | 16 |
---|---|---|---|---|---|---|---|---|
ALFA ROMEO | 0 | 25.5 | 0 | 0 | 0 | 0 | 0 | 0 |
Acura | 0 | 24.4444 | 0 | 21 | 0 | 0 | 0 | 0 |
Aston Martin Lagonda Ltd | 0 | 0 | 0 | 0 | 18 | 0 | 17.5 | 0 |
Audi | 0 | 25.6875 | 24 | 21.6429 | 16.5 | 16.5 | 0 | 0 |
BMW | 0 | 26.8636 | 0 | 22.3529 | 17.4211 | 0 | 16 | 0 |
Bentley | 0 | 0 | 0 | 0 | 18.25 | 0 | 0 | 0 |
Bugatti | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10.5 |
Buick | 28.5 | 26 | 0 | 20.5 | 0 | 0 | 0 | 0 |
Cadillac | 0 | 23.5625 | 0 | 21.75 | 0 | 0 | 0 | 0 |
Chevrolet | 28.5 | 25.5333 | 0 | 20 | 19 | 0 | 0 | 0 |
... (31 rows omitted)
cars
Manufacturer | Brand | Model | Displacement | Cylinders | MPG | Wheel |
---|---|---|---|---|---|---|
BMW | BMW | 228i Gran Coupe | 2 | 4 | 28 | 2-Wheel Drive, Front |
BMW | BMW | 228i xDrive Gran Coupe | 2 | 4 | 27 | All Wheel Drive |
BMW | BMW | 230i Convertible | 2 | 4 | 27 | 2-Wheel Drive, Rear |
BMW | BMW | 230i Coupe | 2 | 4 | 28 | 2-Wheel Drive, Rear |
BMW | BMW | 230i xDrive Convertible | 2 | 4 | 24 | All Wheel Drive |
BMW | BMW | 230i xDrive Coupe | 2 | 4 | 24 | All Wheel Drive |
BMW | BMW | 330i | 2 | 4 | 30 | 2-Wheel Drive, Rear |
BMW | BMW | 330i xDrive | 2 | 4 | 28 | All Wheel Drive |
BMW | BMW | 430i Coupe | 2 | 4 | 29 | 2-Wheel Drive, Rear |
BMW | BMW | 430i xDrive Coupe | 2 | 4 | 27 | All Wheel Drive |
... (764 rows omitted)
# cars.pivot(___, ___, ___, ___)