from datascience import *
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
Run the following cell to load in our full dataset.
cars = Table.read_table('data/models-2021.csv')
Here we'll take a subset of the rows and columns for illustration.
gm = cars.where('Manufacturer', 'General Motors').select('Brand', 'Model', 'Cylinders', 'MPG').take([0, 1, 9, 16, 20, 30, 31, 35, -1]).take([1, 2, 4, 8, 5, 6, 3, 7, 0])
gm
| Brand | Model | Cylinders | MPG |
|---|---|---|---|
| Buick | ENCLAVE FWD | 6 | 21 |
| Cadillac | CT4 AWD | 4 | 26 |
| Cadillac | XT5 AWD | 4 | 23 |
| GMC | YUKON XL 4WD | 6 | 22 |
| Chevrolet | CAMARO | 4 | 25 |
| Chevrolet | COLORADO 2WD | 4 | 22 |
| Cadillac | ESCALADE 2WD | 6 | 23 |
| Chevrolet | EQUINOX AWD | 4 | 27 |
| Buick | ENCLAVE AWD | 6 | 20 |
group¶gm
| Brand | Model | Cylinders | MPG |
|---|---|---|---|
| Buick | ENCLAVE FWD | 6 | 21 |
| Cadillac | CT4 AWD | 4 | 26 |
| Cadillac | XT5 AWD | 4 | 23 |
| GMC | YUKON XL 4WD | 6 | 22 |
| Chevrolet | CAMARO | 4 | 25 |
| Chevrolet | COLORADO 2WD | 4 | 22 |
| Cadillac | ESCALADE 2WD | 6 | 23 |
| Chevrolet | EQUINOX AWD | 4 | 27 |
| Buick | ENCLAVE AWD | 6 | 20 |
gm.group('Brand')
| Brand | count |
|---|---|
| Buick | 2 |
| Cadillac | 3 |
| Chevrolet | 3 |
| GMC | 1 |
gm.group('Cylinders')
| Cylinders | count |
|---|---|
| 4 | 5 |
| 6 | 4 |
# shuffles the rows in the table; returns a new table
cars.shuffle()
| Manufacturer | Brand | Model | Displacement | Cylinders | MPG | Wheel |
|---|---|---|---|---|---|---|
| Volkswagen Group of | Audi | R8 Spyder | 5.2 | 10 | 16 | All Wheel Drive |
| Toyota | TOYOTA | TACOMA 4WD D-CAB MT TRD-ORP/PRO | 3.5 | 6 | 18 | Part-time 4-Wheel Drive |
| Volkswagen Group of | Volkswagen | Tiguan | 2 | 4 | 25 | 2-Wheel Drive, Front |
| Ford Motor Company | Ford | MUSTANG HO COUPE | 2.3 | 4 | 23 | 2-Wheel Drive, Rear |
| Kia | KIA MOTORS CORPORATION | Rio | 1.6 | 4 | 36 | 2-Wheel Drive, Front |
| Volkswagen Group of | Audi | S6 | 2.9 | 6 | 22 | All Wheel Drive |
| Porsche | Porsche | Panamera 4 Executive | 2.9 | 6 | 19 | All Wheel Drive |
| General Motors | Chevrolet | SPARK | 1.4 | 4 | 33 | 2-Wheel Drive, Front |
| General Motors | GMC | TERRAIN AWD | 1.5 | 4 | 26 | All Wheel Drive |
| Ford Motor Company | Ford | EXPEDITION 2WD | 3.5 | 6 | 19 | 2-Wheel Drive, Rear |
... (764 rows omitted)
cars.group('Brand').sort('count', descending = True)
| Brand | count |
|---|---|
| Mercedes-Benz | 84 |
| BMW | 76 |
| TOYOTA | 52 |
| Porsche | 45 |
| Ford | 42 |
| Audi | 41 |
| LEXUS | 35 |
| NISSAN | 29 |
| Chevrolet | 27 |
| HYUNDAI MOTOR COMPANY | 22 |
... (31 rows omitted)
collect function¶gm.group('Brand', np.mean)
| Brand | Model mean | Cylinders mean | MPG mean |
|---|---|---|---|
| Buick | 6 | 20.5 | |
| Cadillac | 4.66667 | 24 | |
| Chevrolet | 4 | 24.6667 | |
| GMC | 6 | 22 |
How does this work under the hood?
gm.where('Brand', 'Buick')
| Brand | Model | Cylinders | MPG |
|---|---|---|---|
| Buick | ENCLAVE FWD | 6 | 21 |
| Buick | ENCLAVE AWD | 6 | 20 |
print('mean of Cylinders: ', gm.where('Brand', 'Buick').column('Cylinders').mean())
print('mean of MPG: ', gm.where('Brand', 'Buick').column('MPG').mean())
mean of Cylinders: 6.0 mean of MPG: 20.5
gm.where('Brand', 'Cadillac')
| Brand | Model | Cylinders | MPG |
|---|---|---|---|
| Cadillac | CT4 AWD | 4 | 26 |
| Cadillac | XT5 AWD | 4 | 23 |
| Cadillac | ESCALADE 2WD | 6 | 23 |
print('mean of Cylinders: ', gm.where('Brand', 'Cadillac').column('Cylinders').mean())
print('mean of MPG: ', gm.where('Brand', 'Cadillac').column('MPG').mean())
mean of Cylinders: 4.666666666666667 mean of MPG: 24.0
gm.where('Brand', 'Chevrolet')
| Brand | Model | Cylinders | MPG |
|---|---|---|---|
| Chevrolet | CAMARO | 4 | 25 |
| Chevrolet | COLORADO 2WD | 4 | 22 |
| Chevrolet | EQUINOX AWD | 4 | 27 |
print('mean of Cylinders: ', gm.where('Brand', 'Chevrolet').column('Cylinders').mean())
print('mean of MPG: ', gm.where('Brand', 'Chevrolet').column('MPG').mean())
mean of Cylinders: 4.0 mean of MPG: 24.666666666666668
gm.where('Brand', 'GMC')
| Brand | Model | Cylinders | MPG |
|---|---|---|---|
| GMC | YUKON XL 4WD | 6 | 22 |
print('mean of Cylinders: ', gm.where('Brand', 'GMC').column('Cylinders').mean())
print('mean of MPG: ', gm.where('Brand', 'GMC').column('MPG').mean())
mean of Cylinders: 6.0 mean of MPG: 22.0
If you want a more concise way of doing the above:
for brand in np.unique(gm.column('Brand')):
brand_only = gm.where('Brand', brand)
print(brand)
print('mean of Cylinders: ', brand_only.column('Cylinders').mean())
print('mean of MPG: ', brand_only.column('MPG').mean())
print('\n')
Buick mean of Cylinders: 6.0 mean of MPG: 20.5 Cadillac mean of Cylinders: 4.666666666666667 mean of MPG: 24.0 Chevrolet mean of Cylinders: 4.0 mean of MPG: 24.666666666666668 GMC mean of Cylinders: 6.0 mean of MPG: 22.0
What if we use other collect functions?
gm
| Brand | Model | Cylinders | MPG |
|---|---|---|---|
| Buick | ENCLAVE FWD | 6 | 21 |
| Cadillac | CT4 AWD | 4 | 26 |
| Cadillac | XT5 AWD | 4 | 23 |
| GMC | YUKON XL 4WD | 6 | 22 |
| Chevrolet | CAMARO | 4 | 25 |
| Chevrolet | COLORADO 2WD | 4 | 22 |
| Cadillac | ESCALADE 2WD | 6 | 23 |
| Chevrolet | EQUINOX AWD | 4 | 27 |
| Buick | ENCLAVE AWD | 6 | 20 |
gm.group('Brand', sum)
| Brand | Model sum | Cylinders sum | MPG sum |
|---|---|---|---|
| Buick | 12 | 41 | |
| Cadillac | 14 | 72 | |
| Chevrolet | 12 | 74 | |
| GMC | 6 | 22 |
gm.group('Brand', list)
/opt/miniconda3/lib/python3.8/site-packages/datascience/tables.py:920: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray values = np.array(tuple(values))
| Brand | Model list | Cylinders list | MPG list |
|---|---|---|---|
| Buick | ['ENCLAVE FWD', 'ENCLAVE AWD'] | [6, 6] | [21, 20] |
| Cadillac | ['CT4 AWD', 'XT5 AWD', 'ESCALADE 2WD'] | [4, 4, 6] | [26, 23, 23] |
| Chevrolet | ['CAMARO', 'COLORADO 2WD', 'EQUINOX AWD'] | [4, 4, 4] | [25, 22, 27] |
| GMC | ['YUKON XL 4WD'] | [6] | [22] |
gm.group('Brand', len)
| Brand | Model len | Cylinders len | MPG len |
|---|---|---|---|
| Buick | 2 | 2 | 2 |
| Cadillac | 3 | 3 | 3 |
| Chevrolet | 3 | 3 | 3 |
| GMC | 1 | 1 | 1 |
gm.group('Brand', max)
| Brand | Model max | Cylinders max | MPG max |
|---|---|---|---|
| Buick | ENCLAVE FWD | 6 | 21 |
| Cadillac | XT5 AWD | 6 | 26 |
| Chevrolet | EQUINOX AWD | 4 | 27 |
| GMC | YUKON XL 4WD | 6 | 22 |
cars.shuffle()
| Manufacturer | Brand | Model | Displacement | Cylinders | MPG | Wheel |
|---|---|---|---|---|---|---|
| Toyota | LEXUS | ES 300h | 2.5 | 4 | 44 | 2-Wheel Drive, Front |
| Ford Motor Company | Ford | EXPLORER AWD | 2.3 | 4 | 23 | Part-time 4-Wheel Drive |
| Mercedes-Benz | Mercedes-Benz | AMG E53 4MATIC+ (Convertible) | 3 | 6 | 23 | 4-Wheel Drive |
| Volkswagen Group of | Volkswagen | Atlas | 2 | 4 | 22 | 2-Wheel Drive, Front |
| Rolls-Royce | Rolls-Royce Motor Cars Limited | Ghost | 6.7 | 12 | 14 | All Wheel Drive |
| Nissan | NISSAN | ALTIMA SV/SL | 2.5 | 4 | 31 | 2-Wheel Drive, Front |
| Mercedes-Benz | Mercedes-Benz | AMG S 63 4MATIC+ (convertible) | 4 | 8 | 17 | 4-Wheel Drive |
| BMW | BMW | X3 M | 3 | 6 | 16 | All Wheel Drive |
| Ford Motor Company | Ford | EXPLORER FFV AWD | 3.3 | 6 | 19 | Part-time 4-Wheel Drive |
| Toyota | TOYOTA | C-HR | 2 | 4 | 29 | 2-Wheel Drive, Front |
... (764 rows omitted)
# cars.group('Cylinders', ____) \
# .where(____, 6) \
# .column(____) \
# .____
cars.group('Cylinders', np.mean).bar('Cylinders', 'MPG mean')
grouping by multiple columns¶cars
| Manufacturer | Brand | Model | Displacement | Cylinders | MPG | Wheel |
|---|---|---|---|---|---|---|
| BMW | BMW | 228i Gran Coupe | 2 | 4 | 28 | 2-Wheel Drive, Front |
| BMW | BMW | 228i xDrive Gran Coupe | 2 | 4 | 27 | All Wheel Drive |
| BMW | BMW | 230i Convertible | 2 | 4 | 27 | 2-Wheel Drive, Rear |
| BMW | BMW | 230i Coupe | 2 | 4 | 28 | 2-Wheel Drive, Rear |
| BMW | BMW | 230i xDrive Convertible | 2 | 4 | 24 | All Wheel Drive |
| BMW | BMW | 230i xDrive Coupe | 2 | 4 | 24 | All Wheel Drive |
| BMW | BMW | 330i | 2 | 4 | 30 | 2-Wheel Drive, Rear |
| BMW | BMW | 330i xDrive | 2 | 4 | 28 | All Wheel Drive |
| BMW | BMW | 430i Coupe | 2 | 4 | 29 | 2-Wheel Drive, Rear |
| BMW | BMW | 430i xDrive Coupe | 2 | 4 | 27 | All Wheel Drive |
... (764 rows omitted)
cars.group(['Manufacturer', 'Brand']).show()
| Manufacturer | Brand | count |
|---|---|---|
| BMW | BMW | 76 |
| BMW | Mini | 17 |
| BMW | TOYOTA | 2 |
| FCA US LLC | ALFA ROMEO | 4 |
| FCA US LLC | Chrysler | 5 |
| FCA US LLC | Dodge | 12 |
| FCA US LLC | FIAT | 1 |
| FCA US LLC | Jeep | 20 |
| FCA US LLC | RAM | 7 |
| Ferrari | Ferrari North America, Inc. | 7 |
| Ford Motor Company | Ford | 42 |
| Ford Motor Company | Lincoln | 9 |
| General Motors | Buick | 8 |
| General Motors | Cadillac | 20 |
| General Motors | Chevrolet | 27 |
| General Motors | GMC | 15 |
| Honda | Acura | 10 |
| Honda | Honda | 17 |
| Hyundai | GENESIS | 8 |
| Hyundai | HYUNDAI MOTOR COMPANY | 22 |
| Jaguar Land Rover L | Jaguar | 10 |
| Jaguar Land Rover L | Land Rover | 19 |
| Kia | KIA MOTORS CORPORATION | 22 |
| Lotus | Lotus Cars Ltd | 1 |
| MAZDA | MAZDA | 15 |
| Maserati | MASERATI | 11 |
| Mercedes-Benz | Mercedes-Benz | 84 |
| Mitsubishi Motors Co | Mitsubishi Motors Corporation | 4 |
| Nissan | INFINITI | 12 |
| Nissan | NISSAN | 29 |
| Porsche | Porsche | 45 |
| Rolls-Royce | Rolls-Royce Motor Cars Limited | 10 |
| Subaru | Subaru | 11 |
| Toyota | LEXUS | 35 |
| Toyota | TOYOTA | 50 |
| Volkswagen Group of | Audi | 41 |
| Volkswagen Group of | Bentley | 4 |
| Volkswagen Group of | Bugatti | 2 |
| Volkswagen Group of | Lamborghini | 6 |
| Volkswagen Group of | Volkswagen | 14 |
| Volvo | Volvo Cars of North America, LLC | 14 |
| aston martin | Aston Martin Lagonda Ltd | 6 |
cars.group(['Brand', 'Cylinders'], np.mean)
| Brand | Cylinders | Manufacturer mean | Model mean | Displacement mean | MPG mean | Wheel mean |
|---|---|---|---|---|---|---|
| ALFA ROMEO | 4 | 2 | 25.5 | |||
| Acura | 4 | 2.04444 | 24.4444 | |||
| Acura | 6 | 3.5 | 21 | |||
| Aston Martin Lagonda Ltd | 8 | 4 | 18 | |||
| Aston Martin Lagonda Ltd | 12 | 5.2 | 17.5 | |||
| Audi | 4 | 2 | 25.6875 | |||
| Audi | 5 | 2.5 | 24 | |||
| Audi | 6 | 2.97143 | 21.6429 | |||
| Audi | 8 | 4 | 16.5 | |||
| Audi | 10 | 5.2 | 16.5 |
... (83 rows omitted)
cars.group(['Manufacturer', 'Brand', 'Displacement'])
| Manufacturer | Brand | Displacement | count |
|---|---|---|---|
| BMW | BMW | 2 | 22 |
| BMW | BMW | 3 | 34 |
| BMW | BMW | 4.4 | 19 |
| BMW | BMW | 6.6 | 1 |
| BMW | Mini | 1.5 | 5 |
| BMW | Mini | 2 | 12 |
| BMW | TOYOTA | 2 | 1 |
| BMW | TOYOTA | 3 | 1 |
| FCA US LLC | ALFA ROMEO | 2 | 4 |
| FCA US LLC | Chrysler | 3.6 | 5 |
... (144 rows omitted)
pivot¶cars.group(['Brand', 'Cylinders']).show()
| Brand | Cylinders | count |
|---|---|---|
| ALFA ROMEO | 4 | 4 |
| Acura | 4 | 9 |
| Acura | 6 | 1 |
| Aston Martin Lagonda Ltd | 8 | 4 |
| Aston Martin Lagonda Ltd | 12 | 2 |
| Audi | 4 | 16 |
| Audi | 5 | 1 |
| Audi | 6 | 14 |
| Audi | 8 | 6 |
| Audi | 10 | 4 |
| BMW | 4 | 22 |
| BMW | 6 | 34 |
| BMW | 8 | 19 |
| BMW | 12 | 1 |
| Bentley | 8 | 4 |
| Bugatti | 16 | 2 |
| Buick | 3 | 2 |
| Buick | 4 | 4 |
| Buick | 6 | 2 |
| Cadillac | 4 | 16 |
| Cadillac | 6 | 4 |
| Chevrolet | 3 | 2 |
| Chevrolet | 4 | 15 |
| Chevrolet | 6 | 9 |
| Chevrolet | 8 | 1 |
| Chrysler | 6 | 5 |
| Dodge | 6 | 6 |
| Dodge | 8 | 6 |
| FIAT | 4 | 1 |
| Ferrari North America, Inc. | 8 | 5 |
| Ferrari North America, Inc. | 12 | 2 |
| Ford | 3 | 4 |
| Ford | 4 | 20 |
| Ford | 6 | 16 |
| Ford | 8 | 2 |
| GENESIS | 4 | 6 |
| GENESIS | 6 | 2 |
| GMC | 4 | 8 |
| GMC | 6 | 7 |
| HYUNDAI MOTOR COMPANY | 4 | 20 |
| HYUNDAI MOTOR COMPANY | 6 | 2 |
| Honda | 4 | 10 |
| Honda | 6 | 7 |
| INFINITI | 4 | 2 |
| INFINITI | 6 | 8 |
| INFINITI | 8 | 2 |
| Jaguar | 4 | 1 |
| Jaguar | 6 | 6 |
| Jaguar | 8 | 3 |
| Jeep | 4 | 10 |
| Jeep | 6 | 8 |
| Jeep | 8 | 2 |
| KIA MOTORS CORPORATION | 4 | 19 |
| KIA MOTORS CORPORATION | 6 | 3 |
| LEXUS | 4 | 11 |
| LEXUS | 6 | 19 |
| LEXUS | 8 | 5 |
| Lamborghini | 8 | 1 |
| Lamborghini | 10 | 3 |
| Lamborghini | 12 | 2 |
| Land Rover | 4 | 5 |
| Land Rover | 6 | 9 |
| Land Rover | 8 | 5 |
| Lincoln | 4 | 4 |
| Lincoln | 6 | 5 |
| Lotus Cars Ltd | 6 | 1 |
| MASERATI | 6 | 7 |
| MASERATI | 8 | 4 |
| MAZDA | 4 | 15 |
| Mercedes-Benz | 4 | 30 |
| Mercedes-Benz | 6 | 23 |
| Mercedes-Benz | 8 | 31 |
| Mini | 3 | 5 |
| Mini | 4 | 12 |
| Mitsubishi Motors Corporation | 3 | 2 |
| Mitsubishi Motors Corporation | 4 | 2 |
| NISSAN | 4 | 17 |
| NISSAN | 6 | 7 |
| NISSAN | 8 | 5 |
| Porsche | 4 | 7 |
| Porsche | 6 | 30 |
| Porsche | 8 | 8 |
| RAM | 4 | 1 |
| RAM | 6 | 5 |
| RAM | 8 | 1 |
| Rolls-Royce Motor Cars Limited | 12 | 10 |
| Subaru | 4 | 11 |
| TOYOTA | 4 | 35 |
| TOYOTA | 6 | 12 |
| TOYOTA | 8 | 5 |
| Volkswagen | 4 | 13 |
| Volkswagen | 6 | 1 |
| Volvo Cars of North America, LLC | 4 | 14 |
cars.pivot('Cylinders', 'Brand', 'MPG', np.mean)
| Brand | 3 | 4 | 5 | 6 | 8 | 10 | 12 | 16 |
|---|---|---|---|---|---|---|---|---|
| ALFA ROMEO | 0 | 25.5 | 0 | 0 | 0 | 0 | 0 | 0 |
| Acura | 0 | 24.4444 | 0 | 21 | 0 | 0 | 0 | 0 |
| Aston Martin Lagonda Ltd | 0 | 0 | 0 | 0 | 18 | 0 | 17.5 | 0 |
| Audi | 0 | 25.6875 | 24 | 21.6429 | 16.5 | 16.5 | 0 | 0 |
| BMW | 0 | 26.8636 | 0 | 22.3529 | 17.4211 | 0 | 16 | 0 |
| Bentley | 0 | 0 | 0 | 0 | 18.25 | 0 | 0 | 0 |
| Bugatti | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10.5 |
| Buick | 28.5 | 26 | 0 | 20.5 | 0 | 0 | 0 | 0 |
| Cadillac | 0 | 23.5625 | 0 | 21.75 | 0 | 0 | 0 | 0 |
| Chevrolet | 28.5 | 25.5333 | 0 | 20 | 19 | 0 | 0 | 0 |
... (31 rows omitted)
cars
| Manufacturer | Brand | Model | Displacement | Cylinders | MPG | Wheel |
|---|---|---|---|---|---|---|
| BMW | BMW | 228i Gran Coupe | 2 | 4 | 28 | 2-Wheel Drive, Front |
| BMW | BMW | 228i xDrive Gran Coupe | 2 | 4 | 27 | All Wheel Drive |
| BMW | BMW | 230i Convertible | 2 | 4 | 27 | 2-Wheel Drive, Rear |
| BMW | BMW | 230i Coupe | 2 | 4 | 28 | 2-Wheel Drive, Rear |
| BMW | BMW | 230i xDrive Convertible | 2 | 4 | 24 | All Wheel Drive |
| BMW | BMW | 230i xDrive Coupe | 2 | 4 | 24 | All Wheel Drive |
| BMW | BMW | 330i | 2 | 4 | 30 | 2-Wheel Drive, Rear |
| BMW | BMW | 330i xDrive | 2 | 4 | 28 | All Wheel Drive |
| BMW | BMW | 430i Coupe | 2 | 4 | 29 | 2-Wheel Drive, Rear |
| BMW | BMW | 430i xDrive Coupe | 2 | 4 | 27 | All Wheel Drive |
... (764 rows omitted)
# cars.pivot(___, ___, ___, ___)