pd.crosstab
"Create a cross tabulation using pivot_table." import sys import random import pandas as pd data = [ [42, "Eighth", 4280], [42, "Eighth", 4281], [42, "Eighth", 4282], [42, "Seventh", 4270], [42, "Seventh", 4271], [42, "Seventh", 4272], [42, "Sixth", 4260], [42, "Sixth", 4261], [42, "Sixth", 4262], [43, "Eighth", 4380], [43, "Eighth", 4381], [43, "Eighth", 4382], [43, "Seventh", 4370], [43, "Seventh", 4371], [43, "Seventh", 4372], [43, "Sixth", 4360], [43, "Sixth", 4361], [43, "Sixth", 4362], [44, "Eighth", 4480], [44, "Eighth", 4481], [44, "Eighth", 4482], [44, "Seventh", 4470], [44, "Seventh", 4471], [44, "Seventh", 4472], [44, "Sixth", 4460], [44, "Sixth", 4461], [44, "Sixth", 4462] ] random.shuffle(data) columns = pd.Index(data = ["street", "avenue", "accidents"]) df = pd.DataFrame(data = data, columns = columns) print(df) print() crosstab = df.pivot_table( index = "street", columns = "avenue", aggfunc = "count", margins = True ) print(crosstab) print() crosstab = crosstab.droplevel(axis = 1, level = 0) print(crosstab) sys.exit(0)
accidents avenue Eighth Seventh Sixth All street 42 3 3 3 9 43 3 3 3 9 44 3 3 3 9 All 9 9 9 27 avenue Eighth Seventh Sixth All street 42 3 3 3 9 43 3 3 3 9 44 3 3 3 9 All 9 9 9 27
Three reports have come in about the number of accidents at Eighth Avenue & 42nd Street.
"Create a cross tabulation." import sys import random import pandas as pd data = [ [42, "Eighth", 4280], [42, "Eighth", 4281], [42, "Eighth", 4282], [42, "Seventh", 4270], [42, "Seventh", 4271], [42, "Seventh", 4272], [42, "Sixth", 4260], [42, "Sixth", 4261], [42, "Sixth", 4262], [43, "Eighth", 4380], [43, "Eighth", 4381], [43, "Eighth", 4382], [43, "Seventh", 4370], [43, "Seventh", 4371], [43, "Seventh", 4372], [43, "Sixth", 4360], [43, "Sixth", 4361], [43, "Sixth", 4362], [44, "Eighth", 4480], [44, "Eighth", 4481], [44, "Eighth", 4482], [44, "Seventh", 4470], [44, "Seventh", 4471], [44, "Seventh", 4472], [44, "Sixth", 4460], [44, "Sixth", 4461], [44, "Sixth", 4462] ] random.shuffle(data) columns = pd.Index(data = ["street", "avenue", "accidents"]) df = pd.DataFrame(data = data, columns = columns) print(df) print() crosstab = pd.crosstab( index = df.street, columns = df.avenue, margins = True ) print(crosstab) sys.exit(0)
street avenue accidents 0 42 Seventh 4271 1 42 Seventh 4270 2 43 Eighth 4382 3 44 Eighth 4481 4 43 Seventh 4371 5 43 Sixth 4362 6 42 Eighth 4280 7 43 Sixth 4361 8 43 Sixth 4360 9 42 Sixth 4261 10 42 Eighth 4281 11 44 Eighth 4482 12 44 Seventh 4471 13 44 Sixth 4460 14 44 Eighth 4480 15 43 Seventh 4372 16 42 Sixth 4262 17 42 Eighth 4282 18 44 Seventh 4472 19 43 Seventh 4370 20 43 Eighth 4380 21 42 Sixth 4260 22 43 Eighth 4381 23 42 Seventh 4272 24 44 Sixth 4462 25 44 Seventh 4470 26 44 Sixth 4461 avenue Eighth Seventh Sixth All street 42 3 3 3 9 43 3 3 3 9 44 3 3 3 9 All 9 9 9 27
df.accidents
.
To compute the mean number of accidents per row,
crosstab = pd.crosstab( index = df.street, columns = df.avenue, values = df.accidents, aggfunc = "mean", margins = True ) print(crosstab)
avenue Eighth Seventh Sixth All street 42 4281 4271 4261 4271 43 4381 4371 4361 4371 44 4481 4471 4461 4471 All 4381 4371 4361 4371But this is as far as we can go with a
crosstab
.
To compute the mean number of accidents and vehicles per row,
we would have to go back to a
pivot_table
.
[44, "Sixth", 4462]
import matplotlib.pyplot as plt
crosstab = pd.crosstab( index = df.street, columns = df.avenue, margins = False #Don't output the "All" row and column. ) print(crosstab) axes = crosstab.plot.bar(rot = 0) #also try crosstab.T.plot.bar(rot = 0) figure = plt.gcf() figure.canvas.set_window_title("crosstab") axes.set_title("Cross Tabulation") axes.set_ylabel("Number of observations at each intersection") plt.show()