Author: Chris Albon, @ChrisAlbon
Date: -
import pandas as pd import os %matplotlib inline import matplotlib.pyplot as plt import numpy as np
cur_dir = os.path.dirname(os.path.realpath('file'))
df = pd.read_csv(cur_dir + '/data/cross_raw_data/' + 'bc_full_crossing_data.csv')
Port Name | Year | Month | Trucks | Loaded Truck Containers | Empty Truck Containers | Trains | Loaded Rail Containers | Empty Rail Containers | Train Passengers | Buses | Bus Passengers | Personal Vehicles | Personal Vehicle Passengers | Pedestrians | City | State | |
0 | AZ: Douglas ... | 1995 | 01 - JAN | 2890 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 164803 | 379047 | 40735 | Douglas ... | AZ |
1 | AZ: Douglas ... | 1995 | 02 - FEB | 2849 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 146925 | 337926 | 40463 | Douglas ... | AZ |
2 | AZ: Douglas ... | 1995 | 03 - MAR | 3421 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 139060 | 319838 | 55370 | Douglas ... | AZ |
3 | AZ: Douglas ... | 1995 | 04 - APR | 3676 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 144163 | 331575 | 46279 | Douglas ... | AZ |
4 | AZ: Douglas ... | 1995 | 05 - MAY | 3560 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 154597 | 355573 | 50160 | Douglas ... | AZ |
df['Port Name'][1]
'AZ: Douglas '
df_douglas = df[df['Port Name'] == 'AZ: Douglas '][:]
Port Name | Year | Month | Trucks | Loaded Truck Containers | Empty Truck Containers | Trains | Loaded Rail Containers | Empty Rail Containers | Train Passengers | Buses | Bus Passengers | Personal Vehicles | Personal Vehicle Passengers | Pedestrians | City | State | |
0 | AZ: Douglas ... | 1995 | 01 - JAN | 2890 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 164803 | 379047 | 40735 | Douglas ... | AZ |
1 | AZ: Douglas ... | 1995 | 02 - FEB | 2849 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 146925 | 337926 | 40463 | Douglas ... | AZ |
2 | AZ: Douglas ... | 1995 | 03 - MAR | 3421 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 139060 | 319838 | 55370 | Douglas ... | AZ |
3 | AZ: Douglas ... | 1995 | 04 - APR | 3676 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 144163 | 331575 | 46279 | Douglas ... | AZ |
4 | AZ: Douglas ... | 1995 | 05 - MAY | 3560 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 154597 | 355573 | 50160 | Douglas ... | AZ |
df_douglas['Total Crossers'] = df_douglas['Train Passengers'] + df_douglas['Bus Passengers'] + df_douglas['Personal Vehicle Passengers'] + df_douglas['Pedestrians']
df_douglas_annual_sum = df_douglas.groupby(df_douglas['Year']).sum()
Trucks | Loaded Truck Containers | Empty Truck Containers | Trains | Loaded Rail Containers | Empty Rail Containers | Train Passengers | Buses | Bus Passengers | Personal Vehicles | Personal Vehicle Passengers | Pedestrians | Total Crossers | |
Year | |||||||||||||
1995 | 36272 | 0 | 0 | 0 | 0 | 0 | 0 | 3249 | 3249 | 1827277 | 4202735 | 567030 | 4773014 |
1996 | 38089 | 8703 | 13811 | 0 | 0 | 0 | 0 | 3353 | 3433 | 1915119 | 4404773 | 547742 | 4955948 |
1997 | 35718 | 10186 | 13119 | 0 | 0 | 0 | 0 | 3651 | 3651 | 1991904 | 4803469 | 599082 | 5406202 |
1998 | 35656 | 14952 | 14106 | 0 | 49 | 57 | 0 | 3650 | 3650 | 2028032 | 5577088 | 641181 | 6221919 |
1999 | 32568 | 14745 | 11720 | 0 | 0 | 0 | 0 | 3650 | 3650 | 2150092 | 5912753 | 704973 | 6621376 |
df_douglas_annual_sum['Bus Passengers'].plot(kind='area', grid=False)
<matplotlib.axes._subplots.AxesSubplot at 0x1083fd410>
df_douglas_annual_sum['Personal Vehicle Passengers'].plot(kind='area', grid=False)
<matplotlib.axes._subplots.AxesSubplot at 0x1087cfe50>
df_douglas_annual_sum['Pedestrians'].plot(kind='area', grid=False)
<matplotlib.axes._subplots.AxesSubplot at 0x1088b8590>
# Create a figure with a single subplot
f, ax = plt.subplots(1, figsize=(10,5))
# Set bar width at 1
bar_width = 1
# positions of the left bar-boundaries
bar_l = [i for i in range(len(df_douglas_annual_sum['Bus Passengers']))]
# positions of the x-axis ticks (center of the bars as bar labels)
tick_pos = [i+(bar_width/2) for i in bar_l]
# Create the total score for each participant
totals = [i+j+k for i,j,k in zip(df_douglas_annual_sum['Bus Passengers'],
df_douglas_annual_sum['Personal Vehicle Passengers'],
# Create the percentage of the total score the pre_score value for each participant was
pre_rel = [i / j * 100 for i,j in zip(df_douglas_annual_sum['Bus Passengers'], totals)]
# Create the percentage of the total score the mid_score value for each participant was
mid_rel = [i / j * 100 for i,j in zip(df_douglas_annual_sum['Personal Vehicle Passengers'], totals)]
# Create the percentage of the total score the post_score value for each participant was
post_rel = [i / j * 100 for i,j in zip(df_douglas_annual_sum['Pedestrians'], totals)]
# Create a bar chart in position bar_1
# using pre_rel data
# labeled
label='Bus Passengers',
# with alpha
# with color
# with bar width
# with border color
# Create a bar chart in position bar_1
# using mid_rel data
# with pre_rel
# labeled
label='Personal Vehicle Passengers',
# with alpha
# with color
# with bar width
# with border color
# Create a bar chart in position bar_1
# using post_rel data
# with pre_rel and mid_rel on bottom
bottom=[i+j for i,j in zip(pre_rel, mid_rel)],
# labeled
# with alpha
# with color
# with bar width
# with border color
# Set the ticks to be first names
plt.xticks(tick_pos, df_douglas_annual_sum.index)
ax.set_ylabel("Percent Of Total")
# Let the borders of the graphic
plt.xlim([min(tick_pos)-bar_width, max(tick_pos)+bar_width])
plt.ylim(-10, 110)
# rotate axis labels
plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right')
# shot plot
# Create a figure with a single subplot
f, ax = plt.subplots(1, figsize=(10,5))
# Create a scatterplot of,
# x axis: Latency, and
# y axis: Download, with
df_douglas_annual_sum['Personal Vehicle Passengers'],
# the color assigned as blue
# y axis: Download, with
# the color assigned as blue
# y axis: Download, with
df_douglas_annual_sum['Bus Passengers'],
# the color assigned as blue
# Chart title
plt.title('Types Of Border Crossers')
# y label
plt.ylabel('Total People')
# x label
<matplotlib.text.Text at 0x109016e10>
Index(['Port Name', 'Year', 'Month', 'Trucks', 'Loaded Truck Containers', 'Empty Truck Containers', 'Trains', 'Loaded Rail Containers', 'Empty Rail Containers', 'Train Passengers', 'Buses', 'Bus Passengers', 'Personal Vehicles', 'Personal Vehicle Passengers', 'Pedestrians', 'City', 'State'], dtype='object')
df_crossing = df.groupby('Port Name').sum()
df_crossing.sort('Trains', ascending=False).head()
Year | Trucks | Loaded Truck Containers | Empty Truck Containers | Trains | Loaded Rail Containers | Empty Rail Containers | Train Passengers | Buses | Bus Passengers | Personal Vehicles | Personal Vehicle Passengers | Pedestrians | |
Port Name | |||||||||||||
TX: Laredo | 456912 | 27331291 | 16555501 | 9484814 | 62322 | 3060584 | 2141567 | 0 | 670433 | 13725607 | 117969118 | 273240177 | 82942171 |
TX: Eagle Pass | 456912 | 1774624 | 1054386 | 676904 | 31437 | 803534 | 1365715 | 83048 | 42050 | 340218 | 57603495 | 136747990 | 12947972 |
TX: El Paso | 456912 | 13050522 | 6504160 | 5548367 | 23793 | 660056 | 842385 | 49831 | 311136 | 5624071 | 261185949 | 567649753 | 128299884 |
TX: Brownsville | 456912 | 4503798 | 2249740 | 2031792 | 14299 | 155970 | 1243865 | 2592 | 180687 | 1591315 | 119779453 | 263794440 | 55892176 |
AZ: Nogales | 456912 | 5076159 | 3812053 | 1044586 | 11647 | 539083 | 357639 | 35902 | 136006 | 2464601 | 65971957 | 164483990 | 97174209 |
df_crossing['container_total'] = df_crossing['Loaded Truck Containers'] + df_crossing['Empty Truck Containers']
df_crossing = df_crossing.sort(columns='container_total')
# input data, specifically the second and
# third rows, skipping the first column
x1 = df_crossing['Loaded Truck Containers']
x2 = df_crossing['Empty Truck Containers']
# Create the bar labels
bar_labels = df_crossing.index
# Create a figure
fig = plt.figure(figsize=(10,8))
# Set the y position
y_pos = np.arange(len(x1))
y_pos = [x for x in y_pos]
plt.yticks(y_pos, bar_labels, fontsize=10)
# Create a horizontal bar in the position y_pos
# using x1 data
# that is centered
# with alpha 0.4
# and color green
# Create a horizontal bar in the position y_pos
# using NEGATIVE x2 data
# that is centered
# with alpha 0.4
# and color green
# annotation and labels
plt.xlabel('Empty Containers: Light Green. Full Containers: Dark Green')
t = plt.title('Comparison of Empty And Full Truck Containers Per Border Crossing')