-
Author: Chris Albon, @ChrisAlbon
-
Date: -
-
Note:
import pandas as pd import os %matplotlib inline import matplotlib.pyplot as plt import numpy as np
cur_dir = os.path.dirname(os.path.realpath('file'))
df = pd.read_csv(cur_dir + '/data/cross_raw_data/' + 'bc_full_crossing_data.csv')
df.head()
Port Name | Year | Month | Trucks | Loaded Truck Containers | Empty Truck Containers | Trains | Loaded Rail Containers | Empty Rail Containers | Train Passengers | Buses | Bus Passengers | Personal Vehicles | Personal Vehicle Passengers | Pedestrians | City | State | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | AZ: Douglas ... | 1995 | 01 - JAN | 2890 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 164803 | 379047 | 40735 | Douglas ... | AZ |
1 | AZ: Douglas ... | 1995 | 02 - FEB | 2849 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 146925 | 337926 | 40463 | Douglas ... | AZ |
2 | AZ: Douglas ... | 1995 | 03 - MAR | 3421 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 139060 | 319838 | 55370 | Douglas ... | AZ |
3 | AZ: Douglas ... | 1995 | 04 - APR | 3676 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 144163 | 331575 | 46279 | Douglas ... | AZ |
4 | AZ: Douglas ... | 1995 | 05 - MAY | 3560 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 154597 | 355573 | 50160 | Douglas ... | AZ |
df['Port Name'][1]
'AZ: Douglas '
df_douglas = df[df['Port Name'] == 'AZ: Douglas '][:]
df_douglas.head()
Port Name | Year | Month | Trucks | Loaded Truck Containers | Empty Truck Containers | Trains | Loaded Rail Containers | Empty Rail Containers | Train Passengers | Buses | Bus Passengers | Personal Vehicles | Personal Vehicle Passengers | Pedestrians | City | State | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | AZ: Douglas ... | 1995 | 01 - JAN | 2890 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 164803 | 379047 | 40735 | Douglas ... | AZ |
1 | AZ: Douglas ... | 1995 | 02 - FEB | 2849 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 146925 | 337926 | 40463 | Douglas ... | AZ |
2 | AZ: Douglas ... | 1995 | 03 - MAR | 3421 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 139060 | 319838 | 55370 | Douglas ... | AZ |
3 | AZ: Douglas ... | 1995 | 04 - APR | 3676 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 144163 | 331575 | 46279 | Douglas ... | AZ |
4 | AZ: Douglas ... | 1995 | 05 - MAY | 3560 | 0 | 0 | 0 | 0 | 0 | 0 | 270 | 270 | 154597 | 355573 | 50160 | Douglas ... | AZ |
df_douglas['Total Crossers'] = df_douglas['Train Passengers'] + df_douglas['Bus Passengers'] + df_douglas['Personal Vehicle Passengers'] + df_douglas['Pedestrians']
df_douglas_annual_sum = df_douglas.groupby(df_douglas['Year']).sum()
df_douglas_annual_sum.head()
Trucks | Loaded Truck Containers | Empty Truck Containers | Trains | Loaded Rail Containers | Empty Rail Containers | Train Passengers | Buses | Bus Passengers | Personal Vehicles | Personal Vehicle Passengers | Pedestrians | Total Crossers | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Year | |||||||||||||
1995 | 36272 | 0 | 0 | 0 | 0 | 0 | 0 | 3249 | 3249 | 1827277 | 4202735 | 567030 | 4773014 |
1996 | 38089 | 8703 | 13811 | 0 | 0 | 0 | 0 | 3353 | 3433 | 1915119 | 4404773 | 547742 | 4955948 |
1997 | 35718 | 10186 | 13119 | 0 | 0 | 0 | 0 | 3651 | 3651 | 1991904 | 4803469 | 599082 | 5406202 |
1998 | 35656 | 14952 | 14106 | 0 | 49 | 57 | 0 | 3650 | 3650 | 2028032 | 5577088 | 641181 | 6221919 |
1999 | 32568 | 14745 | 11720 | 0 | 0 | 0 | 0 | 3650 | 3650 | 2150092 | 5912753 | 704973 | 6621376 |
df_douglas_annual_sum['Bus Passengers'].plot(kind='area', grid=False)
<matplotlib.axes._subplots.AxesSubplot at 0x1083fd410>
df_douglas_annual_sum['Personal Vehicle Passengers'].plot(kind='area', grid=False)
<matplotlib.axes._subplots.AxesSubplot at 0x1087cfe50>
df_douglas_annual_sum['Pedestrians'].plot(kind='area', grid=False)
<matplotlib.axes._subplots.AxesSubplot at 0x1088b8590>
# Create a figure with a single subplot
f, ax = plt.subplots(1, figsize=(10,5))
# Set bar width at 1
bar_width = 1
# positions of the left bar-boundaries
bar_l = [i for i in range(len(df_douglas_annual_sum['Bus Passengers']))]
# positions of the x-axis ticks (center of the bars as bar labels)
tick_pos = [i+(bar_width/2) for i in bar_l]
# Create the total score for each participant
totals = [i+j+k for i,j,k in zip(df_douglas_annual_sum['Bus Passengers'],
df_douglas_annual_sum['Personal Vehicle Passengers'],
df_douglas_annual_sum['Pedestrians'])]
# Create the percentage of the total score the pre_score value for each participant was
pre_rel = [i / j * 100 for i,j in zip(df_douglas_annual_sum['Bus Passengers'], totals)]
# Create the percentage of the total score the mid_score value for each participant was
mid_rel = [i / j * 100 for i,j in zip(df_douglas_annual_sum['Personal Vehicle Passengers'], totals)]
# Create the percentage of the total score the post_score value for each participant was
post_rel = [i / j * 100 for i,j in zip(df_douglas_annual_sum['Pedestrians'], totals)]
# Create a bar chart in position bar_1
ax.bar(bar_l,
# using pre_rel data
pre_rel,
# labeled
label='Bus Passengers',
# with alpha
alpha=0.9,
# with color
color='#019600',
# with bar width
width=bar_width,
# with border color
edgecolor='white'
)
# Create a bar chart in position bar_1
ax.bar(bar_l,
# using mid_rel data
mid_rel,
# with pre_rel
bottom=pre_rel,
# labeled
label='Personal Vehicle Passengers',
# with alpha
alpha=0.9,
# with color
color='#3C5F5A',
# with bar width
width=bar_width,
# with border color
edgecolor='white'
)
# Create a bar chart in position bar_1
ax.bar(bar_l,
# using post_rel data
post_rel,
# with pre_rel and mid_rel on bottom
bottom=[i+j for i,j in zip(pre_rel, mid_rel)],
# labeled
label='Pedestrians',
# with alpha
alpha=0.9,
# with color
color='#219AD8',
# with bar width
width=bar_width,
# with border color
edgecolor='white'
)
# Set the ticks to be first names
plt.xticks(tick_pos, df_douglas_annual_sum.index)
ax.set_ylabel("Percent Of Total")
ax.set_xlabel("Year")
# Let the borders of the graphic
plt.xlim([min(tick_pos)-bar_width, max(tick_pos)+bar_width])
plt.ylim(-10, 110)
# rotate axis labels
plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right')
# shot plot
plt.show()
# Create a figure with a single subplot
f, ax = plt.subplots(1, figsize=(10,5))
# Create a scatterplot of,
# x axis: Latency, and
plt.bar(df_douglas_annual_sum.index,
# y axis: Download, with
df_douglas_annual_sum['Personal Vehicle Passengers'],
# the color assigned as blue
color='b')
plt.bar(df_douglas_annual_sum.index,
# y axis: Download, with
df_douglas_annual_sum['Pedestrians'],
# the color assigned as blue
color='r')
plt.bar(df_douglas_annual_sum.index,
# y axis: Download, with
df_douglas_annual_sum['Bus Passengers'],
# the color assigned as blue
color='g')
# Chart title
plt.title('Types Of Border Crossers')
# y label
plt.ylabel('Total People')
# x label
plt.xlabel('Year')
<matplotlib.text.Text at 0x109016e10>
df.columns
Index(['Port Name', 'Year', 'Month', 'Trucks', 'Loaded Truck Containers', 'Empty Truck Containers', 'Trains', 'Loaded Rail Containers', 'Empty Rail Containers', 'Train Passengers', 'Buses', 'Bus Passengers', 'Personal Vehicles', 'Personal Vehicle Passengers', 'Pedestrians', 'City', 'State'], dtype='object')
df_crossing = df.groupby('Port Name').sum()
df_crossing.sort('Trains', ascending=False).head()
Year | Trucks | Loaded Truck Containers | Empty Truck Containers | Trains | Loaded Rail Containers | Empty Rail Containers | Train Passengers | Buses | Bus Passengers | Personal Vehicles | Personal Vehicle Passengers | Pedestrians | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Port Name | |||||||||||||
TX: Laredo | 456912 | 27331291 | 16555501 | 9484814 | 62322 | 3060584 | 2141567 | 0 | 670433 | 13725607 | 117969118 | 273240177 | 82942171 |
TX: Eagle Pass | 456912 | 1774624 | 1054386 | 676904 | 31437 | 803534 | 1365715 | 83048 | 42050 | 340218 | 57603495 | 136747990 | 12947972 |
TX: El Paso | 456912 | 13050522 | 6504160 | 5548367 | 23793 | 660056 | 842385 | 49831 | 311136 | 5624071 | 261185949 | 567649753 | 128299884 |
TX: Brownsville | 456912 | 4503798 | 2249740 | 2031792 | 14299 | 155970 | 1243865 | 2592 | 180687 | 1591315 | 119779453 | 263794440 | 55892176 |
AZ: Nogales | 456912 | 5076159 | 3812053 | 1044586 | 11647 | 539083 | 357639 | 35902 | 136006 | 2464601 | 65971957 | 164483990 | 97174209 |
df_crossing['container_total'] = df_crossing['Loaded Truck Containers'] + df_crossing['Empty Truck Containers']
df_crossing = df_crossing.sort(columns='container_total')
# input data, specifically the second and
# third rows, skipping the first column
x1 = df_crossing['Loaded Truck Containers']
x2 = df_crossing['Empty Truck Containers']
# Create the bar labels
bar_labels = df_crossing.index
# Create a figure
fig = plt.figure(figsize=(10,8))
# Set the y position
y_pos = np.arange(len(x1))
y_pos = [x for x in y_pos]
plt.yticks(y_pos, bar_labels, fontsize=10)
# Create a horizontal bar in the position y_pos
plt.barh(y_pos,
# using x1 data
x1,
# that is centered
align='center',
# with alpha 0.4
alpha=0.4,
# and color green
color='#263F13')
# Create a horizontal bar in the position y_pos
plt.barh(y_pos,
# using NEGATIVE x2 data
-x2,
# that is centered
align='center',
# with alpha 0.4
alpha=0.4,
# and color green
color='#77A61D')
# annotation and labels
plt.xlabel('Empty Containers: Light Green. Full Containers: Dark Green')
t = plt.title('Comparison of Empty And Full Truck Containers Per Border Crossing')
plt.ylim([-1,len(x1)+0.1])
plt.grid()
plt.show()