-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathprocess_redistricting.Rmd
452 lines (248 loc) · 18.4 KB
/
process_redistricting.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
---
title: "Redistricting 2022"
output: html_notebook
---
```{r}
#NOTES:
# this is code to process the block equivalency files that are released as part of the redistricting process every 10 years (next one will be in 2032) and then do some of the analysis comparing old and new districts
#the code brings in precinct level presidential voting results from the previous election; this will need to be updated for use in 2032
# it brings in population data from the previous census; this will need to be updated for use in 2032
#Some of the work needed to be done in GIS software in 2022 because I couldn't get R to dissolve the block groups into the new districts (had to do that in ArcMap) and then there's also some spatial joining that needed to be done and that also proved much easier in ArcMap (could also be done in QGIS)
#There's a spot partway down the code where it provides advice about what needs to be done in a GIS program (or perhaps by 2032 we can figure out how to make it work all in R and add code to this)
library(tidyverse)
library(sf)
options(scipen=999)# prevents display of scientific notation
library(tidycensus)
library(readxl)
census_api_key(Sys.getenv("CENSUS_API_KEY"))
```
# Convert equivalency files
```{r}
#download: https://www.mncourts.gov/2021RedistrictingPanel
#set these variables to the path and names of the equivalency files
cg_equiv_file <- './equiv_files/Block-Equivalency-Congress.xlsx'
leg_equiv_file <- './equiv_files/Block-Equivalency-House.xlsx'
sen_equiv_file <- './equiv_files/Block-Equivalency-Senate.xlsx'
#https://www.census.gov/cgi-bin/geo/shapefiles/index.php
#download appropriate block shapefile and save to the TIGER folder (no need to unzip)
#uncomment the next line for the first time you run this
#unzip('./TIGER/tl_2020_27_tabblock20.zip', exdir='./TIGER')
#import TIGER block file for MN, 2020
blocks <- "./TIGER/tl_2020_27_tabblock20.shp"
blocks_sf <- st_read(blocks)
#Get census data at the block level to estimate population in the new Congressional Districts
#find the variables that are available by uncommenting the next line and running
#census_variables <- load_variables(2020, "pl", cache = TRUE)
#variable to get total population
vars = c(pop = "P2_001N")
#make the API call - choose the geography and output and year
block_census <- get_decennial(geography = "block", state="MN", variables = vars, year = "2020", output="tidy")
#join the census data to the block shapefile
blocks_sf <- inner_join(blocks_sf, block_census %>% select(GEOID, value), by=c("GEOID20"="GEOID"))
#IMPORT BLOCK EQUIVALENCY FILES
cg_equiv <- read_xlsx(cg_equiv_file) %>% rename(blk = Block, dist = DistrictID) #Congressional
hs_equiv <- read_xlsx(leg_equiv_file) %>% rename(blk = Block, dist = DistrictID) #state house
sen_equiv <- read_xlsx(sen_equiv_file) %>% rename(blk = Block, dist = DistrictID) #state senate
#JOINING TO BLOCK SHAPEFILE
blocks_cg <- left_join(blocks_sf, cg_equiv, by=c("GEOID20"="blk"))
blocks_hs <- left_join(blocks_sf, hs_equiv %>% select(blk, dist), by=c("GEOID20"="blk"))
blocks_sen <- left_join(blocks_sf, sen_equiv %>% select(blk, dist), by=c("GEOID20"="blk"))
```
# Add political vote data to existing districts
```{r}
#CURRENT shapefiles
#https://www.gis.lcc.mn.gov/html/download.html
#bring in current congressional district shapefile
current_cg_file <- "./shapefiles/C2012/c2012.shp"
current_cg_sf <- st_read(current_cg_file)
#bring in current senate district shapefile
current_sen_file <- "./shapefiles/S2012/S2012.shp"
current_sen_sf <- st_read(current_sen_file)
#bring in current house district shapefile
current_hs_file <- "./shapefiles/L2012/L2012-1.shp"
current_hs_sf <- st_read(current_hs_file)
#last presidential election data by precinct
#downloadable files are here: https://electionresults.sos.state.mn.us/Select/MediaFiles/Index?ersElectionId=136
url <- 'https://electionresultsfiles.sos.state.mn.us/20201103/USPresPct.txt'
prez_precincts <- read_delim(url, delim=";", col_names=FALSE, col_types=cols(X6="c", X9="c", X10="c")) %>% rename(state=X1, countyid=X2, precinctnum=X3, officeid=X4, officename=X5, district=X6, candidateid=X7, candidatename=X8, suffix=X9, incumbentcode=X10, partyid=X11, precincts_reporting=X12, total_precincts=X13, candidate_votes=X14, pct_votes=X15, total_votes_office=X16)
precinct_lkup_url <- 'https://electionresultsfiles.sos.state.mn.us/20201103/PrctTbl.txt'
precinct_lkup <- read_delim(precinct_lkup_url, delim=";", col_types=cols(.default = "c"), col_names = FALSE) %>%
rename(countyid=X1, precinctid=X2, precinct_name=X3, cg=X4, leg=X5, cc_dist = X6, jud_dist=X7, soil_dist=X8, mcd_fips = X9, school_dist = X10)
#add leading zero to districts 1-9
precinct_lkup <- precinct_lkup %>% mutate(leg_new = case_when(str_length(leg)==2~ paste("0", leg, sep=""),
str_length(leg)==3~ leg))
#County ID
#Precinct ID
#Precinct Name
#Congressional District
#Legislative District
#County Commissioner District
#Judicial District
#Soil and Water Conservation District
#MCD FIPS code
#School District number (not used in statewide elections)
#join precinct election results with lkup table to get district codes
#create a senate district number (by stripping from the legislative district number) and collapse the parties into R, D and other
prez_precincts <- left_join(prez_precincts, precinct_lkup %>% select(countyid, precinct_name, precinctid, cg, leg_new), by=c("countyid"="countyid", "precinctnum"="precinctid")) %>% mutate(sen_dist = str_sub(leg_new,1,2),
new_party = case_when(partyid=='R' ~ 'R',
partyid=='DFL' ~ 'D',
TRUE ~ 'other')) %>% rename(leg=leg_new)
#SUMMARIZE election results to CONGRESSIONAL DISTRICT
cg_votes <- pivot_wider(prez_precincts %>% group_by(cg, new_party) %>% summarise(party_votes = sum(candidate_votes)), names_from="new_party", values_from="party_votes") %>% mutate(total_votes = D+other+R, pct_D = D/total_votes, pct_other= other/total_votes, pct_R = R/total_votes)
#SUMMARIZE election results to LEGISLATIVE DISTRICT
leg_votes <- pivot_wider(prez_precincts %>% group_by(leg, new_party) %>% summarise(party_votes = sum(candidate_votes)), names_from="new_party", values_from="party_votes") %>% mutate(total_votes = D+other+R, pct_D = D/total_votes, pct_other= other/total_votes, pct_R = R/total_votes)
#SUMMARIZE election results to SENATE DISTRICT
sen_votes <- pivot_wider(prez_precincts %>% group_by(sen_dist, new_party) %>% summarise(party_votes = sum(candidate_votes)), names_from="new_party", values_from="party_votes") %>% mutate(total_votes = D+other+R, pct_D = D/total_votes, pct_other= other/total_votes, pct_R = R/total_votes)
#JOIN cg_votes with CURRENT CONGRESSIONAL SHAPEFILE
current_cg_sf <- left_join(current_cg_sf, cg_votes, by=c("DISTRICT"="cg"))
#st_write(current_cg_sf, "./output/current_cg_sf.shp")
#JOIN leg_votes with CURRENT LEGISLATIVE SHAPEFILE
current_hs_sf <- left_join(current_hs_sf, leg_votes, by=c("DISTRICT"="leg"))
#st_write(current_hs_sf, "./output/current_hs_sf.shp")
#JOIN sen_votes with CURRENT SENATE SHAPEFILE
current_sen_sf <- left_join(current_sen_sf, sen_votes, by=c("DISTRICT"="sen_dist"))
#st_write(current_sen_sf, "./output/current_sen_sf.shp")
```
# Convert precinct results to centroids to merge with new districts
```{r}
precinct_shapefile <- st_read('./shapefiles/mn_vtd.shp')
precinct_votes <- pivot_wider(prez_precincts %>% group_by(countyid, precinctnum, precinct_name, new_party) %>% summarise(party_votes = sum(candidate_votes)), names_from="new_party", values_from="party_votes") %>%
mutate(total_votes = D+other+R,
pct_D = D/total_votes,
pct_other= other/total_votes,
pct_R = R/total_votes,
geoid=paste(countyid, precinctnum, sep=""))
#add vote results
precinct_shapefile <- left_join(precinct_shapefile, precinct_votes, by=c("id"="geoid"))
#need to include this little snippet below in order for the centroid file to be created correctly
sf::sf_use_s2(FALSE)
precinct_roids <- st_centroid(precinct_shapefile)
#export centroids of precinct voting results
st_write(precinct_roids, './output/precinct_roids.shp')
```
# Dissolve and spatial join
```{r}
#Export shapefiles to do additional work in a GIS program
#export block equivalency shapefiles
#these won't work if existing files are already in this directory
st_write(blocks_cg, "./output/blocks_cg.shp")
st_write(blocks_hs, "./output/blocks_hs.shp")
st_write(blocks_sen, "./output/blocks_sen.shp")
#open those shapefiles in a GIS program and run DISSOLVE to create 3 new shapefiles.
#Save the resulting files as 'cg_new_dis.shp' and 'hs_new_dis.shp' and 'sen_new_dis.shp' and save to new_shapefiles folder
#then do spatial joins of each one with the precinct_roids shapefile
# save those files to analysis folder
# new_cg_votes.shp, new_hs_votes.shp, new_sen_votes.shp
# then return to R and continue with the script below
```
# compare political lean of Congressional Districts - new versus old
```{r}
#bring in the new shapefile created in GIS of the new Congressional districts
new_cg_votes <- st_read('./analysis/new_cg_votes.shp')
# make a dataframe table with the last presidential election voting results for the new districts
new_cg_votes_tbl <- new_cg_votes %>% select(dist, Sum_D, Sum_other, Sum_R) %>% st_drop_geometry() %>% mutate(pctD = Sum_D/(Sum_D+Sum_other+Sum_R), pctR = Sum_R/(Sum_D+Sum_other+Sum_R))
# make a dataframe table with the last presidential election voting results for the old district boundaries
old_cg_votes_tbl <- current_cg_sf %>% select(DISTRICT, pct_D, pct_other, pct_R) %>% st_drop_geometry() %>% rename(pctD_old=pct_D, pctR_old = pct_R)
#compare the old voting results to the new voting results (this is shapefile)
cg_vote_compare <- inner_join(old_cg_votes_tbl %>% select(DISTRICT, pctD_old, pctR_old), new_cg_votes_tbl %>% select(dist, pctD, pctR), by=c("DISTRICT"="dist"))
#export that comparison shapefile to create graphics
write.csv(cg_vote_compare, './analysis/cg_vote_compare.csv', row.names = FALSE)
```
# Compare political lean of House districts - new versus old
```{r}
new_hs_votes <- st_read('./analysis/new_hs_votes.shp')
#need to run chunk that joins precinct data to current_hs_sf (above)
new_hs_votes_tbl <- new_hs_votes %>% select(dist, Sum_D, Sum_other, Sum_R) %>% st_drop_geometry() %>% mutate(pctD = Sum_D/(Sum_D+Sum_other+Sum_R), pctR = Sum_R/(Sum_D+Sum_other+Sum_R))
old_hs_votes_tbl <- current_hs_sf %>% select(DISTRICT, pct_D, pct_other, pct_R) %>% st_drop_geometry() %>% rename(pctD_old=pct_D, pctR_old = pct_R)
hs_vote_compare <- inner_join(old_hs_votes_tbl %>% select(DISTRICT, pctD_old, pctR_old), new_hs_votes_tbl %>% select(dist, pctD, pctR), by=c("DISTRICT"="dist"))
#write.csv(hs_vote_compare, './analysis/hs_vote_compare.csv', row.names = FALSE)
#this creates categories that we used for this story:
#https://www.startribune.com/suburbs-remain-legislative-battleground-under-new-minnesota-political-maps/600148648/?refresh=true
#it identifies districts where the political lean has shifted in a way that it's a much tighter margin between Republican and Democratic votes for president (i.e. these are "new battlegrounds")
#important to note that this is comparing old boundaries to new boundaries, not necessarily the same geographic space
new_hs_votes <- inner_join(new_hs_votes %>% select(OBJECTID, dist, Sum_D, Sum_other, Sum_R), old_hs_votes_tbl, by=c("dist"="DISTRICT")) %>%
mutate(pct_d_new = Sum_D/(Sum_D+Sum_other+Sum_R),
pct_r_new = Sum_R/(Sum_D+Sum_other+Sum_R),
diff_D = pct_d_new-pctD_old,
gap_old = case_when(pctD_old>pctR_old ~ pctD_old-pctR_old,
TRUE ~ pctR_old-pctD_old),
gap_new = case_when(pct_d_new>pct_r_new ~ pct_d_new-pct_r_new,
TRUE ~ pct_r_new-pct_d_new),
gap_diff = case_when(gap_old>gap_new ~ gap_old-gap_new,
TRUE ~ gap_new-gap_old),
category = case_when(gap_new<.1 & gap_diff<.1 ~ 'existing battleground',
gap_new<.1 & gap_new<gap_old ~ 'tighter margin-new battleground',
gap_diff<.05 & gap_new>.1 ~ 'unchanged',
gap_new>=.1 & gap_new>gap_old & gap_diff>.05 ~ 'wider margin',
gap_new>=.1 & gap_old>gap_new & gap_diff>=.05 ~'tigter margin',
TRUE ~ 'unk'))
st_write(new_hs_votes, "./analysis/compare_hs_votes.shp")
new_hs_votes_csv <- new_hs_votes %>% select(dist, pctD_old, pctR_old, pct_d_new, pct_r_new, gap_old, gap_new, gap_diff, category) %>% st_drop_geometry()
write.csv(new_hs_votes_csv, './output/new_hs_votes_csv.csv', row.names=FALSE)
```
# compare political lean of Senate districts -- new versus old
```{r}
new_sen_votes <- st_read('./analysis/new_sen_votes.shp')
new_sen_votes_tbl <- new_sen_votes %>% select(dist, Sum_D, Sum_other, Sum_R) %>% st_drop_geometry() %>% mutate(pctD = Sum_D/(Sum_D+Sum_other+Sum_R), pctR = Sum_R/(Sum_D+Sum_other+Sum_R))
old_sen_votes_tbl <- current_sen_sf %>% select(DISTRICT, pct_D, pct_other, pct_R) %>% st_drop_geometry() %>% rename(pctD_old=pct_D, pctR_old = pct_R)
sen_vote_compare <- inner_join(old_sen_votes_tbl %>% select(DISTRICT, pctD_old, pctR_old), new_sen_votes_tbl %>% select(dist, pctD, pctR), by=c("DISTRICT"="dist"))
#write.csv(sen_vote_compare, './analysis/sen_vote_compare.csv', row.names = FALSE)
new_sen_votes <- inner_join(new_sen_votes %>% select(OBJECTID, dist, Sum_D, Sum_other, Sum_R),
old_sen_votes_tbl, by=c("dist"="DISTRICT")) %>%
mutate(pct_d_new = Sum_D/(Sum_D+Sum_other+Sum_R),
pct_r_new = Sum_R/(Sum_D+Sum_other+Sum_R),
diff_D = pct_d_new-pctD_old,
gap_old = case_when(pctD_old>pctR_old ~ pctD_old-pctR_old,
TRUE ~ pctR_old-pctD_old),
gap_new = case_when(pct_d_new>pct_r_new ~ pct_d_new-pct_r_new,
TRUE ~ pct_r_new-pct_d_new),
gap_diff = case_when(gap_old>gap_new ~ gap_old-gap_new,
TRUE ~ gap_new-gap_old),
category = case_when(gap_new<.1 & gap_diff<.1 ~ 'existing battleground',
gap_new<.1 & gap_new<gap_old ~ 'tighter margin-new battleground',
gap_diff<.05 & gap_new>.1 ~ 'unchanged',
gap_new>=.1 & gap_new>gap_old & gap_diff>.05 ~ 'wider margin',
gap_new>=.1 & gap_old>gap_new & gap_diff>=.05 ~'tigter margin',
TRUE ~ 'unk'))
new_sen_votes_csv <- new_sen_votes %>% select(dist, pctD_old, pctR_old, pct_d_new, pct_r_new, gap_old, gap_new, gap_diff, category) %>% st_drop_geometry()
#write.csv(new_sen_votes_csv, './output/new_sen_votes_csv.csv', row.names=FALSE)
st_write(new_sen_votes, "./analysis/compare_sen_votes.shp")
```
# Get block group Census race/ethnicity data
```{r}
#variables to retrieve
race_vars <- c(totpop ="P2_001N",
hispanic = "P2_002N",
whitenh = "P2_005N",
blacknh = "P2_006N",
amindnh = "P2_007N",
asiannh = "P2_008N",
pacislnh = "P2_009N",
othernh ="P2_010N",
tworacenh = "P2_011N")
#make the API call - choose the geography and output
bg_race_2020<- get_decennial(geography = "block group", state="MN", variables = race_vars, year = "2020", output="wide", geometry = TRUE)
bg_race_2020 <- bg_race_2020 %>% clean_names() %>% mutate(pctwhite = whitenh/totpop,
pcthispanic = hispanic/totpop,
pctblack = blacknh/totpop,
pctami = amindnh/totpop,
pctasian = asiannh/totpop,
pct_two = tworacenh/totpop,
pct_other= (pacislnh+othernh)/totpop)
bg_race_2020 <- bg_race_2020 %>% mutate(maxvalue = pmax(bg_race_2020$pctwhite,
bg_race_2020$pcthispanic,
bg_race_2020$pctblack,
bg_race_2020$pctami,
bg_race_2020$pctasian,
bg_race_2020$pct_two,
bg_race_2020$pct_other),
largest_group = case_when(maxvalue==pctwhite ~ 'White',
maxvalue==pcthispanic ~ 'Hispanic',
maxvalue==pctblack ~ 'Black',
maxvalue==pctami ~ 'American Indian',
maxvalue==pctasian ~ 'Asian',
maxvalue==pct_two ~ 'Two or more',
maxvalue==pct_other ~ 'Other',
TRUE ~ 'tie'))
st_write(bg_race_2020, "./output/bg_race_2020.shp")
```