-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscript.py
525 lines (456 loc) · 25.1 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
"""
Name: script.py
Label: Construct and map longitudinal data of ecological status of water bodies.
Summary: ThorNoe.GitHub.io/GreenGDP explains the overall approach and methodology.
Rqmts: script_module.py in working directory. ArcGIS Pro 3.2 or later installed.
Usage: This script supports WaterbodiesScriptTool in the gis.tbx toolbox.
See GitHub.com/ThorNoe/GreenGDP for instructions to run or update it all.
License: MIT Copyright (c) 2025
Author: Thor Donsby Noe
"""
########################################################################################
# 0. Imports
########################################################################################
# Import Operation System (os)
import os
# Import ArcPy package (requires ArcGIS Pro installed), pandas, and ticker.FuncFormatter
import arcpy
import pandas as pd
from matplotlib.ticker import FuncFormatter
########################################################################################
# 1. Setup
########################################################################################
# Set overwrite option
arcpy.env.overwriteOutput = True
# Specify the parent folder as the working directory of the operating system
# path = r"C:\Users\au687527\GitHub\GreenGDP\gis"
# os.chdir(path)
os.chdir(arcpy.GetParameterAsText(0))
# Specify whether to replace existing feature classes downloaded from WFS service
# wfs_replace = 0
wfs_replace = arcpy.GetParameterAsText(1)
# Specify whether to keep the geodatabase when the script finishes
# keep_gdb = 1
keep_gdb = arcpy.GetParameterAsText(2)
########################################################################################
# 2. Specifications
########################################################################################
# Span of natural capital account (1990 investment value depends on change from 1989)
year_first = 1989
year_last = 2020
# Specify the names of data files for each category of water body and shared statistics
data = {
"coastal": ["coastal_chlorophyll.xlsx"],
"lakes": ["lakes_chlorophyll.xlsx"],
"streams": ["streams_DVFI.xlsx", "streams_1987-2020.xlsx"],
"shared": ["CPI_NPV.xlsx", "demographics.csv", "geographical.xlsx"],
}
# Specify the names of the corresponding linkage files (and other supporting tables)
linkage = {
"coastal": ["coastal_stations_VP3.csv", "coastal_chlorophyll_limits.csv"],
"lakes": ["lakes_stations_VP3.csv", "lakes_stations_XY.csv"],
"streams": ["streams_stations_VP3.csv"],
}
# WFS service URL for the current water body plan (VP2 is for 2015-2021)
wfs_service = "https://wfs2-miljoegis.mim.dk/vp3endelig2022/ows?service=WFS&request=Getcapabilities"
# For the WFS, specify the name of the feature class (fc) for each type of water body
wfs_fc = {
"catch": "vp3e2022_kystvand_opland_afg",
"coastal": "vp3e2022_marin_samlet_1mil",
"lakes": "vp3e2022_soe_samlet",
"streams": "vp3e2022_vandloeb_samlet",
}
# For the WFS, specify the names of relevant fields for each type of water body
wfs_fields = {
"catch": ["op_id", "op_navn"],
"coastal": ["distr_id", "ov_id", "ov_navn", "ov_typ", "til_oko_fy"],
"lakes": ["distr_id", "ov_id", "ov_navn", "ov_typ", "til_oko_fy"],
"streams": ["distr_id", "ov_id", "ov_navn", "ov_typ", "til_oko_bb", "na_kun_stm"],
}
########################################################################################
# 3. Import module and run the functions
########################################################################################
# Import the module with all the homemade functions
import script_module
# Initialize the class for all data processing and mapping functions
c = script_module.Water_Quality(
year_first,
year_last,
data,
linkage,
wfs_service,
wfs_fc,
wfs_fields,
wfs_replace,
keep_gdb,
)
# Dictionaries to store DataFrame, shore length, and stats for each category j
frames_j, shores_j, stats_obs_j, stats_imp_j, stats_imp_MA_j = {}, {}, {}, {}, {}
# Loop over each category j ∈ {coastal, lakes, streams}
for j in ("coastal", "lakes", "streams"):
# Get the feature class from the WFS service
c.get_fc_from_WFS(j)
# df for observed biophysical indicator and waterbody characteristics respectively
df_ind_obs, df_VP = c.observed_indicator(j)
# Report ecological status based on observed biophysical indicator
df_eco_obs, stats_obs_j[j], index_sorted = c.ecological_status(j, df_ind_obs, df_VP)
# if j == 'streams':
# # Create a map book with yearly maps of observed ecological status
# c.map_book(j, df_eco_obs)
# Impute missing values for biophysical indicator and return ecological status
df_eco_imp, df_eco_imp_MA, stats_imp_j[j], stats_imp_MA_j[j] = c.impute_missing(
j, df_eco_obs, df_VP, index_sorted
)
# df with variables by coastal catchment area for the Benefit Transfer function
frames_j[j], shores_j[j] = c.values_by_catchment_area(j, df_eco_imp_MA, df_VP)
# Optional: Clean up after each iteration of loop
if keep_gdb != "true":
# Delete feature class
if arcpy.Exists(j):
arcpy.Delete_management(j)
# Optional: Clean up geodatabase
if keep_gdb != "true":
# Delete all feature classes in geodatabase
for fc in arcpy.ListFeatureClasses():
arcpy.Delete_management(fc)
########################################################################################
# 4.a Stats for all categories j: Shore length and share of it where eco status < Good
########################################################################################
# Set up DataFrame of shore length for each category j ∈ {coastal, lakes, streams}
shores = pd.DataFrame(shores_j)
shores["shores all j"] = shores.sum(axis=1, skipna=True)
shores.to_csv("output\\all_VP_shore length.csv") # save to csv
# Total shore length of each category j
shoresTotal = shores.sum()
# Dictionary of stats for observed, imputed, and imputed with moving average respectively
stats_method = {
"obs_LessThanGood": stats_obs_j,
"imp_LessThanGood": stats_imp_j,
"imp_LessThanGood_MA": stats_imp_MA_j,
}
for key, dict in stats_method.items():
# Set up df of share < good status for each category j ∈ {coastal, lakes, streams}
stats = pd.DataFrame(dict)
# Plot share of category j with less than good ecological status by year
for format in (".pdf", ".png"):
f1 = (
stats.loc[list(range(year_first + 1, year_last + 1)), :]
.plot(ylabel="Share of category with less than good ecological status")
.get_figure()
)
f1.savefig("output\\all_eco_" + key + format, bbox_inches="tight")
# Calculate share < eco good status across all categories j weighted by shore length
stats["all j"] = (
stats["coastal"] * shoresTotal["coastal"]
+ stats["lakes"] * shoresTotal["lakes"]
+ stats["streams"] * shoresTotal["streams"]
) / shoresTotal["shores all j"]
# Add df including "all j" columns to dictionary of stats by method
stats_method[key] = stats
# Concatenate stats for observed, imputed, and imputed with moving average respectively
dfStats = pd.concat(stats_method, axis=1)
dfStats.to_excel("output\\all_eco_LessThanGood.xlsx") # manually delete row 3 in Excel
########################################################################################
# 4.b Set up comprehensive DataFrame with variables for the Benefit Transfer function
########################################################################################
# Concatenate DataFrames for each category j ∈ {coastal, lakes, streams}
df_BT = pd.concat(frames_j)
df_BT.index.names = ["j", "t", "v"]
df_BT.to_csv("output\\all_eco_imp.csv") # save to csv
########################################################################################
# 4.c Real cost of water pollution and investment in water quality for journal article
########################################################################################
# Costs of Water Pollution (CWP) in real terms (million DKK, 2023 prices) by t, v, and j
CWP_vj, k = c.valuation(df_BT) # k is factor for estimated income ε relative to unitary
# Costs of Water Pollution (CWP) in real terms (million DKK, 2023 prices) by t and j
CWP_j = CWP_vj.groupby("t").sum().rename_axis(None).rename_axis(None, axis=1)
CWP_label = "Cost of current water pollution (million DKK, 2023 prices)"
fig = CWP_j.plot(ylabel=CWP_label).get_figure()
fig.savefig("output\\all_cost.pdf", bbox_inches="tight") # save figure as PDF
plt.close(fig) # close figure to free up memory
# Investment Value of water quality improvement in real terms (million DKK, 2023 prices)
IV_vj = c.valuation(df_BT, investment=True)
# IV of water quality improvement in real terms (million DKK, 2023 prices) by t and j
IV_j = IV_vj.groupby("t").sum().rename_axis(None).rename_axis(None, axis=1)
# Totals using real prices and the same declining discount rate for all years
for a, b in zip([CWP_j, IV_j], ["cost of pollution", "investment in water quality"]):
a["total"] = a.sum(axis=1) # sum of CWP or IV over all categories j (by year)
print("Average yearly", b, "(million DKK, 2023 prices)\n")
print(a.mean()) # average yearly CWP or IV over all years (by category j & total)
CWP_vj["total"] = CWP_vj.sum(axis=1) # sum of CWP over all categories j (by year & v)
IV_vj["total"] = IV_vj.sum(axis=1) # sum of IV over all categories j (by year & v)
# Save tables as CSV
CWP_j.to_csv("output\\all_cost.csv") # cost by t and j
IV_j.to_csv("output\\all_investment.csv") # IV by t and j
# Function to format the y-axis ticks with thousands delimiter (for IV plots)
def y_f(x, pos):
return f"{int(x):,}"
# IV line plot (total) and stacked bar plot (coastal, lakes, and streams)
IV_label = "Investment in water quality improvement (million DKK, 2023 prices)"
fig, ax = plt.subplots() # create figure and axis
IV_j.iloc[:, -1].plot( # line plot for total IV
kind="line",
ax=ax,
color="black",
use_index=False, # ignore DatetimeIndex units for x-axis
)
IV_j.iloc[:, :-1].plot(kind="bar", stacked=True, ax=ax) # stacked bar plot (same axis)
ax.set_ylabel(IV_label) # set y-axis label
ax.set_ylim(bottom=-75000) # set y-axis to begin at -75,000 (to match figures)
ax.yaxis.set_major_formatter(FuncFormatter(y_f)) # add thousands delimiter to y-axis
ax.legend() # add legend for both plots
fig.savefig("output\\all_investment.pdf", bbox_inches="tight") # save figure as PDF
plt.close(fig) # close figure to free up memory
########################################################################################
# 5. Decompose development by holding everything else equal at 1990 level
########################################################################################
# Cost of water pollution and investment value by each driver (other things equal)
CWP_driver_v, CWP_driver_j, CWP_driver, IV_driver_v, IV_driver = c.decompose(df_BT, k)
# Catchment area names
Geo = pd.read_excel("data\\" + data["shared"][2], sheet_name="ID_108", index_col=0)
Geo.index.name = "v"
names = Geo.iloc[:108, 0]
# Total real cost of water pollution (CWP) decomposed by driver (other things equal)
for d, df, suffix in zip([CWP_driver, CWP_driver_v], [CWP_j, CWP_vj], ["", "_v"]):
# Add total CWP using all drivers (i.e., before decomposition)
d.columns = ["coastal", "lakes", "streams", "income", "age", "households"]
d["total"] = df["total"].copy() # total CWP (before decomposition)
d = d.rename_axis("driver", axis=1)
g = ["g (million DKK, 2023 prices)", "g (%)", "g rate (%)"] # names for d/df index
if suffix != "_v":
# Figure for total CWP decomposed by driver (other things equal at 1990 level)
ax = CWP_driver.plot(ylabel=CWP_label)
fig = ax.get_figure()
fig.savefig("output\\all_cost_decomposed.pdf", bbox_inches="tight") # save PDF
plt.close(fig) # close figure to free up memory
# Growth (both in 2023 prices and in %) and growth rate of total CWP by driver
d.loc[g[0], :] = d.loc[2020, :] - d.loc[1990, :]
d.loc[g[1], :] = 100 * d.loc[g[0], :] / d.loc[1990, :]
d.loc[g[2], :] = 100 * (d.loc[2020, :] / d.loc[1990, :]) ** (1 / 30) - 100
growth = d.tail(3).T
growth.columns = ["growth (million DKK)", "growth (\%)", "growth rate (\%)"]
f = {
col: "{:0,.0f}".format if col == growth.columns[0] else "{:0.2f}".format
for col in growth.columns
}
col_f = "lrrr" # right-aligned column format; match number of columns
print("Growth in total CWP due to driver (other things equal at 1990 level)")
print(d.tail(3), "\n")
d.to_csv("output\\all_cost_decomposed" + suffix + ".csv") # save table as CSV
if suffix == "_v":
for v in d.index.get_level_values("v").unique():
d.loc[(g[0], v), :] = d.loc[(2020, v), :] - d.loc[(1990, v), :]
for v in d.index.get_level_values("v").unique():
d.loc[(g[1], v), :] = 100 * d.loc[(g[0], v), :] / d.loc[(1990, v), :]
for v in d.index.get_level_values("v").unique():
d.loc[(g[2], v), :] = (
100 * (d.loc[(2020, v), :] / d.loc[(1990, v), :]) ** (1 / 30) - 100
)
growth = d[d.index.get_level_values("t") == "g (%)"].describe().drop("count").T
growth.columns = ["mean", "std", "min", "25\%", "50\%", "75\%", "max"]
f = {col: "{:0.2f}".format for col in growth.columns} # two decimals
col_f = "lrrrrrrr" # right-aligned column format; match number of columns
d = d.merge(names, left_on="v", right_index=True) # merge with catchment names
d = d.rename_axis("driver", axis=1)
print("The 15 catchment areas with largest reduction in total CWP (g in %)")
print(d.loc["g (%)", :].nsmallest(15, ("total")), "\n")
print("The 15 catchment areas with largest increase in total CWP (g in %)")
print(d.loc["g (%)", :].nlargest(15, ("total")), "\n")
with open("output\\all_cost_decomposed_growth" + suffix + ".tex", "w") as tf:
tf.write(growth.apply(f).to_latex(column_format=col_f)) # column alignment
# Mean real investment value (IV) decomposed by driver (other things equal at 1990 level)
IV_j_driver = pd.DataFrame() # df to store decomposed mean IV for each category j
for d, df, suffix in zip([IV_driver, IV_driver_v], [IV_j, IV_vj], ["", "_v"]):
# Add IV due to demographics, i.e. residual after water quality improvements
d["demographics"] = df["total"].copy() - d.sum(axis=1)
d["total"] = df["total"].copy() # Total IV (no decomposition)
if suffix != "_v":
# Figure for total IV decomposed by driver (other things equal at 1990 level)
fig, ax = plt.subplots()
d.iloc[:, -1].plot( # line plot for total IV by driver
kind="line",
ax=ax,
color="black",
use_index=False, # ignore DatetimeIndex units for x-axis
)
d.iloc[:, :-1].plot(kind="bar", stacked=True, ax=ax) # stacked bar plot
ax.set_ylabel(IV_label) # set y-axis label
ax.set_ylim(bottom=-75000) # set y-axis to begin at -75,000 (to match figures)
ax.yaxis.set_major_formatter(FuncFormatter(y_f)) # y-axis thousands delimiter
ax.legend() # add legend for both plots
fig.savefig("output\\all_investment_decomposed.pdf", bbox_inches="tight")
plt.close(fig) # close figure to free up memory
# How many % of total IV is due to demographics?
for dataFrame in d, df:
dataFrame.loc["mean IV", :] = dataFrame.mean() # mean yearly IV by driver
d["demographics (%)"] = 100 * d["demographics"] / d["total"] # % of total IV
print("Yearly IV due to demographics (residual after water qual. improvements)")
print(d.loc["mean IV", "demographics (%)"], "\n")
# How many % of IV for category j is due to demographics?
dict_j = {} # dictionary to store df of decomposed IV for each category j
for j in ("coastal", "lakes", "streams"):
IV_driver_j = pd.DataFrame()
IV_driver_j["water quality"] = d[j]
IV_driver_j["demographics"] = df[j] - d[j]
IV_driver_j["total"] = df[j]
IV_driver_j["demographics (\%)"] = 100 * (1 - d[j] / df[j])
dict_j[j] = IV_driver_j
IV_j_driver = pd.concat(dict_j, axis=1, names=["j", "driver"])
for col in IV_j_driver.columns:
if col[1] == "demographics (\%)":
print(
"{0}% of yearly IV due to demographics for {1}\n".format(
round(IV_j_driver.loc["mean IV", col]), col[0]
)
)
# Mean yearly IV for each category j decomposed by water quality and demographics
mean = IV_j_driver.loc["mean IV", :].unstack(level=0)
f = {
"demographics": "{:0.2f}",
"water quality": "{:0,.0f}",
"total": "{:0,.0f}",
"demographics (\%)": "{:0.2f}",
}
for row in mean.index:
if row in f:
mean.loc[row] = mean.loc[row].map(lambda x: f[row].format(x))
with open("output\\all_investment_decomposed.tex", "w") as tf: # save TeX file
tf.write(mean.to_latex(column_format="lrrr"))
d.to_csv("output\\all_investment_decomposed" + suffix + ".csv") # save as CSV
if suffix == "_v":
# Decomposition of mean yearly IV per household in v (DKK, 2023 prices)
N = df_BT[
(df_BT.index.get_level_values("j") == "coastal")
& (df_BT.index.get_level_values("t") != 1989)
]["N"]
d = 1e6 * d.div(N, axis=0) # decomposed IV per household (DKK, 2023 prices)
df = 1e6 * df.div(N, axis=0) # IV per household (DKK, 2023 prices)
for v in d.index.get_level_values("v").unique():
d.loc[("mean IV", v), :] = d[d.index.get_level_values("v") == v].mean()
df.loc[("mean IV", v), :] = df[df.index.get_level_values("v") == v].mean()
mean = d[d.index.get_level_values("t") == "mean IV"].describe().drop("count").T
mean.columns = ["mean", "std", "min", "25\%", "50\%", "75\%", "max"]
f = {col: "{:0,.0f}".format for col in mean.columns}
with open("output\\all_investment_decomposed_v.tex", "w") as tf:
tf.write(mean.apply(f).to_latex(column_format="lrrrrrrr")) # col alignment
d["demographics (%)"] = 100 * d["demographics"] / d["total"] # % of total IV
d = d.merge(names, left_on="v", right_index=True) # catchment area names
d = d.rename_axis("driver", axis=1)
print("The 15 catchment areas with lowest mean total IV per household")
print(d.loc["mean IV", :].nsmallest(15, ("total")), "\n")
print("The 15 catchment areas with highest mean total IV per household")
print(d.loc["mean IV", :].nlargest(15, ("total")), "\n")
for j in ("coastal", "lakes", "streams"):
m = pd.DataFrame() # df to store decomposed mean IV for each category j
m["water quality"] = d.loc["mean IV", j]
m["demographics"] = df.loc["mean IV", j] - m["water quality"]
m["total for j"] = df.loc["mean IV", j]
m["demographics (%)"] = 100 * (1 - m["water quality"] / m["total for j"])
m["Catchment area name"] = names
print("The 10 catchment areas with lowest mean IV per household for", j)
print(m.nsmallest(10, ("total for j")), "\n")
print("The 10 catchment areas with highest mean IV per household for", j)
print(m.nlargest(10, ("total for j")), "\n")
# Colors and line styles by category j - matching those used for total CWP decomposition
cl1 = ["#4477AA", "#CCBB44", "#EE6677", "#AA3377", "black"] # 5 colors for coastal
cl2 = ["#66CCEE", "#CCBB44", "#EE6677", "#AA3377", "black"] # 5 colors for lakes
cl3 = ["#228833", "#CCBB44", "#EE6677", "#AA3377", "black"] # 5 colors for streams
lc1 = ["-", "-", "--", ":", "-."] # 5 line styles for coastal
lc2 = ["--", "-", "--", ":", "-."] # 5 line styles for lakes
lc3 = [":", "-", "--", ":", "-."] # 5 line styles for streams
categories = {
"coastal": (cl1, ["#4477AA", "#CCBB44"], lc1),
"lakes": (cl2, ["#66CCEE", "#CCBB44"], lc2),
"streams": (cl3, ["#228833", "#CCBB44"], lc3),
}
# Real cost of water pollution of category j decomposed by driver (other things equal)
for j, (c1, c2, ls) in categories.items():
# CWP by category j - matching the colors and line styles used for total CWP
d = CWP_driver_j.loc[:, CWP_driver_j.columns.get_level_values(1) == j].copy()
d.columns = [j, "income", "age", "households"]
d["total"] = CWP_j.loc[:, j]
d = d.rename_axis("driver", axis=1)
fig, ax = plt.subplots() # create a new figure and axes
ax.set_prop_cycle(cycler(color=c1) + cycler(linestyle=ls)) # set the property cycle
ax = d.plot(ax=ax, ylabel=CWP_label)
fig.savefig(f"output\\{j}_cost_decomposed.pdf", bbox_inches="tight")
plt.close(fig) # close figure to free up memory
# Figure: IV by category j - matching the colors used for total IV
df = IV_j_driver.loc[:, IV_j_driver.columns.get_level_values(0) == j].copy()
df = df.iloc[:-1, :2][j] # drop "mean IV", "total", and "demographics (\%)"
fig, ax = plt.subplots() # create a new figure and axes
df.plot(kind="bar", stacked=True, ax=ax, color=c2, ylabel=IV_label) # stacked bars
ax.yaxis.set_major_formatter(FuncFormatter(y_f)) # y-axis thousands delimiter
fig.savefig(f"output\\{j}_investment_decomposed.pdf", bbox_inches="tight")
plt.close(fig) # close figure to free up memory
# Calculate growth (in 2023 prices and in %) and growth rate of CWP of j by driver
d.loc["g (million DKK, 2023 prices)", :] = d.loc[2020, :] - d.loc[1990, :]
d.loc["g (%)", :] = 100 * d.loc["g (million DKK, 2023 prices)", :] / d.loc[1990, :]
d.loc["g yearly (%)", :] = 100 * (d.loc[2020, :] / d.loc[1990, :]) ** (1 / 30) - 100
d.to_csv("output\\" + j + "_cost_decomposed.csv") # save table as CSV
print("Growth in CWP of", j, "due to driver (other things equal at 1990 level)")
print(d.tail(3), "\n")
########################################################################################
# 6. Descriptive statistics for geographical variables; Box plot demographics
########################################################################################
# Geographical data (assumed time-invariant)
SL = pd.read_excel( # total shore length by category j (regardless of water quality)
"data\\" + data["shared"][2], sheet_name="all_VP_shore length", index_col=0
)
SL.columns = [
"Coastline",
"Shore length of lakes in VP3",
"Shore length of streams in VP3",
"Shore length of all water bodies in VP3",
]
PAL = pd.read_excel(
"data\\" + data["shared"][2], index_col=0
) # proportion arable land
PAL["Proportion of arable land"] = np.exp(PAL["ln PAL"])
# Descriptive statistics for geographical variables
Geo = SL.merge(PAL[["Proportion of arable land"]], on="v").describe().T
Geo.rename(
columns={"count": "n", "25%": "25\%", "50%": "50\%", "75%": "75\%"}, inplace=True
)
f = {col: "{:0,.0f}".format if col == "n" else "{:0.2f}".format for col in Geo.columns}
col_f = "lrrrrrrrr" # right-aligned column format; match number of columns
with open("output\\all_VP_shore length_stats.tex", "w") as tf:
tf.write(Geo.apply(f).to_latex(column_format=col_f)) # column alignment
# Demographics by catchment area v and year t (using extrapolated data for 2019-2020)
Dem = pd.read_csv("output\\all_demographics.csv") # reset index (no v & t multiindex)
# Box plot for mean real income
plt.figure()
sns.boxplot(x="t", y="y", data=Dem, palette=["#CCBB44"])
plt.title("Distribution of mean real household income over catchment areas")
plt.xlabel("") # omit x-axis label
plt.ylabel("Mean real household income (100,000 DKK, 2018 prices)")
plt.xticks(rotation=90)
plt.tight_layout()
plt.savefig("output\\all_demographics_y.pdf", bbox_inches="tight")
plt.close() # close plot to free up memory
# Box plot for number of households
Dem["N"] = Dem["N"] / 100000 # convert number of households to 100,000
plt.figure()
sns.boxplot(x="t", y="N", data=Dem, palette=["#AA3377"])
plt.title("Distribution of households over catchment areas")
plt.xlabel("") # omit x-axis label
plt.ylabel("Number of households (100,000)")
plt.xticks(rotation=90)
plt.tight_layout()
plt.savefig("output\\all_demographics_N.pdf", bbox_inches="tight")
plt.close() # close plot to free up memory
# Box plot for mean age
plt.figure()
sns.boxplot(x="t", y="age", data=Dem, palette=["#EE6677"])
plt.title("Distribution of mean age over catchment areas")
plt.xlabel("") # omit x-axis label
plt.ylabel("Mean age")
plt.yticks([35, 40, 45, 50, 55]) # set specific tick positions on y-axis
plt.axhline(y=45, color="black", linestyle=":")
plt.xticks(rotation=90)
plt.tight_layout()
plt.savefig("output\\all_demographics_age.pdf", bbox_inches="tight")
plt.close() # close plot to free up memory
n = Dem.groupby("t")["D age"].sum() # number of catchment areas w. mean age > 45 years
pd.DataFrame({"n": n, "s (%)": 100 * n / 108}) # number and share: mean age > 45 years
pd.DataFrame({"n": n, "s (%)": 100 * n / 108}) # number and share: mean age > 45 years