From dbcce25d8c14402102641ed564862a7955be7467 Mon Sep 17 00:00:00 2001
From: Anton Achhammer <anton.achhammer@oth-regensburg.de>
Date: Wed, 31 Jan 2024 15:53:18 +0100
Subject: [PATCH 1/2] fix: fix incorrect recognition of the ISO-2 country code
 "NA" as NaN value

---
 scripts/build_industry_demand.py |  8 +++++---
 scripts/helpers.py               | 14 ++++++++++++++
 scripts/prepare_energy_totals.py | 13 +++++++------
 3 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/scripts/build_industry_demand.py b/scripts/build_industry_demand.py
index 23acd49c..f99302ce 100644
--- a/scripts/build_industry_demand.py
+++ b/scripts/build_industry_demand.py
@@ -11,6 +11,8 @@
 
 import numpy as np
 import pandas as pd
+from helpers import sets_path_to_root, three_2_two_digits_country, read_csv_nafix
+
 
 
 def calculate_end_values(df):
@@ -64,14 +66,14 @@ def country_to_nodal(industrial_production, keys):
         snakemake.config["demand_data"]["base_year"]
     )
 
-    cagr = pd.read_csv("data/demand/industry_growth_cagr.csv", index_col=0)
+    cagr = read_csv_nafix("data/demand/industry_growth_cagr.csv", index_col=0)
 
     countries = snakemake.config["countries"]
     # countries = ["EG", "BH"]
 
     growth_factors = calculate_end_values(cagr)
 
-    industry_base_totals = pd.read_csv(
+    industry_base_totals = read_csv_nafix(
         snakemake.input["base_industry_totals"], index_col=[0, 1]
     )
 
@@ -148,7 +150,7 @@ def match_technology(df):
 
     geo_locs = match_technology(geo_locs).loc[countries]
 
-    AL = pd.read_csv("data/AL_production.csv", index_col=0)
+    AL = read_csv_nafix("data/AL_production.csv", index_col=0)
     AL_prod_tom = AL["production[ktons/a]"].loc[countries]
     AL_emissions = AL_prod_tom * emission_factors["non-ferrous metals"]
 
diff --git a/scripts/helpers.py b/scripts/helpers.py
index 903ff951..f851f8ba 100644
--- a/scripts/helpers.py
+++ b/scripts/helpers.py
@@ -16,6 +16,8 @@
 from shapely.geometry import Point
 from vresutils.costdata import annuity
 
+# list of recognised nan values (NA and na excluded as may be confused with Namibia 2-letter country code)
+NA_VALUES = ["NULL", "", "N/A", "NAN", "NaN", "nan", "Nan", "n/a", "null"]
 
 def sets_path_to_root(root_directory_name):  # Imported from pypsa-africa
     """
@@ -717,3 +719,15 @@ def get_last_commit_message(path):
 
     os.chdir(backup_cwd)
     return last_commit_message
+
+def read_csv_nafix(file, **kwargs):
+    "Function to open a csv as pandas file and standardize the na value"
+    if "keep_default_na" not in kwargs:
+        kwargs["keep_default_na"] = False
+    if "na_values" not in kwargs:
+        kwargs["na_values"] = NA_VALUES
+
+    if os.stat(file).st_size > 0:
+        return pd.read_csv(file, **kwargs)
+    else:
+        return pd.DataFrame()
\ No newline at end of file
diff --git a/scripts/prepare_energy_totals.py b/scripts/prepare_energy_totals.py
index a6727c5b..67d2c5a5 100644
--- a/scripts/prepare_energy_totals.py
+++ b/scripts/prepare_energy_totals.py
@@ -13,7 +13,8 @@
 import pandas as pd
 import py7zr
 import requests
-from helpers import sets_path_to_root, three_2_two_digits_country
+from helpers import sets_path_to_root, three_2_two_digits_country, read_csv_nafix
+
 
 
 def get(item, investment_year=None):
@@ -48,15 +49,15 @@ def calculate_end_values(df):
     investment_year = int(snakemake.wildcards.planning_horizons)
     demand_sc = snakemake.wildcards.demand  # loading the demand scenrario wildcard
 
-    base_energy_totals = pd.read_csv("data/energy_totals_base.csv", index_col=0)
-    growth_factors_cagr = pd.read_csv(
+    base_energy_totals = read_csv_nafix("data/energy_totals_base.csv", index_col=0)
+    growth_factors_cagr = read_csv_nafix(
         "data/demand/growth_factors_cagr.csv", index_col=0
     )
-    efficiency_gains_cagr = pd.read_csv(
+    efficiency_gains_cagr = read_csv_nafix(
         "data/demand/efficiency_gains_cagr.csv", index_col=0
     )
-    fuel_shares = pd.read_csv("data/demand/fuel_shares.csv", index_col=0)
-    district_heating = pd.read_csv("data/demand/district_heating.csv", index_col=0)
+    fuel_shares = read_csv_nafix("data/demand/fuel_shares.csv", index_col=0)
+    district_heating = read_csv_nafix("data/demand/district_heating.csv", index_col=0)
 
     no_years = int(snakemake.wildcards.planning_horizons) - int(
         snakemake.config["demand_data"]["base_year"]

From c7460e65e3d62cc7a27e90664345ffca288808ca Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 31 Jan 2024 14:54:21 +0000
Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 scripts/build_industry_demand.py | 3 +--
 scripts/helpers.py               | 4 +++-
 scripts/prepare_energy_totals.py | 3 +--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/scripts/build_industry_demand.py b/scripts/build_industry_demand.py
index f99302ce..5fb0ed4b 100644
--- a/scripts/build_industry_demand.py
+++ b/scripts/build_industry_demand.py
@@ -11,8 +11,7 @@
 
 import numpy as np
 import pandas as pd
-from helpers import sets_path_to_root, three_2_two_digits_country, read_csv_nafix
-
+from helpers import read_csv_nafix, sets_path_to_root, three_2_two_digits_country
 
 
 def calculate_end_values(df):
diff --git a/scripts/helpers.py b/scripts/helpers.py
index f851f8ba..ee33d6b5 100644
--- a/scripts/helpers.py
+++ b/scripts/helpers.py
@@ -19,6 +19,7 @@
 # list of recognised nan values (NA and na excluded as may be confused with Namibia 2-letter country code)
 NA_VALUES = ["NULL", "", "N/A", "NAN", "NaN", "nan", "Nan", "n/a", "null"]
 
+
 def sets_path_to_root(root_directory_name):  # Imported from pypsa-africa
     """
     Search and sets path to the given root directory (root/path/file).
@@ -720,6 +721,7 @@ def get_last_commit_message(path):
     os.chdir(backup_cwd)
     return last_commit_message
 
+
 def read_csv_nafix(file, **kwargs):
     "Function to open a csv as pandas file and standardize the na value"
     if "keep_default_na" not in kwargs:
@@ -730,4 +732,4 @@ def read_csv_nafix(file, **kwargs):
     if os.stat(file).st_size > 0:
         return pd.read_csv(file, **kwargs)
     else:
-        return pd.DataFrame()
\ No newline at end of file
+        return pd.DataFrame()
diff --git a/scripts/prepare_energy_totals.py b/scripts/prepare_energy_totals.py
index 67d2c5a5..a1f063da 100644
--- a/scripts/prepare_energy_totals.py
+++ b/scripts/prepare_energy_totals.py
@@ -13,8 +13,7 @@
 import pandas as pd
 import py7zr
 import requests
-from helpers import sets_path_to_root, three_2_two_digits_country, read_csv_nafix
-
+from helpers import read_csv_nafix, sets_path_to_root, three_2_two_digits_country
 
 
 def get(item, investment_year=None):