From dbcce25d8c14402102641ed564862a7955be7467 Mon Sep 17 00:00:00 2001 From: Anton Achhammer Date: Wed, 31 Jan 2024 15:53:18 +0100 Subject: [PATCH 1/2] fix: fix incorrect recognition of the ISO-2 country code "NA" as NaN value --- scripts/build_industry_demand.py | 8 +++++--- scripts/helpers.py | 14 ++++++++++++++ scripts/prepare_energy_totals.py | 13 +++++++------ 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/scripts/build_industry_demand.py b/scripts/build_industry_demand.py index 23acd49c..f99302ce 100644 --- a/scripts/build_industry_demand.py +++ b/scripts/build_industry_demand.py @@ -11,6 +11,8 @@ import numpy as np import pandas as pd +from helpers import sets_path_to_root, three_2_two_digits_country, read_csv_nafix + def calculate_end_values(df): @@ -64,14 +66,14 @@ def country_to_nodal(industrial_production, keys): snakemake.config["demand_data"]["base_year"] ) - cagr = pd.read_csv("data/demand/industry_growth_cagr.csv", index_col=0) + cagr = read_csv_nafix("data/demand/industry_growth_cagr.csv", index_col=0) countries = snakemake.config["countries"] # countries = ["EG", "BH"] growth_factors = calculate_end_values(cagr) - industry_base_totals = pd.read_csv( + industry_base_totals = read_csv_nafix( snakemake.input["base_industry_totals"], index_col=[0, 1] ) @@ -148,7 +150,7 @@ def match_technology(df): geo_locs = match_technology(geo_locs).loc[countries] - AL = pd.read_csv("data/AL_production.csv", index_col=0) + AL = read_csv_nafix("data/AL_production.csv", index_col=0) AL_prod_tom = AL["production[ktons/a]"].loc[countries] AL_emissions = AL_prod_tom * emission_factors["non-ferrous metals"] diff --git a/scripts/helpers.py b/scripts/helpers.py index 903ff951..f851f8ba 100644 --- a/scripts/helpers.py +++ b/scripts/helpers.py @@ -16,6 +16,8 @@ from shapely.geometry import Point from vresutils.costdata import annuity +# list of recognised nan values (NA and na excluded as may be confused with Namibia 2-letter country code) +NA_VALUES = ["NULL", "", "N/A", "NAN", "NaN", "nan", "Nan", "n/a", "null"] def sets_path_to_root(root_directory_name): # Imported from pypsa-africa """ @@ -717,3 +719,15 @@ def get_last_commit_message(path): os.chdir(backup_cwd) return last_commit_message + +def read_csv_nafix(file, **kwargs): + "Function to open a csv as pandas file and standardize the na value" + if "keep_default_na" not in kwargs: + kwargs["keep_default_na"] = False + if "na_values" not in kwargs: + kwargs["na_values"] = NA_VALUES + + if os.stat(file).st_size > 0: + return pd.read_csv(file, **kwargs) + else: + return pd.DataFrame() \ No newline at end of file diff --git a/scripts/prepare_energy_totals.py b/scripts/prepare_energy_totals.py index a6727c5b..67d2c5a5 100644 --- a/scripts/prepare_energy_totals.py +++ b/scripts/prepare_energy_totals.py @@ -13,7 +13,8 @@ import pandas as pd import py7zr import requests -from helpers import sets_path_to_root, three_2_two_digits_country +from helpers import sets_path_to_root, three_2_two_digits_country, read_csv_nafix + def get(item, investment_year=None): @@ -48,15 +49,15 @@ def calculate_end_values(df): investment_year = int(snakemake.wildcards.planning_horizons) demand_sc = snakemake.wildcards.demand # loading the demand scenrario wildcard - base_energy_totals = pd.read_csv("data/energy_totals_base.csv", index_col=0) - growth_factors_cagr = pd.read_csv( + base_energy_totals = read_csv_nafix("data/energy_totals_base.csv", index_col=0) + growth_factors_cagr = read_csv_nafix( "data/demand/growth_factors_cagr.csv", index_col=0 ) - efficiency_gains_cagr = pd.read_csv( + efficiency_gains_cagr = read_csv_nafix( "data/demand/efficiency_gains_cagr.csv", index_col=0 ) - fuel_shares = pd.read_csv("data/demand/fuel_shares.csv", index_col=0) - district_heating = pd.read_csv("data/demand/district_heating.csv", index_col=0) + fuel_shares = read_csv_nafix("data/demand/fuel_shares.csv", index_col=0) + district_heating = read_csv_nafix("data/demand/district_heating.csv", index_col=0) no_years = int(snakemake.wildcards.planning_horizons) - int( snakemake.config["demand_data"]["base_year"] From c7460e65e3d62cc7a27e90664345ffca288808ca Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 31 Jan 2024 14:54:21 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/build_industry_demand.py | 3 +-- scripts/helpers.py | 4 +++- scripts/prepare_energy_totals.py | 3 +-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/build_industry_demand.py b/scripts/build_industry_demand.py index f99302ce..5fb0ed4b 100644 --- a/scripts/build_industry_demand.py +++ b/scripts/build_industry_demand.py @@ -11,8 +11,7 @@ import numpy as np import pandas as pd -from helpers import sets_path_to_root, three_2_two_digits_country, read_csv_nafix - +from helpers import read_csv_nafix, sets_path_to_root, three_2_two_digits_country def calculate_end_values(df): diff --git a/scripts/helpers.py b/scripts/helpers.py index f851f8ba..ee33d6b5 100644 --- a/scripts/helpers.py +++ b/scripts/helpers.py @@ -19,6 +19,7 @@ # list of recognised nan values (NA and na excluded as may be confused with Namibia 2-letter country code) NA_VALUES = ["NULL", "", "N/A", "NAN", "NaN", "nan", "Nan", "n/a", "null"] + def sets_path_to_root(root_directory_name): # Imported from pypsa-africa """ Search and sets path to the given root directory (root/path/file). @@ -720,6 +721,7 @@ def get_last_commit_message(path): os.chdir(backup_cwd) return last_commit_message + def read_csv_nafix(file, **kwargs): "Function to open a csv as pandas file and standardize the na value" if "keep_default_na" not in kwargs: @@ -730,4 +732,4 @@ def read_csv_nafix(file, **kwargs): if os.stat(file).st_size > 0: return pd.read_csv(file, **kwargs) else: - return pd.DataFrame() \ No newline at end of file + return pd.DataFrame() diff --git a/scripts/prepare_energy_totals.py b/scripts/prepare_energy_totals.py index 67d2c5a5..a1f063da 100644 --- a/scripts/prepare_energy_totals.py +++ b/scripts/prepare_energy_totals.py @@ -13,8 +13,7 @@ import pandas as pd import py7zr import requests -from helpers import sets_path_to_root, three_2_two_digits_country, read_csv_nafix - +from helpers import read_csv_nafix, sets_path_to_root, three_2_two_digits_country def get(item, investment_year=None):