Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: fix incorrect recognition of the ISO-2 country code "NA" as NaN … #273

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions scripts/build_industry_demand.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import numpy as np
import pandas as pd
from helpers import read_csv_nafix, sets_path_to_root, three_2_two_digits_country


def calculate_end_values(df):
Expand Down Expand Up @@ -64,14 +65,14 @@ def country_to_nodal(industrial_production, keys):
snakemake.config["demand_data"]["base_year"]
)

cagr = pd.read_csv("data/demand/industry_growth_cagr.csv", index_col=0)
cagr = read_csv_nafix("data/demand/industry_growth_cagr.csv", index_col=0)

countries = snakemake.config["countries"]
# countries = ["EG", "BH"]

growth_factors = calculate_end_values(cagr)

industry_base_totals = pd.read_csv(
industry_base_totals = read_csv_nafix(
snakemake.input["base_industry_totals"], index_col=[0, 1]
)

Expand Down Expand Up @@ -148,7 +149,7 @@ def match_technology(df):

geo_locs = match_technology(geo_locs).loc[countries]

AL = pd.read_csv("data/AL_production.csv", index_col=0)
AL = read_csv_nafix("data/AL_production.csv", index_col=0)
AL_prod_tom = AL["production[ktons/a]"].loc[countries]
AL_emissions = AL_prod_tom * emission_factors["non-ferrous metals"]

Expand Down
16 changes: 16 additions & 0 deletions scripts/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
from shapely.geometry import Point
from vresutils.costdata import annuity

# list of recognised nan values (NA and na excluded as may be confused with Namibia 2-letter country code)
NA_VALUES = ["NULL", "", "N/A", "NAN", "NaN", "nan", "Nan", "n/a", "null"]


def sets_path_to_root(root_directory_name): # Imported from pypsa-africa
"""
Expand Down Expand Up @@ -717,3 +720,16 @@ def get_last_commit_message(path):

os.chdir(backup_cwd)
return last_commit_message


def read_csv_nafix(file, **kwargs):
"Function to open a csv as pandas file and standardize the na value"
if "keep_default_na" not in kwargs:
kwargs["keep_default_na"] = False
if "na_values" not in kwargs:
kwargs["na_values"] = NA_VALUES

if os.stat(file).st_size > 0:
return pd.read_csv(file, **kwargs)
else:
return pd.DataFrame()
12 changes: 6 additions & 6 deletions scripts/prepare_energy_totals.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import pandas as pd
import py7zr
import requests
from helpers import sets_path_to_root, three_2_two_digits_country
from helpers import read_csv_nafix, sets_path_to_root, three_2_two_digits_country


def get(item, investment_year=None):
Expand Down Expand Up @@ -48,15 +48,15 @@ def calculate_end_values(df):
investment_year = int(snakemake.wildcards.planning_horizons)
demand_sc = snakemake.wildcards.demand # loading the demand scenrario wildcard

base_energy_totals = pd.read_csv("data/energy_totals_base.csv", index_col=0)
growth_factors_cagr = pd.read_csv(
base_energy_totals = read_csv_nafix("data/energy_totals_base.csv", index_col=0)
growth_factors_cagr = read_csv_nafix(
"data/demand/growth_factors_cagr.csv", index_col=0
)
efficiency_gains_cagr = pd.read_csv(
efficiency_gains_cagr = read_csv_nafix(
"data/demand/efficiency_gains_cagr.csv", index_col=0
)
fuel_shares = pd.read_csv("data/demand/fuel_shares.csv", index_col=0)
district_heating = pd.read_csv("data/demand/district_heating.csv", index_col=0)
fuel_shares = read_csv_nafix("data/demand/fuel_shares.csv", index_col=0)
district_heating = read_csv_nafix("data/demand/district_heating.csv", index_col=0)

no_years = int(snakemake.wildcards.planning_horizons) - int(
snakemake.config["demand_data"]["base_year"]
Expand Down
Loading