From d92b69a2b03b1f596780b900ae783bc8076c3b17 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Tue, 23 Apr 2024 16:06:34 +0200 Subject: [PATCH 001/100] Implemented which uses the overpass API to download power features for individual countries. --- config/config.default.yaml | 5 +- envs/environment.yaml | 2 + rules/build_electricity.smk | 34 +++++++++ scripts/clean_osm_data.py | 39 +++++++++++ scripts/retrieve_osm_data.py | 130 +++++++++++++++++++++++++++++++++++ 5 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 scripts/clean_osm_data.py create mode 100644 scripts/retrieve_osm_data.py diff --git a/config/config.default.yaml b/config/config.default.yaml index 42132f226..05418edf2 100644 --- a/config/config.default.yaml +++ b/config/config.default.yaml @@ -64,6 +64,10 @@ snapshots: end: "2014-01-01" inclusive: 'left' +osm: + retrieve: true + use-prebuilt: false + # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#enable enable: retrieve: auto @@ -79,7 +83,6 @@ enable: custom_busmap: false drop_leap_day: true - # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#co2-budget co2_budget: 2020: 0.701 diff --git a/envs/environment.yaml b/envs/environment.yaml index ee1d1605a..47dcdd620 100644 --- a/envs/environment.yaml +++ b/envs/environment.yaml @@ -48,6 +48,7 @@ dependencies: - pyxlsb - graphviz - pre-commit +- geojson # Keep in conda environment when calling ipython - ipython @@ -64,3 +65,4 @@ dependencies: - snakemake-executor-plugin-slurm - snakemake-executor-plugin-cluster-generic - highspy + - overpass diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index ed341d2f8..589dfab6f 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -606,3 +606,37 @@ rule prepare_network: "../envs/environment.yaml" script: "../scripts/prepare_network.py" + + +if config["osm"].get("retrieve", True): + rule retrieve_osm_data: + output: + cables_way="data/osm/raw/{country}/cables_way_raw.geojson", + lines_way="data/osm/raw/{country}/lines_way_raw.geojson", + substations_way="data/osm/raw/{country}/substations_way_raw.geojson", + substations_node="data/osm/raw/{country}/substations_node_raw.geojson", + transformers_way="data/osm/raw/{country}/transformers_way_raw.geojson", + transformers_node="data/osm/raw/{country}/transformers_node_raw.geojson", + log: + logs("retrieve_osm_data_{country}.log"), + script: + "../scripts/retrieve_osm_data.py" + +rule clean_osm_data: + # params: + # countries=config["countries"], + input: + cables_way=[f"data/osm/raw/{country}/cables_way_raw.geojson" for country in config["countries"]], + lines_way=[f"data/osm/raw/{country}/lines_way_raw.geojson" for country in config["countries"]], + substations_way=[f"data/osm/raw/{country}/substations_way_raw.geojson" for country in config["countries"]], + substations_node=[f"data/osm/raw/{country}/substations_node_raw.geojson" for country in config["countries"]], + transformers_way=[f"data/osm/raw/{country}/transformers_way_raw.geojson" for country in config["countries"]], + transformers_node=[f"data/osm/raw/{country}/transformers_node_raw.geojson" for country in config["countries"]], + output: + dummy="data/osm/raw/dummy.txt" + # cables="resources/RDIR/cables_clean_.geojson" + # lines= + log: + logs("clean_osm_data.log"), + script: + "../scripts/clean_osm_data.py" \ No newline at end of file diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py new file mode 100644 index 000000000..d4c3ba36e --- /dev/null +++ b/scripts/clean_osm_data.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur Authors +# +# SPDX-License-Identifier: MIT +""" +TODO To fill later +""" + +# import geojson +import logging +# import numpy as np +# import overpass as op +# import os +# import pandas as pd +# import pypsa +# import requests + +from _helpers import configure_logging +logger = logging.getLogger(__name__) + +def clean_osm_data(output): + with open(output, "w") as file: + file.write("Hello, world!\n") + + +if __name__ == "__main__": + # Detect running outside of snakemake and mock snakemake for testing + if "snakemake" not in globals(): + from _helpers import mock_snakemake + + snakemake = mock_snakemake("clean_osm_data") + + configure_logging(snakemake) + logger.info("Dummy log: clean_osm_data()") + + output = str(snakemake.output) + clean_osm_data(output) + + diff --git a/scripts/retrieve_osm_data.py b/scripts/retrieve_osm_data.py new file mode 100644 index 000000000..47592d296 --- /dev/null +++ b/scripts/retrieve_osm_data.py @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur Authors +# +# SPDX-License-Identifier: MIT +""" +TODO To fill later +""" + +import geojson +import logging +import overpass as op +import os +import requests +import time + +from _helpers import configure_logging +logger = logging.getLogger(__name__) + + +def _get_overpass_areas(countries): + # If a single country code is provided, convert it to a list + if not isinstance(countries, list): + countries = [countries] + + # Overpass API endpoint URL + overpass_url = "https://overpass-api.de/api/interpreter" + + osm_areas = [] + for c in countries: + # Overpass query to fetch the relation for the specified country code + overpass_query = f""" + [out:json]; + area["ISO3166-1"="{c}"]; + out; + """ + + # Send the request to Overpass API + response = requests.post(overpass_url, data=overpass_query) + + # Parse the response + data = response.json() + + # Check if the response contains any results + if "elements" in data and len(data["elements"]) > 0: + # Extract the area ID from the relation + osm_area_id = data["elements"][0]["id"] + osm_areas.append(f"area({osm_area_id})") + else: + # Print a warning if no results are found for the country code + logger.info(f"No area code found for the specified country code: {c}. Ommitted from the list.") + + # Create a dictionary mapping country codes to their corresponding OSM area codes + op_areas_dict = dict(zip(countries, osm_areas)) + + return op_areas_dict + + +def retrieve_osm_data( + country, + output, + features=[ + "cables_way", + "lines_way", + "substations_way", + "substations_node", + "transformers_way", + "transformers_node", + ]): + + op_area = _get_overpass_areas(country) + + features_dict= { + 'cables_way': 'way["power"="cable"]', + 'lines_way': 'way["power"="line"]', + 'substations_way': 'way["power"="substation"]', + 'substations_node': 'node["power"="substation"]', + 'transformers_way': 'way["power"="transformer"]', + 'transformers_node': 'node["power"="transformer"]', + } + + for f in features: + if f not in features_dict: + raise ValueError(f"Invalid feature: {f}. Supported features: {list(features_dict.keys())}") + logger.info(f"Invalid feature: {f}. Supported features: {list(features_dict.keys())}") + + logger.info(f" - Fetching OSM data for feature '{f}' in {country}...") + # Build the overpass query + op_query = f''' + {op_area[country]}->.searchArea; + ( + {features_dict[f]}(area.searchArea); + ); + out body geom; + ''' + + # Send the request + # response = requests.post(overpass_url, data = op_query) + response = op.API(timeout=300).get(op_query) # returns data in geojson format. Timeout (max.) set to 300s + + filepath = output[f] + parentfolder = os.path.dirname(filepath) + if not os.path.exists(parentfolder): + # Create the folder and its parent directories if they don't exist + os.makedirs(parentfolder) + + with open(filepath, mode = "w") as f: + geojson.dump(response,f,indent=2) + # geojson.dump(response.json(),f,indent=2) + logger.info(" - Done.") + time.sleep(5) + + +if __name__ == "__main__": + # Detect running outside of snakemake and mock snakemake for testing + if "snakemake" not in globals(): + from _helpers import mock_snakemake + + snakemake = mock_snakemake("retrieve_osm_data", country="BE") + + configure_logging(snakemake) + + # Retrieve the OSM data + country = snakemake.wildcards.country + output = snakemake.output + + # Wait 5 seconds before fetching the OSM data to prevent too many requests error + # TODO pypsa-eur: Add try catch to implement this only when needed + logger.info(f"Waiting 5 seconds... Retrieving OSM data for {country}:") + time.sleep(5) + retrieve_osm_data(country, output) \ No newline at end of file From 6352c03c75993fe19a69bbd0d4119d2c1d125646 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Wed, 24 Apr 2024 17:30:09 +0200 Subject: [PATCH 002/100] Extended rule by input. --- rules/build_electricity.smk | 28 +++++++++++++++++++++------- scripts/clean_osm_data.py | 11 +++++++++-- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 589dfab6f..a23bffc6f 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -622,16 +622,21 @@ if config["osm"].get("retrieve", True): script: "../scripts/retrieve_osm_data.py" +FEATURES = ["cables_way", "lines_way", "substations_way", "substations_node", "transformers_way", "transformers_node"] rule clean_osm_data: # params: # countries=config["countries"], input: - cables_way=[f"data/osm/raw/{country}/cables_way_raw.geojson" for country in config["countries"]], - lines_way=[f"data/osm/raw/{country}/lines_way_raw.geojson" for country in config["countries"]], - substations_way=[f"data/osm/raw/{country}/substations_way_raw.geojson" for country in config["countries"]], - substations_node=[f"data/osm/raw/{country}/substations_node_raw.geojson" for country in config["countries"]], - transformers_way=[f"data/osm/raw/{country}/transformers_way_raw.geojson" for country in config["countries"]], - transformers_node=[f"data/osm/raw/{country}/transformers_node_raw.geojson" for country in config["countries"]], + **{ + f"{country}": [f"data/osm/raw/{country}/{feature}.geojson" for feature in FEATURES] + for country in config["countries"] + }, + # cables_way[country]=[f"data/osm/raw/{country}/cables_way_raw.geojson" for country in config["countries"]], + # lines_way=[f"data/osm/raw/{country}/lines_way_raw.geojson" for country in config["countries"]], + # substations_way=[f"data/osm/raw/{country}/substations_way_raw.geojson" for country in config["countries"]], + # substations_node=[f"data/osm/raw/{country}/substations_node_raw.geojson" for country in config["countries"]], + # transformers_way=[f"data/osm/raw/{country}/transformers_way_raw.geojson" for country in config["countries"]], + # transformers_node=[f"data/osm/raw/{country}/transformers_node_raw.geojson" for country in config["countries"]], output: dummy="data/osm/raw/dummy.txt" # cables="resources/RDIR/cables_clean_.geojson" @@ -639,4 +644,13 @@ rule clean_osm_data: log: logs("clean_osm_data.log"), script: - "../scripts/clean_osm_data.py" \ No newline at end of file + "../scripts/clean_osm_data.py" + + +# { +# f"{country}": f"{ +# f"{feature}": f"data/osm/raw/{country}/{feature}.geojson" +# }" +# for feature in FEATURES +# for country in config["countries"] +# } \ No newline at end of file diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index d4c3ba36e..305a9fb98 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -7,13 +7,14 @@ """ # import geojson +import geopandas as gpd import logging # import numpy as np -# import overpass as op # import os -# import pandas as pd +import pandas as pd # import pypsa # import requests +import tqdm.auto as tqdm from _helpers import configure_logging logger = logging.getLogger(__name__) @@ -36,4 +37,10 @@ def clean_osm_data(output): output = str(snakemake.output) clean_osm_data(output) + # Create df by iterating over lines_way and append them to df_lines_way + gdf1 = gpd.read_file(snakemake.input["lines_way"]) + + + snakemake.wildcards + snakemake.input["lines_way"].keys() From 87b4ccedf1d5aaf8cae59d4d16f9f7bc0ce279c0 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Mon, 29 Apr 2024 21:03:22 +0200 Subject: [PATCH 003/100] Bug fixes and improvements to clean_osm_data.py. Added in retrieve_osm_data.py. --- rules/build_electricity.smk | 36 ++-- scripts/clean_osm_data.py | 361 +++++++++++++++++++++++++++++++++-- scripts/retrieve_osm_data.py | 25 ++- 3 files changed, 386 insertions(+), 36 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 8d68cfb34..c4c89c472 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -611,32 +611,34 @@ rule prepare_network: if config["osm"].get("retrieve", True): rule retrieve_osm_data: output: - cables_way="data/osm/raw/{country}/cables_way_raw.geojson", - lines_way="data/osm/raw/{country}/lines_way_raw.geojson", - substations_way="data/osm/raw/{country}/substations_way_raw.geojson", - substations_node="data/osm/raw/{country}/substations_node_raw.geojson", - transformers_way="data/osm/raw/{country}/transformers_way_raw.geojson", - transformers_node="data/osm/raw/{country}/transformers_node_raw.geojson", + cables_way="data/osm/raw/{country}/cables_way_raw.json", + lines_way="data/osm/raw/{country}/lines_way_raw.json", + substations_way="data/osm/raw/{country}/substations_way_raw.json", + substations_node="data/osm/raw/{country}/substations_node_raw.json", + transformers_way="data/osm/raw/{country}/transformers_way_raw.json", + transformers_node="data/osm/raw/{country}/transformers_node_raw.json", + relations="data/osm/raw/{country}/relations_raw.json", log: logs("retrieve_osm_data_{country}.log"), script: "../scripts/retrieve_osm_data.py" -FEATURES = ["cables_way", "lines_way", "substations_way", "substations_node", "transformers_way", "transformers_node"] +# FEATURES = ["cables_way", "lines_way", "substations_way", "substations_node", "transformers_way", "transformers_node"] rule clean_osm_data: # params: # countries=config["countries"], input: - **{ - f"{country}": [f"data/osm/raw/{country}/{feature}.geojson" for feature in FEATURES] - for country in config["countries"] - }, - # cables_way[country]=[f"data/osm/raw/{country}/cables_way_raw.geojson" for country in config["countries"]], - # lines_way=[f"data/osm/raw/{country}/lines_way_raw.geojson" for country in config["countries"]], - # substations_way=[f"data/osm/raw/{country}/substations_way_raw.geojson" for country in config["countries"]], - # substations_node=[f"data/osm/raw/{country}/substations_node_raw.geojson" for country in config["countries"]], - # transformers_way=[f"data/osm/raw/{country}/transformers_way_raw.geojson" for country in config["countries"]], - # transformers_node=[f"data/osm/raw/{country}/transformers_node_raw.geojson" for country in config["countries"]], + # **{ + # f"{country}": [f"data/osm/raw/{country}/{feature}.geojson" for feature in FEATURES] + # for country in config["countries"] + # }, + cables_way=[f"data/osm/raw/{country}/cables_way_raw.json" for country in config["countries"]], + lines_way=[f"data/osm/raw/{country}/lines_way_raw.json" for country in config["countries"]], + substations_way=[f"data/osm/raw/{country}/substations_way_raw.json" for country in config["countries"]], + substations_node=[f"data/osm/raw/{country}/substations_node_raw.json" for country in config["countries"]], + transformers_way=[f"data/osm/raw/{country}/transformers_way_raw.json" for country in config["countries"]], + transformers_node=[f"data/osm/raw/{country}/transformers_node_raw.json" for country in config["countries"]], + relations=[f"data/osm/raw/{country}/relations_raw.json" for country in config["countries"]], output: dummy="data/osm/raw/dummy.txt" # cables="resources/RDIR/cables_clean_.geojson" diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index 305a9fb98..e534801c3 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -6,14 +6,12 @@ TODO To fill later """ -# import geojson import geopandas as gpd +import json import logging -# import numpy as np -# import os import pandas as pd -# import pypsa -# import requests +import re +from shapely.geometry import LineString, Point import tqdm.auto as tqdm from _helpers import configure_logging @@ -24,6 +22,125 @@ def clean_osm_data(output): file.write("Hello, world!\n") +def _create_linestring(row): + coords = [(coord['lon'], coord['lat']) for coord in row["geometry"]] + return LineString(coords) + + +def _clean_voltage(column): + """ + Function to clean the raw voltage column: manual fixing and drop nan values + + Args: + - column: pandas Series, the column to be cleaned + + Returns: + - column: pandas Series, the cleaned column + """ + column = ( + column + .astype(str) + .str.lower() + .str.replace("fixme", "") + .str.replace("(temp 150000)", "") + .str.replace("low", "1000") + .str.replace("minor", "1000") + .str.replace("medium", "33000") + .str.replace("med", "33000") + .str.replace("m", "33000") + .str.replace("high", "150000") + .str.replace("unknown", "") + .str.replace("23000-109000", "109000") + .str.replace("INF", "") + .str.replace("<", "") + .str.replace("?", "") + .str.replace(",", "") + .str.replace(" ", "") + .str.replace("_", "") + .str.replace("kv", "000") + .str.replace("v", "") + .str.replace("/", ";") + .str.replace("nan", "") + .str.replace("", "") + ) + + # Remove all remaining non-numeric characters except for semicolons + column = column.apply(lambda x: re.sub(r'[^0-9;]', '', x)) + + column.dropna(inplace=True) + return column + + +def _clean_circuits(column): + """ + Function to clean the raw circuits column: manual fixing and drop nan values + + Args: + - column: pandas Series, the column to be cleaned + + Returns: + - column: pandas Series, the cleaned column + """ + column = column.copy() + column = ( + column + .astype(str) + .str.replace("partial", "") + .str.replace("1operator=RTE operator:wikidata=Q2178795", "") + .str.lower() + .str.replace("1/3", "1") + .str.replace("", "") + .str.replace("nan", "") + ) + + # Remove all remaining non-numeric characters except for semicolons + column = column.apply(lambda x: re.sub(r'[^0-9;]', '', x)) + + column.dropna(inplace=True) + return column.astype(str) + + +def _clean_frequency(column): + column = column.copy() + to_fifty = column.astype(str) != "0" + column[to_fifty] = "50" + + return column + + +def _split_voltage(df): + to_split = df['voltage'].str.contains(';') + new_rows = [] + for index, row in df[to_split].iterrows(): + split_values = row["voltage"].split(';') + new_sub_id_len = int(len(split_values)) + for i, value in enumerate(split_values): + new_sub_id = str(i+1) + new_id = str(row['id']) + '_' + new_sub_id + new_row = { + 'id': new_id, + 'sub_id': new_sub_id, + 'sub_id_len': new_sub_id_len, + 'bounds': row['bounds'], + 'nodes': row['nodes'], + 'geometry': row['geometry'], + 'power': row['power'], + 'cables': row['cables'], + 'circuits': row['circuits'], + 'frequency': row['frequency'], + 'voltage': value, + 'wires': row['wires'],} + new_rows.append(new_row) + + # Create DataFrame from split rows + split_df = pd.DataFrame(new_rows) + df_new = pd.concat([df[~to_split], split_df]) + df_new["sub_id_len"] = df_new["sub_id_len"].astype(int) + + # Append the original DataFrame with split_df + return df_new + + if __name__ == "__main__": # Detect running outside of snakemake and mock snakemake for testing if "snakemake" not in globals(): @@ -34,13 +151,235 @@ def clean_osm_data(output): configure_logging(snakemake) logger.info("Dummy log: clean_osm_data()") - output = str(snakemake.output) - clean_osm_data(output) + # input_path = snakemake.input.lines_way + snakemake.input.cables_way + # input_path = { + # "lines": snakemake.input.lines_way, + # "cables": snakemake.input.cables_way, + # } + + # columns = ["id", "sub_id", "sub_id_len", "bounds", "nodes", "geometry", "power", "cables", "circuits", "frequency", "voltage", "wires"] + # df_lines = pd.DataFrame(columns=columns) + # crs = "EPSG:4326" + + # # using tqdm loop over input path + + # for key in input_path: + # logger.info(f"Processing {key}...") + # for idx, ip in enumerate(input_path[key]): + # if os.path.exists(ip) and os.path.getsize(ip) > 400: # unpopulated OSM json is about 51 bytes + # logger.info(f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(input_path[key])).zfill(2)}: {ip}") + # with open(ip, "r") as f: + # data = json.load(f) + + # df = pd.DataFrame(data['elements']) + # df["id"] = df["id"].astype(str) + # df["sub_id"] = "0" # initiate sub_id column with 0 + # df["sub_id_len"] = 0 # initiate sub_id column with 0 + + # col_tags = ["power", "cables", "circuits", "frequency", "voltage", "wires"] + + # tags = pd.json_normalize(df["tags"]) \ + # .map(lambda x: str(x) if pd.notnull(x) else x) + + # for ct in col_tags: + # if ct not in tags.columns: + # tags[ct] = pd.NA + + # tags = tags.loc[:, col_tags] + + # df = pd.concat([df, tags], axis="columns") + # df.drop(columns=["type", "tags"], inplace=True) + + # df_lines = pd.concat([df_lines, df], axis="rows") + + # else: + # logger.info(f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(input_path[key])).zfill(2)} (empty): {ip}") + # continue + # logger.info("---") + + # # Drop duplicates + # df_lines.drop_duplicates(subset="id", inplace=True) + + # df_lines["voltage"] = _clean_voltage(df_lines["voltage"]) + # # drop voltage = "" + # df_lines = _split_voltage(df_lines) + # df_lines = df_lines[df_lines["voltage"] != ""] + # df_lines["voltage"] = df_lines["voltage"].astype(int, errors="ignore") + + # # Drop voltages below 220 kV + # df_lines = df_lines[df_lines["voltage"] >= 220000] - # Create df by iterating over lines_way and append them to df_lines_way - gdf1 = gpd.read_file(snakemake.input["lines_way"]) + # # Clean frequencies + # df_lines["frequency"] = _clean_frequency(df_lines["frequency"]) + # df_lines["frequency"] = df_lines["frequency"].astype(int, errors="ignore") + + # # Clean circuits + # df_lines["circuits"] = _clean_circuits(df_lines["circuits"]) + # # Map correct circuits to lines that where split + + # # Initiate new column for cleaned circuits with values that are already valid: + # # Condition 1: Length of sub_id is 0, the line was not split + # # Condition 2: Number of entries in circuits separated by semicolon is 1, value is unique + # # Condition 3: Circuits is not an empty string + # # Condition 4: Circuits is not "0" + # bool_circuits_valid = (df_lines["sub_id_len"] == 0) & \ + # (df_lines["circuits"].apply(lambda x: len(x.split(";"))) == 1) & \ + # (df_lines["circuits"] != "") & \ + # (df_lines["circuits"] != "0") + + # df_lines.loc[bool_circuits_valid, "circuits_clean"] = df_lines.loc[bool_circuits_valid, "circuits"] + # # Boolean to check if sub_id_len is equal to the number of circuits + # bool_equal = df_lines["sub_id_len"] == df_lines["circuits"] \ + # .apply(lambda x: len(x.split(";"))) + # op_equal = lambda row: row["circuits"].split(";")[int(row["sub_id"])-1] + + # df_lines.loc[bool_equal, "circuits_clean"] = df_lines[bool_equal] \ + # .apply(op_equal, axis=1) + + # bool_larger = df_lines["sub_id_len"] > \ + # df_lines["circuits"].apply(lambda x: len(x.split(";"))) + + # pd.set_option('display.max_rows', None) + # df_lines.loc[bool_larger, ["id", "sub_id", "sub_id_len", "cables", "circuits", "circuits_clean", "frequency"]] + + + + + + # df_lines[df_lines["sub_id_len"] > 0]["circuits"] + + + # df_lines["geometry"] = df_lines.apply(_create_linestring, axis=1) + # gdf = gpd.GeoDataFrame( + # df_lines[["id", "sub_id", "sub_id_len", "power", "cables", "circuits", "voltage", "geometry"]], + # geometry = "geometry", crs = "EPSG:4326" + # ) - snakemake.wildcards - snakemake.input["lines_way"].keys() + # gdf.explore() + # df_lines.voltage.unique() + + # df_lines.circuits.apply(lambda x: x.split(";")).explode().unique() + + # ol_lines_way = ["id", "power", "cables", "circuits", "frequency", "voltage"] + + # # gdf = gpd.read_file(lines_way[3]) + # # gdf2 = gpd.GeoDataFrame(gdf, geometry=gdf.geometry) + # # df = gdf.to_json() + + # # gdf.to_file("example.geojson", layer_options={"ID_GENERATE": "YES"}) + + + output = str(snakemake.output) + clean_osm_data(output) + + + + +# # Example DataFrame +# data = {'id': ["ID1", "ID2", "ID3", "ID4", "ID5"], +# 'A': ["220000", "380000", ";100000", "220000;220000;380000", "220000;;400000;700000"], +# 'B': [1, 2, 3, 4, 5], +# 'C': [6, 7, 8, 9, 10]} +# df = pd.DataFrame(data) + +# # Split the entries in column A that contain a semicolon +# split_rows = df[df['A'].str.contains(';')] +# split_values = split_rows['A'].str.split(';', expand=True) + +# # Create two copies of the rows containing semicolons, one for each split value +# split_rows_1 = split_rows.copy() +# split_rows_2 = split_rows.copy() + +# # Update column A in the split rows to contain the split values +# split_rows_1['A'] = split_values[0] +# split_rows_2['A'] = split_values[1] + +# # Concatenate the split rows with the original DataFrame, excluding the rows containing semicolons +# result_df = pd.concat([df[~df.index.isin(split_rows.index)], split_rows_1, split_rows_2], ignore_index=True) + +# # Display the result +# print(result_df) + + +# '# Sample DataFrame +# data = {'id': ["ID1", "ID2", "ID3", "ID4", "ID5"], +# 'voltage': ["220000", "380000", ";100000", "220000;220000;380000", "220000;;400000;700000"], +# 'B': [1, 2, 3, 4, 5], +# 'C': [6, 7, 8, 9, 10]} +# df = pd.DataFrame(data) + +# # Find rows to split +# to_split = df['voltage'].str.contains(';') + +# # Splitting entries and creating new rows + + +# new_rows = [] + +# for index, row in df[to_split].iterrows(): +# split_values = row["voltage"].split(';') +# for i, value in enumerate(split_values): +# new_id = str(row['id']) + '_' + str(i+1) +# new_row = { +# 'id': new_id, +# 'bounds': row['bounds'], +# 'nodes': row['nodes'], +# 'geometry': row['geometry'], +# 'cables': row['cables'], +# 'circuits': row['circuits'], +# 'frequency': row['frequency'], +# 'voltage': value, +# 'wires': row['wires'],} +# new_rows.append(new_row) + +# # Create DataFrame from split rows +# split_df = pd.DataFrame(new_rows) + +# # Append the original DataFrame with split_df +# final_df = pd.concat([df[~to_split], split_df]) + +# print(final_df) + + + +# from shapely.geometry import LineString +# import numpy as np +# import matplotlib.pyplot as plt + +# def offset_line(original_line, distance): +# # Compute the direction vector between the two endpoints +# direction_vector = np.array(original_line.coords[1]) - np.array(original_line.coords[0]) + +# # Compute the orthogonal vector +# orthogonal_vector = np.array([-direction_vector[1], direction_vector[0]]) + +# # Normalize the orthogonal vector +# orthogonal_vector /= np.linalg.norm(orthogonal_vector) + +# # Compute the offset LineString +# offset_points = [] +# for point in original_line.coords: +# offset_point = np.array(point) + distance * orthogonal_vector +# offset_points.append((offset_point[0], offset_point[1])) + +# return LineString(offset_points) + +# # Example usage: +# original_line = lines.iloc[5] +# offset_distance = 1.0 +# b = offset_line(original_line, offset_distance) +# # Plot both LineStrings +# fig, ax = plt.subplots() +# x, y = original_line.xy +# ax.plot(x, y, label='Original LineString') +# x, y = offset_line.xy +# ax.plot(x, y, label='Offset LineString') +# ax.set_aspect('equal') +# ax.legend() +# plt.xlabel('X') +# plt.ylabel('Y') +# plt.title('Original and Offset LineStrings') +# plt.grid(True) +# plt.show() \ No newline at end of file diff --git a/scripts/retrieve_osm_data.py b/scripts/retrieve_osm_data.py index 47592d296..15eec040d 100644 --- a/scripts/retrieve_osm_data.py +++ b/scripts/retrieve_osm_data.py @@ -6,9 +6,9 @@ TODO To fill later """ -import geojson +import json import logging -import overpass as op +# import overpass as op import os import requests import time @@ -43,7 +43,10 @@ def _get_overpass_areas(countries): # Check if the response contains any results if "elements" in data and len(data["elements"]) > 0: # Extract the area ID from the relation - osm_area_id = data["elements"][0]["id"] + if c == "FR": # take second one for France + osm_area_id = data["elements"][1]["id"] + else: + osm_area_id = data["elements"][0]["id"] osm_areas.append(f"area({osm_area_id})") else: # Print a warning if no results are found for the country code @@ -65,10 +68,14 @@ def retrieve_osm_data( "substations_node", "transformers_way", "transformers_node", + "relations", ]): op_area = _get_overpass_areas(country) + # Overpass API endpoint URL + overpass_url = "https://overpass-api.de/api/interpreter" + features_dict= { 'cables_way': 'way["power"="cable"]', 'lines_way': 'way["power"="line"]', @@ -76,6 +83,7 @@ def retrieve_osm_data( 'substations_node': 'node["power"="substation"]', 'transformers_way': 'way["power"="transformer"]', 'transformers_node': 'node["power"="transformer"]', + 'relations': 'rel["route"="power"]["type"="route"]' } for f in features: @@ -86,6 +94,7 @@ def retrieve_osm_data( logger.info(f" - Fetching OSM data for feature '{f}' in {country}...") # Build the overpass query op_query = f''' + [out:json]; {op_area[country]}->.searchArea; ( {features_dict[f]}(area.searchArea); @@ -94,8 +103,8 @@ def retrieve_osm_data( ''' # Send the request - # response = requests.post(overpass_url, data = op_query) - response = op.API(timeout=300).get(op_query) # returns data in geojson format. Timeout (max.) set to 300s + response = requests.post(overpass_url, data = op_query) + # response = op.API(timeout=300).get(op_query) # returns data in geojson format. Timeout (max.) set to 300s filepath = output[f] parentfolder = os.path.dirname(filepath) @@ -104,10 +113,10 @@ def retrieve_osm_data( os.makedirs(parentfolder) with open(filepath, mode = "w") as f: - geojson.dump(response,f,indent=2) - # geojson.dump(response.json(),f,indent=2) + # geojson.dump(response,f,indent=2) + json.dump(response.json(),f,indent=2) logger.info(" - Done.") - time.sleep(5) + # time.sleep(5) if __name__ == "__main__": From e6c9acce29a2981a2c6bc8830bae7d656847d3b0 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Fri, 3 May 2024 17:14:28 +0200 Subject: [PATCH 004/100] Updated clean_osm_data and retrieve_osm_data to create clean substations. --- rules/build_electricity.smk | 18 +- scripts/clean_osm_data.py | 716 +++++++++++++++++++++++++---------- scripts/retrieve_osm_data.py | 21 +- 3 files changed, 546 insertions(+), 209 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index c4c89c472..23bf99969 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -615,9 +615,10 @@ if config["osm"].get("retrieve", True): lines_way="data/osm/raw/{country}/lines_way_raw.json", substations_way="data/osm/raw/{country}/substations_way_raw.json", substations_node="data/osm/raw/{country}/substations_node_raw.json", - transformers_way="data/osm/raw/{country}/transformers_way_raw.json", - transformers_node="data/osm/raw/{country}/transformers_node_raw.json", - relations="data/osm/raw/{country}/relations_raw.json", + substations_relation="data/osm/raw/{country}/substations_relation_raw.json", + # transformers_way="data/osm/raw/{country}/transformers_way_raw.json", + # transformers_node="data/osm/raw/{country}/transformers_node_raw.json", + # route_relations="data/osm/raw/{country}/route_relations_raw.json", log: logs("retrieve_osm_data_{country}.log"), script: @@ -636,13 +637,12 @@ rule clean_osm_data: lines_way=[f"data/osm/raw/{country}/lines_way_raw.json" for country in config["countries"]], substations_way=[f"data/osm/raw/{country}/substations_way_raw.json" for country in config["countries"]], substations_node=[f"data/osm/raw/{country}/substations_node_raw.json" for country in config["countries"]], - transformers_way=[f"data/osm/raw/{country}/transformers_way_raw.json" for country in config["countries"]], - transformers_node=[f"data/osm/raw/{country}/transformers_node_raw.json" for country in config["countries"]], - relations=[f"data/osm/raw/{country}/relations_raw.json" for country in config["countries"]], + substations_relation=[f"data/osm/raw/{country}/substations_relation_raw.json" for country in config["countries"]], + # transformers_way=[f"data/osm/raw/{country}/transformers_way_raw.json" for country in config["countries"]], + # transformers_node=[f"data/osm/raw/{country}/transformers_node_raw.json" for country in config["countries"]], + # route_relations=[f"data/osm/raw/{country}/route_relations_raw.json" for country in config["countries"]], output: - dummy="data/osm/raw/dummy.txt" - # cables="resources/RDIR/cables_clean_.geojson" - # lines= + substations="data/osm/clean/substations.geojson", log: logs("clean_osm_data.log"), script: diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index e534801c3..90e3ca17e 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -6,12 +6,17 @@ TODO To fill later """ +from branca.element import Figure +import folium import geopandas as gpd import json import logging +import os +import numpy as np import pandas as pd import re -from shapely.geometry import LineString, Point +from shapely.geometry import LineString, Point, Polygon +from shapely.ops import linemerge import tqdm.auto as tqdm from _helpers import configure_logging @@ -27,6 +32,29 @@ def _create_linestring(row): return LineString(coords) +def _create_polygon(row): + """ + Create a Shapely Polygon from a list of coordinate dictionaries. + + Parameters: + coords (list): List of dictionaries with 'lat' and 'lon' keys representing coordinates. + + Returns: + shapely.geometry.Polygon: The constructed polygon object. + """ + # Extract coordinates as tuples + point_coords = [(coord['lon'], coord['lat']) for coord in row["geometry"]] + + # Ensure closure by repeating the first coordinate as the last coordinate + if point_coords[0] != point_coords[-1]: + point_coords.append(point_coords[0]) + + # Create Polygon object + polygon = Polygon(point_coords) + + return polygon + + def _clean_voltage(column): """ Function to clean the raw voltage column: manual fixing and drop nan values @@ -37,11 +65,21 @@ def _clean_voltage(column): Returns: - column: pandas Series, the cleaned column """ + column = column.copy() + + column = ( + column + .astype(str) + .str.lower() + .str.replace("400/220/110 kV'", "400000;220000;110000") + .str.replace("400/220/110/20_kv", "400000;220000;110000;20000") + .str.replace("2x25000", "25000;25000") + ) + column = ( column .astype(str) .str.lower() - .str.replace("fixme", "") .str.replace("(temp 150000)", "") .str.replace("low", "1000") .str.replace("minor", "1000") @@ -49,23 +87,20 @@ def _clean_voltage(column): .str.replace("med", "33000") .str.replace("m", "33000") .str.replace("high", "150000") - .str.replace("unknown", "") .str.replace("23000-109000", "109000") - .str.replace("INF", "") - .str.replace("<", "") - .str.replace("?", "") - .str.replace(",", "") - .str.replace(" ", "") - .str.replace("_", "") + .str.replace("380000>220000", "380000;220000") + .str.replace(":", ";") + .str.replace("<", ";") + .str.replace(",", ";") .str.replace("kv", "000") - .str.replace("v", "") + .str.replace("kva", "000") .str.replace("/", ";") .str.replace("nan", "") - .str.replace("", "") + .str.replace("", "") ) # Remove all remaining non-numeric characters except for semicolons - column = column.apply(lambda x: re.sub(r'[^0-9;]', '', x)) + column = column.apply(lambda x: re.sub(r'[^0-9;]', '', str(x))) column.dropna(inplace=True) return column @@ -88,8 +123,75 @@ def _clean_circuits(column): .str.replace("partial", "") .str.replace("1operator=RTE operator:wikidata=Q2178795", "") .str.lower() + .str.replace("1,5", "3") # (way 998005838, should be corrected in OSM soon) .str.replace("1/3", "1") - .str.replace("", "") + .str.replace("", "") + .str.replace("nan", "") + ) + + # Remove all remaining non-numeric characters except for semicolons + column = column.apply(lambda x: re.sub(r'[^0-9;]', '', x)) + + column.dropna(inplace=True) + return column.astype(str) + + +def _clean_cables(column): + """ + Function to clean the raw cables column: manual fixing and drop nan values + + Args: + - column: pandas Series, the column to be cleaned + + Returns: + - column: pandas Series, the cleaned column + """ + column = column.copy() + column = ( + column + .astype(str) + .str.lower() + .str.replace("1/3", "1") + .str.replace("3x2;2", "3") + .str.replace("", "") + .str.replace("nan", "") + ) + + # Remove all remaining non-numeric characters except for semicolons + column = column.apply(lambda x: re.sub(r'[^0-9;]', '', x)) + + column.dropna(inplace=True) + return column.astype(str) + + +def _clean_wires(column): + """ + Function to clean the raw wires column: manual fixing and drop nan values + + Args: + - column: pandas Series, the column to be cleaned + + Returns: + - column: pandas Series, the cleaned column + """ + column = column.copy() + column = ( + column + .astype(str) + .str.lower() + .str.replace("?", "") + .str.replace("trzyprzewodowe", "3") + .str.replace("pojedyƄcze", "1") + .str.replace("single", "1") + .str.replace("double", "2") + .str.replace("triple", "3") + .str.replace("quad", "4") + .str.replace("fivefold", "5") + .str.replace("yes", "3") + .str.replace("1/3", "1") + .str.replace("3x2;2", "3") + .str.replace("_", "") + .str.replace("", "") .str.replace("nan", "") ) @@ -100,7 +202,7 @@ def _clean_circuits(column): return column.astype(str) -def _clean_frequency(column): +def _set_frequency(column): column = column.copy() to_fifty = column.astype(str) != "0" column[to_fifty] = "50" @@ -108,6 +210,46 @@ def _clean_frequency(column): return column +def _check_voltage(voltage, list_voltages): + voltages = voltage.split(';') + for v in voltages: + if v in list_voltages: + return True + return False + + +def _clean_frequency(column): + column = column.copy() + """ + Function to clean the raw frequency column: manual fixing and drop nan values + + Args: + - column: pandas Series, the column to be cleaned + + Returns: + - column: pandas Series, the cleaned column + """ + column = column.copy() + column = ( + column + .astype(str) + .str.lower() + .str.replace("16.67", "16.7") + .str.replace("16,7", "16.7") + .str.replace("?", "") + .str.replace("hz", "") + .str.replace(" ", "") + .str.replace("", "") + .str.replace("nan", "") + ) + + # Remove all remaining non-numeric characters except for semicolons + column = column.apply(lambda x: re.sub(r'[^0-9;.]', '', x)) + + column.dropna(inplace=True) + return column.astype(str) + + def _split_voltage(df): to_split = df['voltage'].str.contains(';') new_rows = [] @@ -124,6 +266,7 @@ def _split_voltage(df): 'bounds': row['bounds'], 'nodes': row['nodes'], 'geometry': row['geometry'], + 'country': row['country'], 'power': row['power'], 'cables': row['cables'], 'circuits': row['circuits'], @@ -141,6 +284,66 @@ def _split_voltage(df): return df_new +def _split_cells(df, cols=["voltage"]): + """ + Split semicolon separated cells i.e. [66000;220000] and create new + identical rows. + + Parameters + ---------- + df : dataframe + Dataframe under analysis + cols : list + List of target columns over which to perform the analysis + + Example + ------- + Original data: + row 1: '66000;220000', '50' + + After applying split_cells(): + row 1, '66000', '50', 2 + row 2, '220000', '50', 2 + """ + if df.empty: + return df + + # Create a dictionary to store the suffix count for each original ID + suffix_counts = {} + # Create a dictionary to store the number of splits associated with each original ID + num_splits = {} + + # Split cells and create new rows + x = df.assign(**{col: df[col].str.split(";") for col in cols}) + x = x.explode(cols, ignore_index=True) + + # Count the number of splits associated with each original ID + num_splits = x.groupby('id').size().to_dict() + + # Update the 'split_elements' column + x["split_elements"] = x["id"].map(num_splits) + + # Function to generate the new ID with suffix and update the number of splits + def generate_new_id(row): + original_id = row["id"] + if row["split_elements"] == 1: + return original_id + else: + suffix_counts[original_id] = suffix_counts.get(original_id, 0) + 1 + return f"{original_id}_{suffix_counts[original_id]}" + + # Update the ID column with the new IDs + x["id"] = x.apply(generate_new_id, axis=1) + + return x + + +# Function to check if any substring is in valid_strings +def _any_substring_in_list(s, list_strings): + substrings = s.split(';') + return any(sub in list_strings for sub in substrings) + + if __name__ == "__main__": # Detect running outside of snakemake and mock snakemake for testing if "snakemake" not in globals(): @@ -151,235 +354,360 @@ def _split_voltage(df): configure_logging(snakemake) logger.info("Dummy log: clean_osm_data()") - # input_path = snakemake.input.lines_way + snakemake.input.cables_way - # input_path = { - # "lines": snakemake.input.lines_way, - # "cables": snakemake.input.cables_way, - # } - - # columns = ["id", "sub_id", "sub_id_len", "bounds", "nodes", "geometry", "power", "cables", "circuits", "frequency", "voltage", "wires"] - # df_lines = pd.DataFrame(columns=columns) - # crs = "EPSG:4326" - - # # using tqdm loop over input path - - # for key in input_path: - # logger.info(f"Processing {key}...") - # for idx, ip in enumerate(input_path[key]): - # if os.path.exists(ip) and os.path.getsize(ip) > 400: # unpopulated OSM json is about 51 bytes - # logger.info(f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(input_path[key])).zfill(2)}: {ip}") - # with open(ip, "r") as f: - # data = json.load(f) + ############# BUSES / SUBSTATIONS ###################### + input_path_substations = { + "substations_way": snakemake.input.substations_way, + "substations_relation": snakemake.input.substations_relation, + } + + cols_substations_way = ["id", "geometry", "country", "power", "substation", "voltage", "frequency"] + cols_substations_relation = ["id", "country", "power", "substation", "voltage", "frequency"] + df_substations_way = pd.DataFrame(columns = cols_substations_way) + df_substations_relation = pd.DataFrame(columns = cols_substations_relation) + + for key in input_path_substations: + logger.info(f"Processing {key}...") + for idx, ip in enumerate(input_path_substations[key]): + if os.path.exists(ip) and os.path.getsize(ip) > 400: # unpopulated OSM json is about 51 bytes + country = os.path.basename(os.path.dirname(input_path_substations[key][idx])) + logger.info(f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(input_path_substations[key])).zfill(2)}: {ip}") + with open(ip, "r") as f: + data = json.load(f) - # df = pd.DataFrame(data['elements']) - # df["id"] = df["id"].astype(str) - # df["sub_id"] = "0" # initiate sub_id column with 0 - # df["sub_id_len"] = 0 # initiate sub_id column with 0 + df = pd.DataFrame(data['elements']) + df["id"] = df["id"].astype(str) + df["country"] = country - # col_tags = ["power", "cables", "circuits", "frequency", "voltage", "wires"] + col_tags = ["power", "substation", "voltage", "frequency"] - # tags = pd.json_normalize(df["tags"]) \ - # .map(lambda x: str(x) if pd.notnull(x) else x) + tags = pd.json_normalize(df["tags"]) \ + .map(lambda x: str(x) if pd.notnull(x) else x) - # for ct in col_tags: - # if ct not in tags.columns: - # tags[ct] = pd.NA + for ct in col_tags: + if ct not in tags.columns: + tags[ct] = pd.NA - # tags = tags.loc[:, col_tags] + tags = tags.loc[:, col_tags] - # df = pd.concat([df, tags], axis="columns") - # df.drop(columns=["type", "tags"], inplace=True) - - # df_lines = pd.concat([df_lines, df], axis="rows") + df = pd.concat([df, tags], axis="columns") - # else: - # logger.info(f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(input_path[key])).zfill(2)} (empty): {ip}") - # continue - # logger.info("---") - - # # Drop duplicates - # df_lines.drop_duplicates(subset="id", inplace=True) + if key == "substations_way": + df.drop(columns=["type", "tags", "bounds", "nodes"], inplace=True) + df_substations_way = pd.concat([df_substations_way, df], axis="rows") + elif key == "substations_relation": + df.drop(columns=["type", "tags", "bounds"], inplace=True) + df_substations_relation = pd.concat([df_substations_relation, df], axis="rows") - # df_lines["voltage"] = _clean_voltage(df_lines["voltage"]) - # # drop voltage = "" - # df_lines = _split_voltage(df_lines) - # df_lines = df_lines[df_lines["voltage"] != ""] - # df_lines["voltage"] = df_lines["voltage"].astype(int, errors="ignore") + else: + logger.info(f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(input_path_substations[key])).zfill(2)} (empty): {ip}") + continue + logger.info("---") - # # Drop voltages below 220 kV - # df_lines = df_lines[df_lines["voltage"] >= 220000] + df_substations_way.drop_duplicates(subset='id', keep='first', inplace=True) + df_substations_relation.drop_duplicates(subset='id', keep='first', inplace=True) - # # Clean frequencies - # df_lines["frequency"] = _clean_frequency(df_lines["frequency"]) - # df_lines["frequency"] = df_lines["frequency"].astype(int, errors="ignore") + df_substations_way["geometry"] = df_substations_way.apply(_create_polygon, axis=1) - # # Clean circuits - # df_lines["circuits"] = _clean_circuits(df_lines["circuits"]) - # # Map correct circuits to lines that where split + # Normalise the members column of df_substations_relation + cols_members = ["id", "type", "ref", "role", "geometry"] + df_substations_relation_members = pd.DataFrame(columns = cols_members) + + for index, row in df_substations_relation.iterrows(): + col_members = ["type", "ref", "role", "geometry"] + df = pd.json_normalize(row["members"]) + + for cm in col_members: + if cm not in df.columns: + df[cm] = pd.NA + + df = df.loc[:, col_members] + df["id"] = str(row["id"]) + df["ref"] = df["ref"].astype(str) + df = df[df["type"] != "node"] + df = df.dropna(subset=["geometry"]) + df = df[~df["role"].isin(["", "incoming_line", "substation", "inner"])] + df_substations_relation_members = pd.concat([df_substations_relation_members, df], axis="rows") - # # Initiate new column for cleaned circuits with values that are already valid: - # # Condition 1: Length of sub_id is 0, the line was not split - # # Condition 2: Number of entries in circuits separated by semicolon is 1, value is unique - # # Condition 3: Circuits is not an empty string - # # Condition 4: Circuits is not "0" - # bool_circuits_valid = (df_lines["sub_id_len"] == 0) & \ - # (df_lines["circuits"].apply(lambda x: len(x.split(";"))) == 1) & \ - # (df_lines["circuits"] != "") & \ - # (df_lines["circuits"] != "0") - - # df_lines.loc[bool_circuits_valid, "circuits_clean"] = df_lines.loc[bool_circuits_valid, "circuits"] + df_substations_relation_members.reset_index(inplace=True) + df_substations_relation_members["linestring"] = df_substations_relation_members.apply(_create_linestring, axis=1) + df_substations_relation_members_grouped = df_substations_relation_members.groupby('id')['linestring'] \ + .apply(lambda x: linemerge(x.tolist())).reset_index() + df_substations_relation_members_grouped["geometry"] = df_substations_relation_members_grouped["linestring"].apply(lambda x: x.convex_hull) - # # Boolean to check if sub_id_len is equal to the number of circuits - # bool_equal = df_lines["sub_id_len"] == df_lines["circuits"] \ - # .apply(lambda x: len(x.split(";"))) - # op_equal = lambda row: row["circuits"].split(";")[int(row["sub_id"])-1] - - # df_lines.loc[bool_equal, "circuits_clean"] = df_lines[bool_equal] \ - # .apply(op_equal, axis=1) + df_substations_relation = df_substations_relation.join( + df_substations_relation_members_grouped.set_index('id'), + on='id', how='left' + ).drop(columns=["members", "linestring"]) \ + .dropna(subset=["geometry"]) - # bool_larger = df_lines["sub_id_len"] > \ - # df_lines["circuits"].apply(lambda x: len(x.split(";"))) + # reorder columns and concatenate + df_substations_relation = df_substations_relation[cols_substations_way] + df_substations = pd.concat([df_substations_way, df_substations_relation], axis="rows") + + # Create centroids from geometries + df_substations.loc[:, "geometry"] = df_substations["geometry"].apply(lambda x: x.centroid) + df_substations.loc[:, "lon"] = df_substations["geometry"].apply(lambda x: x.x) + df_substations.loc[:, "lat"] = df_substations["geometry"].apply(lambda x: x.y) + + # Clean columns + df_substations["voltage"] = _clean_voltage(df_substations["voltage"]) + df_substations["frequency"] = _clean_frequency(df_substations["frequency"]) + df_substations["frequency"] = df_substations["frequency"].astype(str, errors="ignore") + + list_voltages = df_substations["voltage"].str.split(";").explode().unique().astype(str) + list_voltages = list_voltages[np.vectorize(len)(list_voltages) >= 6] + list_voltages = list_voltages[~np.char.startswith(list_voltages, '1')] + + bool_voltages = df_substations["voltage"].apply(_check_voltage, list_voltages=list_voltages) + df_substations = df_substations[bool_voltages] + + df_substations = _split_cells(df_substations) + bool_voltages = df_substations["voltage"].apply(_check_voltage, list_voltages=list_voltages) + df_substations = df_substations[bool_voltages] + df_substations["split_count"] = df_substations["id"].apply(lambda x: x.split("_")[1] if "_" in x else "0") + df_substations["split_count"] = df_substations["split_count"].astype(int) + + bool_split = df_substations["split_elements"] > 1 + bool_frequency_len = df_substations["frequency"].apply(lambda x: len(x.split(";"))) == df_substations["split_elements"] + df_substations.loc[bool_frequency_len & bool_split, "frequency"] = df_substations.loc[bool_frequency_len & bool_split, "frequency"] \ - # pd.set_option('display.max_rows', None) - # df_lines.loc[bool_larger, ["id", "sub_id", "sub_id_len", "cables", "circuits", "circuits_clean", "frequency"]] - - - + op_freq = lambda row: row["frequency"].split(";")[row["split_count"]-1] - - # df_lines[df_lines["sub_id_len"] > 0]["circuits"] - - - # df_lines["geometry"] = df_lines.apply(_create_linestring, axis=1) - # gdf = gpd.GeoDataFrame( - # df_lines[["id", "sub_id", "sub_id_len", "power", "cables", "circuits", "voltage", "geometry"]], - # geometry = "geometry", crs = "EPSG:4326" - # ) + df_substations.loc[bool_frequency_len & bool_split, ["frequency"]] = df_substations.loc[bool_frequency_len & bool_split, ] \ + .apply(op_freq, axis=1) + + df_substations = _split_cells(df_substations, cols=["frequency"]) + bool_invalid_frequency = df_substations["frequency"].apply(lambda x: x not in ["50", "0"]) + df_substations.loc[bool_invalid_frequency, "frequency"] = "50" + df_substations["power"] = "substation" + df_substations["substation"] = "transmission" + df_substations["dc"] = False + df_substations.loc[df_substations["frequency"] == "0", "dc"] = True + df_substations["under_construction"] = False + df_substations["station_id"] = None + df_substations["tag_area"] = None + + # rename columns + df_substations.rename( + columns={ + "id": "bus_id", + "power": "symbol", + "substation":"tag_substation", + }, inplace=True) - # gdf.explore() - # df_lines.voltage.unique() + df_substations = df_substations[[ + "bus_id", + "symbol", + "tag_substation", + "voltage", + "lon", + "lat", + "dc", + "under_construction", + "station_id", + "tag_area", + "country", + "geometry", + ]] + + gdf_substations = gpd.GeoDataFrame(df_substations, geometry = "geometry", crs = "EPSG:4326") - # df_lines.circuits.apply(lambda x: x.split(";")).explode().unique() + filepath_substations = snakemake.output["substations"] + # save substations output + logger.info(f"Exporting clean substations to {filepath_substations}") + parentfolder_substations = os.path.dirname(filepath_substations) + if not os.path.exists(parentfolder_substations): + # Create the folder and its parent directories if they don't exist + os.makedirs(parentfolder_substations) - # ol_lines_way = ["id", "power", "cables", "circuits", "frequency", "voltage"] + gdf_substations.to_file(filepath_substations, driver="GeoJSON") - # # gdf = gpd.read_file(lines_way[3]) - # # gdf2 = gpd.GeoDataFrame(gdf, geometry=gdf.geometry) - # # df = gdf.to_json() + ############# LINES AND CABLES ###################### - # # gdf.to_file("example.geojson", layer_options={"ID_GENERATE": "YES"}) + input_path_lines_cables = { + "lines": snakemake.input.lines_way, + "cables": snakemake.input.cables_way, + } + columns = ["id", "sub_id", "sub_id_len", "bounds", "nodes", "geometry", "country", "power", "cables", "circuits", "frequency", "voltage", "wires"] + df_lines = pd.DataFrame(columns=columns) + crs = "EPSG:4326" - output = str(snakemake.output) - clean_osm_data(output) + # using tqdm loop over input path + for key in input_path_lines_cables: + logger.info(f"Processing {key}...") + for idx, ip in enumerate(input_path_lines_cables[key]): + if os.path.exists(ip) and os.path.getsize(ip) > 400: # unpopulated OSM json is about 51 bytes + country = os.path.basename(os.path.dirname(input_path_lines_cables[key][idx])) + + logger.info(f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(input_path_lines_cables[key])).zfill(2)}: {ip}") + with open(ip, "r") as f: + data = json.load(f) + + df = pd.DataFrame(data['elements']) + df["id"] = df["id"].astype(str) + df["sub_id"] = "0" # initiate sub_id column with 0 + df["sub_id_len"] = 0 # initiate sub_id column with 0 + df["country"] = country + col_tags = ["power", "cables", "circuits", "frequency", "voltage", "wires"] + tags = pd.json_normalize(df["tags"]) \ + .map(lambda x: str(x) if pd.notnull(x) else x) + + for ct in col_tags: + if ct not in tags.columns: + tags[ct] = pd.NA + + tags = tags.loc[:, col_tags] -# # Example DataFrame -# data = {'id': ["ID1", "ID2", "ID3", "ID4", "ID5"], -# 'A': ["220000", "380000", ";100000", "220000;220000;380000", "220000;;400000;700000"], -# 'B': [1, 2, 3, 4, 5], -# 'C': [6, 7, 8, 9, 10]} -# df = pd.DataFrame(data) + df = pd.concat([df, tags], axis="columns") + df.drop(columns=["type", "tags"], inplace=True) + + df_lines = pd.concat([df_lines, df], axis="rows") + + else: + logger.info(f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(input_path_lines_cables[key])).zfill(2)} (empty): {ip}") + continue + logger.info("---") + + # Initiate boolean with False, only set to true if all cleaning steps are passed + df_lines["cleaned"] = False + df_lines["voltage"] = _clean_voltage(df_lines["voltage"]) + + list_voltages = df_lines["voltage"].str.split(";").explode().unique().astype(str) + list_voltages = list_voltages[np.vectorize(len)(list_voltages) >= 6] + list_voltages = list_voltages[~np.char.startswith(list_voltages, '1')] + + bool_voltages = df_lines["voltage"].apply(_check_voltage, list_voltages=list_voltages) + df_lines = df_lines[bool_voltages] + + # Additional cleaning + df_lines["circuits"] = _clean_circuits(df_lines["circuits"]) + df_lines["cables"] = _clean_cables(df_lines["cables"]) + df_lines["frequency"] = _clean_frequency(df_lines["frequency"]) + df_lines["wires"] = _clean_wires(df_lines["wires"]) + + df_lines = _split_cells(df_lines) + bool_voltages = df_lines["voltage"].apply(_check_voltage, list_voltages=list_voltages) + df_lines = df_lines[bool_voltages] + + bool_ac = df_lines["frequency"] != "0" + bool_dc = ~bool_ac + bool_noinfo = (df_lines["cables"] == "") & (df_lines["circuits"] == "") + valid_frequency = ["50", "0"] + bool_invalid_frequency = df_lines["frequency"].apply(lambda x: x not in valid_frequency) + + # Fill in all values where cables info and circuits does not exist. Assuming 1 circuit + df_lines.loc[bool_noinfo, "circuits"] = "1" + df_lines.loc[bool_noinfo & bool_invalid_frequency, "frequency"] = "50" + df_lines.loc[bool_noinfo, "cleaned"] = True + + df_lines + + df_lines[bool_dc] + + df_lines["geometry"] = df_lines.apply(_create_linestring, axis=1) + gdf_lines = gpd.GeoDataFrame( + df_lines[["id", "power", "cables", "circuits", "voltage", "geometry"]], + geometry = "geometry", crs = "EPSG:4326" + ) + + gdf_lines.explore() -# # Split the entries in column A that contain a semicolon -# split_rows = df[df['A'].str.contains(';')] -# split_values = split_rows['A'].str.split(';', expand=True) + ### Split into AC and DC + df_lines_ac = df_lines[df_lines["frequency"] != "0"].copy() + df_lines_dc = df_lines[df_lines["frequency"] == "0"].copy() -# # Create two copies of the rows containing semicolons, one for each split value -# split_rows_1 = split_rows.copy() -# split_rows_2 = split_rows.copy() + df_lines_dc["cleaned"] = False + + -# # Update column A in the split rows to contain the split values -# split_rows_1['A'] = split_values[0] -# split_rows_2['A'] = split_values[1] -# # Concatenate the split rows with the original DataFrame, excluding the rows containing semicolons -# result_df = pd.concat([df[~df.index.isin(split_rows.index)], split_rows_1, split_rows_2], ignore_index=True) -# # Display the result -# print(result_df) + ######## + ######## + ######## -# '# Sample DataFrame -# data = {'id': ["ID1", "ID2", "ID3", "ID4", "ID5"], -# 'voltage': ["220000", "380000", ";100000", "220000;220000;380000", "220000;;400000;700000"], -# 'B': [1, 2, 3, 4, 5], -# 'C': [6, 7, 8, 9, 10]} -# df = pd.DataFrame(data) + fig = Figure(width = "50%", height = 600) -# # Find rows to split -# to_split = df['voltage'].str.contains(';') + m = gdf_substations.explore(name = "Buses", color = "red") + m = gdf_lines.explore(m = m, name = "Lines") -# # Splitting entries and creating new rows + folium.LayerControl(collapsed = False).add_to(m) + fig.add_child(m) + m -# new_rows = [] + gdf_substations.explore() + df_lines.voltage.unique() -# for index, row in df[to_split].iterrows(): -# split_values = row["voltage"].split(';') -# for i, value in enumerate(split_values): -# new_id = str(row['id']) + '_' + str(i+1) -# new_row = { -# 'id': new_id, -# 'bounds': row['bounds'], -# 'nodes': row['nodes'], -# 'geometry': row['geometry'], -# 'cables': row['cables'], -# 'circuits': row['circuits'], -# 'frequency': row['frequency'], -# 'voltage': value, -# 'wires': row['wires'],} -# new_rows.append(new_row) + np.set_printoptions(threshold=np.inf) -# # Create DataFrame from split rows -# split_df = pd.DataFrame(new_rows) -# # Append the original DataFrame with split_df -# final_df = pd.concat([df[~to_split], split_df]) + # duplicate_lines = df_lines[df_lines.duplicated(subset=['id'], keep=False)].copy() -# print(final_df) + # grouped_duplicates = duplicate_rows.groupby('id').agg({'country': 'list'}) + a = df_lines[(df_lines["cables"].apply(lambda x: len(x.split(";"))) == 1) & ((df_lines["voltage"].apply(lambda x: len(x.split(";"))) == 1)) & (df_lines["cables"] != "")] + # Drop duplicates + df_lines.drop_duplicates(subset="id", inplace=True) + df_lines["voltage"] = _clean_voltage(df_lines["voltage"]) + # df_lines["frequency"] = _clean_frequency(df_lines["frequency"]) + df_lines["circuits"] = _clean_circuits(df_lines["circuits"]) + + list_voltages = df_lines["voltage"].str.split(";").explode().unique().astype(str) + list_voltages = list_voltages[np.vectorize(len)(list_voltages) >= 6] + list_voltages[~np.char.startswith(list_voltages, '1')] -# from shapely.geometry import LineString -# import numpy as np -# import matplotlib.pyplot as plt + # df_lines_subset = df_lines[df_lines["voltage"].apply(_any_substring_in_list, list_voltages)] -# def offset_line(original_line, distance): -# # Compute the direction vector between the two endpoints -# direction_vector = np.array(original_line.coords[1]) - np.array(original_line.coords[0]) + # drop voltage = "" + df_lines = _split_voltage(df_lines) + df_lines = df_lines[df_lines["voltage"] != ""] + df_lines["voltage"] = df_lines["voltage"].astype(int, errors="ignore") -# # Compute the orthogonal vector -# orthogonal_vector = np.array([-direction_vector[1], direction_vector[0]]) + # Drop voltages below 220 kV + df_lines = df_lines[df_lines["voltage"] >= 200000] -# # Normalize the orthogonal vector -# orthogonal_vector /= np.linalg.norm(orthogonal_vector) + # set frequencies + df_lines["frequency"] = _set_frequency(df_lines["frequency"]) + df_lines["frequency"] = df_lines["frequency"].astype(int, errors="ignore") -# # Compute the offset LineString -# offset_points = [] -# for point in original_line.coords: -# offset_point = np.array(point) + distance * orthogonal_vector -# offset_points.append((offset_point[0], offset_point[1])) + # Clean circuits + # Map correct circuits to lines that where split + + # Initiate new column for cleaned circuits with values that are already valid: + # Condition 1: Length of sub_id is 0, the line was not split + # Condition 2: Number of entries in circuits separated by semicolon is 1, value is unique + # Condition 3: Circuits is not an empty string + # Condition 4: Circuits is not "0" + bool_circuits_valid = (df_lines["sub_id_len"] == 0) & \ + (df_lines["circuits"].apply(lambda x: len(x.split(";"))) == 1) & \ + (df_lines["circuits"] != "") & \ + (df_lines["circuits"] != "0") + + df_lines.loc[bool_circuits_valid, "circuits_clean"] = df_lines.loc[bool_circuits_valid, "circuits"] + + # Boolean to check if sub_id_len is equal to the number of circuits + bool_equal = df_lines["sub_id_len"] == df_lines["circuits"] \ + .apply(lambda x: len(x.split(";"))) + op_equal = lambda row: row["circuits"].split(";")[int(row["sub_id"])-1] + + df_lines.loc[bool_equal, "circuits_clean"] = df_lines[bool_equal] \ + .apply(op_equal, axis=1) + + bool_larger = df_lines["sub_id_len"] > \ + df_lines["circuits"].apply(lambda x: len(x.split(";"))) + + pd.set_option('display.max_rows', None) + pd.set_option('display.max_columns', None) + df_lines.loc[bool_larger, ["id", "sub_id", "sub_id_len", "cables", "circuits", "circuits_clean", "frequency"]] -# return LineString(offset_points) -# # Example usage: -# original_line = lines.iloc[5] -# offset_distance = 1.0 -# b = offset_line(original_line, offset_distance) -# # Plot both LineStrings -# fig, ax = plt.subplots() -# x, y = original_line.xy -# ax.plot(x, y, label='Original LineString') -# x, y = offset_line.xy -# ax.plot(x, y, label='Offset LineString') -# ax.set_aspect('equal') -# ax.legend() -# plt.xlabel('X') -# plt.ylabel('Y') -# plt.title('Original and Offset LineStrings') -# plt.grid(True) -# plt.show() \ No newline at end of file + output = str(snakemake.output) + clean_osm_data(output) \ No newline at end of file diff --git a/scripts/retrieve_osm_data.py b/scripts/retrieve_osm_data.py index 15eec040d..9a4526a5f 100644 --- a/scripts/retrieve_osm_data.py +++ b/scripts/retrieve_osm_data.py @@ -66,9 +66,10 @@ def retrieve_osm_data( "lines_way", "substations_way", "substations_node", - "transformers_way", - "transformers_node", - "relations", + "substations_relation", + # "transformers_way", + # "transformers_node", + # "route_relations", ]): op_area = _get_overpass_areas(country) @@ -76,14 +77,22 @@ def retrieve_osm_data( # Overpass API endpoint URL overpass_url = "https://overpass-api.de/api/interpreter" + # features_dict= { + # 'cables_way': 'way["power"="cable"]', + # 'lines_way': 'way["power"="line"]', + # 'substations_way': 'way["power"="substation"]', + # 'substations_node': 'node["power"="substation"]', + # 'transformers_way': 'way["power"="transformer"]', + # 'transformers_node': 'node["power"="transformer"]', + # 'route_relations': 'rel["route"="power"]["type"="route"]' + # } + features_dict= { 'cables_way': 'way["power"="cable"]', 'lines_way': 'way["power"="line"]', 'substations_way': 'way["power"="substation"]', 'substations_node': 'node["power"="substation"]', - 'transformers_way': 'way["power"="transformer"]', - 'transformers_node': 'node["power"="transformer"]', - 'relations': 'rel["route"="power"]["type"="route"]' + 'substations_relation': 'relation["power"="substation"]', } for f in features: From 85aa3f1f240c67323d4bc812f16020504c51c9a8 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Mon, 6 May 2024 16:04:37 +0200 Subject: [PATCH 005/100] Finished clean_osm_data function. --- rules/build_electricity.smk | 1 + scripts/clean_osm_data.py | 281 +++++++++++++++++++++--------------- 2 files changed, 165 insertions(+), 117 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 23bf99969..4d4495adc 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -643,6 +643,7 @@ rule clean_osm_data: # route_relations=[f"data/osm/raw/{country}/route_relations_raw.json" for country in config["countries"]], output: substations="data/osm/clean/substations.geojson", + lines="data/osm/clean/lines.geojson", log: logs("clean_osm_data.log"), script: diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index 90e3ca17e..1ccef3644 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -250,40 +250,6 @@ def _clean_frequency(column): return column.astype(str) -def _split_voltage(df): - to_split = df['voltage'].str.contains(';') - new_rows = [] - for index, row in df[to_split].iterrows(): - split_values = row["voltage"].split(';') - new_sub_id_len = int(len(split_values)) - for i, value in enumerate(split_values): - new_sub_id = str(i+1) - new_id = str(row['id']) + '_' + new_sub_id - new_row = { - 'id': new_id, - 'sub_id': new_sub_id, - 'sub_id_len': new_sub_id_len, - 'bounds': row['bounds'], - 'nodes': row['nodes'], - 'geometry': row['geometry'], - 'country': row['country'], - 'power': row['power'], - 'cables': row['cables'], - 'circuits': row['circuits'], - 'frequency': row['frequency'], - 'voltage': value, - 'wires': row['wires'],} - new_rows.append(new_row) - - # Create DataFrame from split rows - split_df = pd.DataFrame(new_rows) - df_new = pd.concat([df[~to_split], split_df]) - df_new["sub_id_len"] = df_new["sub_id_len"].astype(int) - - # Append the original DataFrame with split_df - return df_new - - def _split_cells(df, cols=["voltage"]): """ Split semicolon separated cells i.e. [66000;220000] and create new @@ -338,6 +304,19 @@ def generate_new_id(row): return x +def _distribute_to_circuits(row): + if row["circuits"] != "": + circuits = int(row["circuits"]) + else: + cables = int(row["cables"]) + circuits = cables / 3 + + single_circuit = int(max(1, np.floor_divide(circuits, row["split_elements"]))) + single_circuit = str(single_circuit) + + return single_circuit + + # Function to check if any substring is in valid_strings def _any_substring_in_list(s, list_strings): substrings = s.split(';') @@ -529,7 +508,7 @@ def _any_substring_in_list(s, list_strings): "cables": snakemake.input.cables_way, } - columns = ["id", "sub_id", "sub_id_len", "bounds", "nodes", "geometry", "country", "power", "cables", "circuits", "frequency", "voltage", "wires"] + columns = ["id", "bounds", "nodes", "geometry", "country", "power", "cables", "circuits", "frequency", "voltage", "wires"] df_lines = pd.DataFrame(columns=columns) crs = "EPSG:4326" @@ -547,8 +526,6 @@ def _any_substring_in_list(s, list_strings): df = pd.DataFrame(data['elements']) df["id"] = df["id"].astype(str) - df["sub_id"] = "0" # initiate sub_id column with 0 - df["sub_id_len"] = 0 # initiate sub_id column with 0 df["country"] = country col_tags = ["power", "cables", "circuits", "frequency", "voltage", "wires"] @@ -572,6 +549,18 @@ def _any_substring_in_list(s, list_strings): continue logger.info("---") + # Find duplicates based on id column + duplicate_rows = df_lines[df_lines.duplicated(subset=['id'], keep=False)].copy() + # group rows by id and aggregate the country column to a string split by semicolon + grouped_duplicates = duplicate_rows.groupby('id')["country"].agg(lambda x: ';'.join(x)).reset_index() + duplicate_rows.drop_duplicates(subset="id", inplace=True) + duplicate_rows.drop(columns=["country"], inplace=True) + duplicate_rows = duplicate_rows.join(grouped_duplicates.set_index('id'), on='id', how='left') + + # Drop duplicates and update the df_lines dataframe with the cleaned data + df_lines = df_lines[~df_lines["id"].isin(duplicate_rows["id"])] + df_lines = pd.concat([df_lines, duplicate_rows], axis="rows") + # Initiate boolean with False, only set to true if all cleaning steps are passed df_lines["cleaned"] = False df_lines["voltage"] = _clean_voltage(df_lines["voltage"]) @@ -589,6 +578,9 @@ def _any_substring_in_list(s, list_strings): df_lines["frequency"] = _clean_frequency(df_lines["frequency"]) df_lines["wires"] = _clean_wires(df_lines["wires"]) + df_lines["voltage_original"] = df_lines["voltage"] + df_lines["circuits_original"] = df_lines["circuits"] + df_lines = _split_cells(df_lines) bool_voltages = df_lines["voltage"].apply(_check_voltage, list_voltages=list_voltages) df_lines = df_lines[bool_voltages] @@ -604,27 +596,147 @@ def _any_substring_in_list(s, list_strings): df_lines.loc[bool_noinfo & bool_invalid_frequency, "frequency"] = "50" df_lines.loc[bool_noinfo, "cleaned"] = True - df_lines - - df_lines[bool_dc] - - df_lines["geometry"] = df_lines.apply(_create_linestring, axis=1) - gdf_lines = gpd.GeoDataFrame( - df_lines[["id", "power", "cables", "circuits", "voltage", "geometry"]], - geometry = "geometry", crs = "EPSG:4326" - ) + # Fill in all values where cables info exists and split_elements == 1 + bool_cables_ac = (df_lines["cables"] != "") & \ + (df_lines["split_elements"] == 1) & \ + (df_lines["cables"] != "0") & \ + (df_lines["cables"].apply(lambda x: len(x.split(";")) == 1)) & \ + (df_lines["circuits"] == "") & \ + (df_lines["cleaned"] == False) & \ + bool_ac - gdf_lines.explore() - - ### Split into AC and DC - df_lines_ac = df_lines[df_lines["frequency"] != "0"].copy() - df_lines_dc = df_lines[df_lines["frequency"] == "0"].copy() + df_lines.loc[bool_cables_ac, "circuits"] = df_lines.loc[bool_cables_ac, "cables"] \ + .apply(lambda x: str(int(max(1, np.floor_divide(int(x),3))))) + + df_lines.loc[bool_cables_ac, "frequency"] = "50" + df_lines.loc[bool_cables_ac, "cleaned"] = True + + bool_cables_dc = (df_lines["cables"] != "") & \ + (df_lines["split_elements"] == 1) & \ + (df_lines["cables"] != "0") & \ + (df_lines["cables"].apply(lambda x: len(x.split(";")) == 1)) & \ + (df_lines["circuits"] == "") & \ + (df_lines["cleaned"] == False) & \ + bool_dc + + df_lines.loc[bool_cables_dc, "circuits"] = df_lines.loc[bool_cables_dc, "cables"] \ + .apply(lambda x: str(int(max(1, np.floor_divide(int(x),2))))) + + df_lines.loc[bool_cables_dc, "frequency"] = "0" + df_lines.loc[bool_cables_dc, "cleaned"] = True + + # Fill in all values where circuits info exists and split_elements == 1 + bool_lines = (df_lines["circuits"] != "") & \ + (df_lines["split_elements"] == 1) & \ + (df_lines["circuits"] != "0") & \ + (df_lines["circuits"].apply(lambda x: len(x.split(";")) == 1)) & \ + (df_lines["cleaned"] == False) + + df_lines.loc[bool_lines & bool_ac, "frequency"] = "50" + df_lines.loc[bool_lines & bool_dc, "frequency"] = "0" + df_lines.loc[bool_lines, "cleaned"] = True + + # Clean those values where number of voltages split by semicolon is larger than no cables or no circuits + bool_cables = (df_lines["voltage_original"].apply(lambda x: len(x.split(";")) > 1)) & \ + (df_lines["cables"].apply(lambda x: len(x.split(";")) == 1)) & \ + (df_lines["circuits"].apply(lambda x: len(x.split(";")) == 1)) & \ + (df_lines["cleaned"] == False) + + df_lines.loc[bool_cables, "circuits"] = df_lines[bool_cables] \ + .apply(_distribute_to_circuits, axis=1) + df_lines.loc[bool_cables & bool_ac, "frequency"] = "50" + df_lines.loc[bool_cables & bool_dc, "frequency"] = "0" + df_lines.loc[bool_cables, "cleaned"] = True + + # Clean those values where multiple circuit values are present, divided by semicolon + bool_cables = (df_lines["circuits"].apply(lambda x: len(x.split(";")) > 1)) & \ + (df_lines.apply(lambda row: len(row["circuits"].split(";")) == row["split_elements"], axis=1)) & \ + (df_lines["cleaned"] == False) + + df_lines.loc[bool_cables, "circuits"] = df_lines.loc[bool_cables] \ + .apply(lambda row: str(row["circuits"].split(";")[ + int(row["id"].split("_")[-1])-1 + ]), axis=1) + + df_lines.loc[bool_cables & bool_ac, "frequency"] = "50" + df_lines.loc[bool_cables & bool_dc, "frequency"] = "0" + df_lines.loc[bool_cables, "cleaned"] = True + + # Clean those values where multiple cables values are present, divided by semicolon + bool_cables = (df_lines["cables"].apply(lambda x: len(x.split(";")) > 1)) & \ + (df_lines.apply(lambda row: len(row["cables"].split(";")) == row["split_elements"], axis=1)) & \ + (df_lines["cleaned"] == False) + + df_lines.loc[bool_cables, "circuits"] = df_lines.loc[bool_cables] \ + .apply(lambda row: + str(max(1, + np.floor_divide( + int(row["cables"].split(";")[int(row["id"].split("_")[-1])-1]), + 3 + ) + )), + axis=1) + + df_lines.loc[bool_cables & bool_ac, "frequency"] = "50" + df_lines.loc[bool_cables & bool_dc, "frequency"] = "0" + df_lines.loc[bool_cables, "cleaned"] = True + + # All remaining lines to circuits == 1 + bool_leftover = (df_lines["cleaned"] == False) + str_id = "; ".join(str(id) for id in df_lines.loc[bool_leftover, "id"]) + logger.info(f"Setting circuits of remaining {sum(bool_leftover)} lines to 1...") + logger.info(f"Lines affected: {str_id}") + df_lines.loc[bool_leftover, "circuits"] = "1" + df_lines.loc[bool_leftover & bool_ac, "frequency"] = "50" + df_lines.loc[bool_leftover & bool_dc, "frequency"] = "0" + df_lines.loc[bool_leftover, "cleaned"] = True - df_lines_dc["cleaned"] = False + # rename columns + df_lines.rename( + columns={ + "id": "line_id", + "power": "tag_type", + "frequency":"tag_frequency", + }, inplace=True) + df_lines["bus0"] = None + df_lines["bus1"] = None + df_lines["length"] = None + df_lines.loc[df_lines["tag_type"] == "line", "underground"] = False + df_lines.loc[df_lines["tag_type"] == "cable", "underground"] = True + df_lines["under_construction"] = False + df_lines.loc[df_lines["tag_frequency"] == "0", "dc"] = True + df_lines.loc[df_lines["tag_frequency"] == "50", "dc"] = False + + df_lines = df_lines[[ + "line_id", + "circuits", + "tag_type", + "voltage", + "tag_frequency", + "bus0", + "bus1", + "length", + "underground", + "under_construction", + "dc", + "country", + "geometry", + ]] + df_lines["geometry"] = df_lines.apply(_create_linestring, axis=1) + gdf_lines = gpd.GeoDataFrame(df_lines, geometry = "geometry", crs = "EPSG:4326") + filepath_lines = snakemake.output["lines"] + # save substations output + logger.info(f"Exporting clean lines to {filepath_lines}") + parentfolder_lines = os.path.dirname(filepath_lines) + if not os.path.exists(parentfolder_lines): + # Create the folder and its parent directories if they don't exist + os.makedirs(parentfolder_lines) + gdf_lines.to_file(filepath_lines, driver="GeoJSON") + ######## ######## @@ -642,71 +754,6 @@ def _any_substring_in_list(s, list_strings): m gdf_substations.explore() - df_lines.voltage.unique() - - np.set_printoptions(threshold=np.inf) - - - # duplicate_lines = df_lines[df_lines.duplicated(subset=['id'], keep=False)].copy() - - # grouped_duplicates = duplicate_rows.groupby('id').agg({'country': 'list'}) - - a = df_lines[(df_lines["cables"].apply(lambda x: len(x.split(";"))) == 1) & ((df_lines["voltage"].apply(lambda x: len(x.split(";"))) == 1)) & (df_lines["cables"] != "")] - # Drop duplicates - df_lines.drop_duplicates(subset="id", inplace=True) - - df_lines["voltage"] = _clean_voltage(df_lines["voltage"]) - # df_lines["frequency"] = _clean_frequency(df_lines["frequency"]) - df_lines["circuits"] = _clean_circuits(df_lines["circuits"]) - - list_voltages = df_lines["voltage"].str.split(";").explode().unique().astype(str) - list_voltages = list_voltages[np.vectorize(len)(list_voltages) >= 6] - list_voltages[~np.char.startswith(list_voltages, '1')] - - # df_lines_subset = df_lines[df_lines["voltage"].apply(_any_substring_in_list, list_voltages)] - - # drop voltage = "" - df_lines = _split_voltage(df_lines) - df_lines = df_lines[df_lines["voltage"] != ""] - df_lines["voltage"] = df_lines["voltage"].astype(int, errors="ignore") - - # Drop voltages below 220 kV - df_lines = df_lines[df_lines["voltage"] >= 200000] - - # set frequencies - df_lines["frequency"] = _set_frequency(df_lines["frequency"]) - df_lines["frequency"] = df_lines["frequency"].astype(int, errors="ignore") - - # Clean circuits - # Map correct circuits to lines that where split - - # Initiate new column for cleaned circuits with values that are already valid: - # Condition 1: Length of sub_id is 0, the line was not split - # Condition 2: Number of entries in circuits separated by semicolon is 1, value is unique - # Condition 3: Circuits is not an empty string - # Condition 4: Circuits is not "0" - bool_circuits_valid = (df_lines["sub_id_len"] == 0) & \ - (df_lines["circuits"].apply(lambda x: len(x.split(";"))) == 1) & \ - (df_lines["circuits"] != "") & \ - (df_lines["circuits"] != "0") - - df_lines.loc[bool_circuits_valid, "circuits_clean"] = df_lines.loc[bool_circuits_valid, "circuits"] - - # Boolean to check if sub_id_len is equal to the number of circuits - bool_equal = df_lines["sub_id_len"] == df_lines["circuits"] \ - .apply(lambda x: len(x.split(";"))) - op_equal = lambda row: row["circuits"].split(";")[int(row["sub_id"])-1] - - df_lines.loc[bool_equal, "circuits_clean"] = df_lines[bool_equal] \ - .apply(op_equal, axis=1) - - bool_larger = df_lines["sub_id_len"] > \ - df_lines["circuits"].apply(lambda x: len(x.split(";"))) - - pd.set_option('display.max_rows', None) - pd.set_option('display.max_columns', None) - df_lines.loc[bool_larger, ["id", "sub_id", "sub_id_len", "cables", "circuits", "circuits_clean", "frequency"]] - output = str(snakemake.output) From 49c1baffd92317cdc4af603d103e19948d96b1b8 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Mon, 6 May 2024 17:05:00 +0200 Subject: [PATCH 006/100] Added check whether line is a circle. If so, drop it. --- scripts/clean_osm_data.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index 1ccef3644..63f27d7f1 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -725,6 +725,10 @@ def _any_substring_in_list(s, list_strings): ]] df_lines["geometry"] = df_lines.apply(_create_linestring, axis=1) + # Drop all rows where the geometry has equal start and end point + bool_circle = df_lines["geometry"].apply(lambda x: x.coords[0] == x.coords[-1]) + df_lines = df_lines[~bool_circle] + gdf_lines = gpd.GeoDataFrame(df_lines, geometry = "geometry", crs = "EPSG:4326") filepath_lines = snakemake.output["lines"] From 75cffe444bdfadd24e971f5cedebeb6223568ad9 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Mon, 6 May 2024 17:10:59 +0200 Subject: [PATCH 007/100] Extended build_electricity.smk by build_osm_network.py --- rules/build_electricity.smk | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 4d4495adc..e844fb818 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -624,7 +624,7 @@ if config["osm"].get("retrieve", True): script: "../scripts/retrieve_osm_data.py" -# FEATURES = ["cables_way", "lines_way", "substations_way", "substations_node", "transformers_way", "transformers_node"] + rule clean_osm_data: # params: # countries=config["countries"], @@ -650,10 +650,22 @@ rule clean_osm_data: "../scripts/clean_osm_data.py" -# { -# f"{country}": f"{ -# f"{feature}": f"data/osm/raw/{country}/{feature}.geojson" -# }" -# for feature in FEATURES -# for country in config["countries"] -# } \ No newline at end of file +rule build_osm_network: + input: + substations="data/osm/clean/substations.geojson", + lines="data/osm/clean/lines.geojson", + output: + lines="data/osm/lines.csv", + converters="data/osm/converters.csv", + transformers="data/osm/transformers.csv", + substations="data/osm/buses.csv", + lines_geojson="data/osm/lines.geojson", + converters_geojson="data/osm/converters.geojson", + transformers_geojson="data/osm/transformers.geojson", + substations_geojson="data/osm/buses.geojson", + log: + logs("build_osm_network.log"), + benchmark: + benchmarks("build_osm_network") + script: + "../scripts/build_osm_network.py" \ No newline at end of file From efb96118e85216c22a41f0042ade99471f4e5ea7 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Tue, 7 May 2024 13:28:43 +0200 Subject: [PATCH 008/100] Added build_osm_network --- scripts/build_osm_network.py | 1151 ++++++++++++++++++++++++++++++++++ scripts/clean_osm_data.py | 473 ++++++++------ 2 files changed, 1435 insertions(+), 189 deletions(-) create mode 100644 scripts/build_osm_network.py diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py new file mode 100644 index 000000000..bc0e46541 --- /dev/null +++ b/scripts/build_osm_network.py @@ -0,0 +1,1151 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: PyPSA-Earth and PyPSA-Eur Authors +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- + +import logging +import os + +import geopandas as gpd +import numpy as np +import pandas as pd +from _helpers import ( + configure_logging, +) +from shapely.geometry import LineString, Point +from shapely.ops import linemerge, split +from shapely import wkt +from tqdm import tqdm +from _benchmark import memory_logger +import yaml + +logger = logging.getLogger(__name__) + +# list of recognised nan values (NA and na excluded as may be confused with Namibia 2-letter country code) +NA_VALUES = ["NULL", "", "N/A", "NAN", "NaN", "nan", "Nan", "n/a", "null"] + +def read_csv_nafix(file, **kwargs): + "Function to open a csv as pandas file and standardize the na value" + if "keep_default_na" not in kwargs: + kwargs["keep_default_na"] = False + if "na_values" not in kwargs: + kwargs["na_values"] = NA_VALUES + + if os.stat(file).st_size > 0: + return pd.read_csv(file, **kwargs) + else: + return pd.DataFrame() + + +def save_to_geojson(df, fn): + if os.path.exists(fn): + os.unlink(fn) # remove file if it exists + + # save file if the (Geo)DataFrame is non-empty + if df.empty: + # create empty file to avoid issues with snakemake + with open(fn, "w") as fp: + pass + else: + # save file + df.to_file(fn, driver="GeoJSON") + + +def read_geojson(fn, cols=[], dtype=None, crs="EPSG:4326"): + """ + Function to read a geojson file fn. When the file is empty, then an empty + GeoDataFrame is returned having columns cols, the specified crs and the + columns specified by the dtype dictionary it not none. + + Parameters: + ------------ + fn : str + Path to the file to read + cols : list + List of columns of the GeoDataFrame + dtype : dict + Dictionary of the type of the object by column + crs : str + CRS of the GeoDataFrame + """ + # if the file is non-zero, read the geodataframe and return it + if os.path.getsize(fn) > 0: + return gpd.read_file(fn) + else: + # else return an empty GeoDataFrame + df = gpd.GeoDataFrame(columns=cols, geometry=[], crs=crs) + if isinstance(dtype, dict): + for k, v in dtype.items(): + df[k] = df[k].astype(v) + return df + + +def to_csv_nafix(df, path, **kwargs): + if "na_rep" in kwargs: + del kwargs["na_rep"] + # if len(df) > 0: + if not df.empty or not df.columns.empty: + return df.to_csv(path, **kwargs, na_rep=NA_VALUES[0]) + else: + with open(path, "w") as fp: + pass + + +def line_endings_to_bus_conversion(lines): + # Assign to every line a start and end point + + lines["bounds"] = lines["geometry"].boundary # create start and end point + + lines["bus_0_coors"] = lines["bounds"].map(lambda p: p.geoms[0]) + lines["bus_1_coors"] = lines["bounds"].map(lambda p: p.geoms[-1]) + + # splits into coordinates + lines["bus0_lon"] = lines["bus_0_coors"].x + lines["bus0_lat"] = lines["bus_0_coors"].y + lines["bus1_lon"] = lines["bus_1_coors"].x + lines["bus1_lat"] = lines["bus_1_coors"].y + + return lines + + +# tol in m +def set_substations_ids(buses, distance_crs, tol=5000): + """ + Function to set substations ids to buses, accounting for location + tolerance. + + The algorithm is as follows: + + 1. initialize all substation ids to -1 + 2. if the current substation has been already visited [substation_id < 0], then skip the calculation + 3. otherwise: + 1. identify the substations within the specified tolerance (tol) + 2. when all the substations in tolerance have substation_id < 0, then specify a new substation_id + 3. otherwise, if one of the substation in tolerance has a substation_id >= 0, then set that substation_id to all the others; + in case of multiple substations with substation_ids >= 0, the first value is picked for all + """ + + buses["station_id"] = -1 + + # create temporary series to execute distance calculations using m as reference distances + temp_bus_geom = buses.geometry.to_crs(distance_crs) + + # set tqdm options for substation ids + tqdm_kwargs_substation_ids = dict( + ascii=False, + unit=" buses", + total=buses.shape[0], + desc="Set substation ids ", + ) + + station_id = 0 + for i, row in tqdm(buses.iterrows(), **tqdm_kwargs_substation_ids): + if buses.loc[i, "station_id"] >= 0: + continue + + # get substations within tolerance + close_nodes = np.flatnonzero( + temp_bus_geom.distance(temp_bus_geom.loc[i]) <= tol + ) + + if len(close_nodes) == 1: + # if only one substation is in tolerance, then the substation is the current one iĂŹ + # Note that the node cannot be with substation_id >= 0, given the preliminary check + # at the beginning of the for loop + buses.loc[buses.index[i], "station_id"] = station_id + # update station id + station_id += 1 + else: + # several substations in tolerance + # get their ids + subset_substation_ids = buses.loc[buses.index[close_nodes], "station_id"] + # check if all substation_ids are negative (<0) + all_neg = subset_substation_ids.max() < 0 + # check if at least a substation_id is negative (<0) + some_neg = subset_substation_ids.min() < 0 + + if all_neg: + # when all substation_ids are negative, then this is a new substation id + # set the current station_id and increment the counter + buses.loc[buses.index[close_nodes], "station_id"] = station_id + station_id += 1 + elif some_neg: + # otherwise, when at least a substation_id is non-negative, then pick the first value + # and set it to all the other substations within tolerance + sub_id = -1 + for substation_id in subset_substation_ids: + if substation_id >= 0: + sub_id = substation_id + break + buses.loc[buses.index[close_nodes], "station_id"] = sub_id + + +def set_lines_ids(lines, buses, distance_crs): + """ + Function to set line buses ids to the closest bus in the list. + """ + # set tqdm options for set lines ids + tqdm_kwargs_line_ids = dict( + ascii=False, + unit=" lines", + total=lines.shape[0], + desc="Set line bus ids ", + ) + + # initialization + lines["bus0"] = -1 + lines["bus1"] = -1 + + busesepsg = buses.to_crs(distance_crs) + linesepsg = lines.to_crs(distance_crs) + + for i, row in tqdm(linesepsg.iterrows(), **tqdm_kwargs_line_ids): + # select buses having the voltage level of the current line + buses_sel = busesepsg[ + (buses["voltage"] == row["voltage"]) & (buses["dc"] == row["dc"]) + ] + + # find the closest node of the bus0 of the line + bus0_id = buses_sel.geometry.distance(row.geometry.boundary.geoms[0]).idxmin() + lines.loc[i, "bus0"] = buses.loc[bus0_id, "bus_id"] + + # check if the line starts exactly in the node, otherwise modify the linestring + distance_bus0 = busesepsg.geometry.loc[bus0_id].distance( + row.geometry.boundary.geoms[0] + ) + if distance_bus0 > 0.0: + # the line does not start in the node, thus modify the linestring + lines.loc[i, "geometry"] = linemerge( + [ + LineString( + [ + buses.geometry.loc[bus0_id], + lines.geometry.loc[i].boundary.geoms[0], + ] + ), + lines.geometry.loc[i], + ] + ) + + # find the closest node of the bus1 of the line + bus1_id = buses_sel.geometry.distance(row.geometry.boundary.geoms[1]).idxmin() + lines.loc[i, "bus1"] = buses.loc[bus1_id, "bus_id"] + + # check if the line ends exactly in the node, otherwise modify the linestring + distance_bus1 = busesepsg.geometry.loc[bus1_id].distance( + row.geometry.boundary.geoms[1] + ) + if distance_bus1 > 0.0: + # the line does not end in the node, thus modify the linestring + lines.loc[i, "geometry"] = linemerge( + [ + lines.geometry.loc[i], + LineString( + [ + lines.geometry.loc[i].boundary.geoms[1], + buses.geometry.loc[bus1_id], + ] + ), + ] + ) + + return lines, buses + + +def merge_stations_same_station_id( + buses, delta_lon=0.001, delta_lat=0.001, precision=4 +): + """ + Function to merge buses with same voltage and station_id This function + iterates over all substation ids and creates a bus_id for every substation + and voltage level. + + Therefore, a substation with multiple voltage levels is represented + with different buses, one per voltage level + """ + # initialize list of cleaned buses + buses_clean = [] + + # initialize the number of buses + n_buses = 0 + + for g_name, g_value in buses.groupby(by="station_id"): + # average location of the buses having the same station_id + station_point_x = np.round(g_value.geometry.x.mean(), precision) + station_point_y = np.round(g_value.geometry.y.mean(), precision) + is_dclink_boundary_point = any(g_value["is_dclink_boundary_point"]) + + # loop for every voltage level in the bus + # The location of the buses is averaged; in the case of multiple voltage levels for the same station_id, + # each bus corresponding to a voltage level and each polatity is located at a distance regulated by delta_lon/delta_lat + v_it = 0 + for v_name, bus_row in g_value.groupby(by=["voltage", "dc"]): + lon_bus = np.round(station_point_x + v_it * delta_lon, precision) + lat_bus = np.round(station_point_y + v_it * delta_lat, precision) + + # add the bus + buses_clean.append( + [ + n_buses, # "bus_id" + g_name, # "station_id" + v_name[0], # "voltage" + bus_row["dc"].all(), # "dc" + "|".join(bus_row["symbol"].unique()), # "symbol" + bus_row["under_construction"].any(), # "under_construction" + "|".join(bus_row["tag_substation"].unique()), # "tag_substation" + bus_row["tag_area"].sum(), # "tag_area" + lon_bus, # "lon" + lat_bus, # "lat" + bus_row["country"].iloc[0], # "country", + is_dclink_boundary_point, # check if new bus was formed of at least one DC link boundary point + Point( + lon_bus, + lat_bus, + ), # "geometry" + ] + ) + + # increase counters + v_it += 1 + n_buses += 1 + + # names of the columns + buses_clean_columns = [ + "bus_id", + "station_id", + "voltage", + "dc", + "symbol", + "under_construction", + "tag_substation", + "tag_area", + "x", + "y", + "country", + "is_dclink_boundary_point", + "geometry", + ] + + gdf_buses_clean = gpd.GeoDataFrame(buses_clean, columns=buses_clean_columns).set_crs( + crs=buses.crs, inplace=True + ) + + return gdf_buses_clean + + +def get_ac_frequency(df, fr_col="tag_frequency"): + """ + # Function to define a default frequency value. + + Attempts to find the most usual non-zero frequency across the + dataframe; 50 Hz is assumed as a back-up value + """ + + # Initialize a default frequency value + ac_freq_default = 50 + + grid_freq_levels = df[fr_col].value_counts(sort=True, dropna=True) + if not grid_freq_levels.empty: + # AC lines frequency shouldn't be 0Hz + ac_freq_levels = grid_freq_levels.loc[ + grid_freq_levels.index.get_level_values(0) != "0" + ] + ac_freq_default = ac_freq_levels.index.get_level_values(0)[0] + + return ac_freq_default + + +def get_transformers(buses, lines): + """ + Function to create fake transformer lines that connect buses of the same + station_id at different voltage. + """ + + ac_freq = get_ac_frequency(lines) + df_transformers = [] + + # Transformers should be added between AC buses only + # TODO pypsa-eur: Fix this! instead of tilde use != + buses_ac = buses[buses["dc"] != True] + for g_name, g_value in buses_ac.sort_values("voltage", ascending=True).groupby( + by="station_id" + ): + # note: by construction there cannot be more that two buses with the same station_id and same voltage + n_voltages = len(g_value) + + if n_voltages > 1: + for id in range(0, n_voltages - 1): + # when g_value has more than one node, it means that there are multiple voltages for the same bus + geom_trans = LineString( + [g_value.geometry.iloc[id], g_value.geometry.iloc[id + 1]] + ) + + df_transformers.append( + [ + f"transf_{g_name}_{id}", # "line_id" + g_value["bus_id"].iloc[id], # "bus0" + g_value["bus_id"].iloc[id + 1], # "bus1" + g_value.voltage.iloc[id], # "voltage_bus0" + g_value.voltage.iloc[id + 1], # "voltage_bus0" + g_value.country.iloc[id], # "country" + geom_trans, # "geometry" + ] + ) + # TODO pypsa-eur: fix bug in pypsa-earth, where the id column is wrongly named "line_id" instead of "transformer_id + # name of the columns + trasf_columns = [ + "transformer_id", + "bus0", + "bus1", + "voltage_bus0", + "voltage_bus1", + "country", + "geometry", + ] + + df_transformers = gpd.GeoDataFrame(df_transformers, columns=trasf_columns) + if not df_transformers.empty: + init_index = 0 if lines.empty else lines.index[-1] + 1 + df_transformers.set_index(init_index + df_transformers.index, inplace=True) + # update line endings + df_transformers = line_endings_to_bus_conversion(df_transformers) + + return df_transformers + + +def get_converters(buses, lines): + """ + Function to create fake converter lines that connect buses of the same + station_id of different polarities. + """ + + df_converters = [] + + for g_name, g_value in buses.sort_values("voltage", ascending=True).groupby( + by="station_id" + ): + # note: by construction there cannot be more that two buses with the same station_id and same voltage + n_voltages = len(g_value) + + # A converter stations should have both AC and DC parts + if g_value["dc"].any() & ~g_value["dc"].all(): + dc_voltage = g_value[g_value.dc]["voltage"].values + + for u in dc_voltage: + id_0 = g_value[g_value["dc"] & g_value["voltage"].isin([u])].index[0] + + ac_voltages = g_value[~g_value.dc]["voltage"] + # A converter is added between a DC nodes and AC one with the closest voltage + id_1 = ac_voltages.sub(u).abs().idxmin() + + geom_conv = LineString( + [g_value.geometry.loc[id_0], g_value.geometry.loc[id_1]] + ) + + # check if bus is a dclink boundary point, only then add converter + if g_value["is_dclink_boundary_point"].loc[id_0]: + df_converters.append( + [ + f"convert_{g_name}_{id_0}", # "line_id" + g_value["bus_id"].loc[id_0], # "bus0" + g_value["bus_id"].loc[id_1], # "bus1" + False, # "underground" + False, # "under_construction" + g_value.country.loc[id_0], # "country" + geom_conv, # "geometry" + ] + ) + + # name of the columns + conv_columns = [ + "converter_id", + "bus0", + "bus1", + "underground", + "under_construction", + "country", + "geometry", + ] + + df_converters = gpd.GeoDataFrame(df_converters, columns=conv_columns).reset_index() + + return df_converters + + +def connect_stations_same_station_id(lines, buses): + """ + Function to create fake links between substations with the same + substation_id. + """ + ac_freq = get_ac_frequency(lines) + station_id_list = buses.station_id.unique() + + add_lines = [] + from shapely.geometry import LineString + + for s_id in station_id_list: + buses_station_id = buses[buses.station_id == s_id] + + if len(buses_station_id) > 1: + for b_it in range(1, len(buses_station_id)): + add_lines.append( + [ + f"link{buses_station_id}_{b_it}", # "line_id" + buses_station_id.index[0], # "bus0" + buses_station_id.index[b_it], # "bus1" + 400000, # "voltage" + 1, # "circuits" + 0.0, # "length" + False, # "underground" + False, # "under_construction" + "transmission", # "tag_type" + ac_freq, # "tag_frequency" + buses_station_id.country.iloc[0], # "country" + LineString( + [ + buses_station_id.geometry.iloc[0], + buses_station_id.geometry.iloc[b_it], + ] + ), # "geometry" + LineString( + [ + buses_station_id.geometry.iloc[0], + buses_station_id.geometry.iloc[b_it], + ] + ).bounds, # "bounds" + buses_station_id.geometry.iloc[0], # "bus_0_coors" + buses_station_id.geometry.iloc[b_it], # "bus_1_coors" + buses_station_id.lon.iloc[0], # "bus0_lon" + buses_station_id.lat.iloc[0], # "bus0_lat" + buses_station_id.lon.iloc[b_it], # "bus1_lon" + buses_station_id.lat.iloc[b_it], # "bus1_lat" + ] + ) + + # name of the columns + add_lines_columns = [ + "line_id", + "bus0", + "bus1", + "voltage", + "circuits", + "length", + "underground", + "under_construction", + "tag_type", + "tag_frequency", + "country", + "geometry", + "bounds", + "bus_0_coors", + "bus_1_coors", + "bus0_lon", + "bus0_lat", + "bus1_lon", + "bus1_lat", + ] + + df_add_lines = gpd.GeoDataFrame(pd.concat(add_lines), columns=add_lines_columns) + lines = pd.concat([lines, df_add_lines], ignore_index=True) + + return lines + + +def set_lv_substations(buses): + """ + Function to set what nodes are lv, thereby setting substation_lv The + current methodology is to set lv nodes to buses where multiple voltage + level are found, hence when the station_id is duplicated. + """ + # initialize column substation_lv to true + buses["substation_lv"] = True + + # For each station number with multiple buses make lowest voltage `substation_lv = TRUE` + bus_with_stations_duplicates = buses[ + buses.station_id.duplicated(keep=False) + ].sort_values(by=["station_id", "voltage"]) + lv_bus_at_station_duplicates = ( + buses[buses.station_id.duplicated(keep=False)] + .sort_values(by=["station_id", "voltage"]) + .drop_duplicates(subset=["station_id"]) + ) + # Set all buses with station duplicates "False" + buses.loc[bus_with_stations_duplicates.index, "substation_lv"] = False + # Set lv_buses with station duplicates "True" + buses.loc[lv_bus_at_station_duplicates.index, "substation_lv"] = True + + return buses + + +# Note tolerance = 0.01 means around 700m +# TODO: the current tolerance is high to avoid an issue in the Nigeria case where line 565939360-1 +# seems to be interconnected to both ends, but at the eastern one, the node is actually not connected +# another line seems to be exactly touching the node, but from the data point of view it only fly over it. +# There may be the need to split a line in several segments in the case the line is within tolerance with +# respect to a node + + +def merge_stations_lines_by_station_id_and_voltage( + lines, buses, geo_crs, distance_crs, tol=5000 +): + """ + Function to merge close stations and adapt the line datasets to adhere to + the merged dataset. + """ + + logger.info( + "Stage 3a/4: Set substation ids with tolerance of %.2f km" % (tol / 1000) + ) + + # TODO pypsa-eur: Add this fix to pypsa-earth: Buses should not be clustered geographically if they are different + # bus types (AC != DC) + buses_ac = buses[buses["dc"] == False].reset_index() + buses_dc = buses[buses["dc"] == True].reset_index() + + # set substation ids + # set_substations_ids(buses, distance_crs, tol=tol) + set_substations_ids(buses_ac, distance_crs, tol=tol) + set_substations_ids(buses_dc, distance_crs, tol=tol) + + # Find boundary points of DC links + # lines_dc_shape = lines[lines["dc"] == True].unary_union + # lines_dc_bounds = lines_dc_shape.boundary + # lines_dc_points = [p for p in lines_dc_bounds.geoms] + lines_dc = lines[lines['dc'] == True].reset_index() + lines_dc["adj_idx"] = range(0, len(lines_dc)) + + # Initialize an empty adjacency matrix + dc_adj_matrix = np.zeros((len(lines_dc), len(lines_dc)), dtype=int) + + # Fill the adjacency matrix + for i in range(len(lines_dc)): + for j in range(len(lines_dc)): + if are_lines_connected(lines_dc.iloc[i], lines_dc.iloc[j]): + dc_adj_matrix[i, j] = 1 + + dc_paths = find_paths(dc_adj_matrix) + + all_dc_boundary_points = pd.Series() + + for path in dc_paths: + bus_0_coors = lines_dc.iloc[path]["bus_0_coors"] + bus_1_coors = lines_dc.iloc[path]["bus_1_coors"] + + # Create DataFrame containing all points within a path + dc_points = pd.concat([bus_0_coors, bus_1_coors], ignore_index = True) + + # Determine the value counts of individual points. If it occurs more than + # once, it cannot be an end-point of a path + bool_duplicates = dc_points.apply(lambda p: sum([are_almost_equal(p, s) for s in dc_points])) > 1 + + # Drop all duplicates + dc_boundary_points = dc_points[~bool_duplicates] + + if dc_boundary_points.empty: + all_dc_boundary_points = dc_boundary_points + else: + all_dc_boundary_points = pd.concat([all_dc_boundary_points, dc_boundary_points], ignore_index = True) + + + # TODO pypsa-eur: Add to pypsa-earth for all related entries on is_dclink_boundary_point + # check for each entry in buses_dc whether it is included in lines_dc_points + buses_ac["is_dclink_boundary_point"] = False + buses_dc["is_dclink_boundary_point"] = buses_dc.geometry.apply( + lambda p: any([p.within(l) for l in all_dc_boundary_points]) + ) + + logger.info("Stage 3b/4: Merge substations with the same id") + + # merge buses with same station id and voltage + if not buses.empty: + buses_ac = merge_stations_same_station_id(buses_ac) + buses_dc = merge_stations_same_station_id(buses_dc) + buses_dc["bus_id"] = buses_ac["bus_id"].max() + buses_dc["bus_id"] + 1 + buses = pd.concat([buses_ac, buses_dc], ignore_index=True) + set_substations_ids(buses, distance_crs, tol=tol) + + logger.info("Stage 3c/4: Specify the bus ids of the line endings") + + # set the bus ids to the line dataset + lines, buses = set_lines_ids(lines, buses, distance_crs) + + # drop lines starting and ending in the same node + lines.drop(lines[lines["bus0"] == lines["bus1"]].index, inplace=True) + # update line endings + lines = line_endings_to_bus_conversion(lines) + + # set substation_lv + set_lv_substations(buses) + + logger.info("Stage 3d/4: Add converters to lines") + + # append fake converters + # lines = pd.concat([lines, converters], ignore_index=True) + + # reset index + lines.reset_index(drop=True, inplace=True) + # if len(links) > 0: + # links.reset_index(drop=True, inplace=True) + + return lines, buses + + +def create_station_at_equal_bus_locations( + lines, buses, geo_crs, distance_crs, tol=5000 +): + # V1. Create station_id at same bus location + # - We saw that buses are not connected exactly at one point, they are + # usually connected to a substation "area" (analysed on maps) + # - Create station_id at exactly the same location might therefore be not + # always correct + # - Though as you can see below, it might be still sometime the case. + # Examples are **station 4** (2 lines with the same voltage connect at the + # same point) and **station 23** (4 lines with two different voltages connect + # at the same point) + # TODO: Filter out the generator lines - defined as going from generator to + # the next station which is connected to a load. Excluding generator + # lines make probably sense because they are not transmission expansion + # relevant. For now we simplify and include generator lines. + + # If same location/geometry make station + bus_all = buses + + # set substation ids + set_substations_ids(buses, distance_crs, tol=tol) + + # set the bus ids to the line dataset + lines, buses = set_lines_ids(lines, buses, distance_crs) + + # update line endings + lines = line_endings_to_bus_conversion(lines) + + # For each station number with multiple buses make lowest voltage `substation_lv = TRUE` + set_lv_substations(bus_all) + + # TRY: Keep only buses that are not duplicated & lv_substation = True + # TODO: Check if this is necessary. What effect do duplicates have? + bus_all = bus_all[bus_all["substation_lv"] == True] + + lines = connect_stations_same_station_id(lines, buses) + + return lines, buses + + +def _split_linestring_by_point(linestring, points): + """ + Function to split a linestring geometry by multiple inner points. + + Parameters + ---------- + lstring : LineString + Linestring of the line to be split + points : list + List of points to split the linestring + + Return + ------ + list_lines : list + List of linestring to split the line + """ + + list_linestrings = [linestring] + + for p in points: + # execute split to all lines and store results + temp_list = [split(l, p) for l in list_linestrings] + # nest all geometries + list_linestrings = [lstring for tval in temp_list for lstring in tval.geoms] + + return list_linestrings + + +def fix_overpassing_lines(lines, buses, distance_crs, tol=1): + """ + Function to avoid buses overpassing lines with no connection when the bus + is within a given tolerance from the line. + + Parameters + ---------- + lines : GeoDataFrame + Geodataframe of lines + buses : GeoDataFrame + Geodataframe of substations + tol : float + Tolerance in meters of the distance between the substation and the line + below which the line will be split + """ + + lines_to_add = [] # list of lines to be added + lines_to_split = [] # list of lines that have been split + + lines_epsgmod = lines.to_crs(distance_crs) + buses_epsgmod = buses.to_crs(distance_crs) + + # set tqdm options for substation ids + tqdm_kwargs_substation_ids = dict( + ascii=False, + unit=" lines", + total=lines.shape[0], + desc="Verify lines overpassing nodes ", + ) + + for l in tqdm(lines.index, **tqdm_kwargs_substation_ids): + # bus indices being within tolerance from the line + bus_in_tol_epsg = buses_epsgmod[ + buses_epsgmod.geometry.distance(lines_epsgmod.geometry.loc[l]) <= tol + ] + + # exclude endings of the lines + bus_in_tol_epsg = bus_in_tol_epsg[ + ( + ( + bus_in_tol_epsg.geometry.distance( + lines_epsgmod.geometry.loc[l].boundary.geoms[0] + ) + > tol + ) + | ( + bus_in_tol_epsg.geometry.distance( + lines_epsgmod.geometry.loc[l].boundary.geoms[1] + ) + > tol + ) + ) + ] + + if not bus_in_tol_epsg.empty: + # add index of line to split + lines_to_split.append(l) + + buses_locs = buses.geometry.loc[bus_in_tol_epsg.index] + + # get new line geometries + new_geometries = _split_linestring_by_point(lines.geometry[l], buses_locs) + n_geoms = len(new_geometries) + + # create temporary copies of the line + df_append = gpd.GeoDataFrame([lines.loc[l]] * n_geoms) + # update geometries + df_append["geometry"] = new_geometries + # update name of the line + df_append["line_id"] = [ + str(df_append["line_id"].iloc[0]) + f"_{id}" for id in range(n_geoms) + ] + + lines_to_add.append(df_append) + + if not lines_to_add: + return lines, buses + + df_to_add = gpd.GeoDataFrame(pd.concat(lines_to_add, ignore_index=True)) + df_to_add.set_crs(lines.crs, inplace=True) + df_to_add.set_index(lines.index[-1] + df_to_add.index, inplace=True) + + # update length + df_to_add["length"] = df_to_add.to_crs(distance_crs).geometry.length + + # update line endings + df_to_add = line_endings_to_bus_conversion(df_to_add) + + # remove original lines + lines.drop(lines_to_split, inplace=True) + + lines = gpd.GeoDataFrame( + pd.concat([lines, df_to_add], ignore_index=True).reset_index(drop=True), + crs=lines.crs, + ) + + return lines, buses + + +def add_buses_to_empty_countries(country_list, fp_country_shapes, buses): + """ + Function to add a bus for countries missing substation data. + """ + country_shapes = gpd.read_file(fp_country_shapes).set_index("name")["geometry"] + bus_country_list = buses["country"].unique().tolist() + + # it may happen that bus_country_list contains entries not relevant as a country name (e.g. "not found") + # difference can't give negative values; the following will return only relevant country names + no_data_countries = list(set(country_list).difference(set(bus_country_list))) + + if len(no_data_countries) > 0: + logger.info( + f"No buses for the following countries: {no_data_countries}. Adding a node for everyone of them." + ) + no_data_countries_shape = ( + country_shapes[country_shapes.index.isin(no_data_countries) == True] + .reset_index() + .to_crs(geo_crs) + ) + length = len(no_data_countries) + df = gpd.GeoDataFrame( + { + "voltage": [220000] * length, + "country": no_data_countries_shape["name"], + "x": no_data_countries_shape["geometry"].centroid.x, + "y": no_data_countries_shape["geometry"].centroid.y, + "bus_id": np.arange(len(buses) + 1, len(buses) + (length + 1), 1), + "station_id": [np.nan] * length, + # All lines for the countries with NA bus data are assumed to be AC + "dc": [False] * length, + "under_construction": [False] * length, + "tag_area": [0.0] * length, + "symbol": ["substation"] * length, + "tag_substation": ["transmission"] * length, + "geometry": no_data_countries_shape["geometry"].centroid, + "substation_lv": [True] * length, + }, + crs=geo_crs, + ).astype( + buses.dtypes.to_dict() + ) # keep the same dtypes as buses + buses = gpd.GeoDataFrame( + pd.concat([buses, df], ignore_index=True).reset_index(drop=True), + crs=buses.crs, + ) + + # update country list by buses dataframe + bus_country_list = buses["country"].unique().tolist() + + non_allocated_countries = list( + set(country_list).symmetric_difference(set(bus_country_list)) + ) + + if len(non_allocated_countries) > 0: + logger.error( + f"There following countries could not be allocated properly: {non_allocated_countries}" + ) + + return buses + + +def build_network( + inputs, + outputs, + build_osm_network_config, + countries_config, + geo_crs, + distance_crs, +): + osm_clean_columns = { + 'substation': { + 'bus_id': 'object', + 'station_id': 'float', + 'voltage': 'float', + 'dc': 'bool', + 'symbol': 'object', + 'under_construction': 'bool', + 'tag_substation': 'str', + 'tag_area': 'str', + 'lon': 'float', + 'lat': 'float', + 'country': 'str', + 'geometry': 'object', + 'tag_source': 'str', + }, + 'line': { + 'line_id': 'object', + 'bus0': 'object', + 'bus1': 'object', + 'voltage': 'float', + 'circuits': 'float', + 'length': 'float', + 'underground': 'bool', + 'under_construction': 'bool', + 'tag_type': 'str', + 'tag_frequency': 'float', + 'dc': 'bool', + 'country': 'object', + 'geometry': 'object', + } + } + + logger.info("Stage 1/5: Read input data") + buses = read_geojson( + inputs["substations"], + osm_clean_columns["substation"].keys(), + dtype=osm_clean_columns["substation"], + ) + + lines = read_geojson( + inputs["lines"], + osm_clean_columns["line"].keys(), + dtype=osm_clean_columns["line"], + ) + + lines = line_endings_to_bus_conversion(lines) + + logger.info("Stage 2/5: AC and DC network: enabled") + + # Address the overpassing line issue Step 3/5 + if build_osm_network_config.get("split_overpassing_lines", False): + tol = build_osm_network_config.get("overpassing_lines_tolerance", 1) + logger.info("Stage 3/5: Avoid nodes overpassing lines: enabled with tolerance") + + lines, buses = fix_overpassing_lines(lines, buses, distance_crs, tol=tol) + else: + logger.info("Stage 3/5: Avoid nodes overpassing lines: disabled") + + # Add bus to countries with no buses + buses = add_buses_to_empty_countries(countries_config, inputs.country_shapes, buses) + + # METHOD to merge buses with same voltage and within tolerance Step 4/5 + if build_osm_network_config.get("group_close_buses", False): + tol = build_osm_network_config.get("group_tolerance_buses", 5000) + logger.info( + f"Stage 4/5: Aggregate close substations: enabled with tolerance {tol} m" + ) + lines, buses = merge_stations_lines_by_station_id_and_voltage( + lines, buses, geo_crs, distance_crs, tol=tol + ) + else: + logger.info("Stage 4/5: Aggregate close substations: disabled") + + logger.info("Stage 5/5: Add augmented substation to country with no data") + + # Recalculate lengths of lines + utm = lines.estimate_utm_crs(datum_name = "WGS 84") + lines["length"] = lines.to_crs(utm).length + + # get transformers: modelled as lines connecting buses with different voltage + transformers = get_transformers(buses, lines) + + # get converters: currently modelled as links connecting buses with different polarity + converters = get_converters(buses, lines) + + logger.info("Save outputs") + + # create clean directory if not already exist + if not os.path.exists(outputs["lines"]): + os.makedirs(os.path.dirname(outputs["lines"]), exist_ok=True) + + + ### Convert output to pypsa-eur friendly format + # Rename "substation" in buses["symbol"] to "Substation" + buses["symbol"] = buses["symbol"].replace({"substation": "Substation"}) + + # Drop unncessary index column and set respective element ids as index + lines.set_index("line_id", inplace=True) + converters.set_index("converter_id", inplace=True) + transformers.set_index("transformer_id", inplace=True) + buses.set_index("bus_id", inplace=True) + + + # Convert voltages from V to kV + lines["voltage"] = lines["voltage"] / 1000 + transformers["voltage_bus0"], transformers["voltage_bus1"] = transformers["voltage_bus0"] / 1000, \ + transformers["voltage_bus1"] / 1000 + buses["voltage"] = buses["voltage"] / 1000 + + # Convert 'true' and 'false' to 't' and 'f' + lines = lines.replace({True: "t", False: "f"}) + converters = converters.replace({True: "t", False: "f"}) + buses = buses.replace({True: "t", False: "f"}) + + # Change column orders + cols_lines = ["bus0", "bus1", "voltage", "circuits", "length", "underground", "under_construction", "geometry", + "tag_type", "tag_frequency", "country", "bounds", + "bus_0_coors", "bus_1_coors", "bus0_lon", "bus0_lat", "bus1_lon", "bus1_lat"] + + lines = lines[cols_lines] + cols_lines_csv = ["bus_id", "station_id", "voltage", "dc", "symbol", "under_construction", "tags", "x","y"] + + to_csv_nafix(lines, outputs["lines"]) # Generate CSV + to_csv_nafix(converters, outputs["converters"]) # Generate CSV + to_csv_nafix(transformers, outputs["transformers"]) # Generate CSV + + colstodrop = ["bounds", "bus_0_coors", "bus_1_coors"] + + # Export to GeoJSON for quick validations + save_to_geojson(gpd.GeoDataFrame(lines.drop(columns = colstodrop), geometry = "geometry", crs = geo_crs), outputs["lines_geojson"]) + save_to_geojson(gpd.GeoDataFrame(converters, geometry = "geometry", crs = geo_crs), outputs["converters_geojson"]) + save_to_geojson(gpd.GeoDataFrame(transformers.drop(columns = colstodrop), geometry = "geometry", crs = geo_crs), outputs["transformers_geojson"]) + + # create clean directory if not already exist + if not os.path.exists(outputs["substations"]): + os.makedirs(os.path.dirname(outputs["substations"]), exist_ok=True) + # Generate CSV + to_csv_nafix(buses, outputs["substations"]) + save_to_geojson(gpd.GeoDataFrame(buses, geometry = "geometry", crs = geo_crs), outputs["substations_geojson"]) + + return None + + +# Function to check if two lines are connected +def are_lines_connected(line1, line2): + # return (line1['geometry'].touches(line2['geometry'])) + return ( + are_almost_equal(line1["bus_0_coors"], line2["bus_0_coors"]), + are_almost_equal(line1["bus_0_coors"], line2["bus_1_coors"]), + are_almost_equal(line1["bus_1_coors"], line2["bus_0_coors"]), + are_almost_equal(line1["bus_1_coors"], line2["bus_1_coors"]) + ) + + +def _dfs(adj_matrix, visited, current_vertex, path): + visited[current_vertex] = True + path.append(current_vertex) + for neighbor in range(len(adj_matrix)): + if adj_matrix[current_vertex][neighbor] == 1 and not visited[neighbor]: + _dfs(adj_matrix, visited, neighbor, path) + return path + + +# Returns all connected paths as a vector +def find_paths(adj_matrix): + visited = [False] * len(adj_matrix) + paths = [] + for vertex in range(len(adj_matrix)): + if not visited[vertex]: + path = _dfs(adj_matrix, visited, vertex, []) + if path: + paths.append(path) + return paths + +def are_almost_equal(point1, point2, tolerance=1e-6): + """ + Check if two Shapely points are almost equal with a given tolerance. + + Args: + point1 (Point): First Shapely point. + point2 (Point): Second Shapely point. + tolerance (float): Tolerance for coordinate deviation. + + Returns: + bool: True if the points are almost equal, False otherwise. + """ + return abs(point1.x - point2.x) < tolerance and abs(point1.y - point2.y) < tolerance + + +if __name__ == "__main__": + # Detect running outside of snakemake and mock snakemake for testing + if "snakemake" not in globals(): + from _helpers import mock_snakemake + + snakemake = mock_snakemake("build_osm_network") + + configure_logging(snakemake) + + # load default crs + geo_crs = snakemake.config["crs"]["geo_crs"] + distance_crs = snakemake.config["crs"]["distance_crs"] + + build_osm_network = snakemake.config["build_osm_network"] + countries = snakemake.config["countries"] + + with memory_logger( + filename=getattr(snakemake.log, "memory", None), interval=30.0 + ) as mem: + build_network( + snakemake.input, + snakemake.output, + build_osm_network, + countries, + geo_crs, + distance_crs, + ) + + logger.info(f"Maximum memory usage: {mem.mem_usage}") \ No newline at end of file diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index 63f27d7f1..a87e30823 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -17,7 +17,7 @@ import re from shapely.geometry import LineString, Point, Polygon from shapely.ops import linemerge -import tqdm.auto as tqdm +from tqdm import tqdm from _helpers import configure_logging logger = logging.getLogger(__name__) @@ -296,7 +296,7 @@ def generate_new_id(row): return original_id else: suffix_counts[original_id] = suffix_counts.get(original_id, 0) + 1 - return f"{original_id}_{suffix_counts[original_id]}" + return f"{original_id}-{suffix_counts[original_id]}" # Update the ID column with the new IDs x["id"] = x.apply(generate_new_id, axis=1) @@ -323,185 +323,72 @@ def _any_substring_in_list(s, list_strings): return any(sub in list_strings for sub in substrings) -if __name__ == "__main__": - # Detect running outside of snakemake and mock snakemake for testing - if "snakemake" not in globals(): - from _helpers import mock_snakemake - - snakemake = mock_snakemake("clean_osm_data") - - configure_logging(snakemake) - logger.info("Dummy log: clean_osm_data()") +def add_line_endings_tosubstations(substations, lines): + if lines.empty: + return substations - ############# BUSES / SUBSTATIONS ###################### - input_path_substations = { - "substations_way": snakemake.input.substations_way, - "substations_relation": snakemake.input.substations_relation, - } - - cols_substations_way = ["id", "geometry", "country", "power", "substation", "voltage", "frequency"] - cols_substations_relation = ["id", "country", "power", "substation", "voltage", "frequency"] - df_substations_way = pd.DataFrame(columns = cols_substations_way) - df_substations_relation = pd.DataFrame(columns = cols_substations_relation) - - for key in input_path_substations: - logger.info(f"Processing {key}...") - for idx, ip in enumerate(input_path_substations[key]): - if os.path.exists(ip) and os.path.getsize(ip) > 400: # unpopulated OSM json is about 51 bytes - country = os.path.basename(os.path.dirname(input_path_substations[key][idx])) - logger.info(f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(input_path_substations[key])).zfill(2)}: {ip}") - with open(ip, "r") as f: - data = json.load(f) - - df = pd.DataFrame(data['elements']) - df["id"] = df["id"].astype(str) - df["country"] = country - - col_tags = ["power", "substation", "voltage", "frequency"] - - tags = pd.json_normalize(df["tags"]) \ - .map(lambda x: str(x) if pd.notnull(x) else x) - - for ct in col_tags: - if ct not in tags.columns: - tags[ct] = pd.NA - - tags = tags.loc[:, col_tags] - - df = pd.concat([df, tags], axis="columns") + # extract columns from substation df + bus_s = pd.DataFrame(columns=substations.columns) + bus_e = pd.DataFrame(columns=substations.columns) - if key == "substations_way": - df.drop(columns=["type", "tags", "bounds", "nodes"], inplace=True) - df_substations_way = pd.concat([df_substations_way, df], axis="rows") - elif key == "substations_relation": - df.drop(columns=["type", "tags", "bounds"], inplace=True) - df_substations_relation = pd.concat([df_substations_relation, df], axis="rows") - - else: - logger.info(f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(input_path_substations[key])).zfill(2)} (empty): {ip}") - continue - logger.info("---") - - df_substations_way.drop_duplicates(subset='id', keep='first', inplace=True) - df_substations_relation.drop_duplicates(subset='id', keep='first', inplace=True) - - df_substations_way["geometry"] = df_substations_way.apply(_create_polygon, axis=1) - - # Normalise the members column of df_substations_relation - cols_members = ["id", "type", "ref", "role", "geometry"] - df_substations_relation_members = pd.DataFrame(columns = cols_members) + # Read information from line.csv + bus_s[["voltage", "country"]] = lines[["voltage", "country"]].astype(str) + bus_s["geometry"] = lines.geometry.boundary.map( + lambda p: p.geoms[0] if len(p.geoms) >= 2 else None + ) + bus_s["lon"] = bus_s["geometry"].map(lambda p: p.x if p != None else None) + bus_s["lat"] = bus_s["geometry"].map(lambda p: p.y if p != None else None) + bus_s["bus_id"] = ( + (substations["bus_id"].max() if "bus_id" in substations else 0) + + 1 + + bus_s.index + ) + bus_s["dc"] = lines["dc"] - for index, row in df_substations_relation.iterrows(): - col_members = ["type", "ref", "role", "geometry"] - df = pd.json_normalize(row["members"]) - - for cm in col_members: - if cm not in df.columns: - df[cm] = pd.NA + bus_e[["voltage", "country"]] = lines[["voltage", "country"]].astype(str) + bus_e["geometry"] = lines.geometry.boundary.map( + lambda p: p.geoms[1] if len(p.geoms) >= 2 else None + ) + bus_e["lon"] = bus_e["geometry"].map(lambda p: p.x if p != None else None) + bus_e["lat"] = bus_e["geometry"].map(lambda p: p.y if p != None else None) + bus_e["bus_id"] = bus_s["bus_id"].max() + 1 + bus_e.index + bus_e["dc"] = lines["dc"] - df = df.loc[:, col_members] - df["id"] = str(row["id"]) - df["ref"] = df["ref"].astype(str) - df = df[df["type"] != "node"] - df = df.dropna(subset=["geometry"]) - df = df[~df["role"].isin(["", "incoming_line", "substation", "inner"])] - df_substations_relation_members = pd.concat([df_substations_relation_members, df], axis="rows") - - df_substations_relation_members.reset_index(inplace=True) - df_substations_relation_members["linestring"] = df_substations_relation_members.apply(_create_linestring, axis=1) - df_substations_relation_members_grouped = df_substations_relation_members.groupby('id')['linestring'] \ - .apply(lambda x: linemerge(x.tolist())).reset_index() - df_substations_relation_members_grouped["geometry"] = df_substations_relation_members_grouped["linestring"].apply(lambda x: x.convex_hull) - - df_substations_relation = df_substations_relation.join( - df_substations_relation_members_grouped.set_index('id'), - on='id', how='left' - ).drop(columns=["members", "linestring"]) \ - .dropna(subset=["geometry"]) - - # reorder columns and concatenate - df_substations_relation = df_substations_relation[cols_substations_way] - df_substations = pd.concat([df_substations_way, df_substations_relation], axis="rows") + bus_all = pd.concat([bus_s, bus_e], ignore_index=True) - # Create centroids from geometries - df_substations.loc[:, "geometry"] = df_substations["geometry"].apply(lambda x: x.centroid) - df_substations.loc[:, "lon"] = df_substations["geometry"].apply(lambda x: x.x) - df_substations.loc[:, "lat"] = df_substations["geometry"].apply(lambda x: x.y) + # Initialize default values + bus_all["station_id"] = np.nan + # Assuming substations completed for installed lines + bus_all["under_construction"] = False + bus_all["tag_area"] = 0.0 + bus_all["symbol"] = "substation" + # TODO: this tag may be improved, maybe depending on voltage levels + bus_all["tag_substation"] = "transmission" + bus_all["tag_source"] = "line_ending" - # Clean columns - df_substations["voltage"] = _clean_voltage(df_substations["voltage"]) - df_substations["frequency"] = _clean_frequency(df_substations["frequency"]) - df_substations["frequency"] = df_substations["frequency"].astype(str, errors="ignore") + buses = pd.concat([substations, bus_all], ignore_index=True) - list_voltages = df_substations["voltage"].str.split(";").explode().unique().astype(str) - list_voltages = list_voltages[np.vectorize(len)(list_voltages) >= 6] - list_voltages = list_voltages[~np.char.startswith(list_voltages, '1')] + # # Assign index to bus_id + buses["bus_id"] = buses.index - bool_voltages = df_substations["voltage"].apply(_check_voltage, list_voltages=list_voltages) - df_substations = df_substations[bool_voltages] + # TODO: pypsa-eur: change this later to improve country assignment + bool_multiple_countries = buses["country"].str.contains(";") + buses.loc[bool_multiple_countries, "country"] = buses.loc[bool_multiple_countries, "country"].str.split(";").str[0] - df_substations = _split_cells(df_substations) - bool_voltages = df_substations["voltage"].apply(_check_voltage, list_voltages=list_voltages) - df_substations = df_substations[bool_voltages] - df_substations["split_count"] = df_substations["id"].apply(lambda x: x.split("_")[1] if "_" in x else "0") - df_substations["split_count"] = df_substations["split_count"].astype(int) + return buses - bool_split = df_substations["split_elements"] > 1 - bool_frequency_len = df_substations["frequency"].apply(lambda x: len(x.split(";"))) == df_substations["split_elements"] - df_substations.loc[bool_frequency_len & bool_split, "frequency"] = df_substations.loc[bool_frequency_len & bool_split, "frequency"] \ - - op_freq = lambda row: row["frequency"].split(";")[row["split_count"]-1] - df_substations.loc[bool_frequency_len & bool_split, ["frequency"]] = df_substations.loc[bool_frequency_len & bool_split, ] \ - .apply(op_freq, axis=1) - - df_substations = _split_cells(df_substations, cols=["frequency"]) - bool_invalid_frequency = df_substations["frequency"].apply(lambda x: x not in ["50", "0"]) - df_substations.loc[bool_invalid_frequency, "frequency"] = "50" - df_substations["power"] = "substation" - df_substations["substation"] = "transmission" - df_substations["dc"] = False - df_substations.loc[df_substations["frequency"] == "0", "dc"] = True - df_substations["under_construction"] = False - df_substations["station_id"] = None - df_substations["tag_area"] = None +if __name__ == "__main__": + # Detect running outside of snakemake and mock snakemake for testing + if "snakemake" not in globals(): + from _helpers import mock_snakemake - # rename columns - df_substations.rename( - columns={ - "id": "bus_id", - "power": "symbol", - "substation":"tag_substation", - }, inplace=True) - - df_substations = df_substations[[ - "bus_id", - "symbol", - "tag_substation", - "voltage", - "lon", - "lat", - "dc", - "under_construction", - "station_id", - "tag_area", - "country", - "geometry", - ]] + snakemake = mock_snakemake("clean_osm_data") - gdf_substations = gpd.GeoDataFrame(df_substations, geometry = "geometry", crs = "EPSG:4326") - - filepath_substations = snakemake.output["substations"] - # save substations output - logger.info(f"Exporting clean substations to {filepath_substations}") - parentfolder_substations = os.path.dirname(filepath_substations) - if not os.path.exists(parentfolder_substations): - # Create the folder and its parent directories if they don't exist - os.makedirs(parentfolder_substations) - - gdf_substations.to_file(filepath_substations, driver="GeoJSON") + configure_logging(snakemake) + logger.info("Dummy log: clean_osm_data()") - ############# LINES AND CABLES ###################### + ############# LINES AND CABLES ###################### input_path_lines_cables = { "lines": snakemake.input.lines_way, @@ -655,7 +542,7 @@ def _any_substring_in_list(s, list_strings): df_lines.loc[bool_cables, "circuits"] = df_lines.loc[bool_cables] \ .apply(lambda row: str(row["circuits"].split(";")[ - int(row["id"].split("_")[-1])-1 + int(row["id"].split("-")[-1])-1 ]), axis=1) df_lines.loc[bool_cables & bool_ac, "frequency"] = "50" @@ -671,7 +558,7 @@ def _any_substring_in_list(s, list_strings): .apply(lambda row: str(max(1, np.floor_divide( - int(row["cables"].split(";")[int(row["id"].split("_")[-1])-1]), + int(row["cables"].split(";")[int(row["id"].split("-")[-1])-1]), 3 ) )), @@ -683,9 +570,11 @@ def _any_substring_in_list(s, list_strings): # All remaining lines to circuits == 1 bool_leftover = (df_lines["cleaned"] == False) - str_id = "; ".join(str(id) for id in df_lines.loc[bool_leftover, "id"]) - logger.info(f"Setting circuits of remaining {sum(bool_leftover)} lines to 1...") - logger.info(f"Lines affected: {str_id}") + if sum(bool_leftover) > 0: + str_id = "; ".join(str(id) for id in df_lines.loc[bool_leftover, "id"]) + logger.info(f"Setting circuits of remaining {sum(bool_leftover)} lines to 1...") + logger.info(f"Lines affected: {str_id}") + df_lines.loc[bool_leftover, "circuits"] = "1" df_lines.loc[bool_leftover & bool_ac, "frequency"] = "50" df_lines.loc[bool_leftover & bool_dc, "frequency"] = "0" @@ -702,11 +591,13 @@ def _any_substring_in_list(s, list_strings): df_lines["bus0"] = None df_lines["bus1"] = None df_lines["length"] = None + df_lines["underground"] = False df_lines.loc[df_lines["tag_type"] == "line", "underground"] = False df_lines.loc[df_lines["tag_type"] == "cable", "underground"] = True df_lines["under_construction"] = False - df_lines.loc[df_lines["tag_frequency"] == "0", "dc"] = True + df_lines["dc"] = False df_lines.loc[df_lines["tag_frequency"] == "50", "dc"] = False + df_lines.loc[df_lines["tag_frequency"] == "0", "dc"] = True df_lines = df_lines[[ "line_id", @@ -728,9 +619,183 @@ def _any_substring_in_list(s, list_strings): # Drop all rows where the geometry has equal start and end point bool_circle = df_lines["geometry"].apply(lambda x: x.coords[0] == x.coords[-1]) df_lines = df_lines[~bool_circle] + + # TODO pypsa-eur: Temporary solution as one AC line between converters will create an error in simplify_network + # As this case is not considered there: + lines_to_drop = ["775580659"] + if lines_to_drop in df_lines["line_id"].values: + df_lines.drop(df_lines[df_lines["line_id"].isin(lines_to_drop)].index, inplace=True) gdf_lines = gpd.GeoDataFrame(df_lines, geometry = "geometry", crs = "EPSG:4326") + # Lines data types + gdf_lines["circuits"] = gdf_lines["circuits"].astype(int) + gdf_lines["voltage"] = gdf_lines["voltage"].astype(int) + gdf_lines["tag_frequency"] = gdf_lines["tag_frequency"].astype(int) + + + ############# BUSES / SUBSTATIONS ###################### + input_path_substations = { + "substations_way": snakemake.input.substations_way, + "substations_relation": snakemake.input.substations_relation, + } + + cols_substations_way = ["id", "geometry", "country", "power", "substation", "voltage", "frequency"] + cols_substations_relation = ["id", "country", "power", "substation", "voltage", "frequency"] + df_substations_way = pd.DataFrame(columns = cols_substations_way) + df_substations_relation = pd.DataFrame(columns = cols_substations_relation) + + for key in input_path_substations: + logger.info(f"Processing {key}...") + for idx, ip in enumerate(input_path_substations[key]): + if os.path.exists(ip) and os.path.getsize(ip) > 400: # unpopulated OSM json is about 51 bytes + country = os.path.basename(os.path.dirname(input_path_substations[key][idx])) + logger.info(f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(input_path_substations[key])).zfill(2)}: {ip}") + with open(ip, "r") as f: + data = json.load(f) + + df = pd.DataFrame(data['elements']) + df["id"] = df["id"].astype(str) + # new string that adds "way/" to id + df["id"] = df["id"].apply(lambda x: f"way/{x}" if key == "substations_way" else f"relation/{x}") + df["country"] = country + + col_tags = ["power", "substation", "voltage", "frequency"] + + tags = pd.json_normalize(df["tags"]) \ + .map(lambda x: str(x) if pd.notnull(x) else x) + + for ct in col_tags: + if ct not in tags.columns: + tags[ct] = pd.NA + + tags = tags.loc[:, col_tags] + + df = pd.concat([df, tags], axis="columns") + + if key == "substations_way": + df.drop(columns=["type", "tags", "bounds", "nodes"], inplace=True) + df_substations_way = pd.concat([df_substations_way, df], axis="rows") + elif key == "substations_relation": + df.drop(columns=["type", "tags", "bounds"], inplace=True) + df_substations_relation = pd.concat([df_substations_relation, df], axis="rows") + + else: + logger.info(f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(input_path_substations[key])).zfill(2)} (empty): {ip}") + continue + logger.info("---") + + df_substations_way.drop_duplicates(subset='id', keep='first', inplace=True) + df_substations_relation.drop_duplicates(subset='id', keep='first', inplace=True) + + df_substations_way["geometry"] = df_substations_way.apply(_create_polygon, axis=1) + + # Normalise the members column of df_substations_relation + cols_members = ["id", "type", "ref", "role", "geometry"] + df_substations_relation_members = pd.DataFrame(columns = cols_members) + + for index, row in df_substations_relation.iterrows(): + col_members = ["type", "ref", "role", "geometry"] + df = pd.json_normalize(row["members"]) + + for cm in col_members: + if cm not in df.columns: + df[cm] = pd.NA + + df = df.loc[:, col_members] + df["id"] = str(row["id"]) + df["ref"] = df["ref"].astype(str) + df = df[df["type"] != "node"] + df = df.dropna(subset=["geometry"]) + df = df[~df["role"].isin(["", "incoming_line", "substation", "inner"])] + df_substations_relation_members = pd.concat([df_substations_relation_members, df], axis="rows") + + df_substations_relation_members.reset_index(inplace=True) + df_substations_relation_members["linestring"] = df_substations_relation_members.apply(_create_linestring, axis=1) + df_substations_relation_members_grouped = df_substations_relation_members.groupby('id')['linestring'] \ + .apply(lambda x: linemerge(x.tolist())).reset_index() + df_substations_relation_members_grouped["geometry"] = df_substations_relation_members_grouped["linestring"].apply(lambda x: x.convex_hull) + + df_substations_relation = df_substations_relation.join( + df_substations_relation_members_grouped.set_index('id'), + on='id', how='left' + ).drop(columns=["members", "linestring"]) \ + .dropna(subset=["geometry"]) + + # reorder columns and concatenate + df_substations_relation = df_substations_relation[cols_substations_way] + df_substations = pd.concat([df_substations_way, df_substations_relation], axis="rows") + + # Create centroids from geometries + df_substations.loc[:, "polygon"] = df_substations["geometry"] + df_substations.loc[:, "geometry"] = df_substations["geometry"].apply(lambda x: x.centroid) + df_substations.loc[:, "lon"] = df_substations["geometry"].apply(lambda x: x.x) + df_substations.loc[:, "lat"] = df_substations["geometry"].apply(lambda x: x.y) + + # Clean columns + df_substations["voltage"] = _clean_voltage(df_substations["voltage"]) + df_substations["frequency"] = _clean_frequency(df_substations["frequency"]) + df_substations["frequency"] = df_substations["frequency"].astype(str, errors="ignore") + + list_voltages = df_substations["voltage"].str.split(";").explode().unique().astype(str) + list_voltages = list_voltages[np.vectorize(len)(list_voltages) >= 6] + list_voltages = list_voltages[~np.char.startswith(list_voltages, '1')] + + bool_voltages = df_substations["voltage"].apply(_check_voltage, list_voltages=list_voltages) + df_substations = df_substations[bool_voltages] + + df_substations = _split_cells(df_substations) + bool_voltages = df_substations["voltage"].apply(_check_voltage, list_voltages=list_voltages) + df_substations = df_substations[bool_voltages] + df_substations["split_count"] = df_substations["id"].apply(lambda x: x.split("-")[1] if "-" in x else "0") + df_substations["split_count"] = df_substations["split_count"].astype(int) + + bool_split = df_substations["split_elements"] > 1 + bool_frequency_len = df_substations["frequency"].apply(lambda x: len(x.split(";"))) == df_substations["split_elements"] + df_substations.loc[bool_frequency_len & bool_split, "frequency"] = df_substations.loc[bool_frequency_len & bool_split, "frequency"] \ + + op_freq = lambda row: row["frequency"].split(";")[row["split_count"]-1] + + df_substations.loc[bool_frequency_len & bool_split, ["frequency"]] = df_substations.loc[bool_frequency_len & bool_split, ] \ + .apply(op_freq, axis=1) + + df_substations = _split_cells(df_substations, cols=["frequency"]) + bool_invalid_frequency = df_substations["frequency"].apply(lambda x: x not in ["50", "0"]) + df_substations.loc[bool_invalid_frequency, "frequency"] = "50" + df_substations["power"] = "substation" + df_substations["substation"] = "transmission" + df_substations["dc"] = False + df_substations.loc[df_substations["frequency"] == "0", "dc"] = True + df_substations["under_construction"] = False + df_substations["station_id"] = None + df_substations["tag_area"] = None + df_substations["tag_source"] = df_substations["id"] + + + # Create an empty list to store the results + results = [] + + for index, row in tqdm(gdf_lines.iterrows(), total=len(gdf_lines), desc="Processing LineStrings"): + line = row['geometry'] + # Check if the LineString is within any Polygon in 'substations_df' + is_within_any_substation = any(line.within(substation_polygon) for substation_polygon in df_substations["polygon"]) + results.append(is_within_any_substation) + + # Add the results to 'gdf_lines' + gdf_lines['within_substation'] = results + + # gdf_sub = gpd.GeoDataFrame(df_substations[["id", "polygon"]], geometry = "polygon", crs = "EPSG:4326") + # fig = Figure(width = "70%", height = 600) + + # m = gdf_sub.explore(name = "Subs", color = "red") + # m = gdf_lines.explore(m = m, name = "lines") + + # folium.LayerControl(collapsed = False).add_to(m) + + # fig.add_child(m) + # m + gdf_lines = gdf_lines[~gdf_lines["within_substation"]] + filepath_lines = snakemake.output["lines"] # save substations output logger.info(f"Exporting clean lines to {filepath_lines}") @@ -740,25 +805,55 @@ def _any_substring_in_list(s, list_strings): os.makedirs(parentfolder_lines) gdf_lines.to_file(filepath_lines, driver="GeoJSON") - - - ######## - ######## - ######## - - - fig = Figure(width = "50%", height = 600) - m = gdf_substations.explore(name = "Buses", color = "red") - m = gdf_lines.explore(m = m, name = "Lines") - folium.LayerControl(collapsed = False).add_to(m) + # rename columns + df_substations.rename( + columns={ + "id": "bus_id", + "power": "symbol", + "substation":"tag_substation", + }, inplace=True) + + df_substations = df_substations[[ + "bus_id", + "symbol", + "tag_substation", + "voltage", + "lon", + "lat", + "dc", + "under_construction", + "station_id", + "tag_area", + "country", + "geometry", + "tag_source", + ]] + + df_substations["bus_id"] = df_substations.index - fig.add_child(m) - m + df_substations = add_line_endings_tosubstations( + df_substations, gdf_lines + ) + + #group gdf_substations by voltage and and geometry (dropping duplicates) + df_substations = df_substations.groupby(["voltage", "lon", "lat", "tag_source"]).first().reset_index() + df_substations["bus_id"] = df_substations.index + + gdf_substations = gpd.GeoDataFrame(df_substations, geometry = "geometry", crs = "EPSG:4326") - gdf_substations.explore() + # Substation data types + gdf_substations["bus_id"] = gdf_substations["bus_id"].astype(int) + gdf_substations["voltage"] = gdf_substations["voltage"].astype(int) + filepath_substations = snakemake.output["substations"] + # save substations output + logger.info(f"Exporting clean substations to {filepath_substations}") + parentfolder_substations = os.path.dirname(filepath_substations) + if not os.path.exists(parentfolder_substations): + # Create the folder and its parent directories if they don't exist + os.makedirs(parentfolder_substations) - output = str(snakemake.output) - clean_osm_data(output) \ No newline at end of file + gdf_substations.to_file(filepath_substations, driver="GeoJSON") + \ No newline at end of file From 266a8d0ca69964b14b88e570a1ba288524be0046 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Tue, 7 May 2024 22:32:17 +0200 Subject: [PATCH 009/100] Working osm-network-fast --- rules/build_electricity.smk | 121 ++-- scripts/base_network_osm.py | 1133 ++++++++++++++++++++++++++++++++++ scripts/build_osm_network.py | 13 +- scripts/clean_osm_data.py | 13 +- scripts/simplify_network.py | 7 +- 5 files changed, 1228 insertions(+), 59 deletions(-) create mode 100644 scripts/base_network_osm.py diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 5e7b362de..630d1b46d 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -64,42 +64,80 @@ rule build_powerplants: script: "../scripts/build_powerplants.py" +if config["base_network"] == "eegk": + rule base_network: + params: + countries=config_provider("countries"), + snapshots=config_provider("snapshots"), + drop_leap_day=config_provider("enable", "drop_leap_day"), + lines=config_provider("lines"), + links=config_provider("links"), + transformers=config_provider("transformers"), + input: + eg_buses="data/entsoegridkit/buses.csv", + eg_lines="data/entsoegridkit/lines.csv", + eg_links="data/entsoegridkit/links.csv", + eg_converters="data/entsoegridkit/converters.csv", + eg_transformers="data/entsoegridkit/transformers.csv", + parameter_corrections="data/parameter_corrections.yaml", + links_p_nom="data/links_p_nom.csv", + links_tyndp="data/links_tyndp.csv", + country_shapes=resources("country_shapes.geojson"), + offshore_shapes=resources("offshore_shapes.geojson"), + europe_shape=resources("europe_shape.geojson"), + output: + base_network=resources("networks/base.nc"), + regions_onshore=resources("regions_onshore.geojson"), + regions_offshore=resources("regions_offshore.geojson"), + log: + logs("base_network.log"), + benchmark: + benchmarks("base_network") + threads: 1 + resources: + mem_mb=1500, + conda: + "../envs/environment.yaml" + script: + "../scripts/base_network.py" -rule base_network: - params: - countries=config_provider("countries"), - snapshots=config_provider("snapshots"), - drop_leap_day=config_provider("enable", "drop_leap_day"), - lines=config_provider("lines"), - links=config_provider("links"), - transformers=config_provider("transformers"), - input: - eg_buses="data/entsoegridkit/buses.csv", - eg_lines="data/entsoegridkit/lines.csv", - eg_links="data/entsoegridkit/links.csv", - eg_converters="data/entsoegridkit/converters.csv", - eg_transformers="data/entsoegridkit/transformers.csv", - parameter_corrections="data/parameter_corrections.yaml", - links_p_nom="data/links_p_nom.csv", - links_tyndp="data/links_tyndp.csv", - country_shapes=resources("country_shapes.geojson"), - offshore_shapes=resources("offshore_shapes.geojson"), - europe_shape=resources("europe_shape.geojson"), - output: - base_network=resources("networks/base.nc"), - regions_onshore=resources("regions_onshore.geojson"), - regions_offshore=resources("regions_offshore.geojson"), - log: - logs("base_network.log"), - benchmark: - benchmarks("base_network") - threads: 1 - resources: - mem_mb=1500, - conda: - "../envs/environment.yaml" - script: - "../scripts/base_network.py" + +if config["base_network"] == "osm": + rule base_network: + params: + countries=config_provider("countries"), + snapshots=config_provider("snapshots"), + drop_leap_day=config_provider("enable", "drop_leap_day"), + lines=config_provider("lines"), + links=config_provider("links"), + transformers=config_provider("transformers"), + input: + eg_buses="data/osm/buses.csv", + eg_lines="data/osm/lines.csv", + # eg_links="data/entsoegridkit/links.csv", + eg_converters="data/osm/converters.csv", + eg_transformers="data/osm/transformers.csv", + # parameter_corrections="data/parameter_corrections.yaml", + links_p_nom="data/links_p_nom.csv", + links_tyndp="data/links_tyndp_osm.csv", + country_shapes=resources("country_shapes.geojson"), + offshore_shapes=resources("offshore_shapes.geojson"), + europe_shape=resources("europe_shape.geojson"), + output: + base_network=resources("networks/base.nc"), + regions_onshore=resources("regions_onshore.geojson"), + regions_offshore=resources("regions_offshore.geojson"), + log: + logs("base_network.log"), + benchmark: + benchmarks("base_network") + threads: 1 + resources: + mem_mb=1500, + conda: + "../envs/environment.yaml" + script: + "../scripts/base_network_osm.py" rule build_shapes: @@ -597,9 +635,6 @@ if config["osm"].get("retrieve", True): substations_way="data/osm/raw/{country}/substations_way_raw.json", substations_node="data/osm/raw/{country}/substations_node_raw.json", substations_relation="data/osm/raw/{country}/substations_relation_raw.json", - # transformers_way="data/osm/raw/{country}/transformers_way_raw.json", - # transformers_node="data/osm/raw/{country}/transformers_node_raw.json", - # route_relations="data/osm/raw/{country}/route_relations_raw.json", log: logs("retrieve_osm_data_{country}.log"), script: @@ -607,21 +642,12 @@ if config["osm"].get("retrieve", True): rule clean_osm_data: - # params: - # countries=config["countries"], input: - # **{ - # f"{country}": [f"data/osm/raw/{country}/{feature}.geojson" for feature in FEATURES] - # for country in config["countries"] - # }, cables_way=[f"data/osm/raw/{country}/cables_way_raw.json" for country in config["countries"]], lines_way=[f"data/osm/raw/{country}/lines_way_raw.json" for country in config["countries"]], substations_way=[f"data/osm/raw/{country}/substations_way_raw.json" for country in config["countries"]], substations_node=[f"data/osm/raw/{country}/substations_node_raw.json" for country in config["countries"]], substations_relation=[f"data/osm/raw/{country}/substations_relation_raw.json" for country in config["countries"]], - # transformers_way=[f"data/osm/raw/{country}/transformers_way_raw.json" for country in config["countries"]], - # transformers_node=[f"data/osm/raw/{country}/transformers_node_raw.json" for country in config["countries"]], - # route_relations=[f"data/osm/raw/{country}/route_relations_raw.json" for country in config["countries"]], output: substations="data/osm/clean/substations.geojson", lines="data/osm/clean/lines.geojson", @@ -635,6 +661,7 @@ rule build_osm_network: input: substations="data/osm/clean/substations.geojson", lines="data/osm/clean/lines.geojson", + country_shapes=resources("country_shapes.geojson"), output: lines="data/osm/lines.csv", converters="data/osm/converters.csv", diff --git a/scripts/base_network_osm.py b/scripts/base_network_osm.py new file mode 100644 index 000000000..874c778fe --- /dev/null +++ b/scripts/base_network_osm.py @@ -0,0 +1,1133 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: : 2017-2024 The PyPSA-Eur Authors +# +# SPDX-License-Identifier: MIT + +# coding: utf-8 +""" +Creates the network topology from a `ENTSO-E map extract. + +`_ (March 2022) as a PyPSA +network. + +Relevant Settings +----------------- + +.. code:: yaml + + countries: + + electricity: + voltages: + + lines: + types: + s_max_pu: + under_construction: + + links: + p_max_pu: + under_construction: + include_tyndp: + + transformers: + x: + s_nom: + type: + +.. seealso:: + Documentation of the configuration file ``config/config.yaml`` at + :ref:`snapshots_cf`, :ref:`toplevel_cf`, :ref:`electricity_cf`, :ref:`load_cf`, + :ref:`lines_cf`, :ref:`links_cf`, :ref:`transformers_cf` + +Inputs +------ + +- ``data/entsoegridkit``: Extract from the geographical vector data of the online `ENTSO-E Interactive Map `_ by the `GridKit `_ toolkit dating back to March 2022. +- ``data/parameter_corrections.yaml``: Corrections for ``data/entsoegridkit`` +- ``data/links_p_nom.csv``: confer :ref:`links` +- ``data/links_tyndp.csv``: List of projects in the `TYNDP 2018 `_ that are at least *in permitting* with fields for start- and endpoint (names and coordinates), length, capacity, construction status, and project reference ID. +- ``resources/country_shapes.geojson``: confer :ref:`shapes` +- ``resources/offshore_shapes.geojson``: confer :ref:`shapes` +- ``resources/europe_shape.geojson``: confer :ref:`shapes` + +Outputs +------- + +- ``networks/base.nc`` + + .. image:: img/base.png + :scale: 33 % + +- ``resources/regions_onshore.geojson``: + + .. image:: img/regions_onshore.png + :scale: 33 % + +- ``resources/regions_offshore.geojson``: + + .. image:: img/regions_offshore.png + :scale: 33 % + +Description +----------- +Creates the network topology from an ENTSO-E map extract, and create Voronoi shapes for each bus representing both onshore and offshore regions. +""" + +import logging +from itertools import product + +import geopandas as gpd +import networkx as nx +import numpy as np +import pandas as pd +import pypsa +import shapely +import shapely.prepared +import shapely.wkt +import yaml +from _helpers import REGION_COLS, configure_logging, get_snapshots, set_scenario_config +from packaging.version import Version, parse +from scipy import spatial +from scipy.sparse import csgraph +from shapely.geometry import LineString, Point, Polygon + +PD_GE_2_2 = parse(pd.__version__) >= Version("2.2") + +logger = logging.getLogger(__name__) + + +def _get_oid(df): + if "tags" in df.columns: + return df.tags.str.extract('"oid"=>"(\d+)"', expand=False) + else: + return pd.Series(np.nan, df.index) + + +def _get_country(df): + if "tags" in df.columns: + return df.tags.str.extract('"country"=>"([A-Z]{2})"', expand=False) + else: + return pd.Series(np.nan, df.index) + + +def _find_closest_links(links, new_links, distance_upper_bound=1.5): + treecoords = np.asarray( + [ + np.asarray(shapely.wkt.loads(s).coords)[[0, -1]].flatten() + for s in links.geometry + ] + ) + querycoords = np.vstack( + [new_links[["x1", "y1", "x2", "y2"]], new_links[["x2", "y2", "x1", "y1"]]] + ) + tree = spatial.KDTree(treecoords) + dist, ind = tree.query(querycoords, distance_upper_bound=distance_upper_bound) + found_b = ind < len(links) + found_i = np.arange(len(new_links) * 2)[found_b] % len(new_links) + return ( + pd.DataFrame( + dict(D=dist[found_b], i=links.index[ind[found_b] % len(links)]), + index=new_links.index[found_i], + ) + .sort_values(by="D")[lambda ds: ~ds.index.duplicated(keep="first")] + .sort_index()["i"] + ) + + +def _load_buses_from_eg(eg_buses, europe_shape, config_elec): + buses = ( + pd.read_csv( + eg_buses, + quotechar="'", + true_values=["t"], + false_values=["f"], + dtype=dict(bus_id="str"), + ) + .set_index("bus_id") + .drop(["station_id"], axis=1) + .rename(columns=dict(voltage="v_nom")) + ) + + buses["carrier"] = buses.pop("dc").map({True: "DC", False: "AC"}) + buses["under_construction"] = buses.under_construction.where( + lambda s: s.notnull(), False + ).astype(bool) + + # remove all buses outside of all countries including exclusive economic zones (offshore) + europe_shape = gpd.read_file(europe_shape).loc[0, "geometry"] + # TODO pypsa-eur: Temporary fix: Convex hull, this is important when nodes are between countries + europe_shape = europe_shape.convex_hull + + europe_shape_prepped = shapely.prepared.prep(europe_shape) + buses_in_europe_b = buses[["x", "y"]].apply( + lambda p: europe_shape_prepped.contains(Point(p)), axis=1 + ) + + + # TODO pypsa-eur: Find a long-term solution + # buses_with_v_nom_to_keep_b = ( + # buses.v_nom.isin(config_elec["voltages"]) | buses.v_nom.isnull() + # ) + + # Quick fix: + buses_with_v_nom_to_keep_b = (min(config_elec["voltages"]) <= buses.v_nom) & (buses.v_nom <= max(config_elec["voltages"])) + + logger.info( + f'Removing buses with voltages {pd.Index(buses.v_nom.unique()).dropna().difference(config_elec["voltages"])}' + ) + return pd.DataFrame(buses.loc[buses_in_europe_b & buses_with_v_nom_to_keep_b]) + + +def _load_transformers_from_eg(buses, eg_transformers): + transformers = pd.read_csv( + eg_transformers, + quotechar="'", + true_values=["t"], + false_values=["f"], + dtype=dict(transformer_id="str", bus0="str", bus1="str"), + ).set_index("transformer_id") + + transformers = _remove_dangling_branches(transformers, buses) + + return transformers + + +def _load_converters_from_eg(buses, eg_converters): + converters = pd.read_csv( + eg_converters, + quotechar="'", + true_values=["t"], + false_values=["f"], + dtype=dict(converter_id="str", bus0="str", bus1="str"), + ).set_index("converter_id") + + converters = _remove_dangling_branches(converters, buses) + + converters["carrier"] = "B2B" + + return converters + + +def _load_links_from_eg(buses, eg_links): + links = pd.read_csv( + eg_links, + quotechar="'", + true_values=["t"], + false_values=["f"], + dtype=dict(link_id="str", bus0="str", bus1="str", under_construction="bool"), + ).set_index("link_id") + + links["length"] /= 1e3 + + # Skagerrak Link is connected to 132kV bus which is removed in _load_buses_from_eg. + # Connect to neighboring 380kV bus + links.loc[links.bus1 == "6396", "bus1"] = "6398" + + links = _remove_dangling_branches(links, buses) + + # Add DC line parameters + links["carrier"] = "DC" + + return links + + +def _add_links_from_tyndp(buses, links, links_tyndp, europe_shape): + links_tyndp = pd.read_csv(links_tyndp) + + # remove all links from list which lie outside all of the desired countries + europe_shape = gpd.read_file(europe_shape).loc[0, "geometry"] + europe_shape_prepped = shapely.prepared.prep(europe_shape) + x1y1_in_europe_b = links_tyndp[["x1", "y1"]].apply( + lambda p: europe_shape_prepped.contains(Point(p)), axis=1 + ) + x2y2_in_europe_b = links_tyndp[["x2", "y2"]].apply( + lambda p: europe_shape_prepped.contains(Point(p)), axis=1 + ) + is_within_covered_countries_b = x1y1_in_europe_b & x2y2_in_europe_b + + if not is_within_covered_countries_b.all(): + logger.info( + "TYNDP links outside of the covered area (skipping): " + + ", ".join(links_tyndp.loc[~is_within_covered_countries_b, "Name"]) + ) + + links_tyndp = links_tyndp.loc[is_within_covered_countries_b] + if links_tyndp.empty: + return buses, links + + has_replaces_b = links_tyndp.replaces.notnull() + oids = dict(Bus=_get_oid(buses), Link=_get_oid(links)) + keep_b = dict( + Bus=pd.Series(True, index=buses.index), Link=pd.Series(True, index=links.index) + ) + for reps in links_tyndp.loc[has_replaces_b, "replaces"]: + for comps in reps.split(":"): + oids_to_remove = comps.split(".") + c = oids_to_remove.pop(0) + keep_b[c] &= ~oids[c].isin(oids_to_remove) + buses = buses.loc[keep_b["Bus"]] + links = links.loc[keep_b["Link"]] + + links_tyndp["j"] = _find_closest_links( + links, links_tyndp, distance_upper_bound=0.20 + ) + # Corresponds approximately to 20km tolerances + + if links_tyndp["j"].notnull().any(): + logger.info( + "TYNDP links already in the dataset (skipping): " + + ", ".join(links_tyndp.loc[links_tyndp["j"].notnull(), "Name"]) + ) + links_tyndp = links_tyndp.loc[links_tyndp["j"].isnull()] + if links_tyndp.empty: + return buses, links + + tree = spatial.KDTree(buses[["x", "y"]]) + _, ind0 = tree.query(links_tyndp[["x1", "y1"]]) + ind0_b = ind0 < len(buses) + links_tyndp.loc[ind0_b, "bus0"] = buses.index[ind0[ind0_b]] + + _, ind1 = tree.query(links_tyndp[["x2", "y2"]]) + ind1_b = ind1 < len(buses) + links_tyndp.loc[ind1_b, "bus1"] = buses.index[ind1[ind1_b]] + + links_tyndp_located_b = ( + links_tyndp["bus0"].notnull() & links_tyndp["bus1"].notnull() + ) + if not links_tyndp_located_b.all(): + logger.warning( + "Did not find connected buses for TYNDP links (skipping): " + + ", ".join(links_tyndp.loc[~links_tyndp_located_b, "Name"]) + ) + links_tyndp = links_tyndp.loc[links_tyndp_located_b] + + logger.info("Adding the following TYNDP links: " + ", ".join(links_tyndp["Name"])) + + links_tyndp = links_tyndp[["bus0", "bus1"]].assign( + carrier="DC", + p_nom=links_tyndp["Power (MW)"], + length=links_tyndp["Length (given) (km)"].fillna( + links_tyndp["Length (distance*1.2) (km)"] + ), + under_construction=True, + underground=False, + geometry=( + links_tyndp[["x1", "y1", "x2", "y2"]].apply( + lambda s: str(LineString([[s.x1, s.y1], [s.x2, s.y2]])), axis=1 + ) + ), + tags=( + '"name"=>"' + + links_tyndp["Name"] + + '", ' + + '"ref"=>"' + + links_tyndp["Ref"] + + '", ' + + '"status"=>"' + + links_tyndp["status"] + + '"' + ), + ) + + links_tyndp.index = "T" + links_tyndp.index.astype(str) + + links = pd.concat([links, links_tyndp], sort=True) + + return buses, links + + +def _load_lines_from_eg(buses, eg_lines): + lines = ( + pd.read_csv( + eg_lines, + quotechar="'", + true_values=["t"], + false_values=["f"], + dtype=dict( + line_id="str", + bus0="str", + bus1="str", + underground="bool", + under_construction="bool", + ), + ) + .set_index("line_id") + .rename(columns=dict(voltage="v_nom", circuits="num_parallel")) + ) + + lines["length"] /= 1e3 + + # lines["carrier"] = "AC" #TODO pypsa-eur clean/remove this + lines = _remove_dangling_branches(lines, buses) + + return lines + + +def _apply_parameter_corrections(n, parameter_corrections): + with open(parameter_corrections) as f: + corrections = yaml.safe_load(f) + + if corrections is None: + return + + for component, attrs in corrections.items(): + df = n.df(component) + oid = _get_oid(df) + if attrs is None: + continue + + for attr, repls in attrs.items(): + for i, r in repls.items(): + if i == "oid": + r = oid.map(repls["oid"]).dropna() + elif i == "index": + r = pd.Series(repls["index"]) + else: + raise NotImplementedError() + inds = r.index.intersection(df.index) + df.loc[inds, attr] = r[inds].astype(df[attr].dtype) + + +def _reconnect_crimea(lines): + logger.info("Reconnecting Crimea to the Ukrainian grid.") + lines_to_crimea = pd.DataFrame( + { + "bus0": ["3065", "3181", "3181"], + "bus1": ["3057", "3055", "3057"], + "v_nom": [300, 300, 300], + "num_parallel": [1, 1, 1], + "length": [140, 120, 140], + "carrier": ["AC", "AC", "AC"], + "underground": [False, False, False], + "under_construction": [False, False, False], + }, + index=["Melitopol", "Liubymivka left", "Luibymivka right"], + ) + + return pd.concat([lines, lines_to_crimea]) + + +# def _set_electrical_parameters_lines(lines, config): +# v_noms = config["electricity"]["voltages"] +# linetypes = config["lines"]["types"] + +# for v_nom in v_noms: +# lines.loc[lines["v_nom"] == v_nom, "type"] = linetypes[v_nom] + +def _set_electrical_parameters_lines(lines_config, voltages, lines): + if lines.empty: + lines["type"] = [] + return lines + + linetypes = _get_linetypes_config(lines_config["types"], voltages) + + lines["carrier"] = "AC" + lines["dc"] = False + + lines.loc[:, "type"] = lines.v_nom.apply( + lambda x: _get_linetype_by_voltage(x, linetypes) + ) + + lines["s_max_pu"] = lines_config["s_max_pu"] + + return lines + + +def _set_lines_s_nom_from_linetypes(n): + n.lines["s_nom"] = ( + np.sqrt(3) + * n.lines["type"].map(n.line_types.i_nom) + * n.lines["v_nom"] + * n.lines["num_parallel"] + ) + # Re-define s_nom for DC lines + n.lines.loc[n.lines["carrier"] == "DC", "s_nom"] = n.lines["type"].map( + n.line_types.i_nom + ) * n.lines.eval("v_nom * num_parallel") + + +def _set_electrical_parameters_dc_lines(lines_config, voltages, lines): + if lines.empty: + lines["type"] = [] + return lines + + linetypes = _get_linetypes_config(lines_config["dc_types"], voltages) + + lines["carrier"] = "DC" + lines["dc"] = True + lines.loc[:, "type"] = lines.v_nom.apply( + lambda x: _get_linetype_by_voltage(x, linetypes) + ) + + lines["s_max_pu"] = lines_config["s_max_pu"] + + return lines + +# TODO pypsa-eur: Clean/fix this, update list p_noms +def _set_electrical_parameters_links(links, config, links_p_nom): + if links.empty: + return links + + p_max_pu = config["links"].get("p_max_pu", 1.0) + links["p_max_pu"] = p_max_pu + links["p_min_pu"] = -p_max_pu + + links_p_nom = pd.read_csv(links_p_nom) + + # filter links that are not in operation anymore + removed_b = links_p_nom.Remarks.str.contains("Shut down|Replaced", na=False) + links_p_nom = links_p_nom[~removed_b] + + # find closest link for all links in links_p_nom + links_p_nom["j"] = _find_closest_links(links, links_p_nom) + + links_p_nom = links_p_nom.groupby(["j"], as_index=False).agg({"Power (MW)": "sum"}) + + p_nom = links_p_nom.dropna(subset=["j"]).set_index("j")["Power (MW)"] + + # Don't update p_nom if it's already set + p_nom_unset = ( + p_nom.drop(links.index[links.p_nom.notnull()], errors="ignore") + if "p_nom" in links + else p_nom + ) + links.loc[p_nom_unset.index, "p_nom"] = p_nom_unset + + links["carrier"] = "DC" + links["dc"] = True + + return links + + +def _set_electrical_parameters_converters(converters, config): + p_max_pu = config["links"].get("p_max_pu", 1.0) + converters["p_max_pu"] = p_max_pu + converters["p_min_pu"] = -p_max_pu + + converters["p_nom"] = 2000 + + # Converters are combined with links + converters["under_construction"] = False + converters["underground"] = False + + return converters + + +def _set_electrical_parameters_transformers(transformers, config): + config = config["transformers"] + + ## Add transformer parameters + transformers["x"] = config.get("x", 0.1) + transformers["s_nom"] = config.get("s_nom", 2000) + transformers["type"] = config.get("type", "") + + return transformers + + +def _remove_dangling_branches(branches, buses): + return pd.DataFrame( + branches.loc[branches.bus0.isin(buses.index) & branches.bus1.isin(buses.index)] + ) + + +def _remove_unconnected_components(network, threshold=6): + _, labels = csgraph.connected_components(network.adjacency_matrix(), directed=False) + component = pd.Series(labels, index=network.buses.index) + + component_sizes = component.value_counts() + components_to_remove = component_sizes.loc[component_sizes < threshold] + + logger.info( + f"Removing {len(components_to_remove)} unconnected network components with less than {components_to_remove.max()} buses. In total {components_to_remove.sum()} buses." + ) + + return network[component == component_sizes.index[0]] + + +def _set_countries_and_substations(n, config, country_shapes, offshore_shapes): + buses = n.buses + + def buses_in_shape(shape): + shape = shapely.prepared.prep(shape) + return pd.Series( + np.fromiter( + ( + shape.contains(Point(x, y)) + for x, y in buses.loc[:, ["x", "y"]].values + ), + dtype=bool, + count=len(buses), + ), + index=buses.index, + ) + + countries = config["countries"] + country_shapes = gpd.read_file(country_shapes).set_index("name")["geometry"] + # reindexing necessary for supporting empty geo-dataframes + offshore_shapes = gpd.read_file(offshore_shapes) + offshore_shapes = offshore_shapes.reindex(columns=["name", "geometry"]).set_index( + "name" + )["geometry"] + substation_b = buses["symbol"].str.contains( + "substation|converter station", case=False + ) + + def prefer_voltage(x, which): + index = x.index + if len(index) == 1: + return pd.Series(index, index) + key = ( + x.index[0] + if x["v_nom"].isnull().all() + else getattr(x["v_nom"], "idx" + which)() + ) + return pd.Series(key, index) + + compat_kws = dict(include_groups=False) if PD_GE_2_2 else {} + gb = buses.loc[substation_b].groupby( + ["x", "y"], as_index=False, group_keys=False, sort=False + ) + bus_map_low = gb.apply(prefer_voltage, "min", **compat_kws) + lv_b = (bus_map_low == bus_map_low.index).reindex(buses.index, fill_value=False) + bus_map_high = gb.apply(prefer_voltage, "max", **compat_kws) + hv_b = (bus_map_high == bus_map_high.index).reindex(buses.index, fill_value=False) + + onshore_b = pd.Series(False, buses.index) + offshore_b = pd.Series(False, buses.index) + + for country in countries: + onshore_shape = country_shapes[country] + onshore_country_b = buses_in_shape(onshore_shape) + onshore_b |= onshore_country_b + + buses.loc[onshore_country_b, "country"] = country + + if country not in offshore_shapes.index: + continue + offshore_country_b = buses_in_shape(offshore_shapes[country]) + offshore_b |= offshore_country_b + + buses.loc[offshore_country_b, "country"] = country + + # Only accept buses as low-voltage substations (where load is attached), if + # they have at least one connection which is not under_construction + has_connections_b = pd.Series(False, index=buses.index) + for b, df in product(("bus0", "bus1"), (n.lines, n.links)): + has_connections_b |= ~df.groupby(b).under_construction.min() + + buses["onshore_bus"] = onshore_b + buses["substation_lv"] = ( + lv_b & onshore_b & (~buses["under_construction"]) & has_connections_b + ) + + # TODO: fix this in pypsa-eur master branch + # buses["substation_off"] = offshore_b & ( + # ~buses["under_construction"] + # ) + + buses["substation_off"] = (offshore_b | (hv_b & onshore_b)) & ( + ~buses["under_construction"] + ) + + c_nan_b = buses.country.fillna("na") == "na" + if c_nan_b.sum() > 0: + c_tag = _get_country(buses.loc[c_nan_b]) + c_tag.loc[~c_tag.isin(countries)] = np.nan + n.buses.loc[c_nan_b, "country"] = c_tag + + c_tag_nan_b = n.buses.country.isnull() + + # Nearest country in path length defines country of still homeless buses + # Work-around until commit 705119 lands in pypsa release + n.transformers["length"] = 0.0 + graph = n.graph(weight="length") + n.transformers.drop("length", axis=1, inplace=True) + + for b in n.buses.index[c_tag_nan_b]: + df = ( + pd.DataFrame( + dict( + pathlength=nx.single_source_dijkstra_path_length( + graph, b, cutoff=200 + ) + ) + ) + .join(n.buses.country) + .dropna() + ) + assert ( + not df.empty + ), "No buses with defined country within 200km of bus `{}`".format(b) + n.buses.at[b, "country"] = df.loc[df.pathlength.idxmin(), "country"] + + logger.warning( + "{} buses are not in any country or offshore shape," + " {} have been assigned from the tag of the entsoe map," + " the rest from the next bus in terms of pathlength.".format( + c_nan_b.sum(), c_nan_b.sum() - c_tag_nan_b.sum() + ) + ) + + return buses + + +def _replace_b2b_converter_at_country_border_by_link(n): + # Affects only the B2B converter in Lithuania at the Polish border at the moment + buscntry = n.buses.country + linkcntry = n.links.bus0.map(buscntry) + converters_i = n.links.index[ + (n.links.carrier == "B2B") & (linkcntry == n.links.bus1.map(buscntry)) + ] + + def findforeignbus(G, i): + cntry = linkcntry.at[i] + for busattr in ("bus0", "bus1"): + b0 = n.links.at[i, busattr] + for b1 in G[b0]: + if buscntry[b1] != cntry: + return busattr, b0, b1 + return None, None, None + + for i in converters_i: + G = n.graph() + busattr, b0, b1 = findforeignbus(G, i) + if busattr is not None: + comp, line = next(iter(G[b0][b1])) + if comp != "Line": + logger.warning( + "Unable to replace B2B `{}` expected a Line, but found a {}".format( + i, comp + ) + ) + continue + + n.links.at[i, busattr] = b1 + n.links.at[i, "p_nom"] = min( + n.links.at[i, "p_nom"], n.lines.at[line, "s_nom"] + ) + n.links.at[i, "carrier"] = "DC" + n.links.at[i, "underwater_fraction"] = 0.0 + n.links.at[i, "length"] = n.lines.at[line, "length"] + + n.remove("Line", line) + n.remove("Bus", b0) + + logger.info( + "Replacing B2B converter `{}` together with bus `{}` and line `{}` by an HVDC tie-line {}-{}".format( + i, b0, line, linkcntry.at[i], buscntry.at[b1] + ) + ) + + +def _set_links_underwater_fraction(n, offshore_shapes): + if n.links.empty: + return + + if not hasattr(n.links, "geometry"): + n.links["underwater_fraction"] = 0.0 + else: + offshore_shape = gpd.read_file(offshore_shapes).unary_union + links = gpd.GeoSeries(n.links.geometry.dropna().map(shapely.wkt.loads)) + n.links["underwater_fraction"] = ( + links.intersection(offshore_shape).length / links.length + ) + + +def _adjust_capacities_of_under_construction_branches(n, config): + lines_mode = config["lines"].get("under_construction", "undef") + if lines_mode == "zero": + n.lines.loc[n.lines.under_construction, "num_parallel"] = 0.0 + n.lines.loc[n.lines.under_construction, "s_nom"] = 0.0 + elif lines_mode == "remove": + n.mremove("Line", n.lines.index[n.lines.under_construction]) + elif lines_mode != "keep": + logger.warning( + "Unrecognized configuration for `lines: under_construction` = `{}`. Keeping under construction lines." + ) + + links_mode = config["links"].get("under_construction", "undef") + if links_mode == "zero": + n.links.loc[n.links.under_construction, "p_nom"] = 0.0 + elif links_mode == "remove": + n.mremove("Link", n.links.index[n.links.under_construction]) + elif links_mode != "keep": + logger.warning( + "Unrecognized configuration for `links: under_construction` = `{}`. Keeping under construction links." + ) + + if lines_mode == "remove" or links_mode == "remove": + # We might need to remove further unconnected components + n = _remove_unconnected_components(n) + + return n + + +def _set_shapes(n, country_shapes, offshore_shapes): + # Write the geodataframes country_shapes and offshore_shapes to the network.shapes component + country_shapes = gpd.read_file(country_shapes).rename(columns={"name": "idx"}) + country_shapes["type"] = "country" + offshore_shapes = gpd.read_file(offshore_shapes).rename(columns={"name": "idx"}) + offshore_shapes["type"] = "offshore" + all_shapes = pd.concat([country_shapes, offshore_shapes], ignore_index=True) + n.madd( + "Shape", + all_shapes.index, + geometry=all_shapes.geometry, + idx=all_shapes.idx, + type=all_shapes["type"], + ) + + +def base_network_osm( + eg_buses, + eg_converters, + eg_transformers, + eg_lines, + links_p_nom, + europe_shape, + country_shapes, + offshore_shapes, + config, +): + buses = _load_buses_from_eg(eg_buses, europe_shape, config["electricity"]) + + + + #TODO pypsa-eur add this + # links = _load_links_from_eg(buses, eg_links) + # if config["links"].get("include_tyndp"): + # buses, links = _add_links_from_tyndp(buses, links, links_tyndp, europe_shape) + + converters = _load_converters_from_eg(buses, eg_converters) + + lines = _load_lines_from_eg(buses, eg_lines) + transformers = _load_transformers_from_eg(buses, eg_transformers) + + if config["lines"].get("reconnect_crimea", True) and "UA" in config["countries"]: + lines = _reconnect_crimea(lines) + + lines_ac = lines[lines.tag_frequency.astype(float) != 0].copy() + lines_dc = lines[lines.tag_frequency.astype(float) == 0].copy() + + lines_ac = _set_electrical_parameters_lines( + config["lines"], + config["electricity"]["voltages"], + lines_ac + ) + + lines_dc = _set_electrical_parameters_dc_lines( + config["lines"], + config["electricity"]["voltages"], + lines_dc + ) + + # lines = _set_electrical_parameters_lines(lines, config) + transformers = _set_electrical_parameters_transformers(transformers, config) + # links = _set_electrical_parameters_links(links, config, links_p_nom) + converters = _set_electrical_parameters_converters(converters, config) + + n = pypsa.Network() + n.name = "PyPSA-Eur (OSM)" + + time = get_snapshots(snakemake.params.snapshots, snakemake.params.drop_leap_day) + n.set_snapshots(time) + n.madd("Carrier", ["AC", "DC"]) # TODO: fix hard code and check if AC/DC truly exist + + n.import_components_from_dataframe(buses, "Bus") + + if config["base_network_osm"]["hvdc_as_lines"]: + lines = pd.concat([lines_ac, lines_dc]) + n.import_components_from_dataframe(lines, "Line") + else: + lines_dc = _set_electrical_parameters_links(lines_dc, config, links_p_nom) + # parse line information into p_nom required for converters + lines_dc["p_nom"] = lines_dc.apply( + lambda x: x["v_nom"] * n.line_types.i_nom[x["type"]], + axis=1, + result_type="reduce", + ) + n.import_components_from_dataframe(lines_ac, "Line") + # The columns which names starts with "bus" are mixed up with the third-bus specification + # when executing additional_linkports() + # lines_dc.drop( + # labels=[ + # "bus0_lon", + # "bus0_lat", + # "bus1_lon", + # "bus1_lat", + # "bus_0_coors", + # "bus_1_coors", + # ], + # axis=1, + # inplace=True, + # ) + n.import_components_from_dataframe(lines_dc, "Link") + + # n.import_components_from_dataframe(lines, "Line") + n.import_components_from_dataframe(transformers, "Transformer") + # n.import_components_from_dataframe(links, "Link") + n.import_components_from_dataframe(converters, "Link") + + _set_lines_s_nom_from_linetypes(n) + + #TODO pypsa-eur add this + # _apply_parameter_corrections(n, parameter_corrections) + + # TODO: what about this? + n = _remove_unconnected_components(n) + + _set_countries_and_substations(n, config, country_shapes, offshore_shapes) + + #TODO pypsa-eur add this + _set_links_underwater_fraction(n, offshore_shapes) + + _replace_b2b_converter_at_country_border_by_link(n) + + n = _adjust_capacities_of_under_construction_branches(n, config) + + _set_shapes(n, country_shapes, offshore_shapes) + + return n + +def _get_linetypes_config(line_types, voltages): + """ + Return the dictionary of linetypes for selected voltages. The dictionary is + a subset of the dictionary line_types, whose keys match the selected + voltages. + + Parameters + ---------- + line_types : dict + Dictionary of linetypes: keys are nominal voltages and values are linetypes. + voltages : list + List of selected voltages. + + Returns + ------- + Dictionary of linetypes for selected voltages. + """ + # get voltages value that are not availabile in the line types + vnoms_diff = set(voltages).symmetric_difference(set(line_types.keys())) + if vnoms_diff: + logger.warning( + f"Voltages {vnoms_diff} not in the {line_types} or {voltages} list." + ) + return {k: v for k, v in line_types.items() if k in voltages} + +def _get_linetype_by_voltage(v_nom, d_linetypes): + """ + Return the linetype of a specific line based on its voltage v_nom. + + Parameters + ---------- + v_nom : float + The voltage of the line. + d_linetypes : dict + Dictionary of linetypes: keys are nominal voltages and values are linetypes. + + Returns + ------- + The linetype of the line whose nominal voltage is closest to the line voltage. + """ + v_nom_min, line_type_min = min( + d_linetypes.items(), + key=lambda x: abs(x[0] - v_nom), + ) + return line_type_min + + +def voronoi_partition_pts(points, outline): + """ + Compute the polygons of a voronoi partition of `points` within the polygon + `outline`. Taken from + https://github.com/FRESNA/vresutils/blob/master/vresutils/graph.py. + + Attributes + ---------- + points : Nx2 - ndarray[dtype=float] + outline : Polygon + Returns + ------- + polygons : N - ndarray[dtype=Polygon|MultiPolygon] + """ + points = np.asarray(points) + + if len(points) == 1: + polygons = [outline] + else: + xmin, ymin = np.amin(points, axis=0) + xmax, ymax = np.amax(points, axis=0) + xspan = xmax - xmin + yspan = ymax - ymin + + # to avoid any network positions outside all Voronoi cells, append + # the corners of a rectangle framing these points + vor = spatial.Voronoi( + np.vstack( + ( + points, + [ + [xmin - 3.0 * xspan, ymin - 3.0 * yspan], + [xmin - 3.0 * xspan, ymax + 3.0 * yspan], + [xmax + 3.0 * xspan, ymin - 3.0 * yspan], + [xmax + 3.0 * xspan, ymax + 3.0 * yspan], + ], + ) + ) + ) + + polygons = [] + for i in range(len(points)): + poly = Polygon(vor.vertices[vor.regions[vor.point_region[i]]]) + + if not poly.is_valid: + poly = poly.buffer(0) + + with np.errstate(invalid="ignore"): + poly = poly.intersection(outline) + + polygons.append(poly) + + return polygons + + +def build_bus_shapes(n, country_shapes, offshore_shapes, countries): + country_shapes = gpd.read_file(country_shapes).set_index("name")["geometry"] + offshore_shapes = gpd.read_file(offshore_shapes) + offshore_shapes = offshore_shapes.reindex(columns=REGION_COLS).set_index("name")[ + "geometry" + ] + + onshore_regions = [] + offshore_regions = [] + + for country in countries: + c_b = n.buses.country == country + + onshore_shape = country_shapes[country] + onshore_locs = ( + n.buses.loc[c_b & n.buses.onshore_bus] + .sort_values( + by="substation_lv", ascending=False + ) # preference for substations + .drop_duplicates(subset=["x", "y"], keep="first")[["x", "y"]] + ) + onshore_regions.append( + gpd.GeoDataFrame( + { + "name": onshore_locs.index, + "x": onshore_locs["x"], + "y": onshore_locs["y"], + "geometry": voronoi_partition_pts( + onshore_locs.values, onshore_shape + ), + "country": country, + } + ) + ) + + if country not in offshore_shapes.index: + continue + offshore_shape = offshore_shapes[country] + offshore_locs = n.buses.loc[c_b & n.buses.substation_off, ["x", "y"]] + offshore_regions_c = gpd.GeoDataFrame( + { + "name": offshore_locs.index, + "x": offshore_locs["x"], + "y": offshore_locs["y"], + "geometry": voronoi_partition_pts(offshore_locs.values, offshore_shape), + "country": country, + } + ) + offshore_regions_c = offshore_regions_c.loc[offshore_regions_c.area > 1e-2] + offshore_regions.append(offshore_regions_c) + + shapes = pd.concat(onshore_regions, ignore_index=True) + + return onshore_regions, offshore_regions, shapes + + +def append_bus_shapes(n, shapes, type): + """ + Append shapes to the network. If shapes with the same component and type + already exist, they will be removed. + + Parameters: + n (pypsa.Network): The network to which the shapes will be appended. + shapes (geopandas.GeoDataFrame): The shapes to be appended. + **kwargs: Additional keyword arguments used in `n.madd`. + + Returns: + None + """ + remove = n.shapes.query("component == 'Bus' and type == @type").index + n.mremove("Shape", remove) + + offset = n.shapes.index.astype(int).max() + 1 if not n.shapes.empty else 0 + shapes = shapes.rename(lambda x: int(x) + offset) + n.madd( + "Shape", + shapes.index, + geometry=shapes.geometry, + idx=shapes.name, + component="Bus", + type=type, + ) + + +if __name__ == "__main__": + if "snakemake" not in globals(): + from _helpers import mock_snakemake + + snakemake = mock_snakemake("base_network") + configure_logging(snakemake) + set_scenario_config(snakemake) + + #TODO pypsa-eur add this + # n = base_network( + # snakemake.input.eg_buses, + # snakemake.input.eg_converters, + # snakemake.input.eg_transformers, + # snakemake.input.eg_lines, + # snakemake.input.eg_links, + # snakemake.input.links_p_nom, + # snakemake.input.links_tyndp, + # snakemake.input.europe_shape, + # snakemake.input.country_shapes, + # snakemake.input.offshore_shapes, + # snakemake.input.parameter_corrections, + # snakemake.config, + # ) + + n = base_network_osm( + snakemake.input.eg_buses, + snakemake.input.eg_converters, + snakemake.input.eg_transformers, + snakemake.input.eg_lines, + snakemake.input.links_p_nom, + snakemake.input.europe_shape, + snakemake.input.country_shapes, + snakemake.input.offshore_shapes, + snakemake.config, + ) + + onshore_regions, offshore_regions, shapes = build_bus_shapes( + n, + snakemake.input.country_shapes, + snakemake.input.offshore_shapes, + snakemake.params.countries, + ) + + shapes.to_file(snakemake.output.regions_onshore) + append_bus_shapes(n, shapes, "onshore") + + if offshore_regions: + shapes = pd.concat(offshore_regions, ignore_index=True) + shapes.to_file(snakemake.output.regions_offshore) + append_bus_shapes(n, shapes, "offshore") + else: + offshore_shapes.to_frame().to_file(snakemake.output.regions_offshore) + + n.meta = snakemake.config + n.export_to_netcdf(snakemake.output.base_network) \ No newline at end of file diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index bc0e46541..c64a23706 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -1050,12 +1050,15 @@ def build_network( "tag_type", "tag_frequency", "country", "bounds", "bus_0_coors", "bus_1_coors", "bus0_lon", "bus0_lat", "bus1_lon", "bus1_lat"] + cols_lines_csv = ["bus0", "bus1", "voltage", "circuits", "tag_frequency", "length", "underground", "under_construction", "geometry"] + lines_csv = lines[cols_lines_csv] lines = lines[cols_lines] - cols_lines_csv = ["bus_id", "station_id", "voltage", "dc", "symbol", "under_construction", "tags", "x","y"] + + - to_csv_nafix(lines, outputs["lines"]) # Generate CSV - to_csv_nafix(converters, outputs["converters"]) # Generate CSV - to_csv_nafix(transformers, outputs["transformers"]) # Generate CSV + to_csv_nafix(lines_csv, outputs["lines"], quotechar="'") # Generate CSV + to_csv_nafix(converters, outputs["converters"], quotechar="'") # Generate CSV + to_csv_nafix(transformers, outputs["transformers"], quotechar="'") # Generate CSV colstodrop = ["bounds", "bus_0_coors", "bus_1_coors"] @@ -1068,7 +1071,7 @@ def build_network( if not os.path.exists(outputs["substations"]): os.makedirs(os.path.dirname(outputs["substations"]), exist_ok=True) # Generate CSV - to_csv_nafix(buses, outputs["substations"]) + to_csv_nafix(buses, outputs["substations"], quotechar="'") save_to_geojson(gpd.GeoDataFrame(buses, geometry = "geometry", crs = geo_crs), outputs["substations_geojson"]) return None diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index a87e30823..c7fe13f46 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -386,7 +386,6 @@ def add_line_endings_tosubstations(substations, lines): snakemake = mock_snakemake("clean_osm_data") configure_logging(snakemake) - logger.info("Dummy log: clean_osm_data()") ############# LINES AND CABLES ###################### @@ -401,6 +400,7 @@ def add_line_endings_tosubstations(substations, lines): # using tqdm loop over input path + logger.info("Importing lines and cables") for key in input_path_lines_cables: logger.info(f"Processing {key}...") for idx, ip in enumerate(input_path_lines_cables[key]): @@ -436,6 +436,7 @@ def add_line_endings_tosubstations(substations, lines): continue logger.info("---") + logger.info("Cleaning lines and cables") # Find duplicates based on id column duplicate_rows = df_lines[df_lines.duplicated(subset=['id'], keep=False)].copy() # group rows by id and aggregate the country column to a string split by semicolon @@ -645,6 +646,7 @@ def add_line_endings_tosubstations(substations, lines): df_substations_way = pd.DataFrame(columns = cols_substations_way) df_substations_relation = pd.DataFrame(columns = cols_substations_relation) + logger.info("Importing substations") for key in input_path_substations: logger.info(f"Processing {key}...") for idx, ip in enumerate(input_path_substations[key]): @@ -732,6 +734,7 @@ def add_line_endings_tosubstations(substations, lines): df_substations.loc[:, "lon"] = df_substations["geometry"].apply(lambda x: x.x) df_substations.loc[:, "lat"] = df_substations["geometry"].apply(lambda x: x.y) + logger.info("Cleaning substations") # Clean columns df_substations["voltage"] = _clean_voltage(df_substations["voltage"]) df_substations["frequency"] = _clean_frequency(df_substations["frequency"]) @@ -771,11 +774,11 @@ def add_line_endings_tosubstations(substations, lines): df_substations["tag_area"] = None df_substations["tag_source"] = df_substations["id"] - # Create an empty list to store the results results = [] - for index, row in tqdm(gdf_lines.iterrows(), total=len(gdf_lines), desc="Processing LineStrings"): + logger.info("Removing linestrings within substation polygons...") + for index, row in tqdm(gdf_lines.iterrows(), total=len(gdf_lines)): line = row['geometry'] # Check if the LineString is within any Polygon in 'substations_df' is_within_any_substation = any(line.within(substation_polygon) for substation_polygon in df_substations["polygon"]) @@ -804,9 +807,9 @@ def add_line_endings_tosubstations(substations, lines): # Create the folder and its parent directories if they don't exist os.makedirs(parentfolder_lines) + logger.info(f"Exporting clean lines to {filepath_lines}") gdf_lines.to_file(filepath_lines, driver="GeoJSON") - # rename columns df_substations.rename( columns={ @@ -833,6 +836,7 @@ def add_line_endings_tosubstations(substations, lines): df_substations["bus_id"] = df_substations.index + logger.info("Adding line endings to substations") df_substations = add_line_endings_tosubstations( df_substations, gdf_lines ) @@ -855,5 +859,6 @@ def add_line_endings_tosubstations(substations, lines): # Create the folder and its parent directories if they don't exist os.makedirs(parentfolder_substations) + logger.info(f"Exporting clean substations to {filepath_substations}") gdf_substations.to_file(filepath_substations, driver="GeoJSON") \ No newline at end of file diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 558e4cf28..ceefb3dda 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -108,7 +108,7 @@ logger = logging.getLogger(__name__) -def simplify_network_to_380(n): +def simplify_network_to_380(n, linetype_380): """ Fix all lines to a voltage level of 380 kV and remove all transformers. @@ -124,7 +124,7 @@ def simplify_network_to_380(n): n.buses["v_nom"] = 380.0 - (linetype_380,) = n.lines.loc[n.lines.v_nom == 380.0, "type"].unique() + # TODO pypsa-eur: In the future, make this even more generic (voltage level) n.lines["type"] = linetype_380 n.lines["v_nom"] = 380 n.lines["i_nom"] = n.line_types.i_nom[linetype_380] @@ -536,7 +536,8 @@ def cluster( # remove integer outputs for compatibility with PyPSA v0.26.0 n.generators.drop("n_mod", axis=1, inplace=True, errors="ignore") - n, trafo_map = simplify_network_to_380(n) + linetype_380 = snakemake.config["lines"]["types"][380] + n, trafo_map = simplify_network_to_380(n, linetype_380) technology_costs = load_costs( snakemake.input.tech_costs, From 6264bf8ae8f7395ce2d9dfae75414611fad36d22 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Tue, 7 May 2024 22:44:34 +0200 Subject: [PATCH 010/100] Bug fixes. --- scripts/base_network_osm.py | 2 +- scripts/clean_osm_data.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/base_network_osm.py b/scripts/base_network_osm.py index 874c778fe..64e059b59 100644 --- a/scripts/base_network_osm.py +++ b/scripts/base_network_osm.py @@ -157,7 +157,7 @@ def _load_buses_from_eg(eg_buses, europe_shape, config_elec): # remove all buses outside of all countries including exclusive economic zones (offshore) europe_shape = gpd.read_file(europe_shape).loc[0, "geometry"] # TODO pypsa-eur: Temporary fix: Convex hull, this is important when nodes are between countries - europe_shape = europe_shape.convex_hull + # europe_shape = europe_shape.convex_hull europe_shape_prepped = shapely.prepared.prep(europe_shape) buses_in_europe_b = buses[["x", "y"]].apply( diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index c7fe13f46..ae51cfe19 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -777,7 +777,7 @@ def add_line_endings_tosubstations(substations, lines): # Create an empty list to store the results results = [] - logger.info("Removing linestrings within substation polygons...") + logger.info("Identifying and removing lines within substation polygons...") for index, row in tqdm(gdf_lines.iterrows(), total=len(gdf_lines)): line = row['geometry'] # Check if the LineString is within any Polygon in 'substations_df' @@ -798,6 +798,7 @@ def add_line_endings_tosubstations(substations, lines): # fig.add_child(m) # m gdf_lines = gdf_lines[~gdf_lines["within_substation"]] + logger.info(f"Removed {sum(results)} lines within substations.") filepath_lines = snakemake.output["lines"] # save substations output From 027fbdee6d793b27a1f64878b4b9d0abd8278797 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Wed, 15 May 2024 14:20:40 +0200 Subject: [PATCH 011/100] Finalised and cleaned including docstrings. --- scripts/retrieve_osm_data.py | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/scripts/retrieve_osm_data.py b/scripts/retrieve_osm_data.py index 9a4526a5f..91bf1782e 100644 --- a/scripts/retrieve_osm_data.py +++ b/scripts/retrieve_osm_data.py @@ -18,6 +18,20 @@ def _get_overpass_areas(countries): + """ + Retrieve the OSM area codes for the specified country codes. + + Parameters + ---------- + countries : str or list + A single country code or a list of country codes for which the OSM area codes should be retrieved. + + Returns + ------- + dict + A dictionary mapping country codes to their corresponding OSM area codes. + """ + # If a single country code is provided, convert it to a list if not isinstance(countries, list): countries = [countries] @@ -67,10 +81,26 @@ def retrieve_osm_data( "substations_way", "substations_node", "substations_relation", - # "transformers_way", - # "transformers_node", - # "route_relations", ]): + """ + Retrieve OSM data for the specified country and save it to the specified output files. + + Parameters + ---------- + country : str + The country code for which the OSM data should be retrieved. + output : dict + A dictionary mapping feature names to the corresponding output file paths. Saving the OSM data to .json files. + features : list, optional + A list of OSM features to retrieve. The default is [ + "cables_way", + "lines_way", + "substations_way", + "substations_node", + "substations_relation", + ]. + """ + op_area = _get_overpass_areas(country) From dc829d2ec8ff04e9edd29e8417794837c0925e9d Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Wed, 15 May 2024 18:59:32 +0200 Subject: [PATCH 012/100] Added try catch to retrieve_osm_data. Allows for parallelisation of downloads. --- rules/build_electricity.smk | 95 +++++++++++----------- scripts/base_network_osm.py | 68 ++++++---------- scripts/build_osm_network.py | 21 +++-- scripts/clean_osm_data.py | 25 ++---- scripts/retrieve_osm_data.py | 150 ++++++++++++++++++++++------------- 5 files changed, 185 insertions(+), 174 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index ec47e1cbe..b4b53b621 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -64,7 +64,8 @@ rule build_powerplants: script: "../scripts/build_powerplants.py" -if config["base_network"] == "eegk": + +if config["electricity_network"]["base_network"] == "gridkit": rule base_network: params: countries=config_provider("countries"), @@ -102,7 +103,7 @@ if config["base_network"] == "eegk": "../scripts/base_network.py" -if config["base_network"] == "osm": +if config["electricity_network"]["base_network"] == "osm": rule base_network: params: countries=config_provider("countries"), @@ -112,12 +113,11 @@ if config["base_network"] == "osm": links=config_provider("links"), transformers=config_provider("transformers"), input: - eg_buses="data/osm/buses.csv", - eg_lines="data/osm/lines.csv", + eg_buses=resources("osm/buses.csv"), + eg_lines=resources("osm/lines.csv"), # eg_links="data/entsoegridkit/links.csv", - eg_converters="data/osm/converters.csv", - eg_transformers="data/osm/transformers.csv", - # parameter_corrections="data/parameter_corrections.yaml", + eg_converters=resources("osm/converters.csv"), + eg_transformers=resources("osm/transformers.csv"), links_p_nom="data/links_p_nom.csv", links_tyndp="data/links_tyndp_osm.csv", country_shapes=resources("country_shapes.geojson"), @@ -609,53 +609,56 @@ rule prepare_network: "../scripts/prepare_network.py" -if config["osm"].get("retrieve", True): - rule retrieve_osm_data: - output: - cables_way="data/osm/raw/{country}/cables_way_raw.json", - lines_way="data/osm/raw/{country}/lines_way_raw.json", - substations_way="data/osm/raw/{country}/substations_way_raw.json", - substations_node="data/osm/raw/{country}/substations_node_raw.json", - substations_relation="data/osm/raw/{country}/substations_relation_raw.json", - log: - logs("retrieve_osm_data_{country}.log"), - script: - "../scripts/retrieve_osm_data.py" +rule retrieve_osm_data: + output: + cables_way="data/osm/raw/{country}/cables_way.json", + lines_way="data/osm/raw/{country}/lines_way.json", + substations_way="data/osm/raw/{country}/substations_way.json", + substations_node="data/osm/raw/{country}/substations_node.json", + substations_relation="data/osm/raw/{country}/substations_relation.json", + log: + logs("retrieve_osm_data_{country}.log"), + script: + "../scripts/retrieve_osm_data.py" rule clean_osm_data: input: - cables_way=[f"data/osm/raw/{country}/cables_way_raw.json" for country in config["countries"]], - lines_way=[f"data/osm/raw/{country}/lines_way_raw.json" for country in config["countries"]], - substations_way=[f"data/osm/raw/{country}/substations_way_raw.json" for country in config["countries"]], - substations_node=[f"data/osm/raw/{country}/substations_node_raw.json" for country in config["countries"]], - substations_relation=[f"data/osm/raw/{country}/substations_relation_raw.json" for country in config["countries"]], + cables_way=[f"data/osm/raw/{country}/cables_way.json" for country in config["countries"]], + lines_way=[f"data/osm/raw/{country}/lines_way.json" for country in config["countries"]], + substations_way=[f"data/osm/raw/{country}/substations_way.json" for country in config["countries"]], + substations_node=[f"data/osm/raw/{country}/substations_node.json" for country in config["countries"]], + substations_relation=[f"data/osm/raw/{country}/substations_relation.json" for country in config["countries"]], output: - substations="data/osm/clean/substations.geojson", - lines="data/osm/clean/lines.geojson", + substations=resources("osm/clean/substations.geojson"), + lines=resources("osm/clean/lines.geojson"), log: logs("clean_osm_data.log"), script: "../scripts/clean_osm_data.py" -rule build_osm_network: - input: - substations="data/osm/clean/substations.geojson", - lines="data/osm/clean/lines.geojson", - country_shapes=resources("country_shapes.geojson"), - output: - lines="data/osm/lines.csv", - converters="data/osm/converters.csv", - transformers="data/osm/transformers.csv", - substations="data/osm/buses.csv", - lines_geojson="data/osm/lines.geojson", - converters_geojson="data/osm/converters.geojson", - transformers_geojson="data/osm/transformers.geojson", - substations_geojson="data/osm/buses.geojson", - log: - logs("build_osm_network.log"), - benchmark: - benchmarks("build_osm_network") - script: - "../scripts/build_osm_network.py" \ No newline at end of file +if config["electricity_network"]["build_osm_network"] == True: + rule build_osm_network: + input: + substations=resources("osm/clean/substations.geojson"), + lines=resources("osm/clean/lines.geojson"), + country_shapes=resources("country_shapes.geojson"), + output: + lines=resources("osm/lines.csv"), + converters=resources("osm/converters.csv"), + transformers=resources("osm/transformers.csv"), + substations=resources("osm/buses.csv"), + lines_geojson=resources("osm/lines.geojson"), + converters_geojson=resources("osm/converters.geojson"), + transformers_geojson=resources("osm/transformers.geojson"), + substations_geojson=resources("osm/buses.geojson"), + log: + logs("build_osm_network.log"), + benchmark: + benchmarks("build_osm_network") + script: + "../scripts/build_osm_network.py" + +if config["electricity_network"]["build_osm_network"] == False: + print("Use prebuilt.") \ No newline at end of file diff --git a/scripts/base_network_osm.py b/scripts/base_network_osm.py index 64e059b59..44b2636d6 100644 --- a/scripts/base_network_osm.py +++ b/scripts/base_network_osm.py @@ -836,33 +836,29 @@ def base_network_osm( n.import_components_from_dataframe(buses, "Bus") - if config["base_network_osm"]["hvdc_as_lines"]: - lines = pd.concat([lines_ac, lines_dc]) - n.import_components_from_dataframe(lines, "Line") - else: - lines_dc = _set_electrical_parameters_links(lines_dc, config, links_p_nom) - # parse line information into p_nom required for converters - lines_dc["p_nom"] = lines_dc.apply( - lambda x: x["v_nom"] * n.line_types.i_nom[x["type"]], - axis=1, - result_type="reduce", - ) - n.import_components_from_dataframe(lines_ac, "Line") - # The columns which names starts with "bus" are mixed up with the third-bus specification - # when executing additional_linkports() - # lines_dc.drop( - # labels=[ - # "bus0_lon", - # "bus0_lat", - # "bus1_lon", - # "bus1_lat", - # "bus_0_coors", - # "bus_1_coors", - # ], - # axis=1, - # inplace=True, - # ) - n.import_components_from_dataframe(lines_dc, "Link") + lines_dc = _set_electrical_parameters_links(lines_dc, config, links_p_nom) + # parse line information into p_nom required for converters + lines_dc["p_nom"] = lines_dc.apply( + lambda x: x["v_nom"] * n.line_types.i_nom[x["type"]], + axis=1, + result_type="reduce", + ) + n.import_components_from_dataframe(lines_ac, "Line") + # The columns which names starts with "bus" are mixed up with the third-bus specification + # when executing additional_linkports() + # lines_dc.drop( + # labels=[ + # "bus0_lon", + # "bus0_lat", + # "bus1_lon", + # "bus1_lat", + # "bus_0_coors", + # "bus_1_coors", + # ], + # axis=1, + # inplace=True, + # ) + n.import_components_from_dataframe(lines_dc, "Link") # n.import_components_from_dataframe(lines, "Line") n.import_components_from_dataframe(transformers, "Transformer") @@ -1084,22 +1080,6 @@ def append_bus_shapes(n, shapes, type): configure_logging(snakemake) set_scenario_config(snakemake) - #TODO pypsa-eur add this - # n = base_network( - # snakemake.input.eg_buses, - # snakemake.input.eg_converters, - # snakemake.input.eg_transformers, - # snakemake.input.eg_lines, - # snakemake.input.eg_links, - # snakemake.input.links_p_nom, - # snakemake.input.links_tyndp, - # snakemake.input.europe_shape, - # snakemake.input.country_shapes, - # snakemake.input.offshore_shapes, - # snakemake.input.parameter_corrections, - # snakemake.config, - # ) - n = base_network_osm( snakemake.input.eg_buses, snakemake.input.eg_converters, @@ -1112,6 +1092,8 @@ def append_bus_shapes(n, shapes, type): snakemake.config, ) + logger.info("Base network created using OSM.") + onshore_regions, offshore_regions, shapes = build_bus_shapes( n, snakemake.input.country_shapes, diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index c64a23706..5eccfad4e 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -993,16 +993,13 @@ def build_network( buses = add_buses_to_empty_countries(countries_config, inputs.country_shapes, buses) # METHOD to merge buses with same voltage and within tolerance Step 4/5 - if build_osm_network_config.get("group_close_buses", False): - tol = build_osm_network_config.get("group_tolerance_buses", 5000) - logger.info( - f"Stage 4/5: Aggregate close substations: enabled with tolerance {tol} m" - ) - lines, buses = merge_stations_lines_by_station_id_and_voltage( - lines, buses, geo_crs, distance_crs, tol=tol - ) - else: - logger.info("Stage 4/5: Aggregate close substations: disabled") + tol = build_osm_network_config.get("group_tolerance_buses", 5000) + logger.info( + f"Stage 4/5: Aggregate close substations: enabled with tolerance {tol} m" + ) + lines, buses = merge_stations_lines_by_station_id_and_voltage( + lines, buses, geo_crs, distance_crs, tol=tol + ) logger.info("Stage 5/5: Add augmented substation to country with no data") @@ -1133,8 +1130,8 @@ def are_almost_equal(point1, point2, tolerance=1e-6): configure_logging(snakemake) # load default crs - geo_crs = snakemake.config["crs"]["geo_crs"] - distance_crs = snakemake.config["crs"]["distance_crs"] + geo_crs = "EPSG:4326" + distance_crs = "EPSG:3035" build_osm_network = snakemake.config["build_osm_network"] countries = snakemake.config["countries"] diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index ae51cfe19..eb03a8e28 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -6,8 +6,6 @@ TODO To fill later """ -from branca.element import Figure -import folium import geopandas as gpd import json import logging @@ -15,7 +13,7 @@ import numpy as np import pandas as pd import re -from shapely.geometry import LineString, Point, Polygon +from shapely.geometry import LineString, Polygon from shapely.ops import linemerge from tqdm import tqdm @@ -37,7 +35,8 @@ def _create_polygon(row): Create a Shapely Polygon from a list of coordinate dictionaries. Parameters: - coords (list): List of dictionaries with 'lat' and 'lon' keys representing coordinates. + coords (list): List of dictionaries with 'lat' and 'lon' keys + representing coordinates. Returns: shapely.geometry.Polygon: The constructed polygon object. @@ -108,7 +107,8 @@ def _clean_voltage(column): def _clean_circuits(column): """ - Function to clean the raw circuits column: manual fixing and drop nan values + Function to clean the raw circuits column: manual fixing and drop nan + values Args: - column: pandas Series, the column to be cleaned @@ -123,7 +123,7 @@ def _clean_circuits(column): .str.replace("partial", "") .str.replace("1operator=RTE operator:wikidata=Q2178795", "") .str.lower() - .str.replace("1,5", "3") # (way 998005838, should be corrected in OSM soon) + .str.replace("1,5", "3") .str.replace("1/3", "1") .str.replace("", "") .str.replace("nan", "") @@ -221,7 +221,8 @@ def _check_voltage(voltage, list_voltages): def _clean_frequency(column): column = column.copy() """ - Function to clean the raw frequency column: manual fixing and drop nan values + Function to clean the raw frequency column: manual fixing and drop nan + values Args: - column: pandas Series, the column to be cleaned @@ -787,16 +788,6 @@ def add_line_endings_tosubstations(substations, lines): # Add the results to 'gdf_lines' gdf_lines['within_substation'] = results - # gdf_sub = gpd.GeoDataFrame(df_substations[["id", "polygon"]], geometry = "polygon", crs = "EPSG:4326") - # fig = Figure(width = "70%", height = 600) - - # m = gdf_sub.explore(name = "Subs", color = "red") - # m = gdf_lines.explore(m = m, name = "lines") - - # folium.LayerControl(collapsed = False).add_to(m) - - # fig.add_child(m) - # m gdf_lines = gdf_lines[~gdf_lines["within_substation"]] logger.info(f"Removed {sum(results)} lines within substations.") diff --git a/scripts/retrieve_osm_data.py b/scripts/retrieve_osm_data.py index 91bf1782e..77cc398c5 100644 --- a/scripts/retrieve_osm_data.py +++ b/scripts/retrieve_osm_data.py @@ -2,13 +2,16 @@ # SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur Authors # # SPDX-License-Identifier: MIT + """ -TODO To fill later +Retrieve OSM data for the specified country using the overpass API and save it +to the specified output files. Note that overpass requests are based on a fair +use policy. `retrieve_osm_data` is meant to be used in a way that respects this +policy by fetching the needed data once, only. """ import json import logging -# import overpass as op import os import requests import time @@ -17,6 +20,8 @@ logger = logging.getLogger(__name__) +# Function currently not needed - Kept for backup purposes to retrieve the OSM +# area code if needed in the future def _get_overpass_areas(countries): """ Retrieve the OSM area codes for the specified country codes. @@ -24,12 +29,14 @@ def _get_overpass_areas(countries): Parameters ---------- countries : str or list - A single country code or a list of country codes for which the OSM area codes should be retrieved. + A single country code or a list of country codes for which the OSM area + codes should be retrieved. Returns ------- dict - A dictionary mapping country codes to their corresponding OSM area codes. + A dictionary mapping country codes to their corresponding OSM area + codes. """ # If a single country code is provided, convert it to a list @@ -51,22 +58,28 @@ def _get_overpass_areas(countries): # Send the request to Overpass API response = requests.post(overpass_url, data=overpass_query) - # Parse the response - data = response.json() - - # Check if the response contains any results - if "elements" in data and len(data["elements"]) > 0: - # Extract the area ID from the relation - if c == "FR": # take second one for France - osm_area_id = data["elements"][1]["id"] + try: + # Parse the response + data = response.json() + + # Check if the response contains any results + if "elements" in data and len(data["elements"]) > 0: + # Extract the area ID from the relation + if c == "FR": # take second one for France + osm_area_id = data["elements"][1]["id"] + else: + osm_area_id = data["elements"][0]["id"] + osm_areas.append(f"area({osm_area_id})") else: - osm_area_id = data["elements"][0]["id"] - osm_areas.append(f"area({osm_area_id})") - else: - # Print a warning if no results are found for the country code - logger.info(f"No area code found for the specified country code: {c}. Ommitted from the list.") + # Print a warning if no results are found for the country code + logger.info(f"No area code found for the specified country " + f"code: {c}. Omitted from the list.") + except json.JSONDecodeError as e: + logger.error(f"JSON decode error for country {c}: {e}") + logger.debug(f"Response text: {response.text}") - # Create a dictionary mapping country codes to their corresponding OSM area codes + # Create a dictionary mapping country codes to their corresponding OSM area + # codes op_areas_dict = dict(zip(countries, osm_areas)) return op_areas_dict @@ -83,14 +96,16 @@ def retrieve_osm_data( "substations_relation", ]): """ - Retrieve OSM data for the specified country and save it to the specified output files. + Retrieve OSM data for the specified country and save it to the specified + output files. Parameters ---------- country : str The country code for which the OSM data should be retrieved. output : dict - A dictionary mapping feature names to the corresponding output file paths. Saving the OSM data to .json files. + A dictionary mapping feature names to the corresponding output file + paths. Saving the OSM data to .json files. features : list, optional A list of OSM features to retrieve. The default is [ "cables_way", @@ -100,13 +115,13 @@ def retrieve_osm_data( "substations_relation", ]. """ - - - op_area = _get_overpass_areas(country) - # Overpass API endpoint URL overpass_url = "https://overpass-api.de/api/interpreter" + # More features can in theory be retrieved that are currently not needed + # to build a functioning network. The following power-related + # features are supported: + # features_dict= { # 'cables_way': 'way["power"="cable"]', # 'lines_way': 'way["power"="line"]', @@ -125,41 +140,68 @@ def retrieve_osm_data( 'substations_relation': 'relation["power"="substation"]', } + wait_time = 5 + for f in features: if f not in features_dict: - raise ValueError(f"Invalid feature: {f}. Supported features: {list(features_dict.keys())}") logger.info(f"Invalid feature: {f}. Supported features: {list(features_dict.keys())}") + raise ValueError(f"Invalid feature: {f}. Supported features: {list(features_dict.keys())}") - logger.info(f" - Fetching OSM data for feature '{f}' in {country}...") - # Build the overpass query - op_query = f''' - [out:json]; - {op_area[country]}->.searchArea; - ( - {features_dict[f]}(area.searchArea); - ); - out body geom; - ''' - - # Send the request - response = requests.post(overpass_url, data = op_query) - # response = op.API(timeout=300).get(op_query) # returns data in geojson format. Timeout (max.) set to 300s - - filepath = output[f] - parentfolder = os.path.dirname(filepath) - if not os.path.exists(parentfolder): - # Create the folder and its parent directories if they don't exist - os.makedirs(parentfolder) - - with open(filepath, mode = "w") as f: - # geojson.dump(response,f,indent=2) - json.dump(response.json(),f,indent=2) - logger.info(" - Done.") - # time.sleep(5) + retries = 3 + for attempt in range(retries): + logger.info(f" - Fetching OSM data for feature '{f}' in {country} (Attempt {attempt+1})...") + + # Build the overpass query + op_area = f'area["ISO3166-1"="{country}"]' + op_query = f''' + [out:json]; + {op_area}->.searchArea; + ( + {features_dict[f]}(area.searchArea); + ); + out body geom; + ''' + try: + # Send the request + response = requests.post(overpass_url, data = op_query) + response.raise_for_status() # Raise HTTPError for bad responses + data = response.json() + + filepath = output[f] + parentfolder = os.path.dirname(filepath) + if not os.path.exists(parentfolder): + os.makedirs(parentfolder) + + with open(filepath, mode = "w") as f: + json.dump(response.json(),f,indent=2) + logger.info(" - Done.") + break # Exit the retry loop on success + except (json.JSONDecodeError, requests.exceptions.RequestException) as e: + logger.error(f"Error for feature '{f}' in country {country}: {e}") + logger.debug(f"Response text: {response.text if response else 'No response'}") + if attempt < retries - 1: + wait_time += 10 + logger.info(f"Waiting {wait_time} seconds before retrying...") + time.sleep(wait_time) + else: + logger.error( + f"Failed to retrieve data for feature '{f}' in country {country} after {retries} attempts." + ) + except Exception as e: + # For now, catch any other exceptions and log them. Treat this + # the same as a RequestException and try to run again two times. + logger.error(f"Unexpected error for feature '{f}' in country {country}: {e}") + if attempt < retries - 1: + wait_time += 10 + logger.info(f"Waiting {wait_time} seconds before retrying...") + time.sleep(wait_time) + else: + logger.error( + f"Failed to retrieve data for feature '{f}' in country {country} after {retries} attempts." + ) if __name__ == "__main__": - # Detect running outside of snakemake and mock snakemake for testing if "snakemake" not in globals(): from _helpers import mock_snakemake @@ -171,8 +213,4 @@ def retrieve_osm_data( country = snakemake.wildcards.country output = snakemake.output - # Wait 5 seconds before fetching the OSM data to prevent too many requests error - # TODO pypsa-eur: Add try catch to implement this only when needed - logger.info(f"Waiting 5 seconds... Retrieving OSM data for {country}:") - time.sleep(5) retrieve_osm_data(country, output) \ No newline at end of file From 7bb153bbd8e08d32f23f330be4ddaf7f64d220eb Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Wed, 15 May 2024 22:26:56 +0200 Subject: [PATCH 013/100] Updated cleaning process. --- rules/build_electricity.smk | 1 + scripts/clean_osm_data.py | 54 +++++++++++++++++++++++++++++-------- 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index b4b53b621..a5f767b09 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -631,6 +631,7 @@ rule clean_osm_data: substations_relation=[f"data/osm/raw/{country}/substations_relation.json" for country in config["countries"]], output: substations=resources("osm/clean/substations.geojson"), + substations_polygon=resources("osm/clean/substations_polygon.geojson"), lines=resources("osm/clean/lines.geojson"), log: logs("clean_osm_data.log"), diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index eb03a8e28..71bad62cb 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -775,21 +775,53 @@ def add_line_endings_tosubstations(substations, lines): df_substations["tag_area"] = None df_substations["tag_source"] = df_substations["id"] - # Create an empty list to store the results - results = [] + gdf_substations_polygon = gpd.GeoDataFrame( + df_substations[["id", "polygon"]], + geometry = "polygon", + crs = "EPSG:4326" + ) + + filepath_substations_polygon = snakemake.output["substations_polygon"] + # save substations output + logger.info(f"Exporting clean substations with polygon shapes to {filepath_substations_polygon}") + parentfolder_substations_polygon = os.path.dirname(filepath_substations_polygon) + if not os.path.exists(parentfolder_substations_polygon): + # Create the folder and its parent directories if they don't exist + os.makedirs(parentfolder_substations_polygon) + + logger.info(f"Exporting clean substations to {filepath_substations_polygon}") + gdf_substations_polygon.to_file(filepath_substations_polygon, driver="GeoJSON") + logger.info("Identifying and removing lines within substation polygons...") - for index, row in tqdm(gdf_lines.iterrows(), total=len(gdf_lines)): - line = row['geometry'] - # Check if the LineString is within any Polygon in 'substations_df' - is_within_any_substation = any(line.within(substation_polygon) for substation_polygon in df_substations["polygon"]) - results.append(is_within_any_substation) + lines_within_substations = gpd.sjoin( + gdf_lines[["line_id", "geometry"]], + gdf_substations_polygon, + how = "inner", + predicate = "within" + )["line_id"] + + logger.info(f"Removed {len(lines_within_substations)}/{len(gdf_lines)} lines within substations.") + gdf_lines = gdf_lines[~gdf_lines["line_id"].isin(lines_within_substations)] + + # # Create an empty list to store the results + # results = [] + + # subset a to find only country equal to "BE" + # a[a["country"] == "BE"] + + # logger.info("Identifying and removing lines within substation polygons...") + # for index, row in tqdm(gdf_lines.iterrows(), total=len(gdf_lines)): + # line = row['geometry'] + # # Check if the LineString is within any Polygon in 'substations_df' + # is_within_any_substation = any(line.within(substation_polygon) for substation_polygon in df_substations["polygon"]) + # results.append(is_within_any_substation) - # Add the results to 'gdf_lines' - gdf_lines['within_substation'] = results + # # Add the results to 'gdf_lines' + # gdf_lines['within_substation'] = results - gdf_lines = gdf_lines[~gdf_lines["within_substation"]] - logger.info(f"Removed {sum(results)} lines within substations.") + # gdf_lines = gdf_lines[~gdf_lines["within_substation"]] + # logger.info(f"Removed {sum(results)} lines within substations.") filepath_lines = snakemake.output["lines"] # save substations output From 5ef4d71c9395178e26ad79558ec2f3e321ff421c Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Wed, 15 May 2024 23:02:02 +0200 Subject: [PATCH 014/100] Set maximum number of threads for retrieving to 4, wrt. fair usage policy and potential request errors. --- rules/build_electricity.smk | 1 + 1 file changed, 1 insertion(+) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index a5f767b09..dce396119 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -618,6 +618,7 @@ rule retrieve_osm_data: substations_relation="data/osm/raw/{country}/substations_relation.json", log: logs("retrieve_osm_data_{country}.log"), + threads: 4 script: "../scripts/retrieve_osm_data.py" From f961ab8ed5932dd8063c7debfc287518dc29a26f Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Thu, 16 May 2024 14:21:32 +0200 Subject: [PATCH 015/100] Intermediate update on clean_osm_data.py. Added docstrings. --- rules/build_electricity.smk | 3 +- scripts/clean_osm_data.py | 345 +++++++++++++++++++++++++++-------- scripts/retrieve_osm_data.py | 2 +- 3 files changed, 272 insertions(+), 78 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index dce396119..249c9d843 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -618,7 +618,8 @@ rule retrieve_osm_data: substations_relation="data/osm/raw/{country}/substations_relation.json", log: logs("retrieve_osm_data_{country}.log"), - threads: 4 + resources: + cores = 2, threads: 1 script: "../scripts/retrieve_osm_data.py" diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index 71bad62cb..f3687995f 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -3,7 +3,26 @@ # # SPDX-License-Identifier: MIT """ -TODO To fill later +This script is used to clean OpenStreetMap (OSM) data for the PyPSA-Eur +project. + +The script performs various cleaning operations on the OSM data, including: +- Cleaning voltage, circuits, cables, wires, and frequency columns +- Splitting semicolon-separated cells into new rows +- Distributing values to circuits based on the number of splits +- Adding line endings to substations based on line data + +The cleaned data is then written to an output file. + +Usage: + python clean_osm_data.py + +Arguments: + output_file (str): The path to the output file where the cleaned data will + be written. + +Example: + python clean_osm_data.py cleaned_data.csv """ import geopandas as gpd @@ -15,17 +34,22 @@ import re from shapely.geometry import LineString, Polygon from shapely.ops import linemerge -from tqdm import tqdm from _helpers import configure_logging logger = logging.getLogger(__name__) -def clean_osm_data(output): - with open(output, "w") as file: - file.write("Hello, world!\n") - def _create_linestring(row): + """ + Create a LineString object from the given row. + + Args: + row (dict): A dictionary containing the row data. + + Returns: + LineString: A LineString object representing the geometry. + + """ coords = [(coord['lon'], coord['lat']) for coord in row["geometry"]] return LineString(coords) @@ -64,6 +88,7 @@ def _clean_voltage(column): Returns: - column: pandas Series, the cleaned column """ + logger.info("Cleaning voltages.") column = column.copy() column = ( @@ -116,6 +141,7 @@ def _clean_circuits(column): Returns: - column: pandas Series, the cleaned column """ + logger.info("Cleaning circuits.") column = column.copy() column = ( column @@ -146,6 +172,7 @@ def _clean_cables(column): Returns: - column: pandas Series, the cleaned column """ + logger.info("Cleaning cables.") column = column.copy() column = ( column @@ -174,6 +201,7 @@ def _clean_wires(column): Returns: - column: pandas Series, the cleaned column """ + logger.info("Cleaning wires.") column = column.copy() column = ( column @@ -202,15 +230,18 @@ def _clean_wires(column): return column.astype(str) -def _set_frequency(column): - column = column.copy() - to_fifty = column.astype(str) != "0" - column[to_fifty] = "50" - - return column +def _check_voltage(voltage, list_voltages): + """ + Check if the given voltage is present in the list of allowed voltages. + Parameters: + voltage (str): The voltage to check. + list_voltages (list): A list of allowed voltages. -def _check_voltage(voltage, list_voltages): + Returns: + bool: True if the voltage is present in the list of allowed voltages, + False otherwise. + """ voltages = voltage.split(';') for v in voltages: if v in list_voltages: @@ -219,7 +250,6 @@ def _check_voltage(voltage, list_voltages): def _clean_frequency(column): - column = column.copy() """ Function to clean the raw frequency column: manual fixing and drop nan values @@ -230,6 +260,7 @@ def _clean_frequency(column): Returns: - column: pandas Series, the cleaned column """ + logger.info("Cleaning frequencies.") column = column.copy() column = ( column @@ -277,7 +308,8 @@ def _split_cells(df, cols=["voltage"]): # Create a dictionary to store the suffix count for each original ID suffix_counts = {} - # Create a dictionary to store the number of splits associated with each original ID + # Create a dictionary to store the number of splits associated with each + # original ID num_splits = {} # Split cells and create new rows @@ -290,7 +322,8 @@ def _split_cells(df, cols=["voltage"]): # Update the 'split_elements' column x["split_elements"] = x["id"].map(num_splits) - # Function to generate the new ID with suffix and update the number of splits + # Function to generate the new ID with suffix and update the number of + # splits def generate_new_id(row): original_id = row["id"] if row["split_elements"] == 1: @@ -306,6 +339,19 @@ def generate_new_id(row): def _distribute_to_circuits(row): + """ + Distributes the number of circuits or cables to individual circuits based + on the given row data. + + Parameters: + - row: A dictionary representing a row of data containing information about + circuits and cables. + + Returns: + - single_circuit: The number of circuits to be assigned to each individual + circuit. + + """ if row["circuits"] != "": circuits = int(row["circuits"]) else: @@ -318,13 +364,24 @@ def _distribute_to_circuits(row): return single_circuit -# Function to check if any substring is in valid_strings -def _any_substring_in_list(s, list_strings): - substrings = s.split(';') - return any(sub in list_strings for sub in substrings) +def add_line_endings_tosubstations(substations, lines): + """ + Add line endings to substations. + This function takes two pandas DataFrames, `substations` and `lines`, and + adds line endings to the substations based on the information from the + lines DataFrame. -def add_line_endings_tosubstations(substations, lines): + Parameters: + - substations (pandas DataFrame): DataFrame containing information about + substations. + - lines (pandas DataFrame): DataFrame containing information about lines. + + Returns: + - buses (pandas DataFrame): DataFrame containing the updated information + about substations with line endings. + + """ if lines.empty: return substations @@ -379,27 +436,19 @@ def add_line_endings_tosubstations(substations, lines): return buses -if __name__ == "__main__": - # Detect running outside of snakemake and mock snakemake for testing - if "snakemake" not in globals(): - from _helpers import mock_snakemake - - snakemake = mock_snakemake("clean_osm_data") - - configure_logging(snakemake) +def _import_lines_and_cables(input_path_lines_cables): + """ + Import lines and cables from the given input paths. - ############# LINES AND CABLES ###################### + Parameters: + - input_path_lines_cables (dict): A dictionary containing the input paths for lines and cables data. - input_path_lines_cables = { - "lines": snakemake.input.lines_way, - "cables": snakemake.input.cables_way, - } + Returns: + - df_lines (DataFrame): A DataFrame containing the imported lines and cables data. + """ columns = ["id", "bounds", "nodes", "geometry", "country", "power", "cables", "circuits", "frequency", "voltage", "wires"] df_lines = pd.DataFrame(columns=columns) - crs = "EPSG:4326" - - # using tqdm loop over input path logger.info("Importing lines and cables") for key in input_path_lines_cables: @@ -436,11 +485,32 @@ def add_line_endings_tosubstations(substations, lines): logger.info(f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(input_path_lines_cables[key])).zfill(2)} (empty): {ip}") continue logger.info("---") + + return df_lines + + +def _drop_duplicate_lines(df_lines): + """ + Drop duplicate lines from the given dataframe. Duplicates are usually lines + cross-border lines or slightly outside the country border of focus. + + Parameters: + - df_lines (pandas.DataFrame): The dataframe containing lines data. - logger.info("Cleaning lines and cables") - # Find duplicates based on id column + Returns: + - df_lines (pandas.DataFrame): The dataframe with duplicate lines removed + and cleaned data. + + This function drops duplicate lines from the given dataframe based on the + 'id' column. It groups the duplicate rows by 'id' and aggregates the + 'country' column to a string split by semicolon, as they appear in multiple + country datasets. One example of the duplicates is kept, accordingly. + Finally, the updated dataframe without multiple duplicates is returned. + """ + logger.info("Dropping duplicate lines.") duplicate_rows = df_lines[df_lines.duplicated(subset=['id'], keep=False)].copy() - # group rows by id and aggregate the country column to a string split by semicolon + + # Group rows by id and aggregate the country column to a string split by semicolon grouped_duplicates = duplicate_rows.groupby('id')["country"].agg(lambda x: ';'.join(x)).reset_index() duplicate_rows.drop_duplicates(subset="id", inplace=True) duplicate_rows.drop(columns=["country"], inplace=True) @@ -450,22 +520,78 @@ def add_line_endings_tosubstations(substations, lines): df_lines = df_lines[~df_lines["id"].isin(duplicate_rows["id"])] df_lines = pd.concat([df_lines, duplicate_rows], axis="rows") - # Initiate boolean with False, only set to true if all cleaning steps are passed - df_lines["cleaned"] = False - df_lines["voltage"] = _clean_voltage(df_lines["voltage"]) + return df_lines + + +def _filter_lines_by_voltage(df_lines, voltage_min=200000): + """ + Filter lines in the DataFrame `df_lines` based on the voltage in V. + Parameters: + - df_lines (pandas.DataFrame): The DataFrame containing the lines data. + - voltage_min (int, optional): The minimum voltage value to filter the + lines. Defaults to 200000 [unit: V]. + + Returns: + - filtered df_lines (pandas.DataFrame): The filtered DataFrame containing + the lines data above voltage_min. + - list_voltages (list): A list of unique voltage values above voltage_min. + The type of the list elements is string. + """ + logger.info(f"Filtering lines by voltage. Only keeping lines above and including {voltage_min} V.") list_voltages = df_lines["voltage"].str.split(";").explode().unique().astype(str) - list_voltages = list_voltages[np.vectorize(len)(list_voltages) >= 6] - list_voltages = list_voltages[~np.char.startswith(list_voltages, '1')] + # Keep numeric strings + list_voltages = list_voltages[np.vectorize(str.isnumeric)(list_voltages)] + list_voltages = list_voltages.astype(int) + list_voltages = list_voltages[list_voltages >= int(voltage_min)] + list_voltages = list_voltages.astype(str) bool_voltages = df_lines["voltage"].apply(_check_voltage, list_voltages=list_voltages) df_lines = df_lines[bool_voltages] - # Additional cleaning - df_lines["circuits"] = _clean_circuits(df_lines["circuits"]) - df_lines["cables"] = _clean_cables(df_lines["cables"]) - df_lines["frequency"] = _clean_frequency(df_lines["frequency"]) - df_lines["wires"] = _clean_wires(df_lines["wires"]) + return df_lines, list_voltages + + +def _clean_lines(df_lines): + """ + Cleans and processes the `df_lines` DataFrame heuristically based on the + information available per respective line and cable. + Further checks to ensure data consistency and completeness. + + Parameters + ---------- + df_lines : pandas.DataFrame + The input DataFrame containing line information with columns such as + 'voltage', 'circuits', 'frequency', 'cables', 'split_elements', 'id', + etc. + + Returns + ------- + df_lines : pandas.DataFrame + The cleaned DataFrame with updated columns 'circuits', 'frequency', and + 'cleaned' to reflect the applied transformations. + + Description + ----------- + This function performs the following operations: + + - Initializes a 'cleaned' column with False, step-wise updates to True + following the respective cleaning step. + - Splits the voltage cells in the DataFrame at semicolons using a helper + function `_split_cells`. + - Filters the DataFrame to only include rows with valid voltages. + - Sets circuits of remaining lines without any applicable heuristic equal + to 1. + + The function ensures that the resulting DataFrame has consistent and + complete information for further processing or analysis while maintaining + the data of the original OSM data set wherever possible. + """ + logger.info("Cleaning lines and determining circuits.") + # Initiate boolean with False, only set to true if all cleaning steps are + # passed + df_lines = df_lines.copy() + df_lines["cleaned"] = False df_lines["voltage_original"] = df_lines["voltage"] df_lines["circuits_original"] = df_lines["circuits"] @@ -476,10 +602,10 @@ def add_line_endings_tosubstations(substations, lines): bool_ac = df_lines["frequency"] != "0" bool_dc = ~bool_ac - bool_noinfo = (df_lines["cables"] == "") & (df_lines["circuits"] == "") valid_frequency = ["50", "0"] bool_invalid_frequency = df_lines["frequency"].apply(lambda x: x not in valid_frequency) + bool_noinfo = (df_lines["cables"] == "") & (df_lines["circuits"] == "") # Fill in all values where cables info and circuits does not exist. Assuming 1 circuit df_lines.loc[bool_noinfo, "circuits"] = "1" df_lines.loc[bool_noinfo & bool_invalid_frequency, "frequency"] = "50" @@ -582,7 +708,12 @@ def add_line_endings_tosubstations(substations, lines): df_lines.loc[bool_leftover & bool_dc, "frequency"] = "0" df_lines.loc[bool_leftover, "cleaned"] = True - # rename columns + return df_lines + + +def _finalise_lines(df_lines): + logger.info("Finalising lines column types.") + # Rename columns df_lines.rename( columns={ "id": "line_id", @@ -590,17 +721,19 @@ def add_line_endings_tosubstations(substations, lines): "frequency":"tag_frequency", }, inplace=True) - df_lines["bus0"] = None - df_lines["bus1"] = None - df_lines["length"] = None - df_lines["underground"] = False + # Initiate new columns for subsequent build_osm_network step + df_lines.loc[:, "bus0"] = None + df_lines.loc[:, "bus1"] = None + df_lines.loc[:, "length"] = None + df_lines.loc[:, "underground"] = False df_lines.loc[df_lines["tag_type"] == "line", "underground"] = False df_lines.loc[df_lines["tag_type"] == "cable", "underground"] = True - df_lines["under_construction"] = False - df_lines["dc"] = False + df_lines.loc[:, "under_construction"] = False + df_lines.loc[:, "dc"] = False df_lines.loc[df_lines["tag_frequency"] == "50", "dc"] = False df_lines.loc[df_lines["tag_frequency"] == "0", "dc"] = True + # Only include needed columns df_lines = df_lines[[ "line_id", "circuits", @@ -617,31 +750,37 @@ def add_line_endings_tosubstations(substations, lines): "geometry", ]] - df_lines["geometry"] = df_lines.apply(_create_linestring, axis=1) + # Set lines data types + df_lines.loc[:, "circuits"] = df_lines["circuits"].astype(int) + df_lines.loc[:, "voltage"] = df_lines["voltage"].astype(int) + df_lines.loc[:, "tag_frequency"] = df_lines["tag_frequency"].astype(int) + + # Create shapely linestrings from geometries + df_lines.loc[:, "geometry"] = df_lines.apply(_create_linestring, axis=1) + # Drop all rows where the geometry has equal start and end point + # These are usually not lines, but outlines of areas. bool_circle = df_lines["geometry"].apply(lambda x: x.coords[0] == x.coords[-1]) - df_lines = df_lines[~bool_circle] + df_lines = df_lines[~bool_circle] - # TODO pypsa-eur: Temporary solution as one AC line between converters will create an error in simplify_network - # As this case is not considered there: - lines_to_drop = ["775580659"] - if lines_to_drop in df_lines["line_id"].values: - df_lines.drop(df_lines[df_lines["line_id"].isin(lines_to_drop)].index, inplace=True) - - gdf_lines = gpd.GeoDataFrame(df_lines, geometry = "geometry", crs = "EPSG:4326") + return df_lines - # Lines data types - gdf_lines["circuits"] = gdf_lines["circuits"].astype(int) - gdf_lines["voltage"] = gdf_lines["voltage"].astype(int) - gdf_lines["tag_frequency"] = gdf_lines["tag_frequency"].astype(int) +def _import_substations(input_path_substations): + """ + Import substations from the given input paths. This function imports both + substations from OSM ways as well as relations that contain nested + information on the substations shape and electrical parameters. Ways and + relations are subsequently concatenated to form a single DataFrame + containing unique bus ids. - ############# BUSES / SUBSTATIONS ###################### - input_path_substations = { - "substations_way": snakemake.input.substations_way, - "substations_relation": snakemake.input.substations_relation, - } + Args: + input_path_substations (dict): A dictionary containing input paths for + substations. + Returns: + pd.DataFrame: A DataFrame containing the imported substations data. + """ cols_substations_way = ["id", "geometry", "country", "power", "substation", "voltage", "frequency"] cols_substations_relation = ["id", "country", "power", "substation", "voltage", "frequency"] df_substations_way = pd.DataFrame(columns = cols_substations_way) @@ -729,6 +868,60 @@ def add_line_endings_tosubstations(substations, lines): df_substations_relation = df_substations_relation[cols_substations_way] df_substations = pd.concat([df_substations_way, df_substations_relation], axis="rows") + return df_substations + +if __name__ == "__main__": + if "snakemake" not in globals(): + from _helpers import mock_snakemake + + snakemake = mock_snakemake("clean_osm_data") + + configure_logging(snakemake) + + # Parameters + crs = "EPSG:4326" # Correct crs for OSM data + voltage_min = 200000 # [unit: V] Minimum voltage value to filter lines. + + # TODO pypsa-eur: Temporary solution as one AC line between converters will + # create an error in simplify_network: + lines_to_drop = ["775580659"] + + # Input + input_path_substations = { + "substations_way": snakemake.input.substations_way, + "substations_relation": snakemake.input.substations_relation, + } + + input_path_lines_cables = { + "lines": snakemake.input.lines_way, + "cables": snakemake.input.cables_way, + } + + # Cleaning process + df_lines = _import_lines_and_cables(input_path_lines_cables) + df_lines = _drop_duplicate_lines(df_lines) + df_lines.loc[:, "voltage"] = _clean_voltage(df_lines["voltage"]) + df_lines, list_voltages = _filter_lines_by_voltage(df_lines, voltage_min=voltage_min) + + df_lines.loc[:, "circuits"] = _clean_circuits(df_lines["circuits"]) + df_lines.loc[:, "cables"] = _clean_cables(df_lines["cables"]) + df_lines.loc[:, "frequency"] = _clean_frequency(df_lines["frequency"]) + df_lines.loc[:, "wires"] = _clean_wires(df_lines["wires"]) + + df_lines = _clean_lines(df_lines) + df_lines = _finalise_lines(df_lines) + + # Dropping specific lines, manually + if lines_to_drop in df_lines["line_id"].values: + df_lines.drop(df_lines[df_lines["line_id"].isin(lines_to_drop)].index, inplace=True) + + # Create GeoDataFrame + gdf_lines = gpd.GeoDataFrame(df_lines, geometry = "geometry", crs = crs) + + ############# BUSES / SUBSTATIONS ###################### + df_substations = _import_substations(input_path_substations) + + # Create centroids from geometries df_substations.loc[:, "polygon"] = df_substations["geometry"] df_substations.loc[:, "geometry"] = df_substations["geometry"].apply(lambda x: x.centroid) diff --git a/scripts/retrieve_osm_data.py b/scripts/retrieve_osm_data.py index 77cc398c5..bab645a48 100644 --- a/scripts/retrieve_osm_data.py +++ b/scripts/retrieve_osm_data.py @@ -180,7 +180,7 @@ def retrieve_osm_data( logger.error(f"Error for feature '{f}' in country {country}: {e}") logger.debug(f"Response text: {response.text if response else 'No response'}") if attempt < retries - 1: - wait_time += 10 + wait_time += 15 logger.info(f"Waiting {wait_time} seconds before retrying...") time.sleep(wait_time) else: From 3b9076571bdc2786467ba2ac2071ed7125562959 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Thu, 16 May 2024 14:22:41 +0200 Subject: [PATCH 016/100] Bug fix. --- rules/build_electricity.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 249c9d843..2c5364066 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -619,7 +619,7 @@ rule retrieve_osm_data: log: logs("retrieve_osm_data_{country}.log"), resources: - cores = 2, threads: 1 + cores = 2, threads= 1 script: "../scripts/retrieve_osm_data.py" From 93f09a508dd0940ed6f9192cfd61531e15445ee9 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Thu, 16 May 2024 14:23:09 +0200 Subject: [PATCH 017/100] Bug fix. --- rules/build_electricity.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 2c5364066..5c2346b1c 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -619,7 +619,7 @@ rule retrieve_osm_data: log: logs("retrieve_osm_data_{country}.log"), resources: - cores = 2, threads= 1 + cores = 2, threads= 1, script: "../scripts/retrieve_osm_data.py" From 98f50acf17a23f93da89544e9f2fccb870c26a55 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Thu, 16 May 2024 19:08:57 +0200 Subject: [PATCH 018/100] Bug fixes in data types out of clean_osm_data --- scripts/build_osm_network.py | 6 +++--- scripts/clean_osm_data.py | 36 +++++++++++++++++++++++++++++------- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 5eccfad4e..3cecf50c5 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -596,7 +596,7 @@ def merge_stations_lines_by_station_id_and_voltage( """ logger.info( - "Stage 3a/4: Set substation ids with tolerance of %.2f km" % (tol / 1000) + "Stage 4a/5: Set substation ids with tolerance of %.2f km" % (tol / 1000) ) # TODO pypsa-eur: Add this fix to pypsa-earth: Buses should not be clustered geographically if they are different @@ -656,7 +656,7 @@ def merge_stations_lines_by_station_id_and_voltage( lambda p: any([p.within(l) for l in all_dc_boundary_points]) ) - logger.info("Stage 3b/4: Merge substations with the same id") + logger.info("Stage 4b/5: Merge substations with the same id") # merge buses with same station id and voltage if not buses.empty: @@ -666,7 +666,7 @@ def merge_stations_lines_by_station_id_and_voltage( buses = pd.concat([buses_ac, buses_dc], ignore_index=True) set_substations_ids(buses, distance_crs, tol=tol) - logger.info("Stage 3c/4: Specify the bus ids of the line endings") + logger.info("Stage 4c/5: Specify the bus ids of the line endings") # set the bus ids to the line dataset lines, buses = set_lines_ids(lines, buses, distance_crs) diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index f3687995f..915f3c770 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -712,7 +712,19 @@ def _clean_lines(df_lines): def _finalise_lines(df_lines): + """ + Finalises the lines column types and creates geometries. + + Args: + df_lines (pandas.DataFrame): The input DataFrame containing lines data. + + Returns: + df_lines (pandas.DataFrame(): The DataFrame with finalised column types + and transformed data. + + """ logger.info("Finalising lines column types.") + df_lines = df_lines.copy() # Rename columns df_lines.rename( columns={ @@ -750,18 +762,28 @@ def _finalise_lines(df_lines): "geometry", ]] - # Set lines data types - df_lines.loc[:, "circuits"] = df_lines["circuits"].astype(int) - df_lines.loc[:, "voltage"] = df_lines["voltage"].astype(int) - df_lines.loc[:, "tag_frequency"] = df_lines["tag_frequency"].astype(int) - + # Set lines data types df.apply(pd.to_numeric, args=('coerce',)) + # This workaround is needed as otherwise the column dtypes remain "objects" + df_lines.loc[:, "circuits_num"] = df_lines["circuits"].astype(int) + df_lines.loc[:, "voltage_num"] = df_lines["voltage"].astype(int) + df_lines.loc[:, "tag_frequency_num"] = df_lines["tag_frequency"].astype(int) + df_lines.drop(columns=["circuits", "voltage", "tag_frequency"], inplace=True) + + col_rename_dict = { + "circuits_num": "circuits", + "voltage_num": "voltage", + "tag_frequency_num": "tag_frequency" + } + + df_lines.rename(columns=col_rename_dict, inplace=True) + # Create shapely linestrings from geometries df_lines.loc[:, "geometry"] = df_lines.apply(_create_linestring, axis=1) # Drop all rows where the geometry has equal start and end point # These are usually not lines, but outlines of areas. bool_circle = df_lines["geometry"].apply(lambda x: x.coords[0] == x.coords[-1]) - df_lines = df_lines[~bool_circle] + df_lines = df_lines[~bool_circle] return df_lines @@ -1059,7 +1081,7 @@ def _import_substations(input_path_substations): ) #group gdf_substations by voltage and and geometry (dropping duplicates) - df_substations = df_substations.groupby(["voltage", "lon", "lat", "tag_source"]).first().reset_index() + df_substations = df_substations.groupby(["voltage", "lon", "lat", "dc", "tag_source"]).first().reset_index() df_substations["bus_id"] = df_substations.index gdf_substations = gpd.GeoDataFrame(df_substations, geometry = "geometry", crs = "EPSG:4326") From 4f2308d4b888e3efff225e3aebf50c4f5a38e747 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Fri, 17 May 2024 13:47:47 +0200 Subject: [PATCH 019/100] Significant improvements to retrieve_osm_data, clean_osm_data. Cleaned code. Speed improvements --- config/config.default.yaml | 146 +++++---- rules/build_electricity.smk | 8 +- scripts/build_osm_network.py | 6 +- scripts/clean_osm_data.py | 605 ++++++++++++++++++++--------------- scripts/retrieve_osm_data.py | 3 - 5 files changed, 439 insertions(+), 329 deletions(-) diff --git a/config/config.default.yaml b/config/config.default.yaml index 092c9c9ce..62c3cfdfa 100644 --- a/config/config.default.yaml +++ b/config/config.default.yaml @@ -15,13 +15,13 @@ private: entsoe_api: remote: - ssh: "" - path: "" + ssh: "z1" + path: "~/scratch/projects/pypsa-eur" # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#run run: prefix: "" - name: "" + name: "europe-osm-update-hydro" scenarios: enable: false file: config/scenarios.yaml @@ -40,13 +40,15 @@ scenario: simpl: - '' ll: - - vopt + - v1.0 # TODO mit und ohne Netzausbau v1.0 clusters: - - 37 + - 50 - 128 - 256 + - 512 + # - 1024 opts: - - '' + - 'Co2L0-25H' sector_opts: - '' planning_horizons: @@ -56,7 +58,20 @@ scenario: - 2050 # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#countries +# countries: ["NO"] countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK'] +# countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MD', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK', 'UA'] + +# Settings related to the high-voltage electricity grid +electricity_network: + base_network: "osm" # "osm" or "gridkit" + build_osm_network: true # If 'true', the network will be built from scratch (retrieving OSM data, cleaning, and building) and stored under resources, 'false' will use snapshots in data/osm + +build_osm_network: # Options of the build_osm_network script; osm = OpenStreetMap + group_tolerance_buses: 5000 # [m] (default 5000) Tolerance in meters of the close buses to merge + split_overpassing_lines: false # When True, lines overpassing buses are splitted and connected to the bueses + overpassing_lines_tolerance: 1 # [m] (default 1) Tolerance to identify lines overpassing buses + force_ac: false # When true, it forces all components (lines and substation) to be AC-only. To be used if DC assets create problem. # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#snapshots snapshots: @@ -64,18 +79,18 @@ snapshots: end: "2014-01-01" inclusive: 'left' -osm: - retrieve: true - use-prebuilt: false - # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#enable enable: - retrieve: auto + retrieve: true prepare_links_p_nom: false retrieve_databundle: true + retrieve_sector_databundle: true retrieve_cost_data: true build_cutout: false + retrieve_irena: false retrieve_cutout: true + build_natura_raster: false + retrieve_natura_raster: true custom_busmap: false drop_leap_day: true @@ -91,7 +106,7 @@ co2_budget: # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#electricity electricity: - voltages: [220., 300., 380., 500., 750.] + voltages: [200., 220., 300., 380., 400., 500., 750.] gaslimit_enable: false gaslimit: false co2limit_enable: false @@ -110,7 +125,7 @@ electricity: H2: 168 extendable_carriers: - Generator: [solar, onwind, offwind-ac, offwind-dc, offwind-float, OCGT] + Generator: [solar, onwind, offwind-ac, offwind-dc, OCGT] StorageUnit: [] # battery, H2 Store: [battery, H2] Link: [] # H2 pipeline @@ -120,7 +135,7 @@ electricity: everywhere_powerplants: [nuclear, oil, OCGT, CCGT, coal, lignite, geothermal, biomass] conventional_carriers: [nuclear, oil, OCGT, CCGT, coal, lignite, geothermal, biomass] - renewable_carriers: [solar, onwind, offwind-ac, offwind-dc, offwind-float, hydro] + renewable_carriers: [solar, onwind, offwind-ac, offwind-dc, hydro] # hydro removed estimate_renewable_capacities: enable: true @@ -128,7 +143,7 @@ electricity: year: 2020 expansion_limit: false technology_mapping: - Offshore: [offwind-ac, offwind-dc, offwind-float] + Offshore: [offwind-ac, offwind-dc] Onshore: [onwind] PV: [solar] @@ -196,7 +211,7 @@ renewable: luisa: false # [0, 5230] natura: true ship_threshold: 400 - max_depth: 60 + max_depth: 50 max_shore_distance: 30000 excluder_resolution: 200 clip_p_max_pu: 1.e-2 @@ -212,28 +227,10 @@ renewable: luisa: false # [0, 5230] natura: true ship_threshold: 400 - max_depth: 60 + max_depth: 50 min_shore_distance: 30000 excluder_resolution: 200 clip_p_max_pu: 1.e-2 - offwind-float: - cutout: europe-2013-era5 - resource: - method: wind - turbine: NREL_ReferenceTurbine_5MW_offshore - # ScholzPhd Tab 4.3.1: 10MW/km^2 - capacity_per_sqkm: 2 - correction_factor: 0.8855 - # proxy for wake losses - # from 10.1016/j.energy.2018.08.153 - # until done more rigorously in #153 - corine: [44, 255] - natura: true - ship_threshold: 400 - excluder_resolution: 200 - min_depth: 60 - max_depth: 1000 - clip_p_max_pu: 1.e-2 solar: cutout: europe-2013-sarah resource: @@ -271,17 +268,27 @@ conventional: # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#lines lines: types: + 200.: "Al/St 240/40 2-bundle 220.0" 220.: "Al/St 240/40 2-bundle 220.0" 300.: "Al/St 240/40 3-bundle 300.0" 380.: "Al/St 240/40 4-bundle 380.0" + 400.: "Al/St 240/40 4-bundle 380.0" 500.: "Al/St 240/40 4-bundle 380.0" 750.: "Al/St 560/50 4-bundle 750.0" + dc_types: # setting only for osm + 200.: "HVDC XLPE 1000" + 220.: "HVDC XLPE 1000" + 300.: "HVDC XLPE 1000" + 750.: "HVDC XLPE 1000" + 380.: "HVDC XLPE 1000" + 400.: "HVDC XLPE 1000" + 500.: "HVDC XLPE 1000" s_max_pu: 0.7 s_nom_max: .inf max_extension: 20000 #MW length_factor: 1.25 reconnect_crimea: true - under_construction: 'keep' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity + under_construction: 'zero' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity dynamic_line_rating: activate: false cutout: europe-2013-era5 @@ -294,7 +301,7 @@ links: p_max_pu: 1.0 p_nom_max: .inf max_extension: 30000 #MW - include_tyndp: true + include_tyndp: false under_construction: 'zero' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#transformers @@ -327,7 +334,6 @@ pypsa_eur: - onwind - offwind-ac - offwind-dc - - offwind-float - solar - ror - nuclear @@ -556,7 +562,7 @@ sector: - nearshore # within 50 km of sea # - offshore ammonia: false - min_part_load_fischer_tropsch: 0.5 + min_part_load_fischer_tropsch: 0.7 min_part_load_methanolisation: 0.3 min_part_load_methanation: 0.3 use_fischer_tropsch_waste_heat: true @@ -672,9 +678,6 @@ industry: 2040: 0.12 2045: 0.16 2050: 0.20 - HVC_environment_sequestration_fraction: 0. - waste_to_energy: false - waste_to_energy_cc: false sector_ratios_fraction_future: 2020: 0.0 2025: 0.1 @@ -700,7 +703,7 @@ industry: # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#costs costs: year: 2030 - version: v0.9.0 + version: v0.8.1 rooftop_share: 0.14 # based on the potentials, assuming (0.1 kW/m2 and 10 m2/person) social_discountrate: 0.02 fill_values: @@ -791,7 +794,7 @@ solving: solver_options: highs-default: - # refer to https://ergo-code.github.io/HiGHS/dev/options/definitions/ + # refer to https://ergo-code.github.io/HiGHS/options/definitions.html#solver threads: 4 solver: "ipm" run_crossover: "off" @@ -844,17 +847,23 @@ solving: cbc-default: {} # Used in CI glpk-default: {} # Used in CI - mem_mb: 30000 #memory in MB; 20 GB enough for 50+B+I+H2; 100 GB for 181+B+I+H2 - runtime: 6h #runtime in humanfriendly style https://humanfriendly.readthedocs.io/en/latest/ + mem_mb: 100000 #memory in MB; 20 GB enough for 50+B+I+H2; 100 GB for 181+B+I+H2 + runtime: 12h #runtime in humanfriendly style https://humanfriendly.readthedocs.io/en/latest/ # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#plotting + plotting: map: - boundaries: [-11, 30, 34, 71] - color_geomap: - ocean: white - land: white + boundaries: + eu_node_location: + x: -5.5 + y: 46. + # costs_max: 1000 + # costs_threshold: 0.0000001 + # energy_max: + # energy_min: + # energy_threshold: 0.000001 projection: name: "EqualEarth" # See https://scitools.org.uk/cartopy/docs/latest/reference/projections.html for alternatives, for example: @@ -862,21 +871,34 @@ plotting: # central_longitude: 10. # central_latitude: 50. # standard_parallels: [35, 65] - eu_node_location: - x: -5.5 - y: 46. - costs_max: 1000 - costs_threshold: 1 - energy_max: 20000 - energy_min: -20000 - energy_threshold: 50. + +# plotting: +# map: +# boundaries: [-11, 30, 34, 71] +# color_geomap: +# ocean: white +# land: white +# projection: +# name: "EqualEarth" +# # See https://scitools.org.uk/cartopy/docs/latest/reference/projections.html for alternatives, for example: +# # name: "LambertConformal" +# # central_longitude: 10. +# # central_latitude: 50. +# # standard_parallels: [35, 65] +# eu_node_location: +# x: -5.5 +# y: 46. +# costs_max: 1000 +# costs_threshold: 1 +# energy_max: 20000 +# energy_min: -20000 +# energy_threshold: 50. nice_names: OCGT: "Open-Cycle Gas" CCGT: "Combined-Cycle Gas" offwind-ac: "Offshore Wind (AC)" offwind-dc: "Offshore Wind (DC)" - offwind-float: "Offshore Wind (Floating)" onwind: "Onshore Wind" solar: "Solar" PHS: "Pumped Hydro Storage" @@ -901,9 +923,6 @@ plotting: offwind-dc: "#74c6f2" offshore wind (DC): "#74c6f2" offshore wind dc: "#74c6f2" - offwind-float: "#b5e2fa" - offshore wind (Float): "#b5e2fa" - offshore wind float: "#b5e2fa" # water hydro: '#298c81' hydro reservoir: '#298c81' @@ -1159,6 +1178,3 @@ plotting: DC-DC: "#8a1caf" DC link: "#8a1caf" load: "#dd2e23" - waste CHP: '#e3d37d' - waste CHP CC: '#e3d3ff' - HVC to air: 'k' diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 5c2346b1c..e4b5711f7 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -494,7 +494,7 @@ rule simplify_network: benchmarks("simplify_network/elec_s{simpl}") threads: 1 resources: - mem_mb=12000, + mem_mb=40000, conda: "../envs/environment.yaml" script: @@ -541,7 +541,7 @@ rule cluster_network: benchmarks("cluster_network/elec_s{simpl}_{clusters}") threads: 1 resources: - mem_mb=10000, + mem_mb=40000, conda: "../envs/environment.yaml" script: @@ -614,7 +614,6 @@ rule retrieve_osm_data: cables_way="data/osm/raw/{country}/cables_way.json", lines_way="data/osm/raw/{country}/lines_way.json", substations_way="data/osm/raw/{country}/substations_way.json", - substations_node="data/osm/raw/{country}/substations_node.json", substations_relation="data/osm/raw/{country}/substations_relation.json", log: logs("retrieve_osm_data_{country}.log"), @@ -629,8 +628,9 @@ rule clean_osm_data: cables_way=[f"data/osm/raw/{country}/cables_way.json" for country in config["countries"]], lines_way=[f"data/osm/raw/{country}/lines_way.json" for country in config["countries"]], substations_way=[f"data/osm/raw/{country}/substations_way.json" for country in config["countries"]], - substations_node=[f"data/osm/raw/{country}/substations_node.json" for country in config["countries"]], substations_relation=[f"data/osm/raw/{country}/substations_relation.json" for country in config["countries"]], + offshore_shapes=resources("offshore_shapes.geojson"), + country_shapes=resources("country_shapes.geojson"), output: substations=resources("osm/clean/substations.geojson"), substations_polygon=resources("osm/clean/substations_polygon.geojson"), diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 3cecf50c5..4cd5dd315 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -13,13 +13,12 @@ import pandas as pd from _helpers import ( configure_logging, + set_scenario_config, ) from shapely.geometry import LineString, Point from shapely.ops import linemerge, split -from shapely import wkt from tqdm import tqdm -from _benchmark import memory_logger -import yaml +from _benchmark import memory_logger logger = logging.getLogger(__name__) @@ -1128,6 +1127,7 @@ def are_almost_equal(point1, point2, tolerance=1e-6): snakemake = mock_snakemake("build_osm_network") configure_logging(snakemake) + set_scenario_config(snakemake) # load default crs geo_crs = "EPSG:4326" diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index 915f3c770..c4f99858c 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -35,7 +35,7 @@ from shapely.geometry import LineString, Polygon from shapely.ops import linemerge -from _helpers import configure_logging +from _helpers import configure_logging, set_scenario_config logger = logging.getLogger(__name__) @@ -98,6 +98,7 @@ def _clean_voltage(column): .str.replace("400/220/110 kV'", "400000;220000;110000") .str.replace("400/220/110/20_kv", "400000;220000;110000;20000") .str.replace("2x25000", "25000;25000") + .str.replace("Ă©", ";") ) column = ( @@ -364,7 +365,12 @@ def _distribute_to_circuits(row): return single_circuit -def add_line_endings_tosubstations(substations, lines): +def _add_line_endings_to_substations( + df_substations, + gdf_lines, + path_country_shapes, + path_offshore_shapes, + ): """ Add line endings to substations. @@ -382,82 +388,106 @@ def add_line_endings_tosubstations(substations, lines): about substations with line endings. """ - if lines.empty: - return substations - - # extract columns from substation df - bus_s = pd.DataFrame(columns=substations.columns) - bus_e = pd.DataFrame(columns=substations.columns) - - # Read information from line.csv - bus_s[["voltage", "country"]] = lines[["voltage", "country"]].astype(str) - bus_s["geometry"] = lines.geometry.boundary.map( + if gdf_lines.empty: + return df_substations + + logger.info("Adding line endings to substations") + # extract columns from df_substations + bus_s = pd.DataFrame(columns=df_substations.columns) + bus_e = pd.DataFrame(columns=df_substations.columns) + + # TODO pypsa-eur: fix country code to contain single country code + # Read information from gdf_lines + bus_s[["voltage", "country"]] = gdf_lines[["voltage", "country"]] + bus_s.loc[:, "geometry"] = gdf_lines.geometry.boundary.map( lambda p: p.geoms[0] if len(p.geoms) >= 2 else None ) - bus_s["lon"] = bus_s["geometry"].map(lambda p: p.x if p != None else None) - bus_s["lat"] = bus_s["geometry"].map(lambda p: p.y if p != None else None) - bus_s["bus_id"] = ( - (substations["bus_id"].max() if "bus_id" in substations else 0) - + 1 - + bus_s.index - ) - bus_s["dc"] = lines["dc"] + bus_s.loc[:, "lon"] = bus_s["geometry"].map(lambda p: p.x if p != None else None) + bus_s.loc[:, "lat"] = bus_s["geometry"].map(lambda p: p.y if p != None else None) + bus_s.loc[:, "dc"] = gdf_lines["dc"] - bus_e[["voltage", "country"]] = lines[["voltage", "country"]].astype(str) - bus_e["geometry"] = lines.geometry.boundary.map( + bus_e[["voltage", "country"]] = gdf_lines[["voltage", "country"]] + bus_e.loc[:, "geometry"] = gdf_lines.geometry.boundary.map( lambda p: p.geoms[1] if len(p.geoms) >= 2 else None ) - bus_e["lon"] = bus_e["geometry"].map(lambda p: p.x if p != None else None) - bus_e["lat"] = bus_e["geometry"].map(lambda p: p.y if p != None else None) - bus_e["bus_id"] = bus_s["bus_id"].max() + 1 + bus_e.index - bus_e["dc"] = lines["dc"] + bus_e.loc[:, "lon"] = bus_e["geometry"].map(lambda p: p.x if p != None else None) + bus_e.loc[:, "lat"] = bus_e["geometry"].map(lambda p: p.y if p != None else None) + bus_e.loc[:, "dc"] = gdf_lines["dc"] bus_all = pd.concat([bus_s, bus_e], ignore_index=True) + # Group gdf_substations by voltage and and geometry (dropping duplicates) + bus_all = bus_all.groupby(["voltage", "lon", "lat", "dc"]).first().reset_index() + bus_all = bus_all[df_substations.columns] + bus_all.loc[:, "bus_id"] = bus_all.apply(lambda row: f"line-end/{row.name + 1}", axis=1) + # Initialize default values bus_all["station_id"] = np.nan # Assuming substations completed for installed lines bus_all["under_construction"] = False - bus_all["tag_area"] = 0.0 + bus_all["tag_area"] = None bus_all["symbol"] = "substation" # TODO: this tag may be improved, maybe depending on voltage levels bus_all["tag_substation"] = "transmission" - bus_all["tag_source"] = "line_ending" - - buses = pd.concat([substations, bus_all], ignore_index=True) + bus_all["tag_source"] = "line-end" - # # Assign index to bus_id - buses["bus_id"] = buses.index + buses = pd.concat([df_substations, bus_all], ignore_index=True) + buses.set_index("bus_id", inplace=True) - # TODO: pypsa-eur: change this later to improve country assignment + # Fix country codes + # TODO pypsa-eur: Temporary solution as long as the shapes have a low, + # incomplete resolution (cf. 2500 meters for buffering) bool_multiple_countries = buses["country"].str.contains(";") - buses.loc[bool_multiple_countries, "country"] = buses.loc[bool_multiple_countries, "country"].str.split(";").str[0] + gdf_offshore = gpd.read_file(path_offshore_shapes).set_index("name")["geometry"] + gdf_offshore = gpd.GeoDataFrame(gdf_offshore, geometry=gdf_offshore, crs = gdf_offshore.crs) + gdf_countries = gpd.read_file(path_country_shapes).set_index("name")["geometry"] + # reproject to enable buffer + gdf_countries = gpd.GeoDataFrame(geometry=gdf_countries, crs = gdf_countries.crs) + gdf_union = gdf_countries.merge(gdf_offshore, how="outer", left_index=True, right_index=True) + gdf_union["geometry"] = gdf_union.apply(lambda row: gpd.GeoSeries([row["geometry_x"], row["geometry_y"]]) \ + .unary_union, axis=1) + gdf_union = gpd.GeoDataFrame(geometry=gdf_union["geometry"], crs = crs) + utm = gdf_union.estimate_utm_crs(datum_name = "WGS 84") + gdf_union = gdf_union.to_crs(utm) + gdf_union = gdf_union.buffer(2500) # meters + gdf_union = gdf_union.to_crs(crs) + gdf_union = gpd.GeoDataFrame(geometry=gdf_union, crs = crs) + gdf_buses_tofix = gpd.GeoDataFrame(buses[bool_multiple_countries], geometry="geometry", crs = crs) + joined = gpd.sjoin(gdf_buses_tofix, gdf_union, how="left", predicate="within") + joined.reset_index(inplace=True) + joined = joined.drop_duplicates(subset="bus_id") + joined.set_index("bus_id", inplace=True) + + buses.loc[bool_multiple_countries, "country"] = joined.loc[bool_multiple_countries, "index_right"] return buses -def _import_lines_and_cables(input_path_lines_cables): +def _import_lines_and_cables(path_lines): """ Import lines and cables from the given input paths. Parameters: - - input_path_lines_cables (dict): A dictionary containing the input paths for lines and cables data. + - path_lines (dict): A dictionary containing the input paths for lines and cables data. Returns: - df_lines (DataFrame): A DataFrame containing the imported lines and cables data. """ - columns = ["id", "bounds", "nodes", "geometry", "country", "power", "cables", "circuits", "frequency", "voltage", "wires"] + columns = ["id", "bounds", "nodes", "geometry", "country", "power", "cables", "circuits", "frequency", "voltage", + "wires"] df_lines = pd.DataFrame(columns=columns) logger.info("Importing lines and cables") - for key in input_path_lines_cables: + for key in path_lines: logger.info(f"Processing {key}...") - for idx, ip in enumerate(input_path_lines_cables[key]): + for idx, ip in enumerate(path_lines[key]): if os.path.exists(ip) and os.path.getsize(ip) > 400: # unpopulated OSM json is about 51 bytes - country = os.path.basename(os.path.dirname(input_path_lines_cables[key][idx])) + country = os.path.basename(os.path.dirname(path_lines[key][idx])) - logger.info(f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(input_path_lines_cables[key])).zfill(2)}: {ip}") + logger.info( + f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(path_lines[key])).zfill(2)}: {ip}" + ) with open(ip, "r") as f: data = json.load(f) @@ -482,7 +512,9 @@ def _import_lines_and_cables(input_path_lines_cables): df_lines = pd.concat([df_lines, df], axis="rows") else: - logger.info(f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(input_path_lines_cables[key])).zfill(2)} (empty): {ip}") + logger.info( + f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(path_lines[key])).zfill(2)} (empty): {ip}" + ) continue logger.info("---") @@ -523,36 +555,80 @@ def _drop_duplicate_lines(df_lines): return df_lines -def _filter_lines_by_voltage(df_lines, voltage_min=200000): +def _filter_by_voltage(df, voltage_min=200000): """ - Filter lines in the DataFrame `df_lines` based on the voltage in V. + Filter rows in the DataFrame based on the voltage in V. Parameters: - - df_lines (pandas.DataFrame): The DataFrame containing the lines data. + - df (pandas.DataFrame): The DataFrame containing the substations or lines data. - voltage_min (int, optional): The minimum voltage value to filter the - lines. Defaults to 200000 [unit: V]. + rows. Defaults to 200000 [unit: V]. Returns: - - filtered df_lines (pandas.DataFrame): The filtered DataFrame containing - the lines data above voltage_min. + - filtered df (pandas.DataFrame): The filtered DataFrame containing + the lines or substations above voltage_min. - list_voltages (list): A list of unique voltage values above voltage_min. The type of the list elements is string. """ - logger.info(f"Filtering lines by voltage. Only keeping lines above and including {voltage_min} V.") - list_voltages = df_lines["voltage"].str.split(";").explode().unique().astype(str) + logger.info(f"Filtering dataframe by voltage. Only keeping rows above and including {voltage_min} V.") + list_voltages = df["voltage"].str.split(";").explode().unique().astype(str) # Keep numeric strings list_voltages = list_voltages[np.vectorize(str.isnumeric)(list_voltages)] list_voltages = list_voltages.astype(int) list_voltages = list_voltages[list_voltages >= int(voltage_min)] list_voltages = list_voltages.astype(str) - bool_voltages = df_lines["voltage"].apply(_check_voltage, list_voltages=list_voltages) - df_lines = df_lines[bool_voltages] + bool_voltages = df["voltage"].apply(_check_voltage, list_voltages=list_voltages) + df = df[bool_voltages] + + return df, list_voltages + + +def _clean_substations(df_substations, list_voltages): + """ + Clean the substation data by performing the following steps: + - Split cells in the dataframe. + - Filter substation data based on specified voltages. + - Update the frequency values based on the split count. + - Split cells in the 'frequency' column. + - Set remaining invalid frequency values that are not in ['0', '50'] + to '50'. + + Parameters: + - df_substations (pandas.DataFrame): The input dataframe containing + substation data. + - list_voltages (list): A list of voltages above voltage_min to filter the + substation data. + + Returns: + - df_substations (pandas.DataFrame): The cleaned substation dataframe. + """ + df_substations = df_substations.copy() + + df_substations = _split_cells(df_substations) + + bool_voltages = df_substations["voltage"].apply(_check_voltage, list_voltages=list_voltages) + df_substations = df_substations[bool_voltages] + df_substations.loc[:, "split_count"] = df_substations["id"].apply(lambda x: x.split("-")[1] if "-" in x else "0") + df_substations.loc[:, "split_count"] = df_substations["split_count"].astype(int) + + bool_split = df_substations["split_elements"] > 1 + bool_frequency_len = df_substations["frequency"] \ + .apply(lambda x: len(x.split(";"))) == df_substations["split_elements"] + + op_freq = lambda row: row["frequency"].split(";")[row["split_count"]-1] + + df_substations.loc[bool_frequency_len & bool_split, "frequency"] = df_substations \ + .loc[bool_frequency_len & bool_split, ].apply(op_freq, axis=1) + + df_substations = _split_cells(df_substations, cols=["frequency"]) + bool_invalid_frequency = df_substations["frequency"].apply(lambda x: x not in ["50", "0"]) + df_substations.loc[bool_invalid_frequency, "frequency"] = "50" - return df_lines, list_voltages + return df_substations -def _clean_lines(df_lines): +def _clean_lines(df_lines, list_voltages): """ Cleans and processes the `df_lines` DataFrame heuristically based on the information available per respective line and cable. @@ -564,6 +640,8 @@ def _clean_lines(df_lines): The input DataFrame containing line information with columns such as 'voltage', 'circuits', 'frequency', 'cables', 'split_elements', 'id', etc. + list_voltages : list + A list of unique voltage values above a certain threshold. (type: str) Returns ------- @@ -651,7 +729,8 @@ def _clean_lines(df_lines): df_lines.loc[bool_lines & bool_dc, "frequency"] = "0" df_lines.loc[bool_lines, "cleaned"] = True - # Clean those values where number of voltages split by semicolon is larger than no cables or no circuits + # Clean those values where number of voltages split by semicolon is larger + # than no cables or no circuits bool_cables = (df_lines["voltage_original"].apply(lambda x: len(x.split(";")) > 1)) & \ (df_lines["cables"].apply(lambda x: len(x.split(";")) == 1)) & \ (df_lines["circuits"].apply(lambda x: len(x.split(";")) == 1)) & \ @@ -663,7 +742,8 @@ def _clean_lines(df_lines): df_lines.loc[bool_cables & bool_dc, "frequency"] = "0" df_lines.loc[bool_cables, "cleaned"] = True - # Clean those values where multiple circuit values are present, divided by semicolon + # Clean those values where multiple circuit values are present, divided by + # semicolon bool_cables = (df_lines["circuits"].apply(lambda x: len(x.split(";")) > 1)) & \ (df_lines.apply(lambda row: len(row["circuits"].split(";")) == row["split_elements"], axis=1)) & \ (df_lines["cleaned"] == False) @@ -677,7 +757,8 @@ def _clean_lines(df_lines): df_lines.loc[bool_cables & bool_dc, "frequency"] = "0" df_lines.loc[bool_cables, "cleaned"] = True - # Clean those values where multiple cables values are present, divided by semicolon + # Clean those values where multiple cables values are present, divided by + # semicolon bool_cables = (df_lines["cables"].apply(lambda x: len(x.split(";")) > 1)) & \ (df_lines.apply(lambda row: len(row["cables"].split(";")) == row["split_elements"], axis=1)) & \ (df_lines["cleaned"] == False) @@ -711,9 +792,117 @@ def _clean_lines(df_lines): return df_lines +def _create_substations_geometry(df_substations): + """ + Creates centroids from geometries and keeps the original polygons. + + Parameters: + df_substations (DataFrame): The input DataFrame containing the substations + data. + + Returns: + df_substations (DataFrame): A new DataFrame with the centroids ["geometry"] + and polygons ["polygon"] of the substations geometries. + + """ + logger.info("Creating substations geometry.") + df_substations = df_substations.copy() + + # Create centroids from geometries and keep the original polygons + df_substations.loc[:, "polygon"] = df_substations["geometry"] + df_substations.loc[:, "geometry"] = df_substations["geometry"].apply(lambda x: x.centroid) + df_substations.loc[:, "lon"] = df_substations["geometry"].apply(lambda x: x.x) + df_substations.loc[:, "lat"] = df_substations["geometry"].apply(lambda x: x.y) + + return df_substations + + +def _create_lines_geometry(df_lines): + """ + Create line geometry for the given DataFrame of lines. + + Parameters: + - df_lines (pandas.DataFrame): DataFrame containing lines data. + + Returns: + - df_lines (pandas.DataFrame): DataFrame with transformed 'geometry' + column (type: shapely LineString). + + Notes: + - This function transforms 'geometry' column in the input DataFrame by + applying the '_create_linestring' function to each row. + - It then drops rows where the geometry has equal start and end points, + as these are usually not lines but outlines of areas. + """ + logger.info("Creating lines geometry.") + df_lines = df_lines.copy() + df_lines.loc[:, "geometry"] = df_lines.apply(_create_linestring, axis=1) + + bool_circle = df_lines["geometry"].apply(lambda x: x.coords[0] == x.coords[-1]) + df_lines = df_lines[~bool_circle] + + return df_lines + + +def _finalise_substations(df_substations): + """ + Finalises the substations column types. + + Args: + df_substations (pandas.DataFrame): The input DataFrame + containing substations data. + + Returns: + df_substations (pandas.DataFrame(): The DataFrame with finalised column + types and transformed data. + """ + logger.info("Finalising substations column types.") + df_substations = df_substations.copy() + # rename columns + df_substations.rename( + columns={ + "id": "bus_id", + "power": "symbol", + "substation":"tag_substation", + }, inplace=True) + + # Initiate new columns for subsequent build_osm_network step + df_substations.loc[:, "symbol"] = "substation" + df_substations.loc[:, "tag_substation"] = "transmission" + df_substations.loc[:, "dc"] = False + df_substations.loc[df_substations["frequency"] == "0", "dc"] = True + df_substations.loc[:, "under_construction"] = False + df_substations.loc[:, "station_id"] = None + df_substations.loc[:, "tag_area"] = None + df_substations.loc[:, "tag_source"] = df_substations["bus_id"] + + # Only included needed columns + df_substations = df_substations[[ + "bus_id", + "symbol", + "tag_substation", + "voltage", + "lon", + "lat", + "dc", + "under_construction", + "station_id", + "tag_area", + "country", + "geometry", + "polygon", + "tag_source", + ]] + + # Substation data types + df_substations["voltage"] = df_substations["voltage"].astype(int) + + return df_substations + + def _finalise_lines(df_lines): """ - Finalises the lines column types and creates geometries. + Finalises the lines column types. Args: df_lines (pandas.DataFrame): The input DataFrame containing lines data. @@ -721,7 +910,6 @@ def _finalise_lines(df_lines): Returns: df_lines (pandas.DataFrame(): The DataFrame with finalised column types and transformed data. - """ logger.info("Finalising lines column types.") df_lines = df_lines.copy() @@ -764,31 +952,14 @@ def _finalise_lines(df_lines): # Set lines data types df.apply(pd.to_numeric, args=('coerce',)) # This workaround is needed as otherwise the column dtypes remain "objects" - df_lines.loc[:, "circuits_num"] = df_lines["circuits"].astype(int) - df_lines.loc[:, "voltage_num"] = df_lines["voltage"].astype(int) - df_lines.loc[:, "tag_frequency_num"] = df_lines["tag_frequency"].astype(int) - df_lines.drop(columns=["circuits", "voltage", "tag_frequency"], inplace=True) - - col_rename_dict = { - "circuits_num": "circuits", - "voltage_num": "voltage", - "tag_frequency_num": "tag_frequency" - } - - df_lines.rename(columns=col_rename_dict, inplace=True) - - # Create shapely linestrings from geometries - df_lines.loc[:, "geometry"] = df_lines.apply(_create_linestring, axis=1) - - # Drop all rows where the geometry has equal start and end point - # These are usually not lines, but outlines of areas. - bool_circle = df_lines["geometry"].apply(lambda x: x.coords[0] == x.coords[-1]) - df_lines = df_lines[~bool_circle] + df_lines["circuits"] = df_lines["circuits"].astype(int) + df_lines["voltage"] = df_lines["voltage"].astype(int) + df_lines["tag_frequency"] = df_lines["tag_frequency"].astype(int) return df_lines -def _import_substations(input_path_substations): +def _import_substations(path_substations): """ Import substations from the given input paths. This function imports both substations from OSM ways as well as relations that contain nested @@ -797,7 +968,7 @@ def _import_substations(input_path_substations): containing unique bus ids. Args: - input_path_substations (dict): A dictionary containing input paths for + path_substations (dict): A dictionary containing input paths for substations. Returns: @@ -809,12 +980,14 @@ def _import_substations(input_path_substations): df_substations_relation = pd.DataFrame(columns = cols_substations_relation) logger.info("Importing substations") - for key in input_path_substations: + for key in path_substations: logger.info(f"Processing {key}...") - for idx, ip in enumerate(input_path_substations[key]): + for idx, ip in enumerate(path_substations[key]): if os.path.exists(ip) and os.path.getsize(ip) > 400: # unpopulated OSM json is about 51 bytes - country = os.path.basename(os.path.dirname(input_path_substations[key][idx])) - logger.info(f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(input_path_substations[key])).zfill(2)}: {ip}") + country = os.path.basename(os.path.dirname(path_substations[key][idx])) + logger.info( + f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(path_substations[key])).zfill(2)}: {ip}" + ) with open(ip, "r") as f: data = json.load(f) @@ -845,7 +1018,9 @@ def _import_substations(input_path_substations): df_substations_relation = pd.concat([df_substations_relation, df], axis="rows") else: - logger.info(f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(input_path_substations[key])).zfill(2)} (empty): {ip}") + logger.info( + f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(path_substations[key])).zfill(2)} (empty): {ip}" + ) continue logger.info("---") @@ -878,7 +1053,8 @@ def _import_substations(input_path_substations): df_substations_relation_members["linestring"] = df_substations_relation_members.apply(_create_linestring, axis=1) df_substations_relation_members_grouped = df_substations_relation_members.groupby('id')['linestring'] \ .apply(lambda x: linemerge(x.tolist())).reset_index() - df_substations_relation_members_grouped["geometry"] = df_substations_relation_members_grouped["linestring"].apply(lambda x: x.convex_hull) + df_substations_relation_members_grouped["geometry"] = df_substations_relation_members_grouped["linestring"] \ + .apply(lambda x: x.convex_hull) df_substations_relation = df_substations_relation.join( df_substations_relation_members_grouped.set_index('id'), @@ -892,6 +1068,36 @@ def _import_substations(input_path_substations): return df_substations + +def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): + """ + Removes lines that are within substation polygons from the given + GeoDataFrame of lines. These are not needed to create network (e.g. bus + bars, switchgear, etc.) + + Parameters: + - gdf_lines (GeoDataFrame): A GeoDataFrame containing lines with 'line_id' + and 'geometry' columns. + - gdf_substations_polygon (GeoDataFrame): A GeoDataFrame containing substation + polygons. + + Returns: + GeoDataFrame: A new GeoDataFrame without lines within substation polygons. + """ + logger.info("Identifying and removing lines within substation polygons...") + gdf = gpd.sjoin( + gdf_lines[["line_id", "geometry"]], + gdf_substations_polygon, + how="inner", + predicate="within" + )["line_id"] + + logger.info(f"Removed {len(gdf)} lines within substations of original {len(gdf_lines)} lines.") + gdf_lines = gdf_lines[~gdf_lines["line_id"].isin(gdf)] + + return gdf_lines + + if __name__ == "__main__": if "snakemake" not in globals(): from _helpers import mock_snakemake @@ -899,38 +1105,58 @@ def _import_substations(input_path_substations): snakemake = mock_snakemake("clean_osm_data") configure_logging(snakemake) + set_scenario_config(snakemake) # Parameters crs = "EPSG:4326" # Correct crs for OSM data voltage_min = 200000 # [unit: V] Minimum voltage value to filter lines. - + # TODO pypsa-eur: Temporary solution as one AC line between converters will # create an error in simplify_network: lines_to_drop = ["775580659"] + logger.info("---") + logger.info("SUBSTATIONS") # Input - input_path_substations = { + path_substations = { "substations_way": snakemake.input.substations_way, "substations_relation": snakemake.input.substations_relation, } - input_path_lines_cables = { + # Cleaning process + df_substations = _import_substations(path_substations) + df_substations["voltage"] = _clean_voltage(df_substations["voltage"]) + df_substations, list_voltages = _filter_by_voltage(df_substations, voltage_min=voltage_min) + df_substations["frequency"] = _clean_frequency(df_substations["frequency"]) + df_substations = _clean_substations(df_substations, list_voltages) + df_substations = _create_substations_geometry(df_substations) + df_substations = _finalise_substations(df_substations) + + # Create polygon GeoDataFrame to remove lines within substations + gdf_substations_polygon = gpd.GeoDataFrame( + df_substations[["bus_id", "polygon", "voltage"]], + geometry = "polygon", + crs = crs, + ) + + logger.info("---") + logger.info("LINES AND CABLES") + path_lines = { "lines": snakemake.input.lines_way, "cables": snakemake.input.cables_way, } # Cleaning process - df_lines = _import_lines_and_cables(input_path_lines_cables) + df_lines = _import_lines_and_cables(path_lines) df_lines = _drop_duplicate_lines(df_lines) df_lines.loc[:, "voltage"] = _clean_voltage(df_lines["voltage"]) - df_lines, list_voltages = _filter_lines_by_voltage(df_lines, voltage_min=voltage_min) - + df_lines, list_voltages = _filter_by_voltage(df_lines, voltage_min=voltage_min) df_lines.loc[:, "circuits"] = _clean_circuits(df_lines["circuits"]) df_lines.loc[:, "cables"] = _clean_cables(df_lines["cables"]) df_lines.loc[:, "frequency"] = _clean_frequency(df_lines["frequency"]) df_lines.loc[:, "wires"] = _clean_wires(df_lines["wires"]) - - df_lines = _clean_lines(df_lines) + df_lines = _clean_lines(df_lines, list_voltages) + df_lines = _create_lines_geometry(df_lines) df_lines = _finalise_lines(df_lines) # Dropping specific lines, manually @@ -939,165 +1165,36 @@ def _import_substations(input_path_substations): # Create GeoDataFrame gdf_lines = gpd.GeoDataFrame(df_lines, geometry = "geometry", crs = crs) - - ############# BUSES / SUBSTATIONS ###################### - df_substations = _import_substations(input_path_substations) - - - # Create centroids from geometries - df_substations.loc[:, "polygon"] = df_substations["geometry"] - df_substations.loc[:, "geometry"] = df_substations["geometry"].apply(lambda x: x.centroid) - df_substations.loc[:, "lon"] = df_substations["geometry"].apply(lambda x: x.x) - df_substations.loc[:, "lat"] = df_substations["geometry"].apply(lambda x: x.y) - - logger.info("Cleaning substations") - # Clean columns - df_substations["voltage"] = _clean_voltage(df_substations["voltage"]) - df_substations["frequency"] = _clean_frequency(df_substations["frequency"]) - df_substations["frequency"] = df_substations["frequency"].astype(str, errors="ignore") - - list_voltages = df_substations["voltage"].str.split(";").explode().unique().astype(str) - list_voltages = list_voltages[np.vectorize(len)(list_voltages) >= 6] - list_voltages = list_voltages[~np.char.startswith(list_voltages, '1')] - - bool_voltages = df_substations["voltage"].apply(_check_voltage, list_voltages=list_voltages) - df_substations = df_substations[bool_voltages] - - df_substations = _split_cells(df_substations) - bool_voltages = df_substations["voltage"].apply(_check_voltage, list_voltages=list_voltages) - df_substations = df_substations[bool_voltages] - df_substations["split_count"] = df_substations["id"].apply(lambda x: x.split("-")[1] if "-" in x else "0") - df_substations["split_count"] = df_substations["split_count"].astype(int) - - bool_split = df_substations["split_elements"] > 1 - bool_frequency_len = df_substations["frequency"].apply(lambda x: len(x.split(";"))) == df_substations["split_elements"] - df_substations.loc[bool_frequency_len & bool_split, "frequency"] = df_substations.loc[bool_frequency_len & bool_split, "frequency"] \ - - op_freq = lambda row: row["frequency"].split(";")[row["split_count"]-1] - - df_substations.loc[bool_frequency_len & bool_split, ["frequency"]] = df_substations.loc[bool_frequency_len & bool_split, ] \ - .apply(op_freq, axis=1) - - df_substations = _split_cells(df_substations, cols=["frequency"]) - bool_invalid_frequency = df_substations["frequency"].apply(lambda x: x not in ["50", "0"]) - df_substations.loc[bool_invalid_frequency, "frequency"] = "50" - df_substations["power"] = "substation" - df_substations["substation"] = "transmission" - df_substations["dc"] = False - df_substations.loc[df_substations["frequency"] == "0", "dc"] = True - df_substations["under_construction"] = False - df_substations["station_id"] = None - df_substations["tag_area"] = None - df_substations["tag_source"] = df_substations["id"] - - gdf_substations_polygon = gpd.GeoDataFrame( - df_substations[["id", "polygon"]], - geometry = "polygon", - crs = "EPSG:4326" + gdf_lines = _remove_lines_within_substations(gdf_lines, gdf_substations_polygon) + + # Add line endings to substations + path_country_shapes = snakemake.input.country_shapes + path_offshore_shapes = snakemake.input.offshore_shapes + df_substations = _add_line_endings_to_substations( + df_substations, + gdf_lines, + path_country_shapes, + path_offshore_shapes, ) - filepath_substations_polygon = snakemake.output["substations_polygon"] - # save substations output - logger.info(f"Exporting clean substations with polygon shapes to {filepath_substations_polygon}") - parentfolder_substations_polygon = os.path.dirname(filepath_substations_polygon) - if not os.path.exists(parentfolder_substations_polygon): - # Create the folder and its parent directories if they don't exist - os.makedirs(parentfolder_substations_polygon) - - logger.info(f"Exporting clean substations to {filepath_substations_polygon}") - gdf_substations_polygon.to_file(filepath_substations_polygon, driver="GeoJSON") - - - logger.info("Identifying and removing lines within substation polygons...") - lines_within_substations = gpd.sjoin( - gdf_lines[["line_id", "geometry"]], - gdf_substations_polygon, - how = "inner", - predicate = "within" - )["line_id"] - - logger.info(f"Removed {len(lines_within_substations)}/{len(gdf_lines)} lines within substations.") - gdf_lines = gdf_lines[~gdf_lines["line_id"].isin(lines_within_substations)] - - # # Create an empty list to store the results - # results = [] - - # subset a to find only country equal to "BE" - # a[a["country"] == "BE"] - - # logger.info("Identifying and removing lines within substation polygons...") - # for index, row in tqdm(gdf_lines.iterrows(), total=len(gdf_lines)): - # line = row['geometry'] - # # Check if the LineString is within any Polygon in 'substations_df' - # is_within_any_substation = any(line.within(substation_polygon) for substation_polygon in df_substations["polygon"]) - # results.append(is_within_any_substation) - - # # Add the results to 'gdf_lines' - # gdf_lines['within_substation'] = results - - # gdf_lines = gdf_lines[~gdf_lines["within_substation"]] - # logger.info(f"Removed {sum(results)} lines within substations.") - - filepath_lines = snakemake.output["lines"] - # save substations output - logger.info(f"Exporting clean lines to {filepath_lines}") - parentfolder_lines = os.path.dirname(filepath_lines) - if not os.path.exists(parentfolder_lines): - # Create the folder and its parent directories if they don't exist - os.makedirs(parentfolder_lines) - - logger.info(f"Exporting clean lines to {filepath_lines}") - gdf_lines.to_file(filepath_lines, driver="GeoJSON") - - # rename columns - df_substations.rename( - columns={ - "id": "bus_id", - "power": "symbol", - "substation":"tag_substation", - }, inplace=True) - - df_substations = df_substations[[ - "bus_id", - "symbol", - "tag_substation", - "voltage", - "lon", - "lat", - "dc", - "under_construction", - "station_id", - "tag_area", - "country", - "geometry", - "tag_source", - ]] - - df_substations["bus_id"] = df_substations.index - - logger.info("Adding line endings to substations") - df_substations = add_line_endings_tosubstations( - df_substations, gdf_lines - ) - - #group gdf_substations by voltage and and geometry (dropping duplicates) - df_substations = df_substations.groupby(["voltage", "lon", "lat", "dc", "tag_source"]).first().reset_index() - df_substations["bus_id"] = df_substations.index - - gdf_substations = gpd.GeoDataFrame(df_substations, geometry = "geometry", crs = "EPSG:4326") - - # Substation data types - gdf_substations["bus_id"] = gdf_substations["bus_id"].astype(int) - gdf_substations["voltage"] = gdf_substations["voltage"].astype(int) - - filepath_substations = snakemake.output["substations"] - # save substations output - logger.info(f"Exporting clean substations to {filepath_substations}") - parentfolder_substations = os.path.dirname(filepath_substations) - if not os.path.exists(parentfolder_substations): - # Create the folder and its parent directories if they don't exist - os.makedirs(parentfolder_substations) - - logger.info(f"Exporting clean substations to {filepath_substations}") - gdf_substations.to_file(filepath_substations, driver="GeoJSON") + # Drop polygons and create GDF + gdf_substations = gpd.GeoDataFrame(df_substations.drop(columns=["polygon"]), + geometry = "geometry", crs = crs) + + # Export GeoDataFrames to GeoJSON in specified output paths + parentfolder = os.path.dirname(snakemake.output.substations) + if not os.path.exists(parentfolder): + os.makedirs(parentfolder) + output_substations_polygon = snakemake.output["substations_polygon"] + output_substations = snakemake.output["substations"] + output_lines = snakemake.output["lines"] + + logger.info(f"Exporting clean substations with polygon shapes to {output_substations_polygon}") + gdf_substations_polygon.to_file(output_substations_polygon, driver="GeoJSON") + logger.info(f"Exporting clean substations to {output_substations}") + gdf_substations.to_file(output_substations, driver="GeoJSON") + logger.info(f"Exporting clean lines to {output_lines}") + gdf_lines.to_file(output_lines, driver="GeoJSON") + + logger.info("Cleaning OSM data completed.") \ No newline at end of file diff --git a/scripts/retrieve_osm_data.py b/scripts/retrieve_osm_data.py index bab645a48..0ad9743e4 100644 --- a/scripts/retrieve_osm_data.py +++ b/scripts/retrieve_osm_data.py @@ -92,7 +92,6 @@ def retrieve_osm_data( "cables_way", "lines_way", "substations_way", - "substations_node", "substations_relation", ]): """ @@ -111,7 +110,6 @@ def retrieve_osm_data( "cables_way", "lines_way", "substations_way", - "substations_node", "substations_relation", ]. """ @@ -136,7 +134,6 @@ def retrieve_osm_data( 'cables_way': 'way["power"="cable"]', 'lines_way': 'way["power"="line"]', 'substations_way': 'way["power"="substation"]', - 'substations_node': 'node["power"="substation"]', 'substations_relation': 'relation["power"="substation"]', } From da94a964c74ac266d3502f7956f2c27f60f7484b Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Wed, 22 May 2024 09:12:28 +0200 Subject: [PATCH 020/100] Cleaned config. --- config/config_backup.yaml | 1181 ++++++++++++++++++++++++++++++++++ rules/build_electricity.smk | 44 +- scripts/build_osm_network.py | 15 +- scripts/solve_network.py | 6 +- 4 files changed, 1214 insertions(+), 32 deletions(-) create mode 100644 config/config_backup.yaml diff --git a/config/config_backup.yaml b/config/config_backup.yaml new file mode 100644 index 000000000..2bcaf173c --- /dev/null +++ b/config/config_backup.yaml @@ -0,0 +1,1181 @@ +# SPDX-FileCopyrightText: : 2017-2024 The PyPSA-Eur Authors +# +# SPDX-License-Identifier: CC0-1.0 + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#top-level-configuration +version: 0.10.0 +tutorial: false + +logging: + level: INFO + format: '%(levelname)s:%(name)s:%(message)s' + +private: + keys: + entsoe_api: + +remote: + ssh: "z1" + path: "~/scratch/projects/pypsa-eur" + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#run +run: + prefix: "" + # name: "test-europe1-gridkit" + name: "test-begb-gridkit" + scenarios: + enable: false + file: config/scenarios.yaml + disable_progressbar: false + shared_resources: + policy: false + exclude: [] + shared_cutouts: true + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#foresight +foresight: overnight + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#scenario +# Wildcard docs in https://pypsa-eur.readthedocs.io/en/latest/wildcards.html +scenario: + simpl: + - '' + ll: + - v1.0 # TODO mit und ohne Netzausbau v1.0 + clusters: + - 40 + # - 128 + # - 256 + # - 512 + # # - 1024 + opts: + - 'Co2L0-169H' + sector_opts: + - '' + planning_horizons: + # - 2020 + # - 2030 + # - 2040 + - 2050 + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#countries +countries: ["BE", "GB"] +# countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK'] +# countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MD', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK', 'UA'] + +# Settings related to the high-voltage electricity grid +electricity_network: + base_network: "gridkit" # "osm" or "gridkit" + build_osm_network: true # If 'true', the network will be built from scratch (retrieving OSM data, cleaning, and building) and stored under resources, 'false' will use snapshots in data/osm + +build_osm_network: # Options of the build_osm_network script; osm = OpenStreetMap + group_tolerance_buses: 5000 # [m] (default 5000) Tolerance in meters of the close buses to merge + split_overpassing_lines: false # When True, lines overpassing buses are splitted and connected to the bueses + overpassing_lines_tolerance: 1 # [m] (default 1) Tolerance to identify lines overpassing buses + force_ac: false # When true, it forces all components (lines and substation) to be AC-only. To be used if DC assets create problem. + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#snapshots +snapshots: + start: "2013-01-01" + end: "2014-01-01" + inclusive: 'left' + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#enable +enable: + retrieve: true + prepare_links_p_nom: false + retrieve_databundle: true + retrieve_sector_databundle: true + retrieve_cost_data: true + build_cutout: false + retrieve_irena: false + retrieve_cutout: true + build_natura_raster: false + retrieve_natura_raster: true + custom_busmap: false + drop_leap_day: true + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#co2-budget +co2_budget: + 2020: 0.701 + 2025: 0.524 + 2030: 0.297 + 2035: 0.150 + 2040: 0.071 + 2045: 0.032 + 2050: 0.000 + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#electricity +electricity: + voltages: [200., 220., 300., 380., 400., 500., 750.] + gaslimit_enable: false + gaslimit: false + co2limit_enable: false + co2limit: 7.75e+7 + co2base: 1.487e+9 + agg_p_nom_limits: data/agg_p_nom_minmax.csv + + operational_reserve: + activate: false + epsilon_load: 0.02 + epsilon_vres: 0.02 + contingency: 4000 + + max_hours: + battery: 6 + H2: 168 + + extendable_carriers: + Generator: [solar, onwind, offwind-ac, offwind-dc, OCGT] + StorageUnit: [] # battery, H2 + Store: [battery, H2] + Link: [] # H2 pipeline + + powerplants_filter: (DateOut >= 2023 or DateOut != DateOut) and not (Country == 'Germany' and Fueltype == 'Nuclear') + custom_powerplants: false + everywhere_powerplants: [nuclear, oil, OCGT, CCGT, coal, lignite, geothermal, biomass] + + conventional_carriers: [nuclear, oil, OCGT, CCGT, coal, lignite, geothermal, biomass] + renewable_carriers: [solar, onwind, offwind-ac, offwind-dc, hydro] # hydro removed + + estimate_renewable_capacities: + enable: true + from_opsd: true + year: 2020 + expansion_limit: false + technology_mapping: + Offshore: [offwind-ac, offwind-dc] + Onshore: [onwind] + PV: [solar] + + autarky: + enable: false + by_country: false + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#atlite +atlite: + default_cutout: europe-2013-era5 + nprocesses: 4 + show_progress: false + cutouts: + # use 'base' to determine geographical bounds and time span from config + # base: + # module: era5 + europe-2013-era5: + module: era5 # in priority order + x: [-12., 42.] + y: [33., 72] + dx: 0.3 + dy: 0.3 + time: ['2013', '2013'] + europe-2013-sarah: + module: [sarah, era5] # in priority order + x: [-12., 42.] + y: [33., 65] + dx: 0.2 + dy: 0.2 + time: ['2013', '2013'] + sarah_interpolate: false + sarah_dir: + features: [influx, temperature] + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#renewable +renewable: + onwind: + cutout: europe-2013-era5 + resource: + method: wind + turbine: Vestas_V112_3MW + add_cutout_windspeed: true + capacity_per_sqkm: 3 + # correction_factor: 0.93 + corine: + grid_codes: [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32] + distance: 1000 + distance_grid_codes: [1, 2, 3, 4, 5, 6] + luisa: false + # grid_codes: [1111, 1121, 1122, 1123, 1130, 1210, 1221, 1222, 1230, 1241, 1242] + # distance: 1000 + # distance_grid_codes: [1111, 1121, 1122, 1123, 1130, 1210, 1221, 1222, 1230, 1241, 1242] + natura: true + excluder_resolution: 100 + clip_p_max_pu: 1.e-2 + offwind-ac: + cutout: europe-2013-era5 + resource: + method: wind + turbine: NREL_ReferenceTurbine_2020ATB_5.5MW + add_cutout_windspeed: true + capacity_per_sqkm: 2 + correction_factor: 0.8855 + corine: [44, 255] + luisa: false # [0, 5230] + natura: true + ship_threshold: 400 + max_depth: 50 + max_shore_distance: 30000 + excluder_resolution: 200 + clip_p_max_pu: 1.e-2 + offwind-dc: + cutout: europe-2013-era5 + resource: + method: wind + turbine: NREL_ReferenceTurbine_2020ATB_5.5MW + add_cutout_windspeed: true + capacity_per_sqkm: 2 + correction_factor: 0.8855 + corine: [44, 255] + luisa: false # [0, 5230] + natura: true + ship_threshold: 400 + max_depth: 50 + min_shore_distance: 30000 + excluder_resolution: 200 + clip_p_max_pu: 1.e-2 + solar: + cutout: europe-2013-sarah + resource: + method: pv + panel: CSi + orientation: + slope: 35. + azimuth: 180. + capacity_per_sqkm: 5.1 + # correction_factor: 0.854337 + corine: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 26, 31, 32] + luisa: false # [1111, 1121, 1122, 1123, 1130, 1210, 1221, 1222, 1230, 1241, 1242, 1310, 1320, 1330, 1410, 1421, 1422, 2110, 2120, 2130, 2210, 2220, 2230, 2310, 2410, 2420, 3210, 3320, 3330] + natura: true + excluder_resolution: 100 + clip_p_max_pu: 1.e-2 + hydro: + cutout: europe-2013-era5 + carriers: [ror, PHS, hydro] + PHS_max_hours: 6 + hydro_max_hours: "energy_capacity_totals_by_country" # one of energy_capacity_totals_by_country, estimate_by_large_installations or a float + flatten_dispatch: false + flatten_dispatch_buffer: 0.2 + clip_min_inflow: 1.0 + eia_norm_year: false + eia_correct_by_capacity: false + eia_approximate_missing: false + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#conventional +conventional: + unit_commitment: false + dynamic_fuel_price: false + nuclear: + p_max_pu: "data/nuclear_p_max_pu.csv" # float of file name + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#lines +lines: + types: + 200.: "Al/St 240/40 2-bundle 220.0" + 220.: "Al/St 240/40 2-bundle 220.0" + 300.: "Al/St 240/40 3-bundle 300.0" + 380.: "Al/St 240/40 4-bundle 380.0" + 400.: "Al/St 240/40 4-bundle 380.0" + 500.: "Al/St 240/40 4-bundle 380.0" + 750.: "Al/St 560/50 4-bundle 750.0" + dc_types: # setting only for osm + 200.: "HVDC XLPE 1000" + 220.: "HVDC XLPE 1000" + 300.: "HVDC XLPE 1000" + 750.: "HVDC XLPE 1000" + 380.: "HVDC XLPE 1000" + 400.: "HVDC XLPE 1000" + 500.: "HVDC XLPE 1000" + s_max_pu: 0.7 + s_nom_max: .inf + max_extension: 20000 #MW + length_factor: 1.25 + reconnect_crimea: true + under_construction: 'zero' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity + dynamic_line_rating: + activate: false + cutout: europe-2013-era5 + correction_factor: 0.95 + max_voltage_difference: false + max_line_rating: false + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#links +links: + p_max_pu: 1.0 + p_nom_max: .inf + max_extension: 30000 #MW + include_tyndp: false + under_construction: 'zero' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#transformers +transformers: + x: 0.1 + s_nom: 2000. + type: '' + +# docs-load in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#load +load: + interpolate_limit: 3 + time_shift_for_large_gaps: 1w + manual_adjustments: true # false + scaling_factor: 1.0 + fixed_year: false # false or year (e.g. 2013) + supplement_synthetic: true + +# docs +# TODO: PyPSA-Eur merge issue in prepare_sector_network.py +# regulate what components with which carriers are kept from PyPSA-Eur; +# some technologies are removed because they are implemented differently +# (e.g. battery or H2 storage) or have different year-dependent costs +# in PyPSA-Eur-Sec +pypsa_eur: + Bus: + - AC + Link: + - DC + Generator: + - onwind + - offwind-ac + - offwind-dc + - solar + - ror + - nuclear + StorageUnit: + - PHS + - hydro + Store: [] + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#energy +energy: + energy_totals_year: 2019 + base_emissions_year: 1990 + emissions: CO2 + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#biomass +biomass: + year: 2030 + scenario: ENS_Med + classes: + solid biomass: + - Agricultural waste + - Fuelwood residues + - Secondary Forestry residues - woodchips + - Sawdust + - Residues from landscape care + - Municipal waste + not included: + - Sugar from sugar beet + - Rape seed + - "Sunflower, soya seed " + - Bioethanol barley, wheat, grain maize, oats, other cereals and rye + - Miscanthus, switchgrass, RCG + - Willow + - Poplar + - FuelwoodRW + - C&P_RW + biogas: + - Manure solid, liquid + - Sludge + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#solar-thermal +solar_thermal: + clearsky_model: simple # should be "simple" or "enhanced"? + orientation: + slope: 45. + azimuth: 180. + cutout: default + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#existing-capacities +existing_capacities: + grouping_years_power: [1895, 1920, 1950, 1955, 1960, 1965, 1970, 1975, 1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2020, 2025, 2030] + grouping_years_heat: [1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2020] # heat grouping years >= baseyear will be ignored + threshold_capacity: 10 + default_heating_lifetime: 20 + conventional_carriers: + - lignite + - coal + - oil + - uranium + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#sector +sector: + transport: true + heating: true + biomass: true + industry: true + agriculture: true + district_heating: + potential: 0.6 + progress: + 2020: 0.0 + 2025: 0.15 + 2030: 0.3 + 2035: 0.45 + 2040: 0.6 + 2045: 0.8 + 2050: 1.0 + district_heating_loss: 0.15 + cluster_heat_buses: true + heat_demand_cutout: default + bev_dsm_restriction_value: 0.75 + bev_dsm_restriction_time: 7 + transport_heating_deadband_upper: 20. + transport_heating_deadband_lower: 15. + ICE_lower_degree_factor: 0.375 + ICE_upper_degree_factor: 1.6 + EV_lower_degree_factor: 0.98 + EV_upper_degree_factor: 0.63 + bev_dsm: true + bev_availability: 0.5 + bev_energy: 0.05 + bev_charge_efficiency: 0.9 + bev_plug_to_wheel_efficiency: 0.2 + bev_charge_rate: 0.011 + bev_avail_max: 0.95 + bev_avail_mean: 0.8 + v2g: true + land_transport_fuel_cell_share: + 2020: 0 + 2025: 0 + 2030: 0 + 2035: 0 + 2040: 0 + 2045: 0 + 2050: 0 + land_transport_electric_share: + 2020: 0 + 2025: 0.15 + 2030: 0.3 + 2035: 0.45 + 2040: 0.7 + 2045: 0.85 + 2050: 1 + land_transport_ice_share: + 2020: 1 + 2025: 0.85 + 2030: 0.7 + 2035: 0.55 + 2040: 0.3 + 2045: 0.15 + 2050: 0 + transport_fuel_cell_efficiency: 0.5 + transport_internal_combustion_efficiency: 0.3 + agriculture_machinery_electric_share: 0 + agriculture_machinery_oil_share: 1 + agriculture_machinery_fuel_efficiency: 0.7 + agriculture_machinery_electric_efficiency: 0.3 + MWh_MeOH_per_MWh_H2: 0.8787 + MWh_MeOH_per_tCO2: 4.0321 + MWh_MeOH_per_MWh_e: 3.6907 + shipping_hydrogen_liquefaction: false + shipping_hydrogen_share: + 2020: 0 + 2025: 0 + 2030: 0 + 2035: 0 + 2040: 0 + 2045: 0 + 2050: 0 + shipping_methanol_share: + 2020: 0 + 2025: 0.15 + 2030: 0.3 + 2035: 0.5 + 2040: 0.7 + 2045: 0.85 + 2050: 1 + shipping_oil_share: + 2020: 1 + 2025: 0.85 + 2030: 0.7 + 2035: 0.5 + 2040: 0.3 + 2045: 0.15 + 2050: 0 + shipping_methanol_efficiency: 0.46 + shipping_oil_efficiency: 0.40 + aviation_demand_factor: 1. + HVC_demand_factor: 1. + time_dep_hp_cop: true + heat_pump_sink_T: 55. + reduce_space_heat_exogenously: true + reduce_space_heat_exogenously_factor: + 2020: 0.10 # this results in a space heat demand reduction of 10% + 2025: 0.09 # first heat demand increases compared to 2020 because of larger floor area per capita + 2030: 0.09 + 2035: 0.11 + 2040: 0.16 + 2045: 0.21 + 2050: 0.29 + retrofitting: + retro_endogen: false + cost_factor: 1.0 + interest_rate: 0.04 + annualise_cost: true + tax_weighting: false + construction_index: true + tes: true + tes_tau: + decentral: 3 + central: 180 + boilers: true + resistive_heaters: true + oil_boilers: false + biomass_boiler: true + overdimension_individual_heating: 1.1 #to cover demand peaks bigger than data + chp: true + micro_chp: false + solar_thermal: true + solar_cf_correction: 0.788457 # = >>> 1/1.2683 + marginal_cost_storage: 0. #1e-4 + methanation: true + coal_cc: false + dac: true + co2_vent: false + central_heat_vent: false + allam_cycle: false + hydrogen_fuel_cell: true + hydrogen_turbine: false + SMR: true + SMR_cc: true + regional_methanol_demand: false + regional_oil_demand: false + regional_coal_demand: false + regional_co2_sequestration_potential: + enable: false + attribute: + - conservative estimate Mt + - conservative estimate GAS Mt + - conservative estimate OIL Mt + - conservative estimate aquifer Mt + include_onshore: false + min_size: 3 + max_size: 25 + years_of_storage: 25 + co2_sequestration_potential: 200 + co2_sequestration_cost: 10 + co2_sequestration_lifetime: 50 + co2_spatial: false + co2network: false + co2_network_cost_factor: 1 + cc_fraction: 0.9 + hydrogen_underground_storage: true + hydrogen_underground_storage_locations: + # - onshore # more than 50 km from sea + - nearshore # within 50 km of sea + # - offshore + ammonia: false + min_part_load_fischer_tropsch: 0.7 + min_part_load_methanolisation: 0.3 + min_part_load_methanation: 0.3 + use_fischer_tropsch_waste_heat: true + use_haber_bosch_waste_heat: true + use_methanolisation_waste_heat: true + use_methanation_waste_heat: true + use_fuel_cell_waste_heat: true + use_electrolysis_waste_heat: true + electricity_transmission_grid: true + electricity_distribution_grid: true + electricity_distribution_grid_cost_factor: 1.0 + electricity_grid_connection: true + transmission_efficiency: + DC: + efficiency_static: 0.98 + efficiency_per_1000km: 0.977 + H2 pipeline: + efficiency_per_1000km: 1 # 0.982 + compression_per_1000km: 0.018 + gas pipeline: + efficiency_per_1000km: 1 #0.977 + compression_per_1000km: 0.01 + H2_network: true + gas_network: false + H2_retrofit: false + H2_retrofit_capacity_per_CH4: 0.6 + gas_network_connectivity_upgrade: 1 + gas_distribution_grid: true + gas_distribution_grid_cost_factor: 1.0 + biomass_spatial: false + biomass_transport: false + biogas_upgrading_cc: false + conventional_generation: + OCGT: gas + biomass_to_liquid: false + biosng: false + limit_max_growth: + enable: false + # allowing 30% larger than max historic growth + factor: 1.3 + max_growth: # unit GW + onwind: 16 # onshore max grow so far 16 GW in Europe https://www.iea.org/reports/renewables-2020/wind + solar: 28 # solar max grow so far 28 GW in Europe https://www.iea.org/reports/renewables-2020/solar-pv + offwind-ac: 35 # offshore max grow so far 3.5 GW in Europe https://windeurope.org/about-wind/statistics/offshore/european-offshore-wind-industry-key-trends-statistics-2019/ + offwind-dc: 35 + max_relative_growth: + onwind: 3 + solar: 3 + offwind-ac: 3 + offwind-dc: 3 + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#industry +industry: + St_primary_fraction: + 2020: 0.6 + 2025: 0.55 + 2030: 0.5 + 2035: 0.45 + 2040: 0.4 + 2045: 0.35 + 2050: 0.3 + DRI_fraction: + 2020: 0 + 2025: 0 + 2030: 0.05 + 2035: 0.2 + 2040: 0.4 + 2045: 0.7 + 2050: 1 + H2_DRI: 1.7 + elec_DRI: 0.322 + Al_primary_fraction: + 2020: 0.4 + 2025: 0.375 + 2030: 0.35 + 2035: 0.325 + 2040: 0.3 + 2045: 0.25 + 2050: 0.2 + MWh_NH3_per_tNH3: 5.166 + MWh_CH4_per_tNH3_SMR: 10.8 + MWh_elec_per_tNH3_SMR: 0.7 + MWh_H2_per_tNH3_electrolysis: 5.93 + MWh_elec_per_tNH3_electrolysis: 0.2473 + MWh_NH3_per_MWh_H2_cracker: 1.46 # https://github.com/euronion/trace/blob/44a5ff8401762edbef80eff9cfe5a47c8d3c8be4/data/efficiencies.csv + NH3_process_emissions: 24.5 + petrochemical_process_emissions: 25.5 + #HVC primary/recycling based on values used in Neumann et al https://doi.org/10.1016/j.joule.2023.06.016, linearly interpolated between 2020 and 2050 + #2020 recycling rates based on Agora https://static.agora-energiewende.de/fileadmin/Projekte/2021/2021_02_EU_CEAP/A-EW_254_Mobilising-circular-economy_study_WEB.pdf + #fractions refer to the total primary HVC production in 2020 + #assumes 6.7 Mtplastics produced from recycling in 2020 + HVC_primary_fraction: + 2020: 1.0 + 2025: 0.9 + 2030: 0.8 + 2035: 0.7 + 2040: 0.6 + 2045: 0.5 + 2050: 0.4 + HVC_mechanical_recycling_fraction: + 2020: 0.12 + 2025: 0.15 + 2030: 0.18 + 2035: 0.21 + 2040: 0.24 + 2045: 0.27 + 2050: 0.30 + HVC_chemical_recycling_fraction: + 2020: 0.0 + 2025: 0.0 + 2030: 0.04 + 2035: 0.08 + 2040: 0.12 + 2045: 0.16 + 2050: 0.20 + sector_ratios_fraction_future: + 2020: 0.0 + 2025: 0.1 + 2030: 0.3 + 2035: 0.5 + 2040: 0.7 + 2045: 0.9 + 2050: 1.0 + basic_chemicals_without_NH3_production_today: 69. #Mt/a, = 86 Mtethylene-equiv - 17 MtNH3 + HVC_production_today: 52. + MWh_elec_per_tHVC_mechanical_recycling: 0.547 + MWh_elec_per_tHVC_chemical_recycling: 6.9 + chlorine_production_today: 9.58 + MWh_elec_per_tCl: 3.6 + MWh_H2_per_tCl: -0.9372 + methanol_production_today: 1.5 + MWh_elec_per_tMeOH: 0.167 + MWh_CH4_per_tMeOH: 10.25 + hotmaps_locate_missing: false + reference_year: 2015 + + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#costs +costs: + year: 2030 + version: v0.8.1 + rooftop_share: 0.14 # based on the potentials, assuming (0.1 kW/m2 and 10 m2/person) + social_discountrate: 0.02 + fill_values: + FOM: 0 + VOM: 0 + efficiency: 1 + fuel: 0 + investment: 0 + lifetime: 25 + "CO2 intensity": 0 + "discount rate": 0.07 + # Marginal and capital costs can be overwritten + # capital_cost: + # onwind: 500 + marginal_cost: + solar: 0.01 + onwind: 0.015 + offwind: 0.015 + hydro: 0. + H2: 0. + electrolysis: 0. + fuel cell: 0. + battery: 0. + battery inverter: 0. + emission_prices: + enable: false + co2: 0. + co2_monthly_prices: false + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#clustering +clustering: + focus_weights: false + simplify_network: + to_substations: false + algorithm: kmeans # choose from: [hac, kmeans] + feature: solar+onwind-time + exclude_carriers: [] + remove_stubs: true + remove_stubs_across_borders: true + cluster_network: + algorithm: kmeans + feature: solar+onwind-time + exclude_carriers: [] + consider_efficiency_classes: false + aggregation_strategies: + generators: + committable: any + ramp_limit_up: max + ramp_limit_down: max + temporal: + resolution_elec: 169H + resolution_sector: 169H + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#adjustments +adjustments: + electricity: false + sector: false + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#solving +solving: + #tmpdir: "path/to/tmp" + options: + clip_p_max_pu: 1.e-2 + load_shedding: false + noisy_costs: true + skip_iterations: true + rolling_horizon: false + seed: 123 + custom_extra_functionality: "../data/custom_extra_functionality.py" + # io_api: "direct" # Increases performance but only supported for the highs and gurobi solvers + # options that go into the optimize function + track_iterations: false + min_iterations: 4 + max_iterations: 6 + transmission_losses: 2 + linearized_unit_commitment: true + horizon: 365 + + constraints: + CCL: false + EQ: false + BAU: false + SAFE: false + + solver: + name: gurobi + options: gurobi-default + + solver_options: + highs-default: + # refer to https://ergo-code.github.io/HiGHS/options/definitions.html#solver + threads: 4 + solver: "ipm" + run_crossover: "off" + small_matrix_value: 1e-6 + large_matrix_value: 1e9 + primal_feasibility_tolerance: 1e-5 + dual_feasibility_tolerance: 1e-5 + ipm_optimality_tolerance: 1e-4 + parallel: "on" + random_seed: 123 + gurobi-default: + threads: 4 + method: 2 # barrier + crossover: 0 + BarConvTol: 1.e-6 + Seed: 123 + AggFill: 0 + PreDual: 0 + GURO_PAR_BARDENSETHRESH: 200 + gurobi-numeric-focus: + NumericFocus: 3 # Favour numeric stability over speed + method: 2 # barrier + crossover: 0 # do not use crossover + BarHomogeneous: 1 # Use homogeneous barrier if standard does not converge + BarConvTol: 1.e-5 + FeasibilityTol: 1.e-4 + OptimalityTol: 1.e-4 + ObjScale: -0.5 + threads: 8 + Seed: 123 + gurobi-fallback: # Use gurobi defaults + crossover: 0 + method: 2 # barrier + BarHomogeneous: 1 # Use homogeneous barrier if standard does not converge + BarConvTol: 1.e-5 + FeasibilityTol: 1.e-5 + OptimalityTol: 1.e-5 + Seed: 123 + threads: 8 + cplex-default: + threads: 4 + lpmethod: 4 # barrier + solutiontype: 2 # non basic solution, ie no crossover + barrier.convergetol: 1.e-5 + feasopt.tolerance: 1.e-6 + copt-default: + Threads: 8 + LpMethod: 2 + Crossover: 0 + cbc-default: {} # Used in CI + glpk-default: {} # Used in CI + + mem_mb: 100000 #memory in MB; 20 GB enough for 50+B+I+H2; 100 GB for 181+B+I+H2 + runtime: 12h #runtime in humanfriendly style https://humanfriendly.readthedocs.io/en/latest/ + + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#plotting + +plotting: + map: + boundaries: + eu_node_location: + x: -5.5 + y: 46. + # costs_max: 1000 + # costs_threshold: 0.0000001 + # energy_max: + # energy_min: + # energy_threshold: 0.000001 + projection: + name: "EqualEarth" + # See https://scitools.org.uk/cartopy/docs/latest/reference/projections.html for alternatives, for example: + # name: "LambertConformal" + # central_longitude: 10. + # central_latitude: 50. + # standard_parallels: [35, 65] + +# plotting: +# map: +# boundaries: [-11, 30, 34, 71] +# color_geomap: +# ocean: white +# land: white +# projection: +# name: "EqualEarth" +# # See https://scitools.org.uk/cartopy/docs/latest/reference/projections.html for alternatives, for example: +# # name: "LambertConformal" +# # central_longitude: 10. +# # central_latitude: 50. +# # standard_parallels: [35, 65] +# eu_node_location: +# x: -5.5 +# y: 46. +# costs_max: 1000 +# costs_threshold: 1 +# energy_max: 20000 +# energy_min: -20000 +# energy_threshold: 50. + + nice_names: + OCGT: "Open-Cycle Gas" + CCGT: "Combined-Cycle Gas" + offwind-ac: "Offshore Wind (AC)" + offwind-dc: "Offshore Wind (DC)" + onwind: "Onshore Wind" + solar: "Solar" + PHS: "Pumped Hydro Storage" + hydro: "Reservoir & Dam" + battery: "Battery Storage" + H2: "Hydrogen Storage" + lines: "Transmission Lines" + ror: "Run of River" + load: "Load Shedding" + ac: "AC" + dc: "DC" + + tech_colors: + # wind + onwind: "#235ebc" + onshore wind: "#235ebc" + offwind: "#6895dd" + offshore wind: "#6895dd" + offwind-ac: "#6895dd" + offshore wind (AC): "#6895dd" + offshore wind ac: "#6895dd" + offwind-dc: "#74c6f2" + offshore wind (DC): "#74c6f2" + offshore wind dc: "#74c6f2" + # water + hydro: '#298c81' + hydro reservoir: '#298c81' + ror: '#3dbfb0' + run of river: '#3dbfb0' + hydroelectricity: '#298c81' + PHS: '#51dbcc' + hydro+PHS: "#08ad97" + # solar + solar: "#f9d002" + solar PV: "#f9d002" + solar thermal: '#ffbf2b' + residential rural solar thermal: '#f1c069' + services rural solar thermal: '#eabf61' + residential urban decentral solar thermal: '#e5bc5a' + services urban decentral solar thermal: '#dfb953' + urban central solar thermal: '#d7b24c' + solar rooftop: '#ffea80' + # gas + OCGT: '#e0986c' + OCGT marginal: '#e0986c' + OCGT-heat: '#e0986c' + gas boiler: '#db6a25' + gas boilers: '#db6a25' + gas boiler marginal: '#db6a25' + residential rural gas boiler: '#d4722e' + residential urban decentral gas boiler: '#cb7a36' + services rural gas boiler: '#c4813f' + services urban decentral gas boiler: '#ba8947' + urban central gas boiler: '#b0904f' + gas: '#e05b09' + fossil gas: '#e05b09' + natural gas: '#e05b09' + biogas to gas: '#e36311' + biogas to gas CC: '#e51245' + CCGT: '#a85522' + CCGT marginal: '#a85522' + allam: '#B98F76' + gas for industry co2 to atmosphere: '#692e0a' + gas for industry co2 to stored: '#8a3400' + gas for industry: '#853403' + gas for industry CC: '#692e0a' + gas pipeline: '#ebbca0' + gas pipeline new: '#a87c62' + # oil + oil: '#c9c9c9' + imported oil: '#a3a3a3' + oil boiler: '#adadad' + residential rural oil boiler: '#a9a9a9' + services rural oil boiler: '#a5a5a5' + residential urban decentral oil boiler: '#a1a1a1' + urban central oil boiler: '#9d9d9d' + services urban decentral oil boiler: '#999999' + agriculture machinery oil: '#949494' + shipping oil: "#808080" + land transport oil: '#afafaf' + # nuclear + Nuclear: '#ff8c00' + Nuclear marginal: '#ff8c00' + nuclear: '#ff8c00' + uranium: '#ff8c00' + # coal + Coal: '#545454' + coal: '#545454' + Coal marginal: '#545454' + coal for industry: '#343434' + solid: '#545454' + Lignite: '#826837' + lignite: '#826837' + Lignite marginal: '#826837' + # biomass + biogas: '#e3d37d' + biomass: '#baa741' + solid biomass: '#baa741' + solid biomass transport: '#baa741' + solid biomass for industry: '#7a6d26' + solid biomass for industry CC: '#47411c' + solid biomass for industry co2 from atmosphere: '#736412' + solid biomass for industry co2 to stored: '#47411c' + urban central solid biomass CHP: '#9d9042' + urban central solid biomass CHP CC: '#6c5d28' + biomass boiler: '#8A9A5B' + residential rural biomass boiler: '#a1a066' + residential urban decentral biomass boiler: '#b0b87b' + services rural biomass boiler: '#c6cf98' + services urban decentral biomass boiler: '#dde5b5' + biomass to liquid: '#32CD32' + BioSNG: '#123456' + # power transmission + lines: '#6c9459' + transmission lines: '#6c9459' + electricity distribution grid: '#97ad8c' + low voltage: '#97ad8c' + # electricity demand + Electric load: '#110d63' + electric demand: '#110d63' + electricity: '#110d63' + industry electricity: '#2d2a66' + industry new electricity: '#2d2a66' + agriculture electricity: '#494778' + # battery + EVs + battery: '#ace37f' + battery storage: '#ace37f' + battery charger: '#88a75b' + battery discharger: '#5d4e29' + home battery: '#80c944' + home battery storage: '#80c944' + home battery charger: '#5e8032' + home battery discharger: '#3c5221' + BEV charger: '#baf238' + V2G: '#e5ffa8' + land transport EV: '#baf238' + Li ion: '#baf238' + # hot water storage + water tanks: '#e69487' + residential rural water tanks: '#f7b7a3' + services rural water tanks: '#f3afa3' + residential urban decentral water tanks: '#f2b2a3' + services urban decentral water tanks: '#f1b4a4' + urban central water tanks: '#e9977d' + hot water storage: '#e69487' + hot water charging: '#e8998b' + urban central water tanks charger: '#b57a67' + residential rural water tanks charger: '#b4887c' + residential urban decentral water tanks charger: '#b39995' + services rural water tanks charger: '#b3abb0' + services urban decentral water tanks charger: '#b3becc' + hot water discharging: '#e99c8e' + urban central water tanks discharger: '#b9816e' + residential rural water tanks discharger: '#ba9685' + residential urban decentral water tanks discharger: '#baac9e' + services rural water tanks discharger: '#bbc2b8' + services urban decentral water tanks discharger: '#bdd8d3' + # heat demand + Heat load: '#cc1f1f' + heat: '#cc1f1f' + heat vent: '#aa3344' + heat demand: '#cc1f1f' + rural heat: '#ff5c5c' + residential rural heat: '#ff7c7c' + services rural heat: '#ff9c9c' + central heat: '#cc1f1f' + urban central heat: '#d15959' + urban central heat vent: '#a74747' + decentral heat: '#750606' + residential urban decentral heat: '#a33c3c' + services urban decentral heat: '#cc1f1f' + low-temperature heat for industry: '#8f2727' + process heat: '#ff0000' + agriculture heat: '#d9a5a5' + # heat supply + heat pumps: '#2fb537' + heat pump: '#2fb537' + air heat pump: '#36eb41' + residential urban decentral air heat pump: '#48f74f' + services urban decentral air heat pump: '#5af95d' + services rural air heat pump: '#5af95d' + urban central air heat pump: '#6cfb6b' + ground heat pump: '#2fb537' + residential rural ground heat pump: '#48f74f' + residential rural air heat pump: '#48f74f' + services rural ground heat pump: '#5af95d' + Ambient: '#98eb9d' + CHP: '#8a5751' + urban central gas CHP: '#8d5e56' + CHP CC: '#634643' + urban central gas CHP CC: '#6e4e4c' + CHP heat: '#8a5751' + CHP electric: '#8a5751' + district heating: '#e8beac' + resistive heater: '#d8f9b8' + residential rural resistive heater: '#bef5b5' + residential urban decentral resistive heater: '#b2f1a9' + services rural resistive heater: '#a5ed9d' + services urban decentral resistive heater: '#98e991' + urban central resistive heater: '#8cdf85' + retrofitting: '#8487e8' + building retrofitting: '#8487e8' + # hydrogen + H2 for industry: "#f073da" + H2 for shipping: "#ebaee0" + H2: '#bf13a0' + hydrogen: '#bf13a0' + retrofitted H2 boiler: '#e5a0d9' + SMR: '#870c71' + SMR CC: '#4f1745' + H2 liquefaction: '#d647bd' + hydrogen storage: '#bf13a0' + H2 Store: '#bf13a0' + H2 storage: '#bf13a0' + land transport fuel cell: '#6b3161' + H2 pipeline: '#f081dc' + H2 pipeline retrofitted: '#ba99b5' + H2 Fuel Cell: '#c251ae' + H2 fuel cell: '#c251ae' + H2 turbine: '#991f83' + H2 Electrolysis: '#ff29d9' + H2 electrolysis: '#ff29d9' + # ammonia + NH3: '#46caf0' + ammonia: '#46caf0' + ammonia store: '#00ace0' + ammonia cracker: '#87d0e6' + Haber-Bosch: '#076987' + # syngas + Sabatier: '#9850ad' + methanation: '#c44ce6' + methane: '#c44ce6' + # synfuels + Fischer-Tropsch: '#25c49a' + liquid: '#25c49a' + kerosene for aviation: '#a1ffe6' + naphtha for industry: '#57ebc4' + methanolisation: '#83d6d5' + methanol: '#468c8b' + shipping methanol: '#468c8b' + # co2 + CC: '#f29dae' + CCS: '#f29dae' + CO2 sequestration: '#f29dae' + DAC: '#ff5270' + co2 stored: '#f2385a' + co2 sequestered: '#f2682f' + co2: '#f29dae' + co2 vent: '#ffd4dc' + CO2 pipeline: '#f5627f' + # emissions + process emissions CC: '#000000' + process emissions: '#222222' + process emissions to stored: '#444444' + process emissions to atmosphere: '#888888' + oil emissions: '#aaaaaa' + shipping oil emissions: "#555555" + shipping methanol emissions: '#666666' + land transport oil emissions: '#777777' + agriculture machinery oil emissions: '#333333' + # other + shipping: '#03a2ff' + power-to-heat: '#2fb537' + power-to-gas: '#c44ce6' + power-to-H2: '#ff29d9' + power-to-liquid: '#25c49a' + gas-to-power/heat: '#ee8340' + waste: '#e3d37d' + other: '#000000' + geothermal: '#ba91b1' + AC: "#70af1d" + AC-AC: "#70af1d" + AC line: "#70af1d" + links: "#8a1caf" + HVDC links: "#8a1caf" + DC: "#8a1caf" + DC-DC: "#8a1caf" + DC link: "#8a1caf" + load: "#dd2e23" diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 0bca9ec5e..f998d958d 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -641,27 +641,23 @@ rule clean_osm_data: "../scripts/clean_osm_data.py" -if config["electricity_network"]["build_osm_network"] == True: - rule build_osm_network: - input: - substations=resources("osm/clean/substations.geojson"), - lines=resources("osm/clean/lines.geojson"), - country_shapes=resources("country_shapes.geojson"), - output: - lines=resources("osm/lines.csv"), - converters=resources("osm/converters.csv"), - transformers=resources("osm/transformers.csv"), - substations=resources("osm/buses.csv"), - lines_geojson=resources("osm/lines.geojson"), - converters_geojson=resources("osm/converters.geojson"), - transformers_geojson=resources("osm/transformers.geojson"), - substations_geojson=resources("osm/buses.geojson"), - log: - logs("build_osm_network.log"), - benchmark: - benchmarks("build_osm_network") - script: - "../scripts/build_osm_network.py" - -if config["electricity_network"]["build_osm_network"] == False: - print("Use prebuilt.") \ No newline at end of file +rule build_osm_network: + input: + substations=resources("osm/clean/substations.geojson"), + lines=resources("osm/clean/lines.geojson"), + country_shapes=resources("country_shapes.geojson"), + output: + lines=resources("osm/lines.csv"), + converters=resources("osm/converters.csv"), + transformers=resources("osm/transformers.csv"), + substations=resources("osm/buses.csv"), + lines_geojson=resources("osm/lines.geojson"), + converters_geojson=resources("osm/converters.geojson"), + transformers_geojson=resources("osm/transformers.geojson"), + substations_geojson=resources("osm/buses.geojson"), + log: + logs("build_osm_network.log"), + benchmark: + benchmarks("build_osm_network") + script: + "../scripts/build_osm_network.py" \ No newline at end of file diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 4cd5dd315..ac59548b3 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -979,20 +979,21 @@ def build_network( logger.info("Stage 2/5: AC and DC network: enabled") + # TODO pypsa-eur: Remove entirely after testing, not needed for PyPSA-Eur # Address the overpassing line issue Step 3/5 - if build_osm_network_config.get("split_overpassing_lines", False): - tol = build_osm_network_config.get("overpassing_lines_tolerance", 1) - logger.info("Stage 3/5: Avoid nodes overpassing lines: enabled with tolerance") + # if snakemake.config["electricity_network"]["osm_split_overpassing_lines"]: + # tol = snakemake.config["electricity_network"]["osm_overpassing_lines_tolerance"] + # logger.info("Stage 3/5: Avoid nodes overpassing lines: enabled with tolerance") - lines, buses = fix_overpassing_lines(lines, buses, distance_crs, tol=tol) - else: - logger.info("Stage 3/5: Avoid nodes overpassing lines: disabled") + # lines, buses = fix_overpassing_lines(lines, buses, distance_crs, tol=tol) + # else: + logger.info("Stage 3/5: Avoid nodes overpassing lines: disabled") # Add bus to countries with no buses buses = add_buses_to_empty_countries(countries_config, inputs.country_shapes, buses) # METHOD to merge buses with same voltage and within tolerance Step 4/5 - tol = build_osm_network_config.get("group_tolerance_buses", 5000) + tol = snakemake.config["electricity_network"]["osm_group_tolerance_buses"] logger.info( f"Stage 4/5: Aggregate close substations: enabled with tolerance {tol} m" ) diff --git a/scripts/solve_network.py b/scripts/solve_network.py index 67f39d16c..db4dc08b9 100644 --- a/scripts/solve_network.py +++ b/scripts/solve_network.py @@ -998,7 +998,11 @@ def extra_functionality(n, snapshots): if EQ_o := constraints["EQ"]: add_EQ_constraints(n, EQ_o.replace("EQ", "")) - if {"solar-hsat", "solar"}.issubset(config["renewable"].keys()): + if {"solar-hsat", "solar"}.issubset( + config["electricity"]["renewable_carriers"] + ) and {"solar-hsat", "solar"}.issubset( + config["electricity"]["extendable_carriers"]["Generator"] + ): add_solar_potential_constraints(n, config) add_battery_constraints(n) From 095d936c43264388c5c0826724f5efb82474b8d0 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Wed, 22 May 2024 10:07:26 +0200 Subject: [PATCH 021/100] Fixes. --- rules/build_electricity.smk | 2 +- scripts/build_osm_network.py | 188 +---------------------------------- scripts/clean_osm_data.py | 13 +-- 3 files changed, 11 insertions(+), 192 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index f998d958d..ec4c56f60 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -119,7 +119,7 @@ if config["electricity_network"]["base_network"] == "osm": eg_converters=resources("osm/converters.csv"), eg_transformers=resources("osm/transformers.csv"), links_p_nom="data/links_p_nom.csv", - links_tyndp="data/links_tyndp_osm.csv", + links_tyndp="data/links_tyndp.csv", country_shapes=resources("country_shapes.geojson"), offshore_shapes=resources("offshore_shapes.geojson"), europe_shape=resources("europe_shape.geojson"), diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index ac59548b3..467a7b795 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -760,167 +760,6 @@ def _split_linestring_by_point(linestring, points): return list_linestrings -def fix_overpassing_lines(lines, buses, distance_crs, tol=1): - """ - Function to avoid buses overpassing lines with no connection when the bus - is within a given tolerance from the line. - - Parameters - ---------- - lines : GeoDataFrame - Geodataframe of lines - buses : GeoDataFrame - Geodataframe of substations - tol : float - Tolerance in meters of the distance between the substation and the line - below which the line will be split - """ - - lines_to_add = [] # list of lines to be added - lines_to_split = [] # list of lines that have been split - - lines_epsgmod = lines.to_crs(distance_crs) - buses_epsgmod = buses.to_crs(distance_crs) - - # set tqdm options for substation ids - tqdm_kwargs_substation_ids = dict( - ascii=False, - unit=" lines", - total=lines.shape[0], - desc="Verify lines overpassing nodes ", - ) - - for l in tqdm(lines.index, **tqdm_kwargs_substation_ids): - # bus indices being within tolerance from the line - bus_in_tol_epsg = buses_epsgmod[ - buses_epsgmod.geometry.distance(lines_epsgmod.geometry.loc[l]) <= tol - ] - - # exclude endings of the lines - bus_in_tol_epsg = bus_in_tol_epsg[ - ( - ( - bus_in_tol_epsg.geometry.distance( - lines_epsgmod.geometry.loc[l].boundary.geoms[0] - ) - > tol - ) - | ( - bus_in_tol_epsg.geometry.distance( - lines_epsgmod.geometry.loc[l].boundary.geoms[1] - ) - > tol - ) - ) - ] - - if not bus_in_tol_epsg.empty: - # add index of line to split - lines_to_split.append(l) - - buses_locs = buses.geometry.loc[bus_in_tol_epsg.index] - - # get new line geometries - new_geometries = _split_linestring_by_point(lines.geometry[l], buses_locs) - n_geoms = len(new_geometries) - - # create temporary copies of the line - df_append = gpd.GeoDataFrame([lines.loc[l]] * n_geoms) - # update geometries - df_append["geometry"] = new_geometries - # update name of the line - df_append["line_id"] = [ - str(df_append["line_id"].iloc[0]) + f"_{id}" for id in range(n_geoms) - ] - - lines_to_add.append(df_append) - - if not lines_to_add: - return lines, buses - - df_to_add = gpd.GeoDataFrame(pd.concat(lines_to_add, ignore_index=True)) - df_to_add.set_crs(lines.crs, inplace=True) - df_to_add.set_index(lines.index[-1] + df_to_add.index, inplace=True) - - # update length - df_to_add["length"] = df_to_add.to_crs(distance_crs).geometry.length - - # update line endings - df_to_add = line_endings_to_bus_conversion(df_to_add) - - # remove original lines - lines.drop(lines_to_split, inplace=True) - - lines = gpd.GeoDataFrame( - pd.concat([lines, df_to_add], ignore_index=True).reset_index(drop=True), - crs=lines.crs, - ) - - return lines, buses - - -def add_buses_to_empty_countries(country_list, fp_country_shapes, buses): - """ - Function to add a bus for countries missing substation data. - """ - country_shapes = gpd.read_file(fp_country_shapes).set_index("name")["geometry"] - bus_country_list = buses["country"].unique().tolist() - - # it may happen that bus_country_list contains entries not relevant as a country name (e.g. "not found") - # difference can't give negative values; the following will return only relevant country names - no_data_countries = list(set(country_list).difference(set(bus_country_list))) - - if len(no_data_countries) > 0: - logger.info( - f"No buses for the following countries: {no_data_countries}. Adding a node for everyone of them." - ) - no_data_countries_shape = ( - country_shapes[country_shapes.index.isin(no_data_countries) == True] - .reset_index() - .to_crs(geo_crs) - ) - length = len(no_data_countries) - df = gpd.GeoDataFrame( - { - "voltage": [220000] * length, - "country": no_data_countries_shape["name"], - "x": no_data_countries_shape["geometry"].centroid.x, - "y": no_data_countries_shape["geometry"].centroid.y, - "bus_id": np.arange(len(buses) + 1, len(buses) + (length + 1), 1), - "station_id": [np.nan] * length, - # All lines for the countries with NA bus data are assumed to be AC - "dc": [False] * length, - "under_construction": [False] * length, - "tag_area": [0.0] * length, - "symbol": ["substation"] * length, - "tag_substation": ["transmission"] * length, - "geometry": no_data_countries_shape["geometry"].centroid, - "substation_lv": [True] * length, - }, - crs=geo_crs, - ).astype( - buses.dtypes.to_dict() - ) # keep the same dtypes as buses - buses = gpd.GeoDataFrame( - pd.concat([buses, df], ignore_index=True).reset_index(drop=True), - crs=buses.crs, - ) - - # update country list by buses dataframe - bus_country_list = buses["country"].unique().tolist() - - non_allocated_countries = list( - set(country_list).symmetric_difference(set(bus_country_list)) - ) - - if len(non_allocated_countries) > 0: - logger.error( - f"There following countries could not be allocated properly: {non_allocated_countries}" - ) - - return buses - - def build_network( inputs, outputs, @@ -962,7 +801,7 @@ def build_network( } } - logger.info("Stage 1/5: Read input data") + logger.info("Read input data.") buses = read_geojson( inputs["substations"], osm_clean_columns["substation"].keys(), @@ -976,33 +815,16 @@ def build_network( ) lines = line_endings_to_bus_conversion(lines) - - logger.info("Stage 2/5: AC and DC network: enabled") - - # TODO pypsa-eur: Remove entirely after testing, not needed for PyPSA-Eur - # Address the overpassing line issue Step 3/5 - # if snakemake.config["electricity_network"]["osm_split_overpassing_lines"]: - # tol = snakemake.config["electricity_network"]["osm_overpassing_lines_tolerance"] - # logger.info("Stage 3/5: Avoid nodes overpassing lines: enabled with tolerance") - - # lines, buses = fix_overpassing_lines(lines, buses, distance_crs, tol=tol) - # else: - logger.info("Stage 3/5: Avoid nodes overpassing lines: disabled") - # Add bus to countries with no buses - buses = add_buses_to_empty_countries(countries_config, inputs.country_shapes, buses) - - # METHOD to merge buses with same voltage and within tolerance Step 4/5 + # METHOD to merge buses with same voltage and within tolerance tol = snakemake.config["electricity_network"]["osm_group_tolerance_buses"] logger.info( - f"Stage 4/5: Aggregate close substations: enabled with tolerance {tol} m" + f"Aggregate close substations: Enabled with tolerance {tol} m" ) lines, buses = merge_stations_lines_by_station_id_and_voltage( lines, buses, geo_crs, distance_crs, tol=tol ) - logger.info("Stage 5/5: Add augmented substation to country with no data") - # Recalculate lengths of lines utm = lines.estimate_utm_crs(datum_name = "WGS 84") lines["length"] = lines.to_crs(utm).length @@ -1019,7 +841,6 @@ def build_network( if not os.path.exists(outputs["lines"]): os.makedirs(os.path.dirname(outputs["lines"]), exist_ok=True) - ### Convert output to pypsa-eur friendly format # Rename "substation" in buses["symbol"] to "Substation" buses["symbol"] = buses["symbol"].replace({"substation": "Substation"}) @@ -1030,7 +851,6 @@ def build_network( transformers.set_index("transformer_id", inplace=True) buses.set_index("bus_id", inplace=True) - # Convert voltages from V to kV lines["voltage"] = lines["voltage"] / 1000 transformers["voltage_bus0"], transformers["voltage_bus1"] = transformers["voltage_bus0"] / 1000, \ @@ -1051,8 +871,6 @@ def build_network( lines_csv = lines[cols_lines_csv] lines = lines[cols_lines] - - to_csv_nafix(lines_csv, outputs["lines"], quotechar="'") # Generate CSV to_csv_nafix(converters, outputs["converters"], quotechar="'") # Generate CSV to_csv_nafix(transformers, outputs["transformers"], quotechar="'") # Generate CSV diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index c4f99858c..cde44d412 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -468,10 +468,12 @@ def _import_lines_and_cables(path_lines): Import lines and cables from the given input paths. Parameters: - - path_lines (dict): A dictionary containing the input paths for lines and cables data. + - path_lines (dict): A dictionary containing the input paths for lines and + cables data. Returns: - - df_lines (DataFrame): A DataFrame containing the imported lines and cables data. + - df_lines (DataFrame): A DataFrame containing the imported lines and + cables data. """ columns = ["id", "bounds", "nodes", "geometry", "country", "power", "cables", "circuits", "frequency", "voltage", @@ -1078,8 +1080,8 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): Parameters: - gdf_lines (GeoDataFrame): A GeoDataFrame containing lines with 'line_id' and 'geometry' columns. - - gdf_substations_polygon (GeoDataFrame): A GeoDataFrame containing substation - polygons. + - gdf_substations_polygon (GeoDataFrame): A GeoDataFrame containing + substation polygons. Returns: GeoDataFrame: A new GeoDataFrame without lines within substation polygons. @@ -1196,5 +1198,4 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): logger.info(f"Exporting clean lines to {output_lines}") gdf_lines.to_file(output_lines, driver="GeoJSON") - logger.info("Cleaning OSM data completed.") - \ No newline at end of file + logger.info("Cleaning OSM data completed.") \ No newline at end of file From 6032f9676a5a302130e08c7cecae5c1c1a4dec72 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Wed, 22 May 2024 15:54:53 +0200 Subject: [PATCH 022/100] Bug fixes. --- scripts/base_network_osm.py | 7 +- scripts/build_osm_network.py | 260 ++++++++++++++++++++++------------- scripts/clean_osm_data.py | 1 + 3 files changed, 168 insertions(+), 100 deletions(-) diff --git a/scripts/base_network_osm.py b/scripts/base_network_osm.py index 44b2636d6..81b7339e1 100644 --- a/scripts/base_network_osm.py +++ b/scripts/base_network_osm.py @@ -170,11 +170,14 @@ def _load_buses_from_eg(eg_buses, europe_shape, config_elec): # buses.v_nom.isin(config_elec["voltages"]) | buses.v_nom.isnull() # ) + v_nom_min = min(config_elec["voltages"]) + v_nom_max = max(config_elec["voltages"]) + # Quick fix: - buses_with_v_nom_to_keep_b = (min(config_elec["voltages"]) <= buses.v_nom) & (buses.v_nom <= max(config_elec["voltages"])) + buses_with_v_nom_to_keep_b = (v_nom_min <= buses.v_nom) & (buses.v_nom <= v_nom_max) logger.info( - f'Removing buses with voltages {pd.Index(buses.v_nom.unique()).dropna().difference(config_elec["voltages"])}' + f'Removing buses outside of range {v_nom_min} - {v_nom_max} V' ) return pd.DataFrame(buses.loc[buses_in_europe_b & buses_with_v_nom_to_keep_b]) diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 467a7b795..19f9f4ad1 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -39,6 +39,16 @@ def read_csv_nafix(file, **kwargs): def save_to_geojson(df, fn): + """ + Save a (Geo)DataFrame to a GeoJSON file. + + Parameters: + - df: The (Geo)DataFrame to be saved. + - fn: The filename (including the path) of the output GeoJSON file. + + Returns: + None + """ if os.path.exists(fn): os.unlink(fn) # remove file if it exists @@ -82,9 +92,27 @@ def read_geojson(fn, cols=[], dtype=None, crs="EPSG:4326"): def to_csv_nafix(df, path, **kwargs): + """ + Write a pandas DataFrame to a CSV file with NA values replaced. + + Parameters: + - df: pandas DataFrame + The DataFrame to be written to the CSV file. + - path: str + The file path where the CSV file will be saved. + - **kwargs: keyword arguments + Additional arguments to be passed to the `to_csv` function of pandas. + + Returns: + - None + + If the DataFrame is not empty or does not have empty columns, it will be + written to the CSV file with NA values replaced by the first value in the + `NA_VALUES` list. If the DataFrame is empty or has empty columns, an empty + file will be created at the specified path. + """ if "na_rep" in kwargs: del kwargs["na_rep"] - # if len(df) > 0: if not df.empty or not df.columns.empty: return df.to_csv(path, **kwargs, na_rep=NA_VALUES[0]) else: @@ -93,6 +121,20 @@ def to_csv_nafix(df, path, **kwargs): def line_endings_to_bus_conversion(lines): + """ + Converts line endings to bus connections. + + This function takes a df of lines and converts the line endings to bus + connections. It performs the necessary operations to ensure that the line + endings are properly connected to the buses in the network. + + Parameters: + lines (DataFrame) + + Returns: + lines (DataFrame) + + """ # Assign to every line a start and end point lines["bounds"] = lines["geometry"].boundary # create start and end point @@ -414,7 +456,7 @@ def get_transformers(buses, lines): return df_transformers -def get_converters(buses, lines): +def get_converters(buses): """ Function to create fake converter lines that connect buses of the same station_id of different polarities. @@ -578,16 +620,8 @@ def set_lv_substations(buses): return buses -# Note tolerance = 0.01 means around 700m -# TODO: the current tolerance is high to avoid an issue in the Nigeria case where line 565939360-1 -# seems to be interconnected to both ends, but at the eastern one, the node is actually not connected -# another line seems to be exactly touching the node, but from the data point of view it only fly over it. -# There may be the need to split a line in several segments in the case the line is within tolerance with -# respect to a node - - def merge_stations_lines_by_station_id_and_voltage( - lines, buses, geo_crs, distance_crs, tol=5000 + lines, buses, distance_crs, tol=5000 ): """ Function to merge close stations and adapt the line datasets to adhere to @@ -595,7 +629,7 @@ def merge_stations_lines_by_station_id_and_voltage( """ logger.info( - "Stage 4a/5: Set substation ids with tolerance of %.2f km" % (tol / 1000) + " - Setting substation ids with tolerance of %.2f m" % (tol) ) # TODO pypsa-eur: Add this fix to pypsa-earth: Buses should not be clustered geographically if they are different @@ -645,7 +679,10 @@ def merge_stations_lines_by_station_id_and_voltage( if dc_boundary_points.empty: all_dc_boundary_points = dc_boundary_points else: - all_dc_boundary_points = pd.concat([all_dc_boundary_points, dc_boundary_points], ignore_index = True) + if all_dc_boundary_points.empty: + all_dc_boundary_points = dc_boundary_points + else: + all_dc_boundary_points = pd.concat([all_dc_boundary_points, dc_boundary_points], ignore_index = True) # TODO pypsa-eur: Add to pypsa-earth for all related entries on is_dclink_boundary_point @@ -655,7 +692,7 @@ def merge_stations_lines_by_station_id_and_voltage( lambda p: any([p.within(l) for l in all_dc_boundary_points]) ) - logger.info("Stage 4b/5: Merge substations with the same id") + logger.info(" - Merging substations with the same id") # merge buses with same station id and voltage if not buses.empty: @@ -665,7 +702,7 @@ def merge_stations_lines_by_station_id_and_voltage( buses = pd.concat([buses_ac, buses_dc], ignore_index=True) set_substations_ids(buses, distance_crs, tol=tol) - logger.info("Stage 4c/5: Specify the bus ids of the line endings") + logger.info(" - Specifying the bus ids of the line endings") # set the bus ids to the line dataset lines, buses = set_lines_ids(lines, buses, distance_crs) @@ -678,7 +715,7 @@ def merge_stations_lines_by_station_id_and_voltage( # set substation_lv set_lv_substations(buses) - logger.info("Stage 3d/4: Add converters to lines") + logger.info(" - Adding converters to lines") # append fake converters # lines = pd.concat([lines, converters], ignore_index=True) @@ -691,80 +728,9 @@ def merge_stations_lines_by_station_id_and_voltage( return lines, buses -def create_station_at_equal_bus_locations( - lines, buses, geo_crs, distance_crs, tol=5000 -): - # V1. Create station_id at same bus location - # - We saw that buses are not connected exactly at one point, they are - # usually connected to a substation "area" (analysed on maps) - # - Create station_id at exactly the same location might therefore be not - # always correct - # - Though as you can see below, it might be still sometime the case. - # Examples are **station 4** (2 lines with the same voltage connect at the - # same point) and **station 23** (4 lines with two different voltages connect - # at the same point) - # TODO: Filter out the generator lines - defined as going from generator to - # the next station which is connected to a load. Excluding generator - # lines make probably sense because they are not transmission expansion - # relevant. For now we simplify and include generator lines. - - # If same location/geometry make station - bus_all = buses - - # set substation ids - set_substations_ids(buses, distance_crs, tol=tol) - - # set the bus ids to the line dataset - lines, buses = set_lines_ids(lines, buses, distance_crs) - - # update line endings - lines = line_endings_to_bus_conversion(lines) - - # For each station number with multiple buses make lowest voltage `substation_lv = TRUE` - set_lv_substations(bus_all) - - # TRY: Keep only buses that are not duplicated & lv_substation = True - # TODO: Check if this is necessary. What effect do duplicates have? - bus_all = bus_all[bus_all["substation_lv"] == True] - - lines = connect_stations_same_station_id(lines, buses) - - return lines, buses - - -def _split_linestring_by_point(linestring, points): - """ - Function to split a linestring geometry by multiple inner points. - - Parameters - ---------- - lstring : LineString - Linestring of the line to be split - points : list - List of points to split the linestring - - Return - ------ - list_lines : list - List of linestring to split the line - """ - - list_linestrings = [linestring] - - for p in points: - # execute split to all lines and store results - temp_list = [split(l, p) for l in list_linestrings] - # nest all geometries - list_linestrings = [lstring for tval in temp_list for lstring in tval.geoms] - - return list_linestrings - - def build_network( inputs, outputs, - build_osm_network_config, - countries_config, geo_crs, distance_crs, ): @@ -801,7 +767,7 @@ def build_network( } } - logger.info("Read input data.") + logger.info("Reading input data.") buses = read_geojson( inputs["substations"], osm_clean_columns["substation"].keys(), @@ -819,10 +785,10 @@ def build_network( # METHOD to merge buses with same voltage and within tolerance tol = snakemake.config["electricity_network"]["osm_group_tolerance_buses"] logger.info( - f"Aggregate close substations: Enabled with tolerance {tol} m" + f"Aggregating close substations: Enabled with tolerance {tol} m" ) lines, buses = merge_stations_lines_by_station_id_and_voltage( - lines, buses, geo_crs, distance_crs, tol=tol + lines, buses, distance_crs, tol=tol ) # Recalculate lengths of lines @@ -833,9 +799,9 @@ def build_network( transformers = get_transformers(buses, lines) # get converters: currently modelled as links connecting buses with different polarity - converters = get_converters(buses, lines) + converters = get_converters(buses) - logger.info("Save outputs") + logger.info("Saving outputs") # create clean directory if not already exist if not os.path.exists(outputs["lines"]): @@ -870,7 +836,7 @@ def build_network( cols_lines_csv = ["bus0", "bus1", "voltage", "circuits", "tag_frequency", "length", "underground", "under_construction", "geometry"] lines_csv = lines[cols_lines_csv] lines = lines[cols_lines] - + to_csv_nafix(lines_csv, outputs["lines"], quotechar="'") # Generate CSV to_csv_nafix(converters, outputs["converters"], quotechar="'") # Generate CSV to_csv_nafix(transformers, outputs["transformers"], quotechar="'") # Generate CSV @@ -894,16 +860,50 @@ def build_network( # Function to check if two lines are connected def are_lines_connected(line1, line2): - # return (line1['geometry'].touches(line2['geometry'])) + """ + Check if two lines are connected. + + Parameters: + line1 (dict): A dictionary representing the first line. + line2 (dict): A dictionary representing the second line. + + Returns: + tuple: A tuple of boolean values indicating the connection status between + the lines. + + The tuple contains four elements: + - True if the first line's bus_0_coors is almost equal to the second line's + bus_0_coors, False otherwise. + - True if the first line's bus_0_coors is almost equal to the second line's + bus_1_coors, False otherwise. + - True if the first line's bus_1_coors is almost equal to the second line's + bus_0_coors, False otherwise. + - True if the first line's bus_1_coors is almost equal to the second line's + bus_1_coors, False otherwise. + """ return ( are_almost_equal(line1["bus_0_coors"], line2["bus_0_coors"]), are_almost_equal(line1["bus_0_coors"], line2["bus_1_coors"]), are_almost_equal(line1["bus_1_coors"], line2["bus_0_coors"]), are_almost_equal(line1["bus_1_coors"], line2["bus_1_coors"]) - ) + ) def _dfs(adj_matrix, visited, current_vertex, path): + """ + Perform a depth-first search (DFS) on a graph represented by an adjacency + matrix. + + Parameters: + - adj_matrix (list of lists): The adjacency matrix representing the graph. + - visited (list of bool): A list to keep track of visited vertices. + - current_vertex (int): The current vertex being visited. + - path (list): The path of vertices visited so far. + + Returns: + - path (list): The path of vertices visited during the DFS. + + """ visited[current_vertex] = True path.append(current_vertex) for neighbor in range(len(adj_matrix)): @@ -914,6 +914,17 @@ def _dfs(adj_matrix, visited, current_vertex, path): # Returns all connected paths as a vector def find_paths(adj_matrix): + """ + Find all paths in a graph represented by an adjacency matrix. + + Parameters: + - adj_matrix (list of lists): The adjacency matrix representing the graph. + + Returns: + - paths (list of lists): A list of lists, where each inner list represents + a path in the graph. + + """ visited = [False] * len(adj_matrix) paths = [] for vertex in range(len(adj_matrix)): @@ -923,6 +934,7 @@ def find_paths(adj_matrix): paths.append(path) return paths + def are_almost_equal(point1, point2, tolerance=1e-6): """ Check if two Shapely points are almost equal with a given tolerance. @@ -938,6 +950,61 @@ def are_almost_equal(point1, point2, tolerance=1e-6): return abs(point1.x - point2.x) < tolerance and abs(point1.y - point2.y) < tolerance +def merge_linestrings(gdf): + """ + Merge LineStrings in a GeoDataFrame wherever the endpoints match. + + Parameters: + gdf (GeoDataFrame): A GeoDataFrame containing LineString geometries. + + Returns: + GeoDataFrame: A GeoDataFrame with merged LineString geometries. + """ + gdf = gdf.copy() + if len(gdf) == 1: + return gdf + + lines = list(gdf.geometry) + merged_lines = [] + while lines: + line = lines.pop(0) + merged_line = line + i = 0 + while i < len(lines): + if are_almost_equal( + Point(merged_line.coords[-1]), + Point(lines[i].coords[0]) + ): + merged_line = LineString(list(merged_line.coords) + list(lines.pop(i).coords[1:])) + i = 0 # Restart the scan after merging + elif are_almost_equal( + Point(merged_line.coords[0]), + Point(lines[i].coords[-1]) + ): + merged_line = LineString(list(lines.pop(i).coords)[:-1] + list(merged_line.coords)) + i = 0 # Restart the scan after merging + elif are_almost_equal( + Point(merged_line.coords[-1]), + Point(lines[i].coords[-1]) + ): + merged_line = LineString(list(merged_line.coords) + list(lines.pop(i).coords[::-1])[1:]) + i = 0 # Restart the scan after merging + elif are_almost_equal( + Point(merged_line.coords[0]), + Point(lines[i].coords[0]) + ): + merged_line = LineString(list(lines.pop(i).coords[::-1])[:-1] + list(merged_line.coords)) + i = 0 # Restart the scan after merging + else: + i += 1 + merged_lines.append(merged_line) + no_coordinates = [len(merged_lines[i].coords) for i in range(len(merged_lines))] + max_index = np.argmax(no_coordinates) + merged_lines = [merged_lines[max_index]] + + return gpd.GeoDataFrame(geometry=merged_lines, crs=gdf.crs) + + if __name__ == "__main__": # Detect running outside of snakemake and mock snakemake for testing if "snakemake" not in globals(): @@ -952,7 +1019,6 @@ def are_almost_equal(point1, point2, tolerance=1e-6): geo_crs = "EPSG:4326" distance_crs = "EPSG:3035" - build_osm_network = snakemake.config["build_osm_network"] countries = snakemake.config["countries"] with memory_logger( @@ -961,8 +1027,6 @@ def are_almost_equal(point1, point2, tolerance=1e-6): build_network( snakemake.input, snakemake.output, - build_osm_network, - countries, geo_crs, distance_crs, ) diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index cde44d412..dadc2c902 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -497,6 +497,7 @@ def _import_lines_and_cables(path_lines): df["id"] = df["id"].astype(str) df["country"] = country + # col_tags = ["power", "cables", "circuits", "frequency", "voltage", "wires", "capacity", "rating"] col_tags = ["power", "cables", "circuits", "frequency", "voltage", "wires"] tags = pd.json_normalize(df["tags"]) \ From 7bd83b4fa14761ba8afedb2fcb61f8ff7d75ac64 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Wed, 22 May 2024 15:57:26 +0200 Subject: [PATCH 023/100] Updated default config --- config/config.default.yaml | 146 ++++++++++++++++++------------------- 1 file changed, 73 insertions(+), 73 deletions(-) diff --git a/config/config.default.yaml b/config/config.default.yaml index d742f806f..8ccafab79 100644 --- a/config/config.default.yaml +++ b/config/config.default.yaml @@ -15,13 +15,13 @@ private: entsoe_api: remote: - ssh: "z1" - path: "~/scratch/projects/pypsa-eur" + ssh: "" + path: "" # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#run run: prefix: "" - name: "europe-osm-update-hydro" + name: "" scenarios: enable: false file: config/scenarios.yaml @@ -40,15 +40,13 @@ scenario: simpl: - '' ll: - - v1.0 # TODO mit und ohne Netzausbau v1.0 + - vopt clusters: - - 50 + - 37 - 128 - 256 - - 512 - # - 1024 opts: - - 'Co2L0-25H' + - '' sector_opts: - '' planning_horizons: @@ -58,20 +56,7 @@ scenario: - 2050 # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#countries -# countries: ["NO"] countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK'] -# countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MD', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK', 'UA'] - -# Settings related to the high-voltage electricity grid -electricity_network: - base_network: "osm" # "osm" or "gridkit" - build_osm_network: true # If 'true', the network will be built from scratch (retrieving OSM data, cleaning, and building) and stored under resources, 'false' will use snapshots in data/osm - -build_osm_network: # Options of the build_osm_network script; osm = OpenStreetMap - group_tolerance_buses: 5000 # [m] (default 5000) Tolerance in meters of the close buses to merge - split_overpassing_lines: false # When True, lines overpassing buses are splitted and connected to the bueses - overpassing_lines_tolerance: 1 # [m] (default 1) Tolerance to identify lines overpassing buses - force_ac: false # When true, it forces all components (lines and substation) to be AC-only. To be used if DC assets create problem. # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#snapshots snapshots: @@ -81,19 +66,20 @@ snapshots: # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#enable enable: - retrieve: true + retrieve: auto prepare_links_p_nom: false retrieve_databundle: true - retrieve_sector_databundle: true retrieve_cost_data: true build_cutout: false - retrieve_irena: false retrieve_cutout: true - build_natura_raster: false - retrieve_natura_raster: true custom_busmap: false drop_leap_day: true +# Settings related to the high-voltage electricity grid +electricity_network: + base_network: "osm" # "osm" or "gridkit" + osm_group_tolerance_buses: 5000 # [m] (default 5000) Tolerance in meters of the close buses to merge + # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#co2-budget co2_budget: 2020: 0.701 @@ -106,7 +92,7 @@ co2_budget: # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#electricity electricity: - voltages: [200., 220., 300., 380., 400., 500., 750.] + voltages: [200., 220., 300., 380., 500., 750.] gaslimit_enable: false gaslimit: false co2limit_enable: false @@ -142,7 +128,7 @@ electricity: year: 2020 expansion_limit: false technology_mapping: - Offshore: [offwind-ac, offwind-dc] + Offshore: [offwind-ac, offwind-dc, offwind-float] Onshore: [onwind] PV: [solar] @@ -210,7 +196,7 @@ renewable: luisa: false # [0, 5230] natura: true ship_threshold: 400 - max_depth: 50 + max_depth: 60 max_shore_distance: 30000 excluder_resolution: 200 clip_p_max_pu: 1.e-2 @@ -226,10 +212,28 @@ renewable: luisa: false # [0, 5230] natura: true ship_threshold: 400 - max_depth: 50 + max_depth: 60 min_shore_distance: 30000 excluder_resolution: 200 clip_p_max_pu: 1.e-2 + offwind-float: + cutout: europe-2013-era5 + resource: + method: wind + turbine: NREL_ReferenceTurbine_5MW_offshore + # ScholzPhd Tab 4.3.1: 10MW/km^2 + capacity_per_sqkm: 2 + correction_factor: 0.8855 + # proxy for wake losses + # from 10.1016/j.energy.2018.08.153 + # until done more rigorously in #153 + corine: [44, 255] + natura: true + ship_threshold: 400 + excluder_resolution: 200 + min_depth: 60 + max_depth: 1000 + clip_p_max_pu: 1.e-2 solar: cutout: europe-2013-sarah resource: @@ -289,20 +293,26 @@ lines: 400.: "Al/St 240/40 4-bundle 380.0" 500.: "Al/St 240/40 4-bundle 380.0" 750.: "Al/St 560/50 4-bundle 750.0" - dc_types: # setting only for osm + dc_types: 200.: "HVDC XLPE 1000" - 220.: "HVDC XLPE 1000" + 250.: "HVDC XLPE 1000" + 270.: "HVDC XLPE 1000" + 285.: "HVDC XLPE 1000" 300.: "HVDC XLPE 1000" - 750.: "HVDC XLPE 1000" - 380.: "HVDC XLPE 1000" + 320.: "HVDC XLPE 1000" + 350.: "HVDC XLPE 1000" + 380.: "HVDC Oil filled 1400" 400.: "HVDC XLPE 1000" - 500.: "HVDC XLPE 1000" + 450.: "HVDC XLPE 1000" + 515.: "HVDC XLPE 1000" + 525.: "HVDC XLPE 1000" + 600.: "HVDC XLPE 1000" s_max_pu: 0.7 s_nom_max: .inf max_extension: 20000 #MW length_factor: 1.25 reconnect_crimea: true - under_construction: 'zero' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity + under_construction: 'keep' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity dynamic_line_rating: activate: false cutout: europe-2013-era5 @@ -315,7 +325,7 @@ links: p_max_pu: 1.0 p_nom_max: .inf max_extension: 30000 #MW - include_tyndp: false + include_tyndp: true under_construction: 'zero' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#transformers @@ -578,7 +588,7 @@ sector: - nearshore # within 50 km of sea # - offshore ammonia: false - min_part_load_fischer_tropsch: 0.7 + min_part_load_fischer_tropsch: 0.5 min_part_load_methanolisation: 0.3 min_part_load_methanation: 0.3 use_fischer_tropsch_waste_heat: true @@ -696,6 +706,9 @@ industry: 2040: 0.12 2045: 0.16 2050: 0.20 + HVC_environment_sequestration_fraction: 0. + waste_to_energy: false + waste_to_energy_cc: false sector_ratios_fraction_future: 2020: 0.0 2025: 0.1 @@ -829,7 +842,7 @@ solving: solver_options: highs-default: - # refer to https://ergo-code.github.io/HiGHS/options/definitions.html#solver + # refer to https://ergo-code.github.io/HiGHS/dev/options/definitions/ threads: 4 solver: "ipm" run_crossover: "off" @@ -882,23 +895,17 @@ solving: cbc-default: {} # Used in CI glpk-default: {} # Used in CI - mem_mb: 100000 #memory in MB; 20 GB enough for 50+B+I+H2; 100 GB for 181+B+I+H2 - runtime: 12h #runtime in humanfriendly style https://humanfriendly.readthedocs.io/en/latest/ + mem_mb: 30000 #memory in MB; 20 GB enough for 50+B+I+H2; 100 GB for 181+B+I+H2 + runtime: 6h #runtime in humanfriendly style https://humanfriendly.readthedocs.io/en/latest/ # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#plotting - plotting: map: - boundaries: - eu_node_location: - x: -5.5 - y: 46. - # costs_max: 1000 - # costs_threshold: 0.0000001 - # energy_max: - # energy_min: - # energy_threshold: 0.000001 + boundaries: [-11, 30, 34, 71] + color_geomap: + ocean: white + land: white projection: name: "EqualEarth" # See https://scitools.org.uk/cartopy/docs/latest/reference/projections.html for alternatives, for example: @@ -906,34 +913,21 @@ plotting: # central_longitude: 10. # central_latitude: 50. # standard_parallels: [35, 65] - -# plotting: -# map: -# boundaries: [-11, 30, 34, 71] -# color_geomap: -# ocean: white -# land: white -# projection: -# name: "EqualEarth" -# # See https://scitools.org.uk/cartopy/docs/latest/reference/projections.html for alternatives, for example: -# # name: "LambertConformal" -# # central_longitude: 10. -# # central_latitude: 50. -# # standard_parallels: [35, 65] -# eu_node_location: -# x: -5.5 -# y: 46. -# costs_max: 1000 -# costs_threshold: 1 -# energy_max: 20000 -# energy_min: -20000 -# energy_threshold: 50. + eu_node_location: + x: -5.5 + y: 46. + costs_max: 1000 + costs_threshold: 1 + energy_max: 20000 + energy_min: -20000 + energy_threshold: 50. nice_names: OCGT: "Open-Cycle Gas" CCGT: "Combined-Cycle Gas" offwind-ac: "Offshore Wind (AC)" offwind-dc: "Offshore Wind (DC)" + offwind-float: "Offshore Wind (Floating)" onwind: "Onshore Wind" solar: "Solar" PHS: "Pumped Hydro Storage" @@ -958,6 +952,9 @@ plotting: offwind-dc: "#74c6f2" offshore wind (DC): "#74c6f2" offshore wind dc: "#74c6f2" + offwind-float: "#b5e2fa" + offshore wind (Float): "#b5e2fa" + offshore wind float: "#b5e2fa" # water hydro: '#298c81' hydro reservoir: '#298c81' @@ -1216,3 +1213,6 @@ plotting: DC-DC: "#8a1caf" DC link: "#8a1caf" load: "#dd2e23" + waste CHP: '#e3d37d' + waste CHP CC: '#e3d3ff' + HVC to air: 'k' From d30ad7f632b17e58af6d13f75441c8c55c33e0b3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 22 May 2024 14:30:21 +0000 Subject: [PATCH 024/100] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- rules/build_electricity.smk | 26 +- scripts/base_network_osm.py | 61 ++- scripts/build_osm_network.py | 270 +++++++------ scripts/clean_osm_data.py | 752 ++++++++++++++++++++--------------- scripts/retrieve_osm_data.py | 113 +++--- 5 files changed, 710 insertions(+), 512 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index ec4c56f60..80789900a 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -66,6 +66,7 @@ rule build_powerplants: if config["electricity_network"]["base_network"] == "gridkit": + rule base_network: params: countries=config_provider("countries"), @@ -104,6 +105,7 @@ if config["electricity_network"]["base_network"] == "gridkit": if config["electricity_network"]["base_network"] == "osm": + rule base_network: params: countries=config_provider("countries"), @@ -618,17 +620,29 @@ rule retrieve_osm_data: log: logs("retrieve_osm_data_{country}.log"), resources: - cores = 2, threads= 1, + cores=2, + threads=1, script: "../scripts/retrieve_osm_data.py" rule clean_osm_data: input: - cables_way=[f"data/osm/raw/{country}/cables_way.json" for country in config["countries"]], - lines_way=[f"data/osm/raw/{country}/lines_way.json" for country in config["countries"]], - substations_way=[f"data/osm/raw/{country}/substations_way.json" for country in config["countries"]], - substations_relation=[f"data/osm/raw/{country}/substations_relation.json" for country in config["countries"]], + cables_way=[ + f"data/osm/raw/{country}/cables_way.json" + for country in config["countries"] + ], + lines_way=[ + f"data/osm/raw/{country}/lines_way.json" for country in config["countries"] + ], + substations_way=[ + f"data/osm/raw/{country}/substations_way.json" + for country in config["countries"] + ], + substations_relation=[ + f"data/osm/raw/{country}/substations_relation.json" + for country in config["countries"] + ], offshore_shapes=resources("offshore_shapes.geojson"), country_shapes=resources("country_shapes.geojson"), output: @@ -660,4 +674,4 @@ rule build_osm_network: benchmark: benchmarks("build_osm_network") script: - "../scripts/build_osm_network.py" \ No newline at end of file + "../scripts/build_osm_network.py" diff --git a/scripts/base_network_osm.py b/scripts/base_network_osm.py index 81b7339e1..beec06f84 100644 --- a/scripts/base_network_osm.py +++ b/scripts/base_network_osm.py @@ -164,7 +164,6 @@ def _load_buses_from_eg(eg_buses, europe_shape, config_elec): lambda p: europe_shape_prepped.contains(Point(p)), axis=1 ) - # TODO pypsa-eur: Find a long-term solution # buses_with_v_nom_to_keep_b = ( # buses.v_nom.isin(config_elec["voltages"]) | buses.v_nom.isnull() @@ -176,9 +175,7 @@ def _load_buses_from_eg(eg_buses, europe_shape, config_elec): # Quick fix: buses_with_v_nom_to_keep_b = (v_nom_min <= buses.v_nom) & (buses.v_nom <= v_nom_max) - logger.info( - f'Removing buses outside of range {v_nom_min} - {v_nom_max} V' - ) + logger.info(f"Removing buses outside of range {v_nom_min} - {v_nom_max} V") return pd.DataFrame(buses.loc[buses_in_europe_b & buses_with_v_nom_to_keep_b]) @@ -418,6 +415,7 @@ def _reconnect_crimea(lines): # for v_nom in v_noms: # lines.loc[lines["v_nom"] == v_nom, "type"] = linetypes[v_nom] + def _set_electrical_parameters_lines(lines_config, voltages, lines): if lines.empty: lines["type"] = [] @@ -467,6 +465,7 @@ def _set_electrical_parameters_dc_lines(lines_config, voltages, lines): return lines + # TODO pypsa-eur: Clean/fix this, update list p_noms def _set_electrical_parameters_links(links, config, links_p_nom): if links.empty: @@ -795,9 +794,7 @@ def base_network_osm( ): buses = _load_buses_from_eg(eg_buses, europe_shape, config["electricity"]) - - - #TODO pypsa-eur add this + # TODO pypsa-eur add this # links = _load_links_from_eg(buses, eg_links) # if config["links"].get("include_tyndp"): # buses, links = _add_links_from_tyndp(buses, links, links_tyndp, europe_shape) @@ -814,15 +811,11 @@ def base_network_osm( lines_dc = lines[lines.tag_frequency.astype(float) == 0].copy() lines_ac = _set_electrical_parameters_lines( - config["lines"], - config["electricity"]["voltages"], - lines_ac - ) + config["lines"], config["electricity"]["voltages"], lines_ac + ) lines_dc = _set_electrical_parameters_dc_lines( - config["lines"], - config["electricity"]["voltages"], - lines_dc + config["lines"], config["electricity"]["voltages"], lines_dc ) # lines = _set_electrical_parameters_lines(lines, config) @@ -835,7 +828,9 @@ def base_network_osm( time = get_snapshots(snakemake.params.snapshots, snakemake.params.drop_leap_day) n.set_snapshots(time) - n.madd("Carrier", ["AC", "DC"]) # TODO: fix hard code and check if AC/DC truly exist + n.madd( + "Carrier", ["AC", "DC"] + ) # TODO: fix hard code and check if AC/DC truly exist n.import_components_from_dataframe(buses, "Bus") @@ -870,15 +865,15 @@ def base_network_osm( _set_lines_s_nom_from_linetypes(n) - #TODO pypsa-eur add this - # _apply_parameter_corrections(n, parameter_corrections) + # TODO pypsa-eur add this + # _apply_parameter_corrections(n, parameter_corrections) # TODO: what about this? n = _remove_unconnected_components(n) _set_countries_and_substations(n, config, country_shapes, offshore_shapes) - #TODO pypsa-eur add this + # TODO pypsa-eur add this _set_links_underwater_fraction(n, offshore_shapes) _replace_b2b_converter_at_country_border_by_link(n) @@ -889,6 +884,7 @@ def base_network_osm( return n + def _get_linetypes_config(line_types, voltages): """ Return the dictionary of linetypes for selected voltages. The dictionary is @@ -914,6 +910,7 @@ def _get_linetypes_config(line_types, voltages): ) return {k: v for k, v in line_types.items() if k in voltages} + def _get_linetype_by_voltage(v_nom, d_linetypes): """ Return the linetype of a specific line based on its voltage v_nom. @@ -1084,24 +1081,24 @@ def append_bus_shapes(n, shapes, type): set_scenario_config(snakemake) n = base_network_osm( - snakemake.input.eg_buses, - snakemake.input.eg_converters, - snakemake.input.eg_transformers, - snakemake.input.eg_lines, - snakemake.input.links_p_nom, - snakemake.input.europe_shape, - snakemake.input.country_shapes, - snakemake.input.offshore_shapes, - snakemake.config, + snakemake.input.eg_buses, + snakemake.input.eg_converters, + snakemake.input.eg_transformers, + snakemake.input.eg_lines, + snakemake.input.links_p_nom, + snakemake.input.europe_shape, + snakemake.input.country_shapes, + snakemake.input.offshore_shapes, + snakemake.config, ) logger.info("Base network created using OSM.") onshore_regions, offshore_regions, shapes = build_bus_shapes( - n, - snakemake.input.country_shapes, - snakemake.input.offshore_shapes, - snakemake.params.countries, + n, + snakemake.input.country_shapes, + snakemake.input.offshore_shapes, + snakemake.params.countries, ) shapes.to_file(snakemake.output.regions_onshore) @@ -1115,4 +1112,4 @@ def append_bus_shapes(n, shapes, type): offshore_shapes.to_frame().to_file(snakemake.output.regions_offshore) n.meta = snakemake.config - n.export_to_netcdf(snakemake.output.base_network) \ No newline at end of file + n.export_to_netcdf(snakemake.output.base_network) diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 19f9f4ad1..60576a34d 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -11,20 +11,18 @@ import geopandas as gpd import numpy as np import pandas as pd -from _helpers import ( - configure_logging, - set_scenario_config, -) +from _benchmark import memory_logger +from _helpers import configure_logging, set_scenario_config from shapely.geometry import LineString, Point from shapely.ops import linemerge, split from tqdm import tqdm -from _benchmark import memory_logger logger = logging.getLogger(__name__) # list of recognised nan values (NA and na excluded as may be confused with Namibia 2-letter country code) NA_VALUES = ["NULL", "", "N/A", "NAN", "NaN", "nan", "Nan", "n/a", "null"] + def read_csv_nafix(file, **kwargs): "Function to open a csv as pandas file and standardize the na value" if "keep_default_na" not in kwargs: @@ -106,9 +104,9 @@ def to_csv_nafix(df, path, **kwargs): Returns: - None - If the DataFrame is not empty or does not have empty columns, it will be - written to the CSV file with NA values replaced by the first value in the - `NA_VALUES` list. If the DataFrame is empty or has empty columns, an empty + If the DataFrame is not empty or does not have empty columns, it will be + written to the CSV file with NA values replaced by the first value in the + `NA_VALUES` list. If the DataFrame is empty or has empty columns, an empty file will be created at the specified path. """ if "na_rep" in kwargs: @@ -123,17 +121,16 @@ def to_csv_nafix(df, path, **kwargs): def line_endings_to_bus_conversion(lines): """ Converts line endings to bus connections. - - This function takes a df of lines and converts the line endings to bus - connections. It performs the necessary operations to ensure that the line + + This function takes a df of lines and converts the line endings to bus + connections. It performs the necessary operations to ensure that the line endings are properly connected to the buses in the network. - + Parameters: lines (DataFrame) - + Returns: lines (DataFrame) - """ # Assign to every line a start and end point @@ -340,7 +337,7 @@ def merge_stations_same_station_id( lon_bus, # "lon" lat_bus, # "lat" bus_row["country"].iloc[0], # "country", - is_dclink_boundary_point, # check if new bus was formed of at least one DC link boundary point + is_dclink_boundary_point, # check if new bus was formed of at least one DC link boundary point Point( lon_bus, lat_bus, @@ -369,9 +366,9 @@ def merge_stations_same_station_id( "geometry", ] - gdf_buses_clean = gpd.GeoDataFrame(buses_clean, columns=buses_clean_columns).set_crs( - crs=buses.crs, inplace=True - ) + gdf_buses_clean = gpd.GeoDataFrame( + buses_clean, columns=buses_clean_columns + ).set_crs(crs=buses.crs, inplace=True) return gdf_buses_clean @@ -497,7 +494,7 @@ def get_converters(buses): g_value.country.loc[id_0], # "country" geom_conv, # "geometry" ] - ) + ) # name of the columns conv_columns = [ @@ -628,14 +625,12 @@ def merge_stations_lines_by_station_id_and_voltage( the merged dataset. """ - logger.info( - " - Setting substation ids with tolerance of %.2f m" % (tol) - ) + logger.info(" - Setting substation ids with tolerance of %.2f m" % (tol)) - # TODO pypsa-eur: Add this fix to pypsa-earth: Buses should not be clustered geographically if they are different + # TODO pypsa-eur: Add this fix to pypsa-earth: Buses should not be clustered geographically if they are different # bus types (AC != DC) buses_ac = buses[buses["dc"] == False].reset_index() - buses_dc = buses[buses["dc"] == True].reset_index() + buses_dc = buses[buses["dc"] == True].reset_index() # set substation ids # set_substations_ids(buses, distance_crs, tol=tol) @@ -646,7 +641,7 @@ def merge_stations_lines_by_station_id_and_voltage( # lines_dc_shape = lines[lines["dc"] == True].unary_union # lines_dc_bounds = lines_dc_shape.boundary # lines_dc_points = [p for p in lines_dc_bounds.geoms] - lines_dc = lines[lines['dc'] == True].reset_index() + lines_dc = lines[lines["dc"] == True].reset_index() lines_dc["adj_idx"] = range(0, len(lines_dc)) # Initialize an empty adjacency matrix @@ -667,12 +662,15 @@ def merge_stations_lines_by_station_id_and_voltage( bus_1_coors = lines_dc.iloc[path]["bus_1_coors"] # Create DataFrame containing all points within a path - dc_points = pd.concat([bus_0_coors, bus_1_coors], ignore_index = True) + dc_points = pd.concat([bus_0_coors, bus_1_coors], ignore_index=True) - # Determine the value counts of individual points. If it occurs more than + # Determine the value counts of individual points. If it occurs more than # once, it cannot be an end-point of a path - bool_duplicates = dc_points.apply(lambda p: sum([are_almost_equal(p, s) for s in dc_points])) > 1 - + bool_duplicates = ( + dc_points.apply(lambda p: sum([are_almost_equal(p, s) for s in dc_points])) + > 1 + ) + # Drop all duplicates dc_boundary_points = dc_points[~bool_duplicates] @@ -682,8 +680,9 @@ def merge_stations_lines_by_station_id_and_voltage( if all_dc_boundary_points.empty: all_dc_boundary_points = dc_boundary_points else: - all_dc_boundary_points = pd.concat([all_dc_boundary_points, dc_boundary_points], ignore_index = True) - + all_dc_boundary_points = pd.concat( + [all_dc_boundary_points, dc_boundary_points], ignore_index=True + ) # TODO pypsa-eur: Add to pypsa-earth for all related entries on is_dclink_boundary_point # check for each entry in buses_dc whether it is included in lines_dc_points @@ -733,38 +732,38 @@ def build_network( outputs, geo_crs, distance_crs, -): +): osm_clean_columns = { - 'substation': { - 'bus_id': 'object', - 'station_id': 'float', - 'voltage': 'float', - 'dc': 'bool', - 'symbol': 'object', - 'under_construction': 'bool', - 'tag_substation': 'str', - 'tag_area': 'str', - 'lon': 'float', - 'lat': 'float', - 'country': 'str', - 'geometry': 'object', - 'tag_source': 'str', + "substation": { + "bus_id": "object", + "station_id": "float", + "voltage": "float", + "dc": "bool", + "symbol": "object", + "under_construction": "bool", + "tag_substation": "str", + "tag_area": "str", + "lon": "float", + "lat": "float", + "country": "str", + "geometry": "object", + "tag_source": "str", + }, + "line": { + "line_id": "object", + "bus0": "object", + "bus1": "object", + "voltage": "float", + "circuits": "float", + "length": "float", + "underground": "bool", + "under_construction": "bool", + "tag_type": "str", + "tag_frequency": "float", + "dc": "bool", + "country": "object", + "geometry": "object", }, - 'line': { - 'line_id': 'object', - 'bus0': 'object', - 'bus1': 'object', - 'voltage': 'float', - 'circuits': 'float', - 'length': 'float', - 'underground': 'bool', - 'under_construction': 'bool', - 'tag_type': 'str', - 'tag_frequency': 'float', - 'dc': 'bool', - 'country': 'object', - 'geometry': 'object', - } } logger.info("Reading input data.") @@ -781,18 +780,16 @@ def build_network( ) lines = line_endings_to_bus_conversion(lines) - + # METHOD to merge buses with same voltage and within tolerance tol = snakemake.config["electricity_network"]["osm_group_tolerance_buses"] - logger.info( - f"Aggregating close substations: Enabled with tolerance {tol} m" - ) + logger.info(f"Aggregating close substations: Enabled with tolerance {tol} m") lines, buses = merge_stations_lines_by_station_id_and_voltage( lines, buses, distance_crs, tol=tol ) # Recalculate lengths of lines - utm = lines.estimate_utm_crs(datum_name = "WGS 84") + utm = lines.estimate_utm_crs(datum_name="WGS 84") lines["length"] = lines.to_crs(utm).length # get transformers: modelled as lines connecting buses with different voltage @@ -819,21 +816,50 @@ def build_network( # Convert voltages from V to kV lines["voltage"] = lines["voltage"] / 1000 - transformers["voltage_bus0"], transformers["voltage_bus1"] = transformers["voltage_bus0"] / 1000, \ - transformers["voltage_bus1"] / 1000 + transformers["voltage_bus0"], transformers["voltage_bus1"] = ( + transformers["voltage_bus0"] / 1000, + transformers["voltage_bus1"] / 1000, + ) buses["voltage"] = buses["voltage"] / 1000 - # Convert 'true' and 'false' to 't' and 'f' + # Convert 'true' and 'false' to 't' and 'f' lines = lines.replace({True: "t", False: "f"}) converters = converters.replace({True: "t", False: "f"}) buses = buses.replace({True: "t", False: "f"}) - + # Change column orders - cols_lines = ["bus0", "bus1", "voltage", "circuits", "length", "underground", "under_construction", "geometry", - "tag_type", "tag_frequency", "country", "bounds", - "bus_0_coors", "bus_1_coors", "bus0_lon", "bus0_lat", "bus1_lon", "bus1_lat"] - - cols_lines_csv = ["bus0", "bus1", "voltage", "circuits", "tag_frequency", "length", "underground", "under_construction", "geometry"] + cols_lines = [ + "bus0", + "bus1", + "voltage", + "circuits", + "length", + "underground", + "under_construction", + "geometry", + "tag_type", + "tag_frequency", + "country", + "bounds", + "bus_0_coors", + "bus_1_coors", + "bus0_lon", + "bus0_lat", + "bus1_lon", + "bus1_lat", + ] + + cols_lines_csv = [ + "bus0", + "bus1", + "voltage", + "circuits", + "tag_frequency", + "length", + "underground", + "under_construction", + "geometry", + ] lines_csv = lines[cols_lines_csv] lines = lines[cols_lines] @@ -844,16 +870,32 @@ def build_network( colstodrop = ["bounds", "bus_0_coors", "bus_1_coors"] # Export to GeoJSON for quick validations - save_to_geojson(gpd.GeoDataFrame(lines.drop(columns = colstodrop), geometry = "geometry", crs = geo_crs), outputs["lines_geojson"]) - save_to_geojson(gpd.GeoDataFrame(converters, geometry = "geometry", crs = geo_crs), outputs["converters_geojson"]) - save_to_geojson(gpd.GeoDataFrame(transformers.drop(columns = colstodrop), geometry = "geometry", crs = geo_crs), outputs["transformers_geojson"]) + save_to_geojson( + gpd.GeoDataFrame( + lines.drop(columns=colstodrop), geometry="geometry", crs=geo_crs + ), + outputs["lines_geojson"], + ) + save_to_geojson( + gpd.GeoDataFrame(converters, geometry="geometry", crs=geo_crs), + outputs["converters_geojson"], + ) + save_to_geojson( + gpd.GeoDataFrame( + transformers.drop(columns=colstodrop), geometry="geometry", crs=geo_crs + ), + outputs["transformers_geojson"], + ) # create clean directory if not already exist if not os.path.exists(outputs["substations"]): os.makedirs(os.path.dirname(outputs["substations"]), exist_ok=True) # Generate CSV to_csv_nafix(buses, outputs["substations"], quotechar="'") - save_to_geojson(gpd.GeoDataFrame(buses, geometry = "geometry", crs = geo_crs), outputs["substations_geojson"]) + save_to_geojson( + gpd.GeoDataFrame(buses, geometry="geometry", crs=geo_crs), + outputs["substations_geojson"], + ) return None @@ -868,30 +910,30 @@ def are_lines_connected(line1, line2): line2 (dict): A dictionary representing the second line. Returns: - tuple: A tuple of boolean values indicating the connection status between + tuple: A tuple of boolean values indicating the connection status between the lines. The tuple contains four elements: - - True if the first line's bus_0_coors is almost equal to the second line's + - True if the first line's bus_0_coors is almost equal to the second line's bus_0_coors, False otherwise. - - True if the first line's bus_0_coors is almost equal to the second line's + - True if the first line's bus_0_coors is almost equal to the second line's bus_1_coors, False otherwise. - - True if the first line's bus_1_coors is almost equal to the second line's + - True if the first line's bus_1_coors is almost equal to the second line's bus_0_coors, False otherwise. - - True if the first line's bus_1_coors is almost equal to the second line's + - True if the first line's bus_1_coors is almost equal to the second line's bus_1_coors, False otherwise. """ return ( are_almost_equal(line1["bus_0_coors"], line2["bus_0_coors"]), are_almost_equal(line1["bus_0_coors"], line2["bus_1_coors"]), are_almost_equal(line1["bus_1_coors"], line2["bus_0_coors"]), - are_almost_equal(line1["bus_1_coors"], line2["bus_1_coors"]) + are_almost_equal(line1["bus_1_coors"], line2["bus_1_coors"]), ) def _dfs(adj_matrix, visited, current_vertex, path): """ - Perform a depth-first search (DFS) on a graph represented by an adjacency + Perform a depth-first search (DFS) on a graph represented by an adjacency matrix. Parameters: @@ -902,7 +944,6 @@ def _dfs(adj_matrix, visited, current_vertex, path): Returns: - path (list): The path of vertices visited during the DFS. - """ visited[current_vertex] = True path.append(current_vertex) @@ -921,9 +962,8 @@ def find_paths(adj_matrix): - adj_matrix (list of lists): The adjacency matrix representing the graph. Returns: - - paths (list of lists): A list of lists, where each inner list represents + - paths (list of lists): A list of lists, where each inner list represents a path in the graph. - """ visited = [False] * len(adj_matrix) paths = [] @@ -938,12 +978,12 @@ def find_paths(adj_matrix): def are_almost_equal(point1, point2, tolerance=1e-6): """ Check if two Shapely points are almost equal with a given tolerance. - + Args: point1 (Point): First Shapely point. point2 (Point): Second Shapely point. tolerance (float): Tolerance for coordinate deviation. - + Returns: bool: True if the points are almost equal, False otherwise. """ @@ -972,28 +1012,32 @@ def merge_linestrings(gdf): i = 0 while i < len(lines): if are_almost_equal( - Point(merged_line.coords[-1]), - Point(lines[i].coords[0]) - ): - merged_line = LineString(list(merged_line.coords) + list(lines.pop(i).coords[1:])) + Point(merged_line.coords[-1]), Point(lines[i].coords[0]) + ): + merged_line = LineString( + list(merged_line.coords) + list(lines.pop(i).coords[1:]) + ) i = 0 # Restart the scan after merging elif are_almost_equal( - Point(merged_line.coords[0]), - Point(lines[i].coords[-1]) - ): - merged_line = LineString(list(lines.pop(i).coords)[:-1] + list(merged_line.coords)) + Point(merged_line.coords[0]), Point(lines[i].coords[-1]) + ): + merged_line = LineString( + list(lines.pop(i).coords)[:-1] + list(merged_line.coords) + ) i = 0 # Restart the scan after merging elif are_almost_equal( - Point(merged_line.coords[-1]), - Point(lines[i].coords[-1]) - ): - merged_line = LineString(list(merged_line.coords) + list(lines.pop(i).coords[::-1])[1:]) + Point(merged_line.coords[-1]), Point(lines[i].coords[-1]) + ): + merged_line = LineString( + list(merged_line.coords) + list(lines.pop(i).coords[::-1])[1:] + ) i = 0 # Restart the scan after merging elif are_almost_equal( - Point(merged_line.coords[0]), - Point(lines[i].coords[0]) - ): - merged_line = LineString(list(lines.pop(i).coords[::-1])[:-1] + list(merged_line.coords)) + Point(merged_line.coords[0]), Point(lines[i].coords[0]) + ): + merged_line = LineString( + list(lines.pop(i).coords[::-1])[:-1] + list(merged_line.coords) + ) i = 0 # Restart the scan after merging else: i += 1 @@ -1011,7 +1055,7 @@ def merge_linestrings(gdf): from _helpers import mock_snakemake snakemake = mock_snakemake("build_osm_network") - + configure_logging(snakemake) set_scenario_config(snakemake) @@ -1025,10 +1069,10 @@ def merge_linestrings(gdf): filename=getattr(snakemake.log, "memory", None), interval=30.0 ) as mem: build_network( - snakemake.input, - snakemake.output, - geo_crs, - distance_crs, + snakemake.input, + snakemake.output, + geo_crs, + distance_crs, ) - logger.info(f"Maximum memory usage: {mem.mem_usage}") \ No newline at end of file + logger.info(f"Maximum memory usage: {mem.mem_usage}") diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index dadc2c902..882c1229e 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -3,7 +3,7 @@ # # SPDX-License-Identifier: MIT """ -This script is used to clean OpenStreetMap (OSM) data for the PyPSA-Eur +This script is used to clean OpenStreetMap (OSM) data for the PyPSA-Eur project. The script performs various cleaning operations on the OSM data, including: @@ -18,24 +18,25 @@ python clean_osm_data.py Arguments: - output_file (str): The path to the output file where the cleaned data will + output_file (str): The path to the output file where the cleaned data will be written. Example: python clean_osm_data.py cleaned_data.csv """ -import geopandas as gpd import json import logging import os +import re + +import geopandas as gpd import numpy as np import pandas as pd -import re +from _helpers import configure_logging, set_scenario_config from shapely.geometry import LineString, Polygon from shapely.ops import linemerge -from _helpers import configure_logging, set_scenario_config logger = logging.getLogger(__name__) @@ -48,33 +49,32 @@ def _create_linestring(row): Returns: LineString: A LineString object representing the geometry. - """ - coords = [(coord['lon'], coord['lat']) for coord in row["geometry"]] + coords = [(coord["lon"], coord["lat"]) for coord in row["geometry"]] return LineString(coords) def _create_polygon(row): """ Create a Shapely Polygon from a list of coordinate dictionaries. - + Parameters: - coords (list): List of dictionaries with 'lat' and 'lon' keys + coords (list): List of dictionaries with 'lat' and 'lon' keys representing coordinates. - + Returns: shapely.geometry.Polygon: The constructed polygon object. """ # Extract coordinates as tuples - point_coords = [(coord['lon'], coord['lat']) for coord in row["geometry"]] - + point_coords = [(coord["lon"], coord["lat"]) for coord in row["geometry"]] + # Ensure closure by repeating the first coordinate as the last coordinate if point_coords[0] != point_coords[-1]: point_coords.append(point_coords[0]) - + # Create Polygon object polygon = Polygon(point_coords) - + return polygon @@ -92,8 +92,7 @@ def _clean_voltage(column): column = column.copy() column = ( - column - .astype(str) + column.astype(str) .str.lower() .str.replace("400/220/110 kV'", "400000;220000;110000") .str.replace("400/220/110/20_kv", "400000;220000;110000;20000") @@ -102,8 +101,7 @@ def _clean_voltage(column): ) column = ( - column - .astype(str) + column.astype(str) .str.lower() .str.replace("(temp 150000)", "") .str.replace("low", "1000") @@ -119,13 +117,13 @@ def _clean_voltage(column): .str.replace(",", ";") .str.replace("kv", "000") .str.replace("kva", "000") - .str.replace("/", ";") + .str.replace("/", ";") .str.replace("nan", "") .str.replace("", "") ) # Remove all remaining non-numeric characters except for semicolons - column = column.apply(lambda x: re.sub(r'[^0-9;]', '', str(x))) + column = column.apply(lambda x: re.sub(r"[^0-9;]", "", str(x))) column.dropna(inplace=True) return column @@ -133,7 +131,7 @@ def _clean_voltage(column): def _clean_circuits(column): """ - Function to clean the raw circuits column: manual fixing and drop nan + Function to clean the raw circuits column: manual fixing and drop nan values Args: @@ -145,8 +143,7 @@ def _clean_circuits(column): logger.info("Cleaning circuits.") column = column.copy() column = ( - column - .astype(str) + column.astype(str) .str.replace("partial", "") .str.replace("1operator=RTE operator:wikidata=Q2178795", "") .str.lower() @@ -157,7 +154,7 @@ def _clean_circuits(column): ) # Remove all remaining non-numeric characters except for semicolons - column = column.apply(lambda x: re.sub(r'[^0-9;]', '', x)) + column = column.apply(lambda x: re.sub(r"[^0-9;]", "", x)) column.dropna(inplace=True) return column.astype(str) @@ -176,8 +173,7 @@ def _clean_cables(column): logger.info("Cleaning cables.") column = column.copy() column = ( - column - .astype(str) + column.astype(str) .str.lower() .str.replace("1/3", "1") .str.replace("3x2;2", "3") @@ -186,7 +182,7 @@ def _clean_cables(column): ) # Remove all remaining non-numeric characters except for semicolons - column = column.apply(lambda x: re.sub(r'[^0-9;]', '', x)) + column = column.apply(lambda x: re.sub(r"[^0-9;]", "", x)) column.dropna(inplace=True) return column.astype(str) @@ -205,8 +201,7 @@ def _clean_wires(column): logger.info("Cleaning wires.") column = column.copy() column = ( - column - .astype(str) + column.astype(str) .str.lower() .str.replace("?", "") .str.replace("trzyprzewodowe", "3") @@ -225,7 +220,7 @@ def _clean_wires(column): ) # Remove all remaining non-numeric characters except for semicolons - column = column.apply(lambda x: re.sub(r'[^0-9;]', '', x)) + column = column.apply(lambda x: re.sub(r"[^0-9;]", "", x)) column.dropna(inplace=True) return column.astype(str) @@ -240,19 +235,19 @@ def _check_voltage(voltage, list_voltages): list_voltages (list): A list of allowed voltages. Returns: - bool: True if the voltage is present in the list of allowed voltages, + bool: True if the voltage is present in the list of allowed voltages, False otherwise. """ - voltages = voltage.split(';') + voltages = voltage.split(";") for v in voltages: if v in list_voltages: return True return False -def _clean_frequency(column): +def _clean_frequency(column): """ - Function to clean the raw frequency column: manual fixing and drop nan + Function to clean the raw frequency column: manual fixing and drop nan values Args: @@ -264,8 +259,7 @@ def _clean_frequency(column): logger.info("Cleaning frequencies.") column = column.copy() column = ( - column - .astype(str) + column.astype(str) .str.lower() .str.replace("16.67", "16.7") .str.replace("16,7", "16.7") @@ -277,7 +271,7 @@ def _clean_frequency(column): ) # Remove all remaining non-numeric characters except for semicolons - column = column.apply(lambda x: re.sub(r'[^0-9;.]', '', x)) + column = column.apply(lambda x: re.sub(r"[^0-9;.]", "", x)) column.dropna(inplace=True) return column.astype(str) @@ -309,7 +303,7 @@ def _split_cells(df, cols=["voltage"]): # Create a dictionary to store the suffix count for each original ID suffix_counts = {} - # Create a dictionary to store the number of splits associated with each + # Create a dictionary to store the number of splits associated with each # original ID num_splits = {} @@ -318,12 +312,12 @@ def _split_cells(df, cols=["voltage"]): x = x.explode(cols, ignore_index=True) # Count the number of splits associated with each original ID - num_splits = x.groupby('id').size().to_dict() + num_splits = x.groupby("id").size().to_dict() # Update the 'split_elements' column x["split_elements"] = x["id"].map(num_splits) - # Function to generate the new ID with suffix and update the number of + # Function to generate the new ID with suffix and update the number of # splits def generate_new_id(row): original_id = row["id"] @@ -341,17 +335,16 @@ def generate_new_id(row): def _distribute_to_circuits(row): """ - Distributes the number of circuits or cables to individual circuits based + Distributes the number of circuits or cables to individual circuits based on the given row data. Parameters: - - row: A dictionary representing a row of data containing information about + - row: A dictionary representing a row of data containing information about circuits and cables. Returns: - - single_circuit: The number of circuits to be assigned to each individual + - single_circuit: The number of circuits to be assigned to each individual circuit. - """ if row["circuits"] != "": circuits = int(row["circuits"]) @@ -366,31 +359,30 @@ def _distribute_to_circuits(row): def _add_line_endings_to_substations( - df_substations, - gdf_lines, - path_country_shapes, - path_offshore_shapes, - ): + df_substations, + gdf_lines, + path_country_shapes, + path_offshore_shapes, +): """ Add line endings to substations. - This function takes two pandas DataFrames, `substations` and `lines`, and - adds line endings to the substations based on the information from the + This function takes two pandas DataFrames, `substations` and `lines`, and + adds line endings to the substations based on the information from the lines DataFrame. Parameters: - - substations (pandas DataFrame): DataFrame containing information about + - substations (pandas DataFrame): DataFrame containing information about substations. - lines (pandas DataFrame): DataFrame containing information about lines. Returns: - - buses (pandas DataFrame): DataFrame containing the updated information + - buses (pandas DataFrame): DataFrame containing the updated information about substations with line endings. - """ if gdf_lines.empty: return df_substations - + logger.info("Adding line endings to substations") # extract columns from df_substations bus_s = pd.DataFrame(columns=df_substations.columns) @@ -419,7 +411,9 @@ def _add_line_endings_to_substations( # Group gdf_substations by voltage and and geometry (dropping duplicates) bus_all = bus_all.groupby(["voltage", "lon", "lat", "dc"]).first().reset_index() bus_all = bus_all[df_substations.columns] - bus_all.loc[:, "bus_id"] = bus_all.apply(lambda row: f"line-end/{row.name + 1}", axis=1) + bus_all.loc[:, "bus_id"] = bus_all.apply( + lambda row: f"line-end/{row.name + 1}", axis=1 + ) # Initialize default values bus_all["station_id"] = np.nan @@ -435,30 +429,40 @@ def _add_line_endings_to_substations( buses.set_index("bus_id", inplace=True) # Fix country codes - # TODO pypsa-eur: Temporary solution as long as the shapes have a low, + # TODO pypsa-eur: Temporary solution as long as the shapes have a low, # incomplete resolution (cf. 2500 meters for buffering) bool_multiple_countries = buses["country"].str.contains(";") gdf_offshore = gpd.read_file(path_offshore_shapes).set_index("name")["geometry"] - gdf_offshore = gpd.GeoDataFrame(gdf_offshore, geometry=gdf_offshore, crs = gdf_offshore.crs) + gdf_offshore = gpd.GeoDataFrame( + gdf_offshore, geometry=gdf_offshore, crs=gdf_offshore.crs + ) gdf_countries = gpd.read_file(path_country_shapes).set_index("name")["geometry"] # reproject to enable buffer - gdf_countries = gpd.GeoDataFrame(geometry=gdf_countries, crs = gdf_countries.crs) - gdf_union = gdf_countries.merge(gdf_offshore, how="outer", left_index=True, right_index=True) - gdf_union["geometry"] = gdf_union.apply(lambda row: gpd.GeoSeries([row["geometry_x"], row["geometry_y"]]) \ - .unary_union, axis=1) - gdf_union = gpd.GeoDataFrame(geometry=gdf_union["geometry"], crs = crs) - utm = gdf_union.estimate_utm_crs(datum_name = "WGS 84") + gdf_countries = gpd.GeoDataFrame(geometry=gdf_countries, crs=gdf_countries.crs) + gdf_union = gdf_countries.merge( + gdf_offshore, how="outer", left_index=True, right_index=True + ) + gdf_union["geometry"] = gdf_union.apply( + lambda row: gpd.GeoSeries([row["geometry_x"], row["geometry_y"]]).unary_union, + axis=1, + ) + gdf_union = gpd.GeoDataFrame(geometry=gdf_union["geometry"], crs=crs) + utm = gdf_union.estimate_utm_crs(datum_name="WGS 84") gdf_union = gdf_union.to_crs(utm) - gdf_union = gdf_union.buffer(2500) # meters + gdf_union = gdf_union.buffer(2500) # meters gdf_union = gdf_union.to_crs(crs) - gdf_union = gpd.GeoDataFrame(geometry=gdf_union, crs = crs) - gdf_buses_tofix = gpd.GeoDataFrame(buses[bool_multiple_countries], geometry="geometry", crs = crs) + gdf_union = gpd.GeoDataFrame(geometry=gdf_union, crs=crs) + gdf_buses_tofix = gpd.GeoDataFrame( + buses[bool_multiple_countries], geometry="geometry", crs=crs + ) joined = gpd.sjoin(gdf_buses_tofix, gdf_union, how="left", predicate="within") joined.reset_index(inplace=True) joined = joined.drop_duplicates(subset="bus_id") joined.set_index("bus_id", inplace=True) - - buses.loc[bool_multiple_countries, "country"] = joined.loc[bool_multiple_countries, "index_right"] + + buses.loc[bool_multiple_countries, "country"] = joined.loc[ + bool_multiple_countries, "index_right" + ] return buses @@ -468,88 +472,112 @@ def _import_lines_and_cables(path_lines): Import lines and cables from the given input paths. Parameters: - - path_lines (dict): A dictionary containing the input paths for lines and + - path_lines (dict): A dictionary containing the input paths for lines and cables data. Returns: - - df_lines (DataFrame): A DataFrame containing the imported lines and + - df_lines (DataFrame): A DataFrame containing the imported lines and cables data. - """ - columns = ["id", "bounds", "nodes", "geometry", "country", "power", "cables", "circuits", "frequency", "voltage", - "wires"] + columns = [ + "id", + "bounds", + "nodes", + "geometry", + "country", + "power", + "cables", + "circuits", + "frequency", + "voltage", + "wires", + ] df_lines = pd.DataFrame(columns=columns) logger.info("Importing lines and cables") for key in path_lines: logger.info(f"Processing {key}...") for idx, ip in enumerate(path_lines[key]): - if os.path.exists(ip) and os.path.getsize(ip) > 400: # unpopulated OSM json is about 51 bytes + if ( + os.path.exists(ip) and os.path.getsize(ip) > 400 + ): # unpopulated OSM json is about 51 bytes country = os.path.basename(os.path.dirname(path_lines[key][idx])) - + logger.info( f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(path_lines[key])).zfill(2)}: {ip}" - ) + ) with open(ip, "r") as f: data = json.load(f) - - df = pd.DataFrame(data['elements']) + + df = pd.DataFrame(data["elements"]) df["id"] = df["id"].astype(str) df["country"] = country # col_tags = ["power", "cables", "circuits", "frequency", "voltage", "wires", "capacity", "rating"] - col_tags = ["power", "cables", "circuits", "frequency", "voltage", "wires"] + col_tags = [ + "power", + "cables", + "circuits", + "frequency", + "voltage", + "wires", + ] + + tags = pd.json_normalize(df["tags"]).map( + lambda x: str(x) if pd.notnull(x) else x + ) - tags = pd.json_normalize(df["tags"]) \ - .map(lambda x: str(x) if pd.notnull(x) else x) - for ct in col_tags: if ct not in tags.columns: tags[ct] = pd.NA - + tags = tags.loc[:, col_tags] - df = pd.concat([df, tags], axis="columns") + df = pd.concat([df, tags], axis="columns") df.drop(columns=["type", "tags"], inplace=True) - + df_lines = pd.concat([df_lines, df], axis="rows") else: logger.info( f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(path_lines[key])).zfill(2)} (empty): {ip}" - ) + ) continue logger.info("---") - + return df_lines def _drop_duplicate_lines(df_lines): """ - Drop duplicate lines from the given dataframe. Duplicates are usually lines + Drop duplicate lines from the given dataframe. Duplicates are usually lines cross-border lines or slightly outside the country border of focus. Parameters: - df_lines (pandas.DataFrame): The dataframe containing lines data. Returns: - - df_lines (pandas.DataFrame): The dataframe with duplicate lines removed + - df_lines (pandas.DataFrame): The dataframe with duplicate lines removed and cleaned data. - This function drops duplicate lines from the given dataframe based on the - 'id' column. It groups the duplicate rows by 'id' and aggregates the + This function drops duplicate lines from the given dataframe based on the + 'id' column. It groups the duplicate rows by 'id' and aggregates the 'country' column to a string split by semicolon, as they appear in multiple - country datasets. One example of the duplicates is kept, accordingly. + country datasets. One example of the duplicates is kept, accordingly. Finally, the updated dataframe without multiple duplicates is returned. """ logger.info("Dropping duplicate lines.") - duplicate_rows = df_lines[df_lines.duplicated(subset=['id'], keep=False)].copy() + duplicate_rows = df_lines[df_lines.duplicated(subset=["id"], keep=False)].copy() # Group rows by id and aggregate the country column to a string split by semicolon - grouped_duplicates = duplicate_rows.groupby('id')["country"].agg(lambda x: ';'.join(x)).reset_index() + grouped_duplicates = ( + duplicate_rows.groupby("id")["country"].agg(lambda x: ";".join(x)).reset_index() + ) duplicate_rows.drop_duplicates(subset="id", inplace=True) duplicate_rows.drop(columns=["country"], inplace=True) - duplicate_rows = duplicate_rows.join(grouped_duplicates.set_index('id'), on='id', how='left') + duplicate_rows = duplicate_rows.join( + grouped_duplicates.set_index("id"), on="id", how="left" + ) # Drop duplicates and update the df_lines dataframe with the cleaned data df_lines = df_lines[~df_lines["id"].isin(duplicate_rows["id"])] @@ -564,16 +592,18 @@ def _filter_by_voltage(df, voltage_min=200000): Parameters: - df (pandas.DataFrame): The DataFrame containing the substations or lines data. - - voltage_min (int, optional): The minimum voltage value to filter the + - voltage_min (int, optional): The minimum voltage value to filter the rows. Defaults to 200000 [unit: V]. Returns: - - filtered df (pandas.DataFrame): The filtered DataFrame containing + - filtered df (pandas.DataFrame): The filtered DataFrame containing the lines or substations above voltage_min. - list_voltages (list): A list of unique voltage values above voltage_min. The type of the list elements is string. """ - logger.info(f"Filtering dataframe by voltage. Only keeping rows above and including {voltage_min} V.") + logger.info( + f"Filtering dataframe by voltage. Only keeping rows above and including {voltage_min} V." + ) list_voltages = df["voltage"].str.split(";").explode().unique().astype(str) # Keep numeric strings list_voltages = list_voltages[np.vectorize(str.isnumeric)(list_voltages)] @@ -594,13 +624,13 @@ def _clean_substations(df_substations, list_voltages): - Filter substation data based on specified voltages. - Update the frequency values based on the split count. - Split cells in the 'frequency' column. - - Set remaining invalid frequency values that are not in ['0', '50'] + - Set remaining invalid frequency values that are not in ['0', '50'] to '50'. Parameters: - - df_substations (pandas.DataFrame): The input dataframe containing + - df_substations (pandas.DataFrame): The input dataframe containing substation data. - - list_voltages (list): A list of voltages above voltage_min to filter the + - list_voltages (list): A list of voltages above voltage_min to filter the substation data. Returns: @@ -610,22 +640,31 @@ def _clean_substations(df_substations, list_voltages): df_substations = _split_cells(df_substations) - bool_voltages = df_substations["voltage"].apply(_check_voltage, list_voltages=list_voltages) + bool_voltages = df_substations["voltage"].apply( + _check_voltage, list_voltages=list_voltages + ) df_substations = df_substations[bool_voltages] - df_substations.loc[:, "split_count"] = df_substations["id"].apply(lambda x: x.split("-")[1] if "-" in x else "0") + df_substations.loc[:, "split_count"] = df_substations["id"].apply( + lambda x: x.split("-")[1] if "-" in x else "0" + ) df_substations.loc[:, "split_count"] = df_substations["split_count"].astype(int) bool_split = df_substations["split_elements"] > 1 - bool_frequency_len = df_substations["frequency"] \ - .apply(lambda x: len(x.split(";"))) == df_substations["split_elements"] - - op_freq = lambda row: row["frequency"].split(";")[row["split_count"]-1] - - df_substations.loc[bool_frequency_len & bool_split, "frequency"] = df_substations \ - .loc[bool_frequency_len & bool_split, ].apply(op_freq, axis=1) - + bool_frequency_len = ( + df_substations["frequency"].apply(lambda x: len(x.split(";"))) + == df_substations["split_elements"] + ) + + op_freq = lambda row: row["frequency"].split(";")[row["split_count"] - 1] + + df_substations.loc[bool_frequency_len & bool_split, "frequency"] = ( + df_substations.loc[bool_frequency_len & bool_split,].apply(op_freq, axis=1) + ) + df_substations = _split_cells(df_substations, cols=["frequency"]) - bool_invalid_frequency = df_substations["frequency"].apply(lambda x: x not in ["50", "0"]) + bool_invalid_frequency = df_substations["frequency"].apply( + lambda x: x not in ["50", "0"] + ) df_substations.loc[bool_invalid_frequency, "frequency"] = "50" return df_substations @@ -633,15 +672,15 @@ def _clean_substations(df_substations, list_voltages): def _clean_lines(df_lines, list_voltages): """ - Cleans and processes the `df_lines` DataFrame heuristically based on the - information available per respective line and cable. - Further checks to ensure data consistency and completeness. + Cleans and processes the `df_lines` DataFrame heuristically based on the + information available per respective line and cable. Further checks to + ensure data consistency and completeness. Parameters ---------- df_lines : pandas.DataFrame - The input DataFrame containing line information with columns such as - 'voltage', 'circuits', 'frequency', 'cables', 'split_elements', 'id', + The input DataFrame containing line information with columns such as + 'voltage', 'circuits', 'frequency', 'cables', 'split_elements', 'id', etc. list_voltages : list A list of unique voltage values above a certain threshold. (type: str) @@ -649,7 +688,7 @@ def _clean_lines(df_lines, list_voltages): Returns ------- df_lines : pandas.DataFrame - The cleaned DataFrame with updated columns 'circuits', 'frequency', and + The cleaned DataFrame with updated columns 'circuits', 'frequency', and 'cleaned' to reflect the applied transformations. Description @@ -658,18 +697,18 @@ def _clean_lines(df_lines, list_voltages): - Initializes a 'cleaned' column with False, step-wise updates to True following the respective cleaning step. - - Splits the voltage cells in the DataFrame at semicolons using a helper + - Splits the voltage cells in the DataFrame at semicolons using a helper function `_split_cells`. - Filters the DataFrame to only include rows with valid voltages. - - Sets circuits of remaining lines without any applicable heuristic equal + - Sets circuits of remaining lines without any applicable heuristic equal to 1. - The function ensures that the resulting DataFrame has consistent and - complete information for further processing or analysis while maintaining + The function ensures that the resulting DataFrame has consistent and + complete information for further processing or analysis while maintaining the data of the original OSM data set wherever possible. """ logger.info("Cleaning lines and determining circuits.") - # Initiate boolean with False, only set to true if all cleaning steps are + # Initiate boolean with False, only set to true if all cleaning steps are # passed df_lines = df_lines.copy() df_lines["cleaned"] = False @@ -678,13 +717,17 @@ def _clean_lines(df_lines, list_voltages): df_lines["circuits_original"] = df_lines["circuits"] df_lines = _split_cells(df_lines) - bool_voltages = df_lines["voltage"].apply(_check_voltage, list_voltages=list_voltages) + bool_voltages = df_lines["voltage"].apply( + _check_voltage, list_voltages=list_voltages + ) df_lines = df_lines[bool_voltages] bool_ac = df_lines["frequency"] != "0" bool_dc = ~bool_ac valid_frequency = ["50", "0"] - bool_invalid_frequency = df_lines["frequency"].apply(lambda x: x not in valid_frequency) + bool_invalid_frequency = df_lines["frequency"].apply( + lambda x: x not in valid_frequency + ) bool_noinfo = (df_lines["cables"] == "") & (df_lines["circuits"] == "") # Fill in all values where cables info and circuits does not exist. Assuming 1 circuit @@ -693,100 +736,127 @@ def _clean_lines(df_lines, list_voltages): df_lines.loc[bool_noinfo, "cleaned"] = True # Fill in all values where cables info exists and split_elements == 1 - bool_cables_ac = (df_lines["cables"] != "") & \ - (df_lines["split_elements"] == 1) & \ - (df_lines["cables"] != "0") & \ - (df_lines["cables"].apply(lambda x: len(x.split(";")) == 1)) & \ - (df_lines["circuits"] == "") & \ - (df_lines["cleaned"] == False) & \ - bool_ac - - df_lines.loc[bool_cables_ac, "circuits"] = df_lines.loc[bool_cables_ac, "cables"] \ - .apply(lambda x: str(int(max(1, np.floor_divide(int(x),3))))) - + bool_cables_ac = ( + (df_lines["cables"] != "") + & (df_lines["split_elements"] == 1) + & (df_lines["cables"] != "0") + & (df_lines["cables"].apply(lambda x: len(x.split(";")) == 1)) + & (df_lines["circuits"] == "") + & (df_lines["cleaned"] == False) + & bool_ac + ) + + df_lines.loc[bool_cables_ac, "circuits"] = df_lines.loc[ + bool_cables_ac, "cables" + ].apply(lambda x: str(int(max(1, np.floor_divide(int(x), 3))))) + df_lines.loc[bool_cables_ac, "frequency"] = "50" df_lines.loc[bool_cables_ac, "cleaned"] = True - bool_cables_dc = (df_lines["cables"] != "") & \ - (df_lines["split_elements"] == 1) & \ - (df_lines["cables"] != "0") & \ - (df_lines["cables"].apply(lambda x: len(x.split(";")) == 1)) & \ - (df_lines["circuits"] == "") & \ - (df_lines["cleaned"] == False) & \ - bool_dc - - df_lines.loc[bool_cables_dc, "circuits"] = df_lines.loc[bool_cables_dc, "cables"] \ - .apply(lambda x: str(int(max(1, np.floor_divide(int(x),2))))) - + bool_cables_dc = ( + (df_lines["cables"] != "") + & (df_lines["split_elements"] == 1) + & (df_lines["cables"] != "0") + & (df_lines["cables"].apply(lambda x: len(x.split(";")) == 1)) + & (df_lines["circuits"] == "") + & (df_lines["cleaned"] == False) + & bool_dc + ) + + df_lines.loc[bool_cables_dc, "circuits"] = df_lines.loc[ + bool_cables_dc, "cables" + ].apply(lambda x: str(int(max(1, np.floor_divide(int(x), 2))))) + df_lines.loc[bool_cables_dc, "frequency"] = "0" df_lines.loc[bool_cables_dc, "cleaned"] = True # Fill in all values where circuits info exists and split_elements == 1 - bool_lines = (df_lines["circuits"] != "") & \ - (df_lines["split_elements"] == 1) & \ - (df_lines["circuits"] != "0") & \ - (df_lines["circuits"].apply(lambda x: len(x.split(";")) == 1)) & \ - (df_lines["cleaned"] == False) - + bool_lines = ( + (df_lines["circuits"] != "") + & (df_lines["split_elements"] == 1) + & (df_lines["circuits"] != "0") + & (df_lines["circuits"].apply(lambda x: len(x.split(";")) == 1)) + & (df_lines["cleaned"] == False) + ) + df_lines.loc[bool_lines & bool_ac, "frequency"] = "50" df_lines.loc[bool_lines & bool_dc, "frequency"] = "0" df_lines.loc[bool_lines, "cleaned"] = True - # Clean those values where number of voltages split by semicolon is larger + # Clean those values where number of voltages split by semicolon is larger # than no cables or no circuits - bool_cables = (df_lines["voltage_original"].apply(lambda x: len(x.split(";")) > 1)) & \ - (df_lines["cables"].apply(lambda x: len(x.split(";")) == 1)) & \ - (df_lines["circuits"].apply(lambda x: len(x.split(";")) == 1)) & \ - (df_lines["cleaned"] == False) - - df_lines.loc[bool_cables, "circuits"] = df_lines[bool_cables] \ - .apply(_distribute_to_circuits, axis=1) + bool_cables = ( + (df_lines["voltage_original"].apply(lambda x: len(x.split(";")) > 1)) + & (df_lines["cables"].apply(lambda x: len(x.split(";")) == 1)) + & (df_lines["circuits"].apply(lambda x: len(x.split(";")) == 1)) + & (df_lines["cleaned"] == False) + ) + + df_lines.loc[bool_cables, "circuits"] = df_lines[bool_cables].apply( + _distribute_to_circuits, axis=1 + ) df_lines.loc[bool_cables & bool_ac, "frequency"] = "50" df_lines.loc[bool_cables & bool_dc, "frequency"] = "0" df_lines.loc[bool_cables, "cleaned"] = True - # Clean those values where multiple circuit values are present, divided by + # Clean those values where multiple circuit values are present, divided by # semicolon - bool_cables = (df_lines["circuits"].apply(lambda x: len(x.split(";")) > 1)) & \ - (df_lines.apply(lambda row: len(row["circuits"].split(";")) == row["split_elements"], axis=1)) & \ - (df_lines["cleaned"] == False) - - df_lines.loc[bool_cables, "circuits"] = df_lines.loc[bool_cables] \ - .apply(lambda row: str(row["circuits"].split(";")[ - int(row["id"].split("-")[-1])-1 - ]), axis=1) - + bool_cables = ( + (df_lines["circuits"].apply(lambda x: len(x.split(";")) > 1)) + & ( + df_lines.apply( + lambda row: len(row["circuits"].split(";")) == row["split_elements"], + axis=1, + ) + ) + & (df_lines["cleaned"] == False) + ) + + df_lines.loc[bool_cables, "circuits"] = df_lines.loc[bool_cables].apply( + lambda row: str(row["circuits"].split(";")[int(row["id"].split("-")[-1]) - 1]), + axis=1, + ) + df_lines.loc[bool_cables & bool_ac, "frequency"] = "50" df_lines.loc[bool_cables & bool_dc, "frequency"] = "0" df_lines.loc[bool_cables, "cleaned"] = True - # Clean those values where multiple cables values are present, divided by + # Clean those values where multiple cables values are present, divided by # semicolon - bool_cables = (df_lines["cables"].apply(lambda x: len(x.split(";")) > 1)) & \ - (df_lines.apply(lambda row: len(row["cables"].split(";")) == row["split_elements"], axis=1)) & \ - (df_lines["cleaned"] == False) + bool_cables = ( + (df_lines["cables"].apply(lambda x: len(x.split(";")) > 1)) + & ( + df_lines.apply( + lambda row: len(row["cables"].split(";")) == row["split_elements"], + axis=1, + ) + ) + & (df_lines["cleaned"] == False) + ) - df_lines.loc[bool_cables, "circuits"] = df_lines.loc[bool_cables] \ - .apply(lambda row: - str(max(1, + df_lines.loc[bool_cables, "circuits"] = df_lines.loc[bool_cables].apply( + lambda row: str( + max( + 1, np.floor_divide( - int(row["cables"].split(";")[int(row["id"].split("-")[-1])-1]), - 3 - ) - )), - axis=1) - + int(row["cables"].split(";")[int(row["id"].split("-")[-1]) - 1]), 3 + ), + ) + ), + axis=1, + ) + df_lines.loc[bool_cables & bool_ac, "frequency"] = "50" df_lines.loc[bool_cables & bool_dc, "frequency"] = "0" df_lines.loc[bool_cables, "cleaned"] = True # All remaining lines to circuits == 1 - bool_leftover = (df_lines["cleaned"] == False) + bool_leftover = df_lines["cleaned"] == False if sum(bool_leftover) > 0: str_id = "; ".join(str(id) for id in df_lines.loc[bool_leftover, "id"]) logger.info(f"Setting circuits of remaining {sum(bool_leftover)} lines to 1...") logger.info(f"Lines affected: {str_id}") - + df_lines.loc[bool_leftover, "circuits"] = "1" df_lines.loc[bool_leftover & bool_ac, "frequency"] = "50" df_lines.loc[bool_leftover & bool_dc, "frequency"] = "0" @@ -800,20 +870,21 @@ def _create_substations_geometry(df_substations): Creates centroids from geometries and keeps the original polygons. Parameters: - df_substations (DataFrame): The input DataFrame containing the substations + df_substations (DataFrame): The input DataFrame containing the substations data. Returns: - df_substations (DataFrame): A new DataFrame with the centroids ["geometry"] + df_substations (DataFrame): A new DataFrame with the centroids ["geometry"] and polygons ["polygon"] of the substations geometries. - """ logger.info("Creating substations geometry.") df_substations = df_substations.copy() - + # Create centroids from geometries and keep the original polygons df_substations.loc[:, "polygon"] = df_substations["geometry"] - df_substations.loc[:, "geometry"] = df_substations["geometry"].apply(lambda x: x.centroid) + df_substations.loc[:, "geometry"] = df_substations["geometry"].apply( + lambda x: x.centroid + ) df_substations.loc[:, "lon"] = df_substations["geometry"].apply(lambda x: x.x) df_substations.loc[:, "lat"] = df_substations["geometry"].apply(lambda x: x.y) @@ -828,21 +899,21 @@ def _create_lines_geometry(df_lines): - df_lines (pandas.DataFrame): DataFrame containing lines data. Returns: - - df_lines (pandas.DataFrame): DataFrame with transformed 'geometry' + - df_lines (pandas.DataFrame): DataFrame with transformed 'geometry' column (type: shapely LineString). Notes: - - This function transforms 'geometry' column in the input DataFrame by + - This function transforms 'geometry' column in the input DataFrame by applying the '_create_linestring' function to each row. - - It then drops rows where the geometry has equal start and end points, + - It then drops rows where the geometry has equal start and end points, as these are usually not lines but outlines of areas. """ logger.info("Creating lines geometry.") df_lines = df_lines.copy() - df_lines.loc[:, "geometry"] = df_lines.apply(_create_linestring, axis=1) + df_lines.loc[:, "geometry"] = df_lines.apply(_create_linestring, axis=1) - bool_circle = df_lines["geometry"].apply(lambda x: x.coords[0] == x.coords[-1]) - df_lines = df_lines[~bool_circle] + bool_circle = df_lines["geometry"].apply(lambda x: x.coords[0] == x.coords[-1]) + df_lines = df_lines[~bool_circle] return df_lines @@ -852,11 +923,11 @@ def _finalise_substations(df_substations): Finalises the substations column types. Args: - df_substations (pandas.DataFrame): The input DataFrame + df_substations (pandas.DataFrame): The input DataFrame containing substations data. Returns: - df_substations (pandas.DataFrame(): The DataFrame with finalised column + df_substations (pandas.DataFrame(): The DataFrame with finalised column types and transformed data. """ logger.info("Finalising substations column types.") @@ -864,12 +935,14 @@ def _finalise_substations(df_substations): # rename columns df_substations.rename( columns={ - "id": "bus_id", + "id": "bus_id", "power": "symbol", - "substation":"tag_substation", - }, inplace=True) - - # Initiate new columns for subsequent build_osm_network step + "substation": "tag_substation", + }, + inplace=True, + ) + + # Initiate new columns for subsequent build_osm_network step df_substations.loc[:, "symbol"] = "substation" df_substations.loc[:, "tag_substation"] = "transmission" df_substations.loc[:, "dc"] = False @@ -880,23 +953,25 @@ def _finalise_substations(df_substations): df_substations.loc[:, "tag_source"] = df_substations["bus_id"] # Only included needed columns - df_substations = df_substations[[ - "bus_id", - "symbol", - "tag_substation", - "voltage", - "lon", - "lat", - "dc", - "under_construction", - "station_id", - "tag_area", - "country", - "geometry", - "polygon", - "tag_source", - ]] - + df_substations = df_substations[ + [ + "bus_id", + "symbol", + "tag_substation", + "voltage", + "lon", + "lat", + "dc", + "under_construction", + "station_id", + "tag_area", + "country", + "geometry", + "polygon", + "tag_source", + ] + ] + # Substation data types df_substations["voltage"] = df_substations["voltage"].astype(int) @@ -911,7 +986,7 @@ def _finalise_lines(df_lines): df_lines (pandas.DataFrame): The input DataFrame containing lines data. Returns: - df_lines (pandas.DataFrame(): The DataFrame with finalised column types + df_lines (pandas.DataFrame(): The DataFrame with finalised column types and transformed data. """ logger.info("Finalising lines column types.") @@ -919,11 +994,13 @@ def _finalise_lines(df_lines): # Rename columns df_lines.rename( columns={ - "id": "line_id", + "id": "line_id", "power": "tag_type", - "frequency":"tag_frequency", - }, inplace=True) - + "frequency": "tag_frequency", + }, + inplace=True, + ) + # Initiate new columns for subsequent build_osm_network step df_lines.loc[:, "bus0"] = None df_lines.loc[:, "bus1"] = None @@ -937,22 +1014,24 @@ def _finalise_lines(df_lines): df_lines.loc[df_lines["tag_frequency"] == "0", "dc"] = True # Only include needed columns - df_lines = df_lines[[ - "line_id", - "circuits", - "tag_type", - "voltage", - "tag_frequency", - "bus0", - "bus1", - "length", - "underground", - "under_construction", - "dc", - "country", - "geometry", - ]] - + df_lines = df_lines[ + [ + "line_id", + "circuits", + "tag_type", + "voltage", + "tag_frequency", + "bus0", + "bus1", + "length", + "underground", + "under_construction", + "dc", + "country", + "geometry", + ] + ] + # Set lines data types df.apply(pd.to_numeric, args=('coerce',)) # This workaround is needed as otherwise the column dtypes remain "objects" df_lines["circuits"] = df_lines["circuits"].astype(int) @@ -965,81 +1044,107 @@ def _finalise_lines(df_lines): def _import_substations(path_substations): """ Import substations from the given input paths. This function imports both - substations from OSM ways as well as relations that contain nested + substations from OSM ways as well as relations that contain nested information on the substations shape and electrical parameters. Ways and - relations are subsequently concatenated to form a single DataFrame + relations are subsequently concatenated to form a single DataFrame containing unique bus ids. Args: - path_substations (dict): A dictionary containing input paths for + path_substations (dict): A dictionary containing input paths for substations. Returns: pd.DataFrame: A DataFrame containing the imported substations data. """ - cols_substations_way = ["id", "geometry", "country", "power", "substation", "voltage", "frequency"] - cols_substations_relation = ["id", "country", "power", "substation", "voltage", "frequency"] - df_substations_way = pd.DataFrame(columns = cols_substations_way) - df_substations_relation = pd.DataFrame(columns = cols_substations_relation) + cols_substations_way = [ + "id", + "geometry", + "country", + "power", + "substation", + "voltage", + "frequency", + ] + cols_substations_relation = [ + "id", + "country", + "power", + "substation", + "voltage", + "frequency", + ] + df_substations_way = pd.DataFrame(columns=cols_substations_way) + df_substations_relation = pd.DataFrame(columns=cols_substations_relation) logger.info("Importing substations") for key in path_substations: logger.info(f"Processing {key}...") for idx, ip in enumerate(path_substations[key]): - if os.path.exists(ip) and os.path.getsize(ip) > 400: # unpopulated OSM json is about 51 bytes - country = os.path.basename(os.path.dirname(path_substations[key][idx])) + if ( + os.path.exists(ip) and os.path.getsize(ip) > 400 + ): # unpopulated OSM json is about 51 bytes + country = os.path.basename(os.path.dirname(path_substations[key][idx])) logger.info( f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(path_substations[key])).zfill(2)}: {ip}" - ) + ) with open(ip, "r") as f: data = json.load(f) - - df = pd.DataFrame(data['elements']) + + df = pd.DataFrame(data["elements"]) df["id"] = df["id"].astype(str) # new string that adds "way/" to id - df["id"] = df["id"].apply(lambda x: f"way/{x}" if key == "substations_way" else f"relation/{x}") + df["id"] = df["id"].apply( + lambda x: ( + f"way/{x}" if key == "substations_way" else f"relation/{x}" + ) + ) df["country"] = country col_tags = ["power", "substation", "voltage", "frequency"] - tags = pd.json_normalize(df["tags"]) \ - .map(lambda x: str(x) if pd.notnull(x) else x) - + tags = pd.json_normalize(df["tags"]).map( + lambda x: str(x) if pd.notnull(x) else x + ) + for ct in col_tags: if ct not in tags.columns: tags[ct] = pd.NA - + tags = tags.loc[:, col_tags] - df = pd.concat([df, tags], axis="columns") + df = pd.concat([df, tags], axis="columns") if key == "substations_way": df.drop(columns=["type", "tags", "bounds", "nodes"], inplace=True) - df_substations_way = pd.concat([df_substations_way, df], axis="rows") + df_substations_way = pd.concat( + [df_substations_way, df], axis="rows" + ) elif key == "substations_relation": df.drop(columns=["type", "tags", "bounds"], inplace=True) - df_substations_relation = pd.concat([df_substations_relation, df], axis="rows") + df_substations_relation = pd.concat( + [df_substations_relation, df], axis="rows" + ) else: logger.info( f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(path_substations[key])).zfill(2)} (empty): {ip}" - ) + ) continue logger.info("---") - df_substations_way.drop_duplicates(subset='id', keep='first', inplace=True) - df_substations_relation.drop_duplicates(subset='id', keep='first', inplace=True) + df_substations_way.drop_duplicates(subset="id", keep="first", inplace=True) + df_substations_relation.drop_duplicates(subset="id", keep="first", inplace=True) df_substations_way["geometry"] = df_substations_way.apply(_create_polygon, axis=1) # Normalise the members column of df_substations_relation cols_members = ["id", "type", "ref", "role", "geometry"] - df_substations_relation_members = pd.DataFrame(columns = cols_members) + df_substations_relation_members = pd.DataFrame(columns=cols_members) for index, row in df_substations_relation.iterrows(): col_members = ["type", "ref", "role", "geometry"] - df = pd.json_normalize(row["members"]) - + df = pd.json_normalize(row["members"]) + for cm in col_members: if cm not in df.columns: df[cm] = pd.NA @@ -1050,38 +1155,52 @@ def _import_substations(path_substations): df = df[df["type"] != "node"] df = df.dropna(subset=["geometry"]) df = df[~df["role"].isin(["", "incoming_line", "substation", "inner"])] - df_substations_relation_members = pd.concat([df_substations_relation_members, df], axis="rows") - + df_substations_relation_members = pd.concat( + [df_substations_relation_members, df], axis="rows" + ) + df_substations_relation_members.reset_index(inplace=True) - df_substations_relation_members["linestring"] = df_substations_relation_members.apply(_create_linestring, axis=1) - df_substations_relation_members_grouped = df_substations_relation_members.groupby('id')['linestring'] \ - .apply(lambda x: linemerge(x.tolist())).reset_index() - df_substations_relation_members_grouped["geometry"] = df_substations_relation_members_grouped["linestring"] \ - .apply(lambda x: x.convex_hull) - - df_substations_relation = df_substations_relation.join( - df_substations_relation_members_grouped.set_index('id'), - on='id', how='left' - ).drop(columns=["members", "linestring"]) \ + df_substations_relation_members["linestring"] = ( + df_substations_relation_members.apply(_create_linestring, axis=1) + ) + df_substations_relation_members_grouped = ( + df_substations_relation_members.groupby("id")["linestring"] + .apply(lambda x: linemerge(x.tolist())) + .reset_index() + ) + df_substations_relation_members_grouped["geometry"] = ( + df_substations_relation_members_grouped["linestring"].apply( + lambda x: x.convex_hull + ) + ) + + df_substations_relation = ( + df_substations_relation.join( + df_substations_relation_members_grouped.set_index("id"), on="id", how="left" + ) + .drop(columns=["members", "linestring"]) .dropna(subset=["geometry"]) - + ) + # reorder columns and concatenate df_substations_relation = df_substations_relation[cols_substations_way] - df_substations = pd.concat([df_substations_way, df_substations_relation], axis="rows") + df_substations = pd.concat( + [df_substations_way, df_substations_relation], axis="rows" + ) return df_substations def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): """ - Removes lines that are within substation polygons from the given - GeoDataFrame of lines. These are not needed to create network (e.g. bus + Removes lines that are within substation polygons from the given + GeoDataFrame of lines. These are not needed to create network (e.g. bus bars, switchgear, etc.) Parameters: - - gdf_lines (GeoDataFrame): A GeoDataFrame containing lines with 'line_id' + - gdf_lines (GeoDataFrame): A GeoDataFrame containing lines with 'line_id' and 'geometry' columns. - - gdf_substations_polygon (GeoDataFrame): A GeoDataFrame containing + - gdf_substations_polygon (GeoDataFrame): A GeoDataFrame containing substation polygons. Returns: @@ -1089,13 +1208,15 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): """ logger.info("Identifying and removing lines within substation polygons...") gdf = gpd.sjoin( - gdf_lines[["line_id", "geometry"]], - gdf_substations_polygon, + gdf_lines[["line_id", "geometry"]], + gdf_substations_polygon, how="inner", - predicate="within" + predicate="within", )["line_id"] - logger.info(f"Removed {len(gdf)} lines within substations of original {len(gdf_lines)} lines.") + logger.info( + f"Removed {len(gdf)} lines within substations of original {len(gdf_lines)} lines." + ) gdf_lines = gdf_lines[~gdf_lines["line_id"].isin(gdf)] return gdf_lines @@ -1106,15 +1227,15 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): from _helpers import mock_snakemake snakemake = mock_snakemake("clean_osm_data") - + configure_logging(snakemake) set_scenario_config(snakemake) - + # Parameters - crs = "EPSG:4326" # Correct crs for OSM data - voltage_min = 200000 # [unit: V] Minimum voltage value to filter lines. + crs = "EPSG:4326" # Correct crs for OSM data + voltage_min = 200000 # [unit: V] Minimum voltage value to filter lines. - # TODO pypsa-eur: Temporary solution as one AC line between converters will + # TODO pypsa-eur: Temporary solution as one AC line between converters will # create an error in simplify_network: lines_to_drop = ["775580659"] @@ -1129,7 +1250,9 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): # Cleaning process df_substations = _import_substations(path_substations) df_substations["voltage"] = _clean_voltage(df_substations["voltage"]) - df_substations, list_voltages = _filter_by_voltage(df_substations, voltage_min=voltage_min) + df_substations, list_voltages = _filter_by_voltage( + df_substations, voltage_min=voltage_min + ) df_substations["frequency"] = _clean_frequency(df_substations["frequency"]) df_substations = _clean_substations(df_substations, list_voltages) df_substations = _create_substations_geometry(df_substations) @@ -1138,8 +1261,8 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): # Create polygon GeoDataFrame to remove lines within substations gdf_substations_polygon = gpd.GeoDataFrame( df_substations[["bus_id", "polygon", "voltage"]], - geometry = "polygon", - crs = crs, + geometry="polygon", + crs=crs, ) logger.info("---") @@ -1161,28 +1284,31 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): df_lines = _clean_lines(df_lines, list_voltages) df_lines = _create_lines_geometry(df_lines) df_lines = _finalise_lines(df_lines) - + # Dropping specific lines, manually if lines_to_drop in df_lines["line_id"].values: - df_lines.drop(df_lines[df_lines["line_id"].isin(lines_to_drop)].index, inplace=True) - + df_lines.drop( + df_lines[df_lines["line_id"].isin(lines_to_drop)].index, inplace=True + ) + # Create GeoDataFrame - gdf_lines = gpd.GeoDataFrame(df_lines, geometry = "geometry", crs = crs) + gdf_lines = gpd.GeoDataFrame(df_lines, geometry="geometry", crs=crs) gdf_lines = _remove_lines_within_substations(gdf_lines, gdf_substations_polygon) # Add line endings to substations path_country_shapes = snakemake.input.country_shapes path_offshore_shapes = snakemake.input.offshore_shapes df_substations = _add_line_endings_to_substations( - df_substations, + df_substations, gdf_lines, path_country_shapes, path_offshore_shapes, - ) - + ) + # Drop polygons and create GDF - gdf_substations = gpd.GeoDataFrame(df_substations.drop(columns=["polygon"]), - geometry = "geometry", crs = crs) + gdf_substations = gpd.GeoDataFrame( + df_substations.drop(columns=["polygon"]), geometry="geometry", crs=crs + ) # Export GeoDataFrames to GeoJSON in specified output paths parentfolder = os.path.dirname(snakemake.output.substations) @@ -1192,11 +1318,13 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): output_substations = snakemake.output["substations"] output_lines = snakemake.output["lines"] - logger.info(f"Exporting clean substations with polygon shapes to {output_substations_polygon}") - gdf_substations_polygon.to_file(output_substations_polygon, driver="GeoJSON") + logger.info( + f"Exporting clean substations with polygon shapes to {output_substations_polygon}" + ) + gdf_substations_polygon.to_file(output_substations_polygon, driver="GeoJSON") logger.info(f"Exporting clean substations to {output_substations}") - gdf_substations.to_file(output_substations, driver="GeoJSON") + gdf_substations.to_file(output_substations, driver="GeoJSON") logger.info(f"Exporting clean lines to {output_lines}") gdf_lines.to_file(output_lines, driver="GeoJSON") - logger.info("Cleaning OSM data completed.") \ No newline at end of file + logger.info("Cleaning OSM data completed.") diff --git a/scripts/retrieve_osm_data.py b/scripts/retrieve_osm_data.py index 0ad9743e4..901145728 100644 --- a/scripts/retrieve_osm_data.py +++ b/scripts/retrieve_osm_data.py @@ -2,40 +2,42 @@ # SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur Authors # # SPDX-License-Identifier: MIT - """ -Retrieve OSM data for the specified country using the overpass API and save it -to the specified output files. Note that overpass requests are based on a fair -use policy. `retrieve_osm_data` is meant to be used in a way that respects this -policy by fetching the needed data once, only. +Retrieve OSM data for the specified country using the overpass API and save it +to the specified output files. + +Note that overpass requests are based on a fair +use policy. `retrieve_osm_data` is meant to be used in a way that respects this +policy by fetching the needed data once, only. """ import json import logging import os -import requests import time +import requests from _helpers import configure_logging + logger = logging.getLogger(__name__) -# Function currently not needed - Kept for backup purposes to retrieve the OSM +# Function currently not needed - Kept for backup purposes to retrieve the OSM # area code if needed in the future def _get_overpass_areas(countries): """ Retrieve the OSM area codes for the specified country codes. - + Parameters ---------- countries : str or list - A single country code or a list of country codes for which the OSM area + A single country code or a list of country codes for which the OSM area codes should be retrieved. Returns ------- dict - A dictionary mapping country codes to their corresponding OSM area + A dictionary mapping country codes to their corresponding OSM area codes. """ @@ -65,37 +67,40 @@ def _get_overpass_areas(countries): # Check if the response contains any results if "elements" in data and len(data["elements"]) > 0: # Extract the area ID from the relation - if c == "FR": # take second one for France + if c == "FR": # take second one for France osm_area_id = data["elements"][1]["id"] else: osm_area_id = data["elements"][0]["id"] osm_areas.append(f"area({osm_area_id})") else: # Print a warning if no results are found for the country code - logger.info(f"No area code found for the specified country " - f"code: {c}. Omitted from the list.") + logger.info( + f"No area code found for the specified country " + f"code: {c}. Omitted from the list." + ) except json.JSONDecodeError as e: logger.error(f"JSON decode error for country {c}: {e}") logger.debug(f"Response text: {response.text}") - - # Create a dictionary mapping country codes to their corresponding OSM area + + # Create a dictionary mapping country codes to their corresponding OSM area # codes op_areas_dict = dict(zip(countries, osm_areas)) - + return op_areas_dict - + def retrieve_osm_data( - country, - output, - features=[ - "cables_way", - "lines_way", - "substations_way", - "substations_relation", - ]): + country, + output, + features=[ + "cables_way", + "lines_way", + "substations_way", + "substations_relation", + ], +): """ - Retrieve OSM data for the specified country and save it to the specified + Retrieve OSM data for the specified country and save it to the specified output files. Parameters @@ -103,7 +108,7 @@ def retrieve_osm_data( country : str The country code for which the OSM data should be retrieved. output : dict - A dictionary mapping feature names to the corresponding output file + A dictionary mapping feature names to the corresponding output file paths. Saving the OSM data to .json files. features : list, optional A list of OSM features to retrieve. The default is [ @@ -119,7 +124,7 @@ def retrieve_osm_data( # More features can in theory be retrieved that are currently not needed # to build a functioning network. The following power-related # features are supported: - + # features_dict= { # 'cables_way': 'way["power"="cable"]', # 'lines_way': 'way["power"="line"]', @@ -130,38 +135,44 @@ def retrieve_osm_data( # 'route_relations': 'rel["route"="power"]["type"="route"]' # } - features_dict= { - 'cables_way': 'way["power"="cable"]', - 'lines_way': 'way["power"="line"]', - 'substations_way': 'way["power"="substation"]', - 'substations_relation': 'relation["power"="substation"]', + features_dict = { + "cables_way": 'way["power"="cable"]', + "lines_way": 'way["power"="line"]', + "substations_way": 'way["power"="substation"]', + "substations_relation": 'relation["power"="substation"]', } wait_time = 5 for f in features: if f not in features_dict: - logger.info(f"Invalid feature: {f}. Supported features: {list(features_dict.keys())}") - raise ValueError(f"Invalid feature: {f}. Supported features: {list(features_dict.keys())}") + logger.info( + f"Invalid feature: {f}. Supported features: {list(features_dict.keys())}" + ) + raise ValueError( + f"Invalid feature: {f}. Supported features: {list(features_dict.keys())}" + ) retries = 3 for attempt in range(retries): - logger.info(f" - Fetching OSM data for feature '{f}' in {country} (Attempt {attempt+1})...") + logger.info( + f" - Fetching OSM data for feature '{f}' in {country} (Attempt {attempt+1})..." + ) # Build the overpass query op_area = f'area["ISO3166-1"="{country}"]' - op_query = f''' + op_query = f""" [out:json]; {op_area}->.searchArea; ( {features_dict[f]}(area.searchArea); ); out body geom; - ''' + """ try: # Send the request - response = requests.post(overpass_url, data = op_query) - response.raise_for_status() # Raise HTTPError for bad responses + response = requests.post(overpass_url, data=op_query) + response.raise_for_status() # Raise HTTPError for bad responses data = response.json() filepath = output[f] @@ -169,13 +180,15 @@ def retrieve_osm_data( if not os.path.exists(parentfolder): os.makedirs(parentfolder) - with open(filepath, mode = "w") as f: - json.dump(response.json(),f,indent=2) + with open(filepath, mode="w") as f: + json.dump(response.json(), f, indent=2) logger.info(" - Done.") break # Exit the retry loop on success except (json.JSONDecodeError, requests.exceptions.RequestException) as e: logger.error(f"Error for feature '{f}' in country {country}: {e}") - logger.debug(f"Response text: {response.text if response else 'No response'}") + logger.debug( + f"Response text: {response.text if response else 'No response'}" + ) if attempt < retries - 1: wait_time += 15 logger.info(f"Waiting {wait_time} seconds before retrying...") @@ -183,11 +196,13 @@ def retrieve_osm_data( else: logger.error( f"Failed to retrieve data for feature '{f}' in country {country} after {retries} attempts." - ) + ) except Exception as e: - # For now, catch any other exceptions and log them. Treat this + # For now, catch any other exceptions and log them. Treat this # the same as a RequestException and try to run again two times. - logger.error(f"Unexpected error for feature '{f}' in country {country}: {e}") + logger.error( + f"Unexpected error for feature '{f}' in country {country}: {e}" + ) if attempt < retries - 1: wait_time += 10 logger.info(f"Waiting {wait_time} seconds before retrying...") @@ -195,7 +210,7 @@ def retrieve_osm_data( else: logger.error( f"Failed to retrieve data for feature '{f}' in country {country} after {retries} attempts." - ) + ) if __name__ == "__main__": @@ -203,11 +218,11 @@ def retrieve_osm_data( from _helpers import mock_snakemake snakemake = mock_snakemake("retrieve_osm_data", country="BE") - + configure_logging(snakemake) # Retrieve the OSM data country = snakemake.wildcards.country output = snakemake.output - retrieve_osm_data(country, output) \ No newline at end of file + retrieve_osm_data(country, output) From f2761a2bb10fabc3f621be18c80cd6708b0deece Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Fri, 24 May 2024 10:09:17 +0200 Subject: [PATCH 025/100] Removed overpass from required packages. Not needed anymore. --- envs/environment.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/envs/environment.yaml b/envs/environment.yaml index 2b771f0b9..dd5df3250 100644 --- a/envs/environment.yaml +++ b/envs/environment.yaml @@ -64,5 +64,4 @@ dependencies: - snakemake-storage-plugin-http - snakemake-executor-plugin-slurm - snakemake-executor-plugin-cluster-generic - - highspy - - overpass + - highspy \ No newline at end of file From c71a3b934b2e50ed2c3c8572887ebe329cb77bd6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 24 May 2024 08:10:45 +0000 Subject: [PATCH 026/100] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- envs/environment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/envs/environment.yaml b/envs/environment.yaml index dd5df3250..fbc61d367 100644 --- a/envs/environment.yaml +++ b/envs/environment.yaml @@ -64,4 +64,4 @@ dependencies: - snakemake-storage-plugin-http - snakemake-executor-plugin-slurm - snakemake-executor-plugin-cluster-generic - - highspy \ No newline at end of file + - highspy From ff882e69f9aedb4e5aa9f3ecb399e0c85f654355 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Fri, 24 May 2024 14:14:26 +0200 Subject: [PATCH 027/100] Added links_relations (route = power, frequency = 0) to retrieval. This will change how HVDC links are extracted in the near future. --- rules/build_electricity.smk | 13 +++--- scripts/retrieve_osm_data.py | 83 +----------------------------------- 2 files changed, 9 insertions(+), 87 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index a1e44f5fa..683b75d57 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -616,6 +616,7 @@ rule retrieve_osm_data: output: cables_way="data/osm/raw/{country}/cables_way.json", lines_way="data/osm/raw/{country}/lines_way.json", + links_relation="data/osm/raw/{country}/links_relation.json", substations_way="data/osm/raw/{country}/substations_way.json", substations_relation="data/osm/raw/{country}/substations_relation.json", log: @@ -630,19 +631,19 @@ rule retrieve_osm_data: rule clean_osm_data: input: cables_way=[ - f"data/osm/raw/{country}/cables_way.json" - for country in config["countries"] + f"data/osm/raw/{country}/cables_way.json" for country in config["countries"] ], lines_way=[ f"data/osm/raw/{country}/lines_way.json" for country in config["countries"] ], + links_relation=[ + f"data/osm/raw/{country}/links_relation.json" for country in config["countries"] + ], substations_way=[ - f"data/osm/raw/{country}/substations_way.json" - for country in config["countries"] + f"data/osm/raw/{country}/substations_way.json" for country in config["countries"] ], substations_relation=[ - f"data/osm/raw/{country}/substations_relation.json" - for country in config["countries"] + f"data/osm/raw/{country}/substations_relation.json" for country in config["countries"] ], offshore_shapes=resources("offshore_shapes.geojson"), country_shapes=resources("country_shapes.geojson"), diff --git a/scripts/retrieve_osm_data.py b/scripts/retrieve_osm_data.py index 901145728..bad99df3a 100644 --- a/scripts/retrieve_osm_data.py +++ b/scripts/retrieve_osm_data.py @@ -22,79 +22,13 @@ logger = logging.getLogger(__name__) -# Function currently not needed - Kept for backup purposes to retrieve the OSM -# area code if needed in the future -def _get_overpass_areas(countries): - """ - Retrieve the OSM area codes for the specified country codes. - - Parameters - ---------- - countries : str or list - A single country code or a list of country codes for which the OSM area - codes should be retrieved. - - Returns - ------- - dict - A dictionary mapping country codes to their corresponding OSM area - codes. - """ - - # If a single country code is provided, convert it to a list - if not isinstance(countries, list): - countries = [countries] - - # Overpass API endpoint URL - overpass_url = "https://overpass-api.de/api/interpreter" - - osm_areas = [] - for c in countries: - # Overpass query to fetch the relation for the specified country code - overpass_query = f""" - [out:json]; - area["ISO3166-1"="{c}"]; - out; - """ - - # Send the request to Overpass API - response = requests.post(overpass_url, data=overpass_query) - - try: - # Parse the response - data = response.json() - - # Check if the response contains any results - if "elements" in data and len(data["elements"]) > 0: - # Extract the area ID from the relation - if c == "FR": # take second one for France - osm_area_id = data["elements"][1]["id"] - else: - osm_area_id = data["elements"][0]["id"] - osm_areas.append(f"area({osm_area_id})") - else: - # Print a warning if no results are found for the country code - logger.info( - f"No area code found for the specified country " - f"code: {c}. Omitted from the list." - ) - except json.JSONDecodeError as e: - logger.error(f"JSON decode error for country {c}: {e}") - logger.debug(f"Response text: {response.text}") - - # Create a dictionary mapping country codes to their corresponding OSM area - # codes - op_areas_dict = dict(zip(countries, osm_areas)) - - return op_areas_dict - - def retrieve_osm_data( country, output, features=[ "cables_way", "lines_way", + "links_relation", "substations_way", "substations_relation", ], @@ -121,23 +55,10 @@ def retrieve_osm_data( # Overpass API endpoint URL overpass_url = "https://overpass-api.de/api/interpreter" - # More features can in theory be retrieved that are currently not needed - # to build a functioning network. The following power-related - # features are supported: - - # features_dict= { - # 'cables_way': 'way["power"="cable"]', - # 'lines_way': 'way["power"="line"]', - # 'substations_way': 'way["power"="substation"]', - # 'substations_node': 'node["power"="substation"]', - # 'transformers_way': 'way["power"="transformer"]', - # 'transformers_node': 'node["power"="transformer"]', - # 'route_relations': 'rel["route"="power"]["type"="route"]' - # } - features_dict = { "cables_way": 'way["power"="cable"]', "lines_way": 'way["power"="line"]', + "links_relation": 'relation["route"="power"]["frequency"="0"]', "substations_way": 'way["power"="substation"]', "substations_relation": 'relation["power"="substation"]', } From 0c0aff7cc888b62611ec4cd85b9586fac5f71bff Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 24 May 2024 12:15:10 +0000 Subject: [PATCH 028/100] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- rules/build_electricity.smk | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 683b75d57..680dedd89 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -631,19 +631,23 @@ rule retrieve_osm_data: rule clean_osm_data: input: cables_way=[ - f"data/osm/raw/{country}/cables_way.json" for country in config["countries"] + f"data/osm/raw/{country}/cables_way.json" + for country in config["countries"] ], lines_way=[ f"data/osm/raw/{country}/lines_way.json" for country in config["countries"] ], links_relation=[ - f"data/osm/raw/{country}/links_relation.json" for country in config["countries"] + f"data/osm/raw/{country}/links_relation.json" + for country in config["countries"] ], substations_way=[ - f"data/osm/raw/{country}/substations_way.json" for country in config["countries"] + f"data/osm/raw/{country}/substations_way.json" + for country in config["countries"] ], substations_relation=[ - f"data/osm/raw/{country}/substations_relation.json" for country in config["countries"] + f"data/osm/raw/{country}/substations_relation.json" + for country in config["countries"] ], offshore_shapes=resources("offshore_shapes.geojson"), country_shapes=resources("country_shapes.geojson"), From ea9b3830b2beac8fd6c21468565bc0665766d549 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Tue, 28 May 2024 10:00:32 +0200 Subject: [PATCH 029/100] Work-in-progress clean_osm_data --- scripts/clean_osm_data.py | 122 ++++++++++++++++++++++++++++++++++---- 1 file changed, 111 insertions(+), 11 deletions(-) diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index 882c1229e..e40bd4234 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -513,7 +513,6 @@ def _import_lines_and_cables(path_lines): df["id"] = df["id"].astype(str) df["country"] = country - # col_tags = ["power", "cables", "circuits", "frequency", "voltage", "wires", "capacity", "rating"] col_tags = [ "power", "cables", @@ -548,6 +547,82 @@ def _import_lines_and_cables(path_lines): return df_lines +def _import_links(path_links): + """ + Import links from the given input paths. + + Parameters: + - path_links (dict): A dictionary containing the input paths for links. + + Returns: + - df_links (DataFrame): A DataFrame containing the imported links data. + """ + columns = [ + "id", + "bounds", + "nodes", + "geometry", + "country", + "circuits", + "frequency", + "rating", + "voltage", + "wires", + ] + df_links = pd.DataFrame(columns=columns) + + logger.info("Importing links") + for key in path_links: + logger.info(f"Processing {key}...") + for idx, ip in enumerate(path_links[key]): + if ( + os.path.exists(ip) and os.path.getsize(ip) > 400 + ): # unpopulated OSM json is about 51 bytes + country = os.path.basename(os.path.dirname(path_links[key][idx])) + + logger.info( + f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(path_links[key])).zfill(2)}: {ip}" + ) + with open(ip, "r") as f: + data = json.load(f) + + df = pd.DataFrame(data["elements"]) + df["id"] = df["id"].astype(str) + df["country"] = country + + col_tags = [ + "circuits", + "frequency", + "rating", + "voltage", + "wires", + ] + + tags = pd.json_normalize(df["tags"]).map( + lambda x: str(x) if pd.notnull(x) else x + ) + + for ct in col_tags: + if ct not in tags.columns: + tags[ct] = pd.NA + + tags = tags.loc[:, col_tags] + + df = pd.concat([df, tags], axis="columns") + df.drop(columns=["type", "tags"], inplace=True) + + df_links = pd.concat([df_links, df], axis="rows") + + else: + logger.info( + f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(path_links[key])).zfill(2)} (empty): {ip}" + ) + continue + logger.info("---") + + return df_links + + def _drop_duplicate_lines(df_lines): """ Drop duplicate lines from the given dataframe. Duplicates are usually lines @@ -586,29 +661,29 @@ def _drop_duplicate_lines(df_lines): return df_lines -def _filter_by_voltage(df, voltage_min=200000): +def _filter_by_voltage(df, min_voltage=200000): """ Filter rows in the DataFrame based on the voltage in V. Parameters: - df (pandas.DataFrame): The DataFrame containing the substations or lines data. - - voltage_min (int, optional): The minimum voltage value to filter the + - min_voltage (int, optional): The minimum voltage value to filter the rows. Defaults to 200000 [unit: V]. Returns: - filtered df (pandas.DataFrame): The filtered DataFrame containing - the lines or substations above voltage_min. - - list_voltages (list): A list of unique voltage values above voltage_min. + the lines or substations above min_voltage. + - list_voltages (list): A list of unique voltage values above min_voltage. The type of the list elements is string. """ logger.info( - f"Filtering dataframe by voltage. Only keeping rows above and including {voltage_min} V." + f"Filtering dataframe by voltage. Only keeping rows above and including {min_voltage} V." ) list_voltages = df["voltage"].str.split(";").explode().unique().astype(str) # Keep numeric strings list_voltages = list_voltages[np.vectorize(str.isnumeric)(list_voltages)] list_voltages = list_voltages.astype(int) - list_voltages = list_voltages[list_voltages >= int(voltage_min)] + list_voltages = list_voltages[list_voltages >= int(min_voltage_ac)] list_voltages = list_voltages.astype(str) bool_voltages = df["voltage"].apply(_check_voltage, list_voltages=list_voltages) @@ -630,7 +705,7 @@ def _clean_substations(df_substations, list_voltages): Parameters: - df_substations (pandas.DataFrame): The input dataframe containing substation data. - - list_voltages (list): A list of voltages above voltage_min to filter the + - list_voltages (list): A list of voltages above min_voltage to filter the substation data. Returns: @@ -1233,7 +1308,8 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): # Parameters crs = "EPSG:4326" # Correct crs for OSM data - voltage_min = 200000 # [unit: V] Minimum voltage value to filter lines. + min_voltage_ac = 200000 # [unit: V] Minimum voltage value to filter AC lines. + min_voltage_dc = 150000 # [unit: V] Minimum voltage value to filter DC links. # TODO pypsa-eur: Temporary solution as one AC line between converters will # create an error in simplify_network: @@ -1251,7 +1327,7 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): df_substations = _import_substations(path_substations) df_substations["voltage"] = _clean_voltage(df_substations["voltage"]) df_substations, list_voltages = _filter_by_voltage( - df_substations, voltage_min=voltage_min + df_substations, min_voltage=min_voltage_ac ) df_substations["frequency"] = _clean_frequency(df_substations["frequency"]) df_substations = _clean_substations(df_substations, list_voltages) @@ -1276,7 +1352,7 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): df_lines = _import_lines_and_cables(path_lines) df_lines = _drop_duplicate_lines(df_lines) df_lines.loc[:, "voltage"] = _clean_voltage(df_lines["voltage"]) - df_lines, list_voltages = _filter_by_voltage(df_lines, voltage_min=voltage_min) + df_lines, list_voltages = _filter_by_voltage(df_lines, min_voltage=min_voltage_ac) df_lines.loc[:, "circuits"] = _clean_circuits(df_lines["circuits"]) df_lines.loc[:, "cables"] = _clean_cables(df_lines["cables"]) df_lines.loc[:, "frequency"] = _clean_frequency(df_lines["frequency"]) @@ -1327,4 +1403,28 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): logger.info(f"Exporting clean lines to {output_lines}") gdf_lines.to_file(output_lines, driver="GeoJSON") + logger.info("---") + logger.info("HVDC LINKS") + path_links = { + "links": snakemake.input.links_relation, + } + + + ### CONTINUE HERE + # Cleaning process + df_links = _import_links(path_links) + df_links = _drop_duplicate_lines(df_links) + df_links.loc[:, "voltage"] = _clean_voltage(df_links["voltage"]) + df_links, list_voltages = _filter_by_voltage(df_links, min_voltage=min_voltage_dc) + + + df_lines.loc[:, "circuits"] = _clean_circuits(df_lines["circuits"]) + df_lines.loc[:, "cables"] = _clean_cables(df_lines["cables"]) + df_lines.loc[:, "frequency"] = _clean_frequency(df_lines["frequency"]) + df_lines.loc[:, "wires"] = _clean_wires(df_lines["wires"]) + df_lines = _clean_lines(df_lines, list_voltages) + df_lines = _create_lines_geometry(df_lines) + df_lines = _finalise_lines(df_lines) + + logger.info("Cleaning OSM data completed.") From 055699ba170d1c73310016d8f84732ac13076b45 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 28 May 2024 08:02:14 +0000 Subject: [PATCH 030/100] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/clean_osm_data.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index e40bd4234..531169cdb 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -1309,7 +1309,7 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): # Parameters crs = "EPSG:4326" # Correct crs for OSM data min_voltage_ac = 200000 # [unit: V] Minimum voltage value to filter AC lines. - min_voltage_dc = 150000 # [unit: V] Minimum voltage value to filter DC links. + min_voltage_dc = 150000 # [unit: V] Minimum voltage value to filter DC links. # TODO pypsa-eur: Temporary solution as one AC line between converters will # create an error in simplify_network: @@ -1409,15 +1409,13 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): "links": snakemake.input.links_relation, } - ### CONTINUE HERE # Cleaning process df_links = _import_links(path_links) df_links = _drop_duplicate_lines(df_links) df_links.loc[:, "voltage"] = _clean_voltage(df_links["voltage"]) df_links, list_voltages = _filter_by_voltage(df_links, min_voltage=min_voltage_dc) - - + df_lines.loc[:, "circuits"] = _clean_circuits(df_lines["circuits"]) df_lines.loc[:, "cables"] = _clean_cables(df_lines["cables"]) df_lines.loc[:, "frequency"] = _clean_frequency(df_lines["frequency"]) @@ -1426,5 +1424,4 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): df_lines = _create_lines_geometry(df_lines) df_lines = _finalise_lines(df_lines) - logger.info("Cleaning OSM data completed.") From 2b9d6982cbdc57983861f7c43123d3a379fb441e Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Fri, 31 May 2024 18:34:44 +0200 Subject: [PATCH 031/100] Added clean links output to clean_osm_data. Script uses OSM relations to retrieve clean HVDC links. --- rules/build_electricity.smk | 1 + scripts/clean_osm_data.py | 164 +++++++++++++++++++++++++++++++++--- 2 files changed, 154 insertions(+), 11 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 680dedd89..b9161423c 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -655,6 +655,7 @@ rule clean_osm_data: substations=resources("osm/clean/substations.geojson"), substations_polygon=resources("osm/clean/substations_polygon.geojson"), lines=resources("osm/clean/lines.geojson"), + links=resources("osm/clean/links.geojson"), log: logs("clean_osm_data.log"), script: diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index 531169cdb..5674f53f3 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -34,7 +34,7 @@ import numpy as np import pandas as pd from _helpers import configure_logging, set_scenario_config -from shapely.geometry import LineString, Polygon +from shapely.geometry import LineString, Polygon, MultiLineString from shapely.ops import linemerge logger = logging.getLogger(__name__) @@ -277,6 +277,34 @@ def _clean_frequency(column): return column.astype(str) +def _clean_rating(column): + """ + Function to clean and sum the rating columns: + + Args: + - column: pandas Series, the column to be cleaned + + Returns: + - column: pandas Series, the cleaned column + """ + logger.info("Cleaning ratings.") + column = column.copy() + column = ( + column.astype(str) + .str.replace("MW", "") + ) + + # Remove all remaining non-numeric characters except for semicolons + column = column.apply(lambda x: re.sub(r"[^0-9;]", "", x)) + + # Sum up all ratings if there are multiple entries + column = column.str.split(";").apply(lambda x: sum([int(i) for i in x])) + + + column.dropna(inplace=True) + return column.astype(str) + + def _split_cells(df, cols=["voltage"]): """ Split semicolon separated cells i.e. [66000;220000] and create new @@ -567,7 +595,6 @@ def _import_links(path_links): "frequency", "rating", "voltage", - "wires", ] df_links = pd.DataFrame(columns=columns) @@ -595,7 +622,6 @@ def _import_links(path_links): "frequency", "rating", "voltage", - "wires", ] tags = pd.json_normalize(df["tags"]).map( @@ -619,10 +645,66 @@ def _import_links(path_links): ) continue logger.info("---") + logger.info("Dropping lines without rating.") + len_before = len(df_links) + df_links = df_links.dropna(subset=["rating"]) + len_after = len(df_links) + logger.info(f"Dropped {len_before-len_after} elements without rating. " + + f"Imported {len_after} elements.") return df_links +def _create_single_link(row): + """ + Create a single link from multiple rows within a OSM link relation. + + Parameters: + - row: A row of OSM data containing information about the link. + + Returns: + - single_link: A single LineString representing the link. + + This function takes a row of OSM data and extracts the relevant information + to create a single link. It filters out elements (substations, electrodes) + with invalid roles and finds the longest link based on its endpoints. + If the longest link is a MultiLineString, it extracts the longest + linestring from it. The resulting single link is returned. + """ + valid_roles = ["line", "cable"] + df = pd.json_normalize(row["members"]) + df = df[df["role"].isin(valid_roles)] + df.loc[:, "geometry"] = df.apply(_create_linestring, axis=1) + df.loc[:, "length"] = df["geometry"].apply(lambda x: x.length) + + list_endpoints = [] + for idx, row in df.iterrows(): + tuple = sorted([row["geometry"].coords[0], row["geometry"].coords[-1]]) + # round tuple to 3 decimals + tuple = ( + round(tuple[0][0], 2), + round(tuple[0][1], 2), + round(tuple[1][0], 2), + round(tuple[1][1], 2) + ) + list_endpoints.append(tuple) + + df.loc[:, "endpoints"] = list_endpoints + df_longest = df.loc[df.groupby("endpoints")["length"].idxmax()] + + single_link = linemerge(df_longest["geometry"].values.tolist()) + + # If the longest component is a MultiLineString, extract the longest linestring from it + if isinstance(single_link, MultiLineString): + # Find connected components + components = list(single_link.geoms) + + # Find the longest connected linestring + single_link = max(components, key=lambda x: x.length) + + return single_link + + def _drop_duplicate_lines(df_lines): """ Drop duplicate lines from the given dataframe. Duplicates are usually lines @@ -654,9 +736,14 @@ def _drop_duplicate_lines(df_lines): grouped_duplicates.set_index("id"), on="id", how="left" ) + len_before = len(df_lines) # Drop duplicates and update the df_lines dataframe with the cleaned data df_lines = df_lines[~df_lines["id"].isin(duplicate_rows["id"])] df_lines = pd.concat([df_lines, duplicate_rows], axis="rows") + len_after = len(df_lines) + + logger.info(f"Dropped {len_before - len_after} duplicate elements. " + + f"Keeping {len_after} elements." ) return df_lines @@ -687,7 +774,11 @@ def _filter_by_voltage(df, min_voltage=200000): list_voltages = list_voltages.astype(str) bool_voltages = df["voltage"].apply(_check_voltage, list_voltages=list_voltages) + len_before = len(df) df = df[bool_voltages] + len_after = len(df) + logger.info(f"Dropped {len_before - len_after} elements with voltage below {min_voltage}. " + + f"Keeping {len_after} elements." ) return df, list_voltages @@ -1116,6 +1207,54 @@ def _finalise_lines(df_lines): return df_lines +def _finalise_links(df_links): + """ + Finalises the links column types. + + Args: + df_links (pandas.DataFrame): The input DataFrame containing links data. + + Returns: + df_links (pandas.DataFrame(): The DataFrame with finalised column types + and transformed data. + """ + logger.info("Finalising links column types.") + df_links = df_links.copy() + # Rename columns + df_links.rename( + columns={ + "id": "link_id", + "rating": "p_nom", + }, + inplace=True, + ) + + # Initiate new columns for subsequent build_osm_network step + df_links.loc[:, "bus0"] = None + df_links.loc[:, "bus1"] = None + df_links.loc[:, "length"] = None + + # Only include needed columns + df_links = df_links[ + [ + "link_id", + "voltage", + "p_nom", + "bus0", + "bus1", + "length", + "country", + "geometry", + ] + ] + + # Set lines data types df.apply(pd.to_numeric, args=('coerce',)) + # This workaround is needed as otherwise the column dtypes remain "objects" + df_links["p_nom"] = df_links["p_nom"].astype(int) + + return df_links + + def _import_substations(path_substations): """ Import substations from the given input paths. This function imports both @@ -1393,6 +1532,7 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): output_substations_polygon = snakemake.output["substations_polygon"] output_substations = snakemake.output["substations"] output_lines = snakemake.output["lines"] + output_links = snakemake.output["links"] logger.info( f"Exporting clean substations with polygon shapes to {output_substations_polygon}" @@ -1412,16 +1552,18 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): ### CONTINUE HERE # Cleaning process df_links = _import_links(path_links) + df_links = _drop_duplicate_lines(df_links) df_links.loc[:, "voltage"] = _clean_voltage(df_links["voltage"]) df_links, list_voltages = _filter_by_voltage(df_links, min_voltage=min_voltage_dc) - - df_lines.loc[:, "circuits"] = _clean_circuits(df_lines["circuits"]) - df_lines.loc[:, "cables"] = _clean_cables(df_lines["cables"]) - df_lines.loc[:, "frequency"] = _clean_frequency(df_lines["frequency"]) - df_lines.loc[:, "wires"] = _clean_wires(df_lines["wires"]) - df_lines = _clean_lines(df_lines, list_voltages) - df_lines = _create_lines_geometry(df_lines) - df_lines = _finalise_lines(df_lines) + df_links.loc[:, "frequency"] = _clean_frequency(df_links["frequency"]) + df_links.loc[:, "rating"] = _clean_rating(df_links["rating"]) + df_links.loc[:, "geometry"] = df_links.apply(_create_single_link, axis=1) + df_links = _finalise_links(df_links) + gdf_links = gpd.GeoDataFrame(df_links, geometry="geometry", crs=crs) + + logger.info(f"Exporting clean links to {output_links}") + gdf_links.to_file(output_links, driver="GeoJSON") + logger.info("Cleaning OSM data completed.") From ff2b8390dc837d6cd3f12d109a209482a80ae963 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 31 May 2024 16:35:14 +0000 Subject: [PATCH 032/100] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/clean_osm_data.py | 51 ++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index 5674f53f3..2bd6454fe 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -34,7 +34,7 @@ import numpy as np import pandas as pd from _helpers import configure_logging, set_scenario_config -from shapely.geometry import LineString, Polygon, MultiLineString +from shapely.geometry import LineString, MultiLineString, Polygon from shapely.ops import linemerge logger = logging.getLogger(__name__) @@ -279,7 +279,7 @@ def _clean_frequency(column): def _clean_rating(column): """ - Function to clean and sum the rating columns: + Function to clean and sum the rating columns: Args: - column: pandas Series, the column to be cleaned @@ -289,17 +289,13 @@ def _clean_rating(column): """ logger.info("Cleaning ratings.") column = column.copy() - column = ( - column.astype(str) - .str.replace("MW", "") - ) + column = column.astype(str).str.replace("MW", "") # Remove all remaining non-numeric characters except for semicolons column = column.apply(lambda x: re.sub(r"[^0-9;]", "", x)) # Sum up all ratings if there are multiple entries column = column.str.split(";").apply(lambda x: sum([int(i) for i in x])) - column.dropna(inplace=True) return column.astype(str) @@ -649,8 +645,10 @@ def _import_links(path_links): len_before = len(df_links) df_links = df_links.dropna(subset=["rating"]) len_after = len(df_links) - logger.info(f"Dropped {len_before-len_after} elements without rating. " + - f"Imported {len_after} elements.") + logger.info( + f"Dropped {len_before-len_after} elements without rating. " + + f"Imported {len_after} elements." + ) return df_links @@ -665,13 +663,13 @@ def _create_single_link(row): Returns: - single_link: A single LineString representing the link. - This function takes a row of OSM data and extracts the relevant information - to create a single link. It filters out elements (substations, electrodes) - with invalid roles and finds the longest link based on its endpoints. - If the longest link is a MultiLineString, it extracts the longest + This function takes a row of OSM data and extracts the relevant information + to create a single link. It filters out elements (substations, electrodes) + with invalid roles and finds the longest link based on its endpoints. + If the longest link is a MultiLineString, it extracts the longest linestring from it. The resulting single link is returned. """ - valid_roles = ["line", "cable"] + valid_roles = ["line", "cable"] df = pd.json_normalize(row["members"]) df = df[df["role"].isin(valid_roles)] df.loc[:, "geometry"] = df.apply(_create_linestring, axis=1) @@ -682,16 +680,16 @@ def _create_single_link(row): tuple = sorted([row["geometry"].coords[0], row["geometry"].coords[-1]]) # round tuple to 3 decimals tuple = ( - round(tuple[0][0], 2), - round(tuple[0][1], 2), - round(tuple[1][0], 2), - round(tuple[1][1], 2) - ) + round(tuple[0][0], 2), + round(tuple[0][1], 2), + round(tuple[1][0], 2), + round(tuple[1][1], 2), + ) list_endpoints.append(tuple) df.loc[:, "endpoints"] = list_endpoints df_longest = df.loc[df.groupby("endpoints")["length"].idxmax()] - + single_link = linemerge(df_longest["geometry"].values.tolist()) # If the longest component is a MultiLineString, extract the longest linestring from it @@ -742,8 +740,10 @@ def _drop_duplicate_lines(df_lines): df_lines = pd.concat([df_lines, duplicate_rows], axis="rows") len_after = len(df_lines) - logger.info(f"Dropped {len_before - len_after} duplicate elements. " + - f"Keeping {len_after} elements." ) + logger.info( + f"Dropped {len_before - len_after} duplicate elements. " + + f"Keeping {len_after} elements." + ) return df_lines @@ -777,8 +777,10 @@ def _filter_by_voltage(df, min_voltage=200000): len_before = len(df) df = df[bool_voltages] len_after = len(df) - logger.info(f"Dropped {len_before - len_after} elements with voltage below {min_voltage}. " + - f"Keeping {len_after} elements." ) + logger.info( + f"Dropped {len_before - len_after} elements with voltage below {min_voltage}. " + + f"Keeping {len_after} elements." + ) return df, list_voltages @@ -1564,6 +1566,5 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): logger.info(f"Exporting clean links to {output_links}") gdf_links.to_file(output_links, driver="GeoJSON") - logger.info("Cleaning OSM data completed.") From 24aa2e0a22bd1baeb7f9ac3622540db08d76f3d7 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Mon, 3 Jun 2024 17:21:48 +0200 Subject: [PATCH 033/100] New code for integrating HVDC links. Using relations. Base network implementation functioning. --- rules/build_electricity.smk | 7 +- scripts/base_network_osm.py | 90 +++----------- scripts/build_osm_network.py | 229 +++++++++++++++++++---------------- scripts/clean_osm_data.py | 164 ++++++++++++++++++++----- 4 files changed, 278 insertions(+), 212 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index b9161423c..324dc6410 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -117,11 +117,9 @@ if config["electricity_network"]["base_network"] == "osm": input: eg_buses=resources("osm/buses.csv"), eg_lines=resources("osm/lines.csv"), - # eg_links="data/entsoegridkit/links.csv", + eg_links=resources("osm/links.csv"), eg_converters=resources("osm/converters.csv"), eg_transformers=resources("osm/transformers.csv"), - links_p_nom="data/links_p_nom.csv", - links_tyndp="data/links_tyndp.csv", country_shapes=resources("country_shapes.geojson"), offshore_shapes=resources("offshore_shapes.geojson"), europe_shape=resources("europe_shape.geojson"), @@ -666,13 +664,16 @@ rule build_osm_network: input: substations=resources("osm/clean/substations.geojson"), lines=resources("osm/clean/lines.geojson"), + links=resources("osm/clean/links.geojson"), country_shapes=resources("country_shapes.geojson"), output: lines=resources("osm/lines.csv"), + links=resources("osm/links.csv"), converters=resources("osm/converters.csv"), transformers=resources("osm/transformers.csv"), substations=resources("osm/buses.csv"), lines_geojson=resources("osm/lines.geojson"), + links_geojson=resources("osm/links.geojson"), converters_geojson=resources("osm/converters.geojson"), transformers_geojson=resources("osm/transformers.geojson"), substations_geojson=resources("osm/buses.geojson"), diff --git a/scripts/base_network_osm.py b/scripts/base_network_osm.py index beec06f84..68d3232e5 100644 --- a/scripts/base_network_osm.py +++ b/scripts/base_network_osm.py @@ -215,15 +215,17 @@ def _load_links_from_eg(buses, eg_links): quotechar="'", true_values=["t"], false_values=["f"], - dtype=dict(link_id="str", bus0="str", bus1="str", under_construction="bool"), + dtype=dict( + link_id="str", + bus0="str", + bus1="str", + voltage="int", + p_nom="float", + ), ).set_index("link_id") links["length"] /= 1e3 - # Skagerrak Link is connected to 132kV bus which is removed in _load_buses_from_eg. - # Connect to neighboring 380kV bus - links.loc[links.bus1 == "6396", "bus1"] = "6398" - links = _remove_dangling_branches(links, buses) # Add DC line parameters @@ -448,54 +450,14 @@ def _set_lines_s_nom_from_linetypes(n): ) * n.lines.eval("v_nom * num_parallel") -def _set_electrical_parameters_dc_lines(lines_config, voltages, lines): - if lines.empty: - lines["type"] = [] - return lines - - linetypes = _get_linetypes_config(lines_config["dc_types"], voltages) - - lines["carrier"] = "DC" - lines["dc"] = True - lines.loc[:, "type"] = lines.v_nom.apply( - lambda x: _get_linetype_by_voltage(x, linetypes) - ) - - lines["s_max_pu"] = lines_config["s_max_pu"] - - return lines - - # TODO pypsa-eur: Clean/fix this, update list p_noms -def _set_electrical_parameters_links(links, config, links_p_nom): +def _set_electrical_parameters_links(links, config): if links.empty: return links p_max_pu = config["links"].get("p_max_pu", 1.0) links["p_max_pu"] = p_max_pu links["p_min_pu"] = -p_max_pu - - links_p_nom = pd.read_csv(links_p_nom) - - # filter links that are not in operation anymore - removed_b = links_p_nom.Remarks.str.contains("Shut down|Replaced", na=False) - links_p_nom = links_p_nom[~removed_b] - - # find closest link for all links in links_p_nom - links_p_nom["j"] = _find_closest_links(links, links_p_nom) - - links_p_nom = links_p_nom.groupby(["j"], as_index=False).agg({"Power (MW)": "sum"}) - - p_nom = links_p_nom.dropna(subset=["j"]).set_index("j")["Power (MW)"] - - # Don't update p_nom if it's already set - p_nom_unset = ( - p_nom.drop(links.index[links.p_nom.notnull()], errors="ignore") - if "p_nom" in links - else p_nom - ) - links.loc[p_nom_unset.index, "p_nom"] = p_nom_unset - links["carrier"] = "DC" links["dc"] = True @@ -786,7 +748,7 @@ def base_network_osm( eg_converters, eg_transformers, eg_lines, - links_p_nom, + eg_links, europe_shape, country_shapes, offshore_shapes, @@ -795,7 +757,7 @@ def base_network_osm( buses = _load_buses_from_eg(eg_buses, europe_shape, config["electricity"]) # TODO pypsa-eur add this - # links = _load_links_from_eg(buses, eg_links) + links = _load_links_from_eg(buses, eg_links) # if config["links"].get("include_tyndp"): # buses, links = _add_links_from_tyndp(buses, links, links_tyndp, europe_shape) @@ -807,20 +769,13 @@ def base_network_osm( if config["lines"].get("reconnect_crimea", True) and "UA" in config["countries"]: lines = _reconnect_crimea(lines) - lines_ac = lines[lines.tag_frequency.astype(float) != 0].copy() - lines_dc = lines[lines.tag_frequency.astype(float) == 0].copy() - - lines_ac = _set_electrical_parameters_lines( - config["lines"], config["electricity"]["voltages"], lines_ac + lines = _set_electrical_parameters_lines( + config["lines"], config["electricity"]["voltages"], lines ) - lines_dc = _set_electrical_parameters_dc_lines( - config["lines"], config["electricity"]["voltages"], lines_dc - ) + links = _set_electrical_parameters_links(links, config) - # lines = _set_electrical_parameters_lines(lines, config) transformers = _set_electrical_parameters_transformers(transformers, config) - # links = _set_electrical_parameters_links(links, config, links_p_nom) converters = _set_electrical_parameters_converters(converters, config) n = pypsa.Network() @@ -833,15 +788,7 @@ def base_network_osm( ) # TODO: fix hard code and check if AC/DC truly exist n.import_components_from_dataframe(buses, "Bus") - - lines_dc = _set_electrical_parameters_links(lines_dc, config, links_p_nom) - # parse line information into p_nom required for converters - lines_dc["p_nom"] = lines_dc.apply( - lambda x: x["v_nom"] * n.line_types.i_nom[x["type"]], - axis=1, - result_type="reduce", - ) - n.import_components_from_dataframe(lines_ac, "Line") + n.import_components_from_dataframe(lines, "Line") # The columns which names starts with "bus" are mixed up with the third-bus specification # when executing additional_linkports() # lines_dc.drop( @@ -856,17 +803,12 @@ def base_network_osm( # axis=1, # inplace=True, # ) - n.import_components_from_dataframe(lines_dc, "Link") - - # n.import_components_from_dataframe(lines, "Line") + n.import_components_from_dataframe(links, "Link") n.import_components_from_dataframe(transformers, "Transformer") - # n.import_components_from_dataframe(links, "Link") n.import_components_from_dataframe(converters, "Link") _set_lines_s_nom_from_linetypes(n) - # TODO pypsa-eur add this - # _apply_parameter_corrections(n, parameter_corrections) # TODO: what about this? n = _remove_unconnected_components(n) @@ -1085,7 +1027,7 @@ def append_bus_shapes(n, shapes, type): snakemake.input.eg_converters, snakemake.input.eg_transformers, snakemake.input.eg_lines, - snakemake.input.links_p_nom, + snakemake.input.eg_links, snakemake.input.europe_shape, snakemake.input.country_shapes, snakemake.input.offshore_shapes, diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 60576a34d..8c47cbc91 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -313,7 +313,7 @@ def merge_stations_same_station_id( # average location of the buses having the same station_id station_point_x = np.round(g_value.geometry.x.mean(), precision) station_point_y = np.round(g_value.geometry.y.mean(), precision) - is_dclink_boundary_point = any(g_value["is_dclink_boundary_point"]) + # is_dclink_boundary_point = any(g_value["is_dclink_boundary_point"]) # loop for every voltage level in the bus # The location of the buses is averaged; in the case of multiple voltage levels for the same station_id, @@ -337,7 +337,7 @@ def merge_stations_same_station_id( lon_bus, # "lon" lat_bus, # "lat" bus_row["country"].iloc[0], # "country", - is_dclink_boundary_point, # check if new bus was formed of at least one DC link boundary point + # is_dclink_boundary_point, # check if new bus was formed of at least one DC link boundary point Point( lon_bus, lat_bus, @@ -362,7 +362,7 @@ def merge_stations_same_station_id( "x", "y", "country", - "is_dclink_boundary_point", + # "is_dclink_boundary_point", "geometry", ] @@ -483,18 +483,17 @@ def get_converters(buses): ) # check if bus is a dclink boundary point, only then add converter - if g_value["is_dclink_boundary_point"].loc[id_0]: - df_converters.append( - [ - f"convert_{g_name}_{id_0}", # "line_id" - g_value["bus_id"].loc[id_0], # "bus0" - g_value["bus_id"].loc[id_1], # "bus1" - False, # "underground" - False, # "under_construction" - g_value.country.loc[id_0], # "country" - geom_conv, # "geometry" - ] - ) + df_converters.append( + [ + f"convert_{g_name}_{id_0}", # "line_id" + g_value["bus_id"].loc[id_0], # "bus0" + g_value["bus_id"].loc[id_1], # "bus1" + False, # "underground" + False, # "under_construction" + g_value.country.loc[id_0], # "country" + geom_conv, # "geometry" + ] + ) # name of the columns conv_columns = [ @@ -618,7 +617,7 @@ def set_lv_substations(buses): def merge_stations_lines_by_station_id_and_voltage( - lines, buses, distance_crs, tol=5000 + lines, links, buses, distance_crs, tol=5000 ): """ Function to merge close stations and adapt the line datasets to adhere to @@ -637,59 +636,59 @@ def merge_stations_lines_by_station_id_and_voltage( set_substations_ids(buses_ac, distance_crs, tol=tol) set_substations_ids(buses_dc, distance_crs, tol=tol) - # Find boundary points of DC links - # lines_dc_shape = lines[lines["dc"] == True].unary_union - # lines_dc_bounds = lines_dc_shape.boundary - # lines_dc_points = [p for p in lines_dc_bounds.geoms] - lines_dc = lines[lines["dc"] == True].reset_index() - lines_dc["adj_idx"] = range(0, len(lines_dc)) - - # Initialize an empty adjacency matrix - dc_adj_matrix = np.zeros((len(lines_dc), len(lines_dc)), dtype=int) - - # Fill the adjacency matrix - for i in range(len(lines_dc)): - for j in range(len(lines_dc)): - if are_lines_connected(lines_dc.iloc[i], lines_dc.iloc[j]): - dc_adj_matrix[i, j] = 1 - - dc_paths = find_paths(dc_adj_matrix) - - all_dc_boundary_points = pd.Series() - - for path in dc_paths: - bus_0_coors = lines_dc.iloc[path]["bus_0_coors"] - bus_1_coors = lines_dc.iloc[path]["bus_1_coors"] - - # Create DataFrame containing all points within a path - dc_points = pd.concat([bus_0_coors, bus_1_coors], ignore_index=True) - - # Determine the value counts of individual points. If it occurs more than - # once, it cannot be an end-point of a path - bool_duplicates = ( - dc_points.apply(lambda p: sum([are_almost_equal(p, s) for s in dc_points])) - > 1 - ) - - # Drop all duplicates - dc_boundary_points = dc_points[~bool_duplicates] - - if dc_boundary_points.empty: - all_dc_boundary_points = dc_boundary_points - else: - if all_dc_boundary_points.empty: - all_dc_boundary_points = dc_boundary_points - else: - all_dc_boundary_points = pd.concat( - [all_dc_boundary_points, dc_boundary_points], ignore_index=True - ) - - # TODO pypsa-eur: Add to pypsa-earth for all related entries on is_dclink_boundary_point - # check for each entry in buses_dc whether it is included in lines_dc_points - buses_ac["is_dclink_boundary_point"] = False - buses_dc["is_dclink_boundary_point"] = buses_dc.geometry.apply( - lambda p: any([p.within(l) for l in all_dc_boundary_points]) - ) + # # Find boundary points of DC links + # # lines_dc_shape = lines[lines["dc"] == True].unary_union + # # lines_dc_bounds = lines_dc_shape.boundary + # # lines_dc_points = [p for p in lines_dc_bounds.geoms] + # lines_dc = lines[lines["dc"] == True].reset_index() + # lines_dc["adj_idx"] = range(0, len(lines_dc)) + + # # Initialize an empty adjacency matrix + # dc_adj_matrix = np.zeros((len(lines_dc), len(lines_dc)), dtype=int) + + # # Fill the adjacency matrix + # for i in range(len(lines_dc)): + # for j in range(len(lines_dc)): + # if are_lines_connected(lines_dc.iloc[i], lines_dc.iloc[j]): + # dc_adj_matrix[i, j] = 1 + + # dc_paths = find_paths(dc_adj_matrix) + + # all_dc_boundary_points = pd.Series() + + # for path in dc_paths: + # bus_0_coors = lines_dc.iloc[path]["bus_0_coors"] + # bus_1_coors = lines_dc.iloc[path]["bus_1_coors"] + + # # Create DataFrame containing all points within a path + # dc_points = pd.concat([bus_0_coors, bus_1_coors], ignore_index=True) + + # # Determine the value counts of individual points. If it occurs more than + # # once, it cannot be an end-point of a path + # bool_duplicates = ( + # dc_points.apply(lambda p: sum([are_almost_equal(p, s) for s in dc_points])) + # > 1 + # ) + + # # Drop all duplicates + # dc_boundary_points = dc_points[~bool_duplicates] + + # if dc_boundary_points.empty: + # all_dc_boundary_points = dc_boundary_points + # else: + # if all_dc_boundary_points.empty: + # all_dc_boundary_points = dc_boundary_points + # else: + # all_dc_boundary_points = pd.concat( + # [all_dc_boundary_points, dc_boundary_points], ignore_index=True + # ) + + # # TODO pypsa-eur: Add to pypsa-earth for all related entries on is_dclink_boundary_point + # # check for each entry in buses_dc whether it is included in lines_dc_points + # buses_ac["is_dclink_boundary_point"] = False + # buses_dc["is_dclink_boundary_point"] = buses_dc.geometry.apply( + # lambda p: any([p.within(l) for l in all_dc_boundary_points]) + # ) logger.info(" - Merging substations with the same id") @@ -705,26 +704,25 @@ def merge_stations_lines_by_station_id_and_voltage( # set the bus ids to the line dataset lines, buses = set_lines_ids(lines, buses, distance_crs) + links, buses = set_lines_ids(links, buses, distance_crs) # drop lines starting and ending in the same node lines.drop(lines[lines["bus0"] == lines["bus1"]].index, inplace=True) + links.drop(links[links["bus0"] == links["bus1"]].index, inplace=True) # update line endings lines = line_endings_to_bus_conversion(lines) + links = line_endings_to_bus_conversion(links) # set substation_lv set_lv_substations(buses) - logger.info(" - Adding converters to lines") - - # append fake converters - # lines = pd.concat([lines, converters], ignore_index=True) - # reset index lines.reset_index(drop=True, inplace=True) + links.reset_index(drop=True, inplace=True) # if len(links) > 0: # links.reset_index(drop=True, inplace=True) - return lines, buses + return lines, links, buses def build_network( @@ -764,6 +762,17 @@ def build_network( "country": "object", "geometry": "object", }, + "link": { + "link_id": "object", + "bus0": "object", + "bus1": "object", + "voltage": "float", + "length": "float", + "under_construction": "bool", + "dc": "bool", + "country": "object", + "geometry": "object", + }, } logger.info("Reading input data.") @@ -779,19 +788,29 @@ def build_network( dtype=osm_clean_columns["line"], ) + links = read_geojson( + inputs["links"], + osm_clean_columns["link"].keys(), + dtype=osm_clean_columns["link"], + ) + lines = line_endings_to_bus_conversion(lines) + links = line_endings_to_bus_conversion(links) # METHOD to merge buses with same voltage and within tolerance tol = snakemake.config["electricity_network"]["osm_group_tolerance_buses"] logger.info(f"Aggregating close substations: Enabled with tolerance {tol} m") - lines, buses = merge_stations_lines_by_station_id_and_voltage( - lines, buses, distance_crs, tol=tol + + lines, links, buses = merge_stations_lines_by_station_id_and_voltage( + lines, links, buses, distance_crs, tol=tol ) # Recalculate lengths of lines utm = lines.estimate_utm_crs(datum_name="WGS 84") lines["length"] = lines.to_crs(utm).length + links["length"] = links.to_crs(utm).length + # TODO pypsa-eur: check if needed for updated links scripts # get transformers: modelled as lines connecting buses with different voltage transformers = get_transformers(buses, lines) @@ -810,12 +829,14 @@ def build_network( # Drop unncessary index column and set respective element ids as index lines.set_index("line_id", inplace=True) + links.set_index("link_id", inplace=True) converters.set_index("converter_id", inplace=True) transformers.set_index("transformer_id", inplace=True) buses.set_index("bus_id", inplace=True) # Convert voltages from V to kV lines["voltage"] = lines["voltage"] / 1000 + links["voltage"] = links["voltage"] / 1000 transformers["voltage_bus0"], transformers["voltage_bus1"] = ( transformers["voltage_bus0"] / 1000, transformers["voltage_bus1"] / 1000, @@ -824,66 +845,68 @@ def build_network( # Convert 'true' and 'false' to 't' and 'f' lines = lines.replace({True: "t", False: "f"}) + links = links.replace({True: "t", False: "f"}) converters = converters.replace({True: "t", False: "f"}) buses = buses.replace({True: "t", False: "f"}) # Change column orders - cols_lines = [ + cols_lines= [ "bus0", "bus1", "voltage", "circuits", + "tag_frequency", "length", "underground", "under_construction", "geometry", - "tag_type", - "tag_frequency", - "country", - "bounds", - "bus_0_coors", - "bus_1_coors", - "bus0_lon", - "bus0_lat", - "bus1_lon", - "bus1_lat", ] - cols_lines_csv = [ + lines = lines[cols_lines] + + cols_links = [ "bus0", "bus1", "voltage", - "circuits", - "tag_frequency", + "p_nom", "length", - "underground", "under_construction", "geometry", ] - lines_csv = lines[cols_lines_csv] - lines = lines[cols_lines] - to_csv_nafix(lines_csv, outputs["lines"], quotechar="'") # Generate CSV + links = links[cols_links] + + cols_transformers = [ + "bus0", + "bus1", + "voltage_bus0", + "voltage_bus1", + "country", + "geometry", + ] + + transformers = transformers[cols_transformers] + + to_csv_nafix(lines, outputs["lines"], quotechar="'") # Generate CSV + to_csv_nafix(links, outputs["links"], quotechar="'") # Generate CSV to_csv_nafix(converters, outputs["converters"], quotechar="'") # Generate CSV to_csv_nafix(transformers, outputs["transformers"], quotechar="'") # Generate CSV - colstodrop = ["bounds", "bus_0_coors", "bus_1_coors"] - # Export to GeoJSON for quick validations save_to_geojson( - gpd.GeoDataFrame( - lines.drop(columns=colstodrop), geometry="geometry", crs=geo_crs - ), + gpd.GeoDataFrame(lines), outputs["lines_geojson"], ) + save_to_geojson( + gpd.GeoDataFrame(links), + outputs["links_geojson"], + ) save_to_geojson( gpd.GeoDataFrame(converters, geometry="geometry", crs=geo_crs), outputs["converters_geojson"], ) save_to_geojson( - gpd.GeoDataFrame( - transformers.drop(columns=colstodrop), geometry="geometry", crs=geo_crs - ), + gpd.GeoDataFrame(transformers, geometry="geometry", crs=geo_crs), outputs["transformers_geojson"], ) diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index 5674f53f3..e80f0b709 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -78,6 +78,72 @@ def _create_polygon(row): return polygon +def _extended_linemerge(lines): + """ + Merges a list of LineStrings into a single LineString by finding the + closest pair of points between all pairs of LineStrings. + + Parameters: + lines (list): A list of LineStrings to be merged. + + Returns: + merged_line (LineString): The merged LineString. + + Raises: + TypeError: If the input is not a list of LineStrings. + + """ + # Ensure we have a list of LineStrings + if not isinstance(lines, list): + raise TypeError("Input should be a list of LineStrings") + if any(not isinstance(line, LineString) for line in lines): + raise TypeError("All elements in the list should be LineStrings") + + if len(lines) == 1: + return lines[0] + + merged_linestring = linemerge(lines) + + if isinstance(merged_linestring, LineString): + return merged_linestring + else: + def find_closest_points(line1, line2): + min_dist = np.inf + closest_points = (None, None) + for point1 in line1.coords: + for point2 in line2.coords: + dist = np.linalg.norm(np.array(point1) - np.array(point2)) + if dist < min_dist: + min_dist = dist + closest_points = (point1, point2) + return closest_points + + def merge_lines(lines): + while len(lines) > 1: + min_distance = np.inf + closest_pair = (None, None) + pair_indices = (None, None) + for i in range(len(lines)): + for j in range(i + 1, len(lines)): + point1, point2 = find_closest_points(lines[i], lines[j]) + distance = np.linalg.norm(np.array(point1) - np.array(point2)) + if distance < min_distance: + min_distance = distance + closest_pair = (point1, point2) + pair_indices = (i, j) + + connecting_line = LineString([closest_pair[0], closest_pair[1]]) + combined_line = linemerge(MultiLineString([lines[pair_indices[0]], lines[pair_indices[1]], connecting_line])) + + new_lines = [line for k, line in enumerate(lines) if k not in pair_indices] + new_lines.append(combined_line) + lines = new_lines + + return lines[0] + lines = list(merged_linestring.geoms) + return merge_lines(lines) + + def _clean_voltage(column): """ Function to clean the raw voltage column: manual fixing and drop nan values @@ -391,6 +457,7 @@ def _add_line_endings_to_substations( gdf_lines, path_country_shapes, path_offshore_shapes, + prefix, ): """ Add line endings to substations. @@ -440,18 +507,18 @@ def _add_line_endings_to_substations( bus_all = bus_all.groupby(["voltage", "lon", "lat", "dc"]).first().reset_index() bus_all = bus_all[df_substations.columns] bus_all.loc[:, "bus_id"] = bus_all.apply( - lambda row: f"line-end/{row.name + 1}", axis=1 + lambda row: f"{prefix}/{row.name + 1}", axis=1 ) # Initialize default values - bus_all["station_id"] = np.nan + bus_all["station_id"] = None # Assuming substations completed for installed lines bus_all["under_construction"] = False bus_all["tag_area"] = None bus_all["symbol"] = "substation" # TODO: this tag may be improved, maybe depending on voltage levels bus_all["tag_substation"] = "transmission" - bus_all["tag_source"] = "line-end" + bus_all["tag_source"] = prefix buses = pd.concat([df_substations, bus_all], ignore_index=True) buses.set_index("bus_id", inplace=True) @@ -492,7 +559,7 @@ def _add_line_endings_to_substations( bool_multiple_countries, "index_right" ] - return buses + return buses.reset_index() def _import_lines_and_cables(path_lines): @@ -615,6 +682,11 @@ def _import_links(path_links): df = pd.DataFrame(data["elements"]) df["id"] = df["id"].astype(str) + df["id"] = df["id"].apply( + lambda x: ( + f"relation/{x}" + ) + ) df["country"] = country col_tags = [ @@ -682,18 +754,18 @@ def _create_single_link(row): tuple = sorted([row["geometry"].coords[0], row["geometry"].coords[-1]]) # round tuple to 3 decimals tuple = ( - round(tuple[0][0], 2), - round(tuple[0][1], 2), - round(tuple[1][0], 2), - round(tuple[1][1], 2) + round(tuple[0][0], 3), + round(tuple[0][1], 3), + round(tuple[1][0], 3), + round(tuple[1][1], 3) ) list_endpoints.append(tuple) df.loc[:, "endpoints"] = list_endpoints - df_longest = df.loc[df.groupby("endpoints")["length"].idxmax()] + df_longest = df.loc[df.groupby("endpoints")["length"].idxmin()] single_link = linemerge(df_longest["geometry"].values.tolist()) - + # If the longest component is a MultiLineString, extract the longest linestring from it if isinstance(single_link, MultiLineString): # Find connected components @@ -1233,6 +1305,8 @@ def _finalise_links(df_links): df_links.loc[:, "bus0"] = None df_links.loc[:, "bus1"] = None df_links.loc[:, "length"] = None + df_links.loc[:, "under_construction"] = False + df_links.loc[:, "dc"] = True # Only include needed columns df_links = df_links[ @@ -1243,6 +1317,8 @@ def _finalise_links(df_links): "bus0", "bus1", "length", + "under_construction", + "dc", "country", "geometry", ] @@ -1251,6 +1327,9 @@ def _finalise_links(df_links): # Set lines data types df.apply(pd.to_numeric, args=('coerce',)) # This workaround is needed as otherwise the column dtypes remain "objects" df_links["p_nom"] = df_links["p_nom"].astype(int) + # Set lines data types df.apply(pd.to_numeric, args=('coerce',)) + # This workaround is needed as otherwise the column dtypes remain "objects" + df_links["voltage"] = df_links["voltage"].astype(int) return df_links @@ -1497,6 +1576,13 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): df_lines.loc[:, "frequency"] = _clean_frequency(df_lines["frequency"]) df_lines.loc[:, "wires"] = _clean_wires(df_lines["wires"]) df_lines = _clean_lines(df_lines, list_voltages) + + # Drop DC lines, will be added through relations later + len_before = len(df_lines) + df_lines = df_lines[df_lines["frequency"] == "50"] + len_after = len(df_lines) + logger.info(f"Dropped {len_before - len_after} DC lines. Keeping {len_after} AC lines.") + df_lines = _create_lines_geometry(df_lines) df_lines = _finalise_lines(df_lines) @@ -1510,14 +1596,49 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): gdf_lines = gpd.GeoDataFrame(df_lines, geometry="geometry", crs=crs) gdf_lines = _remove_lines_within_substations(gdf_lines, gdf_substations_polygon) + logger.info("---") + logger.info("HVDC LINKS") + path_links = { + "links": snakemake.input.links_relation, + } + + ### CONTINUE HERE + # Cleaning process + df_links = _import_links(path_links) + + df_links = _drop_duplicate_lines(df_links) + df_links.loc[:, "voltage"] = _clean_voltage(df_links["voltage"]) + df_links, list_voltages = _filter_by_voltage(df_links, min_voltage=min_voltage_dc) + # Keep only highest voltage of split string + df_links.loc[:, "voltage"] = df_links["voltage"].apply( + lambda x: str(max(map(int, x.split(";")))) + ) + df_links.loc[:, "frequency"] = _clean_frequency(df_links["frequency"]) + df_links.loc[:, "rating"] = _clean_rating(df_links["rating"]) + + df_links.loc[:, "geometry"] = df_links.apply(_create_single_link, axis=1) + df_links = _finalise_links(df_links) + gdf_links = gpd.GeoDataFrame(df_links, geometry="geometry", crs=crs) + + # Add line endings to substations path_country_shapes = snakemake.input.country_shapes path_offshore_shapes = snakemake.input.offshore_shapes + df_substations = _add_line_endings_to_substations( df_substations, gdf_lines, path_country_shapes, path_offshore_shapes, + prefix="line-end", + ) + + df_substations = _add_line_endings_to_substations( + df_substations, + gdf_links, + path_country_shapes, + path_offshore_shapes, + prefix="link-end", ) # Drop polygons and create GDF @@ -1542,28 +1663,7 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): gdf_substations.to_file(output_substations, driver="GeoJSON") logger.info(f"Exporting clean lines to {output_lines}") gdf_lines.to_file(output_lines, driver="GeoJSON") - - logger.info("---") - logger.info("HVDC LINKS") - path_links = { - "links": snakemake.input.links_relation, - } - - ### CONTINUE HERE - # Cleaning process - df_links = _import_links(path_links) - - df_links = _drop_duplicate_lines(df_links) - df_links.loc[:, "voltage"] = _clean_voltage(df_links["voltage"]) - df_links, list_voltages = _filter_by_voltage(df_links, min_voltage=min_voltage_dc) - df_links.loc[:, "frequency"] = _clean_frequency(df_links["frequency"]) - df_links.loc[:, "rating"] = _clean_rating(df_links["rating"]) - df_links.loc[:, "geometry"] = df_links.apply(_create_single_link, axis=1) - df_links = _finalise_links(df_links) - gdf_links = gpd.GeoDataFrame(df_links, geometry="geometry", crs=crs) - logger.info(f"Exporting clean links to {output_links}") gdf_links.to_file(output_links, driver="GeoJSON") - - logger.info("Cleaning OSM data completed.") + logger.info("Cleaning OSM data completed.") \ No newline at end of file From f9e3eec81990d2fc0d7fd4df4c5aa11c9ad84b3c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Jun 2024 15:24:00 +0000 Subject: [PATCH 034/100] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/base_network_osm.py | 7 ++-- scripts/build_osm_network.py | 4 +-- scripts/clean_osm_data.py | 64 ++++++++++++++++++++---------------- 3 files changed, 41 insertions(+), 34 deletions(-) diff --git a/scripts/base_network_osm.py b/scripts/base_network_osm.py index 68d3232e5..6e78f9454 100644 --- a/scripts/base_network_osm.py +++ b/scripts/base_network_osm.py @@ -216,12 +216,12 @@ def _load_links_from_eg(buses, eg_links): true_values=["t"], false_values=["f"], dtype=dict( - link_id="str", - bus0="str", + link_id="str", + bus0="str", bus1="str", voltage="int", p_nom="float", - ), + ), ).set_index("link_id") links["length"] /= 1e3 @@ -809,7 +809,6 @@ def base_network_osm( _set_lines_s_nom_from_linetypes(n) - # TODO: what about this? n = _remove_unconnected_components(n) diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 8c47cbc91..66ba5cd24 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -682,7 +682,7 @@ def merge_stations_lines_by_station_id_and_voltage( # all_dc_boundary_points = pd.concat( # [all_dc_boundary_points, dc_boundary_points], ignore_index=True # ) - + # # TODO pypsa-eur: Add to pypsa-earth for all related entries on is_dclink_boundary_point # # check for each entry in buses_dc whether it is included in lines_dc_points # buses_ac["is_dclink_boundary_point"] = False @@ -850,7 +850,7 @@ def build_network( buses = buses.replace({True: "t", False: "f"}) # Change column orders - cols_lines= [ + cols_lines = [ "bus0", "bus1", "voltage", diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index a8cd57ace..14e8005c1 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -80,7 +80,7 @@ def _create_polygon(row): def _extended_linemerge(lines): """ - Merges a list of LineStrings into a single LineString by finding the + Merges a list of LineStrings into a single LineString by finding the closest pair of points between all pairs of LineStrings. Parameters: @@ -91,22 +91,22 @@ def _extended_linemerge(lines): Raises: TypeError: If the input is not a list of LineStrings. - """ # Ensure we have a list of LineStrings if not isinstance(lines, list): raise TypeError("Input should be a list of LineStrings") if any(not isinstance(line, LineString) for line in lines): raise TypeError("All elements in the list should be LineStrings") - + if len(lines) == 1: return lines[0] - + merged_linestring = linemerge(lines) if isinstance(merged_linestring, LineString): return merged_linestring else: + def find_closest_points(line1, line2): min_dist = np.inf closest_points = (None, None) @@ -117,7 +117,7 @@ def find_closest_points(line1, line2): min_dist = dist closest_points = (point1, point2) return closest_points - + def merge_lines(lines): while len(lines) > 1: min_distance = np.inf @@ -131,15 +131,26 @@ def merge_lines(lines): min_distance = distance closest_pair = (point1, point2) pair_indices = (i, j) - + connecting_line = LineString([closest_pair[0], closest_pair[1]]) - combined_line = linemerge(MultiLineString([lines[pair_indices[0]], lines[pair_indices[1]], connecting_line])) - - new_lines = [line for k, line in enumerate(lines) if k not in pair_indices] + combined_line = linemerge( + MultiLineString( + [ + lines[pair_indices[0]], + lines[pair_indices[1]], + connecting_line, + ] + ) + ) + + new_lines = [ + line for k, line in enumerate(lines) if k not in pair_indices + ] new_lines.append(combined_line) lines = new_lines - + return lines[0] + lines = list(merged_linestring.geoms) return merge_lines(lines) @@ -678,11 +689,7 @@ def _import_links(path_links): df = pd.DataFrame(data["elements"]) df["id"] = df["id"].astype(str) - df["id"] = df["id"].apply( - lambda x: ( - f"relation/{x}" - ) - ) + df["id"] = df["id"].apply(lambda x: (f"relation/{x}")) df["country"] = country col_tags = [ @@ -752,18 +759,18 @@ def _create_single_link(row): tuple = sorted([row["geometry"].coords[0], row["geometry"].coords[-1]]) # round tuple to 3 decimals tuple = ( - round(tuple[0][0], 3), - round(tuple[0][1], 3), - round(tuple[1][0], 3), - round(tuple[1][1], 3) - ) + round(tuple[0][0], 3), + round(tuple[0][1], 3), + round(tuple[1][0], 3), + round(tuple[1][1], 3), + ) list_endpoints.append(tuple) df.loc[:, "endpoints"] = list_endpoints df_longest = df.loc[df.groupby("endpoints")["length"].idxmin()] - + single_link = linemerge(df_longest["geometry"].values.tolist()) - + # If the longest component is a MultiLineString, extract the longest linestring from it if isinstance(single_link, MultiLineString): # Find connected components @@ -1329,7 +1336,7 @@ def _finalise_links(df_links): # Set lines data types df.apply(pd.to_numeric, args=('coerce',)) # This workaround is needed as otherwise the column dtypes remain "objects" df_links["p_nom"] = df_links["p_nom"].astype(int) - # Set lines data types df.apply(pd.to_numeric, args=('coerce',)) + # Set lines data types df.apply(pd.to_numeric, args=('coerce',)) # This workaround is needed as otherwise the column dtypes remain "objects" df_links["voltage"] = df_links["voltage"].astype(int) @@ -1583,7 +1590,9 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): len_before = len(df_lines) df_lines = df_lines[df_lines["frequency"] == "50"] len_after = len(df_lines) - logger.info(f"Dropped {len_before - len_after} DC lines. Keeping {len_after} AC lines.") + logger.info( + f"Dropped {len_before - len_after} DC lines. Keeping {len_after} AC lines." + ) df_lines = _create_lines_geometry(df_lines) df_lines = _finalise_lines(df_lines) @@ -1622,11 +1631,10 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): df_links = _finalise_links(df_links) gdf_links = gpd.GeoDataFrame(df_links, geometry="geometry", crs=crs) - # Add line endings to substations path_country_shapes = snakemake.input.country_shapes path_offshore_shapes = snakemake.input.offshore_shapes - + df_substations = _add_line_endings_to_substations( df_substations, gdf_lines, @@ -1667,5 +1675,5 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): gdf_lines.to_file(output_lines, driver="GeoJSON") logger.info(f"Exporting clean links to {output_links}") gdf_links.to_file(output_links, driver="GeoJSON") - - logger.info("Cleaning OSM data completed.") \ No newline at end of file + + logger.info("Cleaning OSM data completed.") From aeb80a26495e690d8d8485a7f28d2e570f465ded Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Tue, 4 Jun 2024 16:06:46 +0200 Subject: [PATCH 035/100] removed manual line dropping. --- Snakefile | 1 + scripts/clean_osm_data.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Snakefile b/Snakefile index 412d520d7..318deb980 100644 --- a/Snakefile +++ b/Snakefile @@ -137,4 +137,5 @@ rule sync: rsync -uvarh --no-g {params.cluster}/resources . || echo "No resources directory, skipping rsync" rsync -uvarh --no-g {params.cluster}/results . || echo "No results directory, skipping rsync" rsync -uvarh --no-g {params.cluster}/logs . || echo "No logs directory, skipping rsync" + rsync -uvarh --no-g {params.cluster}/data/osm . || echo "No data directory, skipping rsync" """ diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index 14e8005c1..b38b3c1a4 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -1540,7 +1540,8 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): # TODO pypsa-eur: Temporary solution as one AC line between converters will # create an error in simplify_network: - lines_to_drop = ["775580659"] + # lines_to_drop = ["775580659"] + lines_to_drop = [""] logger.info("---") logger.info("SUBSTATIONS") From 637d28c755988aec814e19ddb82096dbfe238b18 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Thu, 6 Jun 2024 10:27:17 +0200 Subject: [PATCH 036/100] Updated clean script --- rules/build_electricity.smk | 2 +- scripts/build_osm_network.py | 54 ------------------------------------ scripts/clean_osm_data.py | 31 +++++++++++++-------- 3 files changed, 21 insertions(+), 66 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 324dc6410..99f39423a 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -317,7 +317,7 @@ rule build_renewable_profiles: benchmarks("build_renewable_profiles_{technology}") threads: config["atlite"].get("nprocesses", 4) resources: - mem_mb=config["atlite"].get("nprocesses", 4) * 5000, + mem_mb=config["atlite"].get("nprocesses", 4) * 10000, wildcard_constraints: technology="(?!hydro).*", # Any technology other than hydro conda: diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 66ba5cd24..0372692f9 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -636,60 +636,6 @@ def merge_stations_lines_by_station_id_and_voltage( set_substations_ids(buses_ac, distance_crs, tol=tol) set_substations_ids(buses_dc, distance_crs, tol=tol) - # # Find boundary points of DC links - # # lines_dc_shape = lines[lines["dc"] == True].unary_union - # # lines_dc_bounds = lines_dc_shape.boundary - # # lines_dc_points = [p for p in lines_dc_bounds.geoms] - # lines_dc = lines[lines["dc"] == True].reset_index() - # lines_dc["adj_idx"] = range(0, len(lines_dc)) - - # # Initialize an empty adjacency matrix - # dc_adj_matrix = np.zeros((len(lines_dc), len(lines_dc)), dtype=int) - - # # Fill the adjacency matrix - # for i in range(len(lines_dc)): - # for j in range(len(lines_dc)): - # if are_lines_connected(lines_dc.iloc[i], lines_dc.iloc[j]): - # dc_adj_matrix[i, j] = 1 - - # dc_paths = find_paths(dc_adj_matrix) - - # all_dc_boundary_points = pd.Series() - - # for path in dc_paths: - # bus_0_coors = lines_dc.iloc[path]["bus_0_coors"] - # bus_1_coors = lines_dc.iloc[path]["bus_1_coors"] - - # # Create DataFrame containing all points within a path - # dc_points = pd.concat([bus_0_coors, bus_1_coors], ignore_index=True) - - # # Determine the value counts of individual points. If it occurs more than - # # once, it cannot be an end-point of a path - # bool_duplicates = ( - # dc_points.apply(lambda p: sum([are_almost_equal(p, s) for s in dc_points])) - # > 1 - # ) - - # # Drop all duplicates - # dc_boundary_points = dc_points[~bool_duplicates] - - # if dc_boundary_points.empty: - # all_dc_boundary_points = dc_boundary_points - # else: - # if all_dc_boundary_points.empty: - # all_dc_boundary_points = dc_boundary_points - # else: - # all_dc_boundary_points = pd.concat( - # [all_dc_boundary_points, dc_boundary_points], ignore_index=True - # ) - - # # TODO pypsa-eur: Add to pypsa-earth for all related entries on is_dclink_boundary_point - # # check for each entry in buses_dc whether it is included in lines_dc_points - # buses_ac["is_dclink_boundary_point"] = False - # buses_dc["is_dclink_boundary_point"] = buses_dc.geometry.apply( - # lambda p: any([p.within(l) for l in all_dc_boundary_points]) - # ) - logger.info(" - Merging substations with the same id") # merge buses with same station id and voltage diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index b38b3c1a4..ce1639667 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -1279,8 +1279,6 @@ def _finalise_lines(df_lines): ] ] - # Set lines data types df.apply(pd.to_numeric, args=('coerce',)) - # This workaround is needed as otherwise the column dtypes remain "objects" df_lines["circuits"] = df_lines["circuits"].astype(int) df_lines["voltage"] = df_lines["voltage"].astype(int) df_lines["tag_frequency"] = df_lines["tag_frequency"].astype(int) @@ -1333,11 +1331,7 @@ def _finalise_links(df_links): ] ] - # Set lines data types df.apply(pd.to_numeric, args=('coerce',)) - # This workaround is needed as otherwise the column dtypes remain "objects" df_links["p_nom"] = df_links["p_nom"].astype(int) - # Set lines data types df.apply(pd.to_numeric, args=('coerce',)) - # This workaround is needed as otherwise the column dtypes remain "objects" df_links["voltage"] = df_links["voltage"].astype(int) return df_links @@ -1524,6 +1518,11 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): return gdf_lines +# Define a function to check if a polygon intersects any line in the lines GeoDataFrame +def intersects_any_line(polygon, lines): + return lines.intersects(polygon).any() + + if __name__ == "__main__": if "snakemake" not in globals(): from _helpers import mock_snakemake @@ -1538,9 +1537,6 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): min_voltage_ac = 200000 # [unit: V] Minimum voltage value to filter AC lines. min_voltage_dc = 150000 # [unit: V] Minimum voltage value to filter DC links. - # TODO pypsa-eur: Temporary solution as one AC line between converters will - # create an error in simplify_network: - # lines_to_drop = ["775580659"] lines_to_drop = [""] logger.info("---") @@ -1614,8 +1610,6 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): "links": snakemake.input.links_relation, } - ### CONTINUE HERE - # Cleaning process df_links = _import_links(path_links) df_links = _drop_duplicate_lines(df_links) @@ -1652,6 +1646,21 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): prefix="link-end", ) + # # Drop df_substations.dc == True and tag_source != "link-end" + # df_substations = df_substations[ + # ~((df_substations.dc == True) & (df_substations.tag_source != "link-end")) + # ] + + # # Apply the function to each polygon in the substations GeoDataFrame + # gdf_substations_polygon["connected"] = False + # gdf_substations_polygon['connected'] = gdf_substations_polygon['polygon'].apply(intersects_any_line, lines=gdf_lines) + + # list_buses_disconnected = gdf_substations_polygon[gdf_substations_polygon['connected'] == False]['bus_id'].tolist() + + # # Drop islanded substations + # gdf_substations_polygon = gdf_substations_polygon[~gdf_substations_polygon['bus_id'].isin(list_buses_disconnected)] + # df_substations = df_substations[~df_substations['bus_id'].isin(list_buses_disconnected)] + # Drop polygons and create GDF gdf_substations = gpd.GeoDataFrame( df_substations.drop(columns=["polygon"]), geometry="geometry", crs=crs From f5e51711bad66f6c79ca6fefe8c3aac4dfee5f8e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 6 Jun 2024 08:30:05 +0000 Subject: [PATCH 037/100] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/clean_osm_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index ce1639667..d64bcec97 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -1656,7 +1656,7 @@ def intersects_any_line(polygon, lines): # gdf_substations_polygon['connected'] = gdf_substations_polygon['polygon'].apply(intersects_any_line, lines=gdf_lines) # list_buses_disconnected = gdf_substations_polygon[gdf_substations_polygon['connected'] == False]['bus_id'].tolist() - + # # Drop islanded substations # gdf_substations_polygon = gdf_substations_polygon[~gdf_substations_polygon['bus_id'].isin(list_buses_disconnected)] # df_substations = df_substations[~df_substations['bus_id'].isin(list_buses_disconnected)] From 3bebcc0204eb784720dd73f0b0990a082b71dfd6 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Thu, 6 Jun 2024 10:43:58 +0200 Subject: [PATCH 038/100] reverted Snakefile to default: sync settings --- Snakefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Snakefile b/Snakefile index 318deb980..726b8d2ba 100644 --- a/Snakefile +++ b/Snakefile @@ -136,6 +136,4 @@ rule sync: rsync -uvarh --ignore-missing-args --files-from=.sync-send . {params.cluster} rsync -uvarh --no-g {params.cluster}/resources . || echo "No resources directory, skipping rsync" rsync -uvarh --no-g {params.cluster}/results . || echo "No results directory, skipping rsync" - rsync -uvarh --no-g {params.cluster}/logs . || echo "No logs directory, skipping rsync" - rsync -uvarh --no-g {params.cluster}/data/osm . || echo "No data directory, skipping rsync" """ From a2ee16f82490eb838d0b98a62072b7ecd0a3b212 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Thu, 6 Jun 2024 18:23:52 +0200 Subject: [PATCH 039/100] added prebuilt functionality. --- rules/build_electricity.smk | 10 +++++----- scripts/base_network_osm.py | 6 ++++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 99f39423a..80b694323 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -115,11 +115,11 @@ if config["electricity_network"]["base_network"] == "osm": links=config_provider("links"), transformers=config_provider("transformers"), input: - eg_buses=resources("osm/buses.csv"), - eg_lines=resources("osm/lines.csv"), - eg_links=resources("osm/links.csv"), - eg_converters=resources("osm/converters.csv"), - eg_transformers=resources("osm/transformers.csv"), + eg_buses="data/osm/prebuilt/buses.csv" if config["electricity_network"]["osm_use_prebuilt"] == True else resources("osm/buses.csv"), + eg_lines="data/osm/prebuilt/lines.csv" if config["electricity_network"]["osm_use_prebuilt"] == True else resources("osm/lines.csv"), + eg_links="data/osm/prebuilt/links.csv" if config["electricity_network"]["osm_use_prebuilt"] == True else resources("osm/links.csv"), + eg_converters="data/osm/prebuilt/converters.csv" if config["electricity_network"]["osm_use_prebuilt"] == True else resources("osm/converters.csv"), + eg_transformers="data/osm/prebuilt/transformers.csv" if config["electricity_network"]["osm_use_prebuilt"] == True else resources("osm/transformers.csv"), country_shapes=resources("country_shapes.geojson"), offshore_shapes=resources("offshore_shapes.geojson"), europe_shape=resources("europe_shape.geojson"), diff --git a/scripts/base_network_osm.py b/scripts/base_network_osm.py index 6e78f9454..f9b0daf57 100644 --- a/scripts/base_network_osm.py +++ b/scripts/base_network_osm.py @@ -145,11 +145,13 @@ def _load_buses_from_eg(eg_buses, europe_shape, config_elec): dtype=dict(bus_id="str"), ) .set_index("bus_id") - .drop(["station_id"], axis=1) .rename(columns=dict(voltage="v_nom")) ) - buses["carrier"] = buses.pop("dc").map({True: "DC", False: "AC"}) + if "station_id" in buses.columns: + buses.drop("station_id", axis=1, inplace=True) + + # buses["carrier"] = buses.pop("dc").map({True: "DC", False: "AC"}) buses["under_construction"] = buses.under_construction.where( lambda s: s.notnull(), False ).astype(bool) From 10b51465c62c5b01331230f0040e9023945128f5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 6 Jun 2024 16:24:21 +0000 Subject: [PATCH 040/100] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- rules/build_electricity.smk | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 80b694323..fe3194141 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -115,11 +115,31 @@ if config["electricity_network"]["base_network"] == "osm": links=config_provider("links"), transformers=config_provider("transformers"), input: - eg_buses="data/osm/prebuilt/buses.csv" if config["electricity_network"]["osm_use_prebuilt"] == True else resources("osm/buses.csv"), - eg_lines="data/osm/prebuilt/lines.csv" if config["electricity_network"]["osm_use_prebuilt"] == True else resources("osm/lines.csv"), - eg_links="data/osm/prebuilt/links.csv" if config["electricity_network"]["osm_use_prebuilt"] == True else resources("osm/links.csv"), - eg_converters="data/osm/prebuilt/converters.csv" if config["electricity_network"]["osm_use_prebuilt"] == True else resources("osm/converters.csv"), - eg_transformers="data/osm/prebuilt/transformers.csv" if config["electricity_network"]["osm_use_prebuilt"] == True else resources("osm/transformers.csv"), + eg_buses=( + "data/osm/prebuilt/buses.csv" + if config["electricity_network"]["osm_use_prebuilt"] == True + else resources("osm/buses.csv") + ), + eg_lines=( + "data/osm/prebuilt/lines.csv" + if config["electricity_network"]["osm_use_prebuilt"] == True + else resources("osm/lines.csv") + ), + eg_links=( + "data/osm/prebuilt/links.csv" + if config["electricity_network"]["osm_use_prebuilt"] == True + else resources("osm/links.csv") + ), + eg_converters=( + "data/osm/prebuilt/converters.csv" + if config["electricity_network"]["osm_use_prebuilt"] == True + else resources("osm/converters.csv") + ), + eg_transformers=( + "data/osm/prebuilt/transformers.csv" + if config["electricity_network"]["osm_use_prebuilt"] == True + else resources("osm/transformers.csv") + ), country_shapes=resources("country_shapes.geojson"), offshore_shapes=resources("offshore_shapes.geojson"), europe_shape=resources("europe_shape.geojson"), From 38de271d05da52a377d8aa73c4b6417e1dcca063 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Fri, 7 Jun 2024 15:29:07 +0200 Subject: [PATCH 041/100] Updated build_electricity.smk to work with scenario management. --- rules/build_electricity.smk | 86 +++++++++++++++++++++++++++--------- scripts/retrieve_osm_data.py | 6 ++- 2 files changed, 70 insertions(+), 22 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index fe3194141..7849031a8 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -104,7 +104,7 @@ if config["electricity_network"]["base_network"] == "gridkit": "../scripts/base_network.py" -if config["electricity_network"]["base_network"] == "osm": +if config["electricity_network"]["base_network"] == "osm": rule base_network: params: @@ -638,7 +638,7 @@ rule retrieve_osm_data: substations_way="data/osm/raw/{country}/substations_way.json", substations_relation="data/osm/raw/{country}/substations_relation.json", log: - logs("retrieve_osm_data_{country}.log"), + "logs/retrieve_osm_data_{country}.log", resources: cores=2, threads=1, @@ -646,27 +646,37 @@ rule retrieve_osm_data: "../scripts/retrieve_osm_data.py" +rule retrieve_osm_data_all: + input: + expand("data/osm/raw/{country}/cables_way.json", country=config_provider("countries")), + expand("data/osm/raw/{country}/lines_way.json", country=config_provider("countries")), + expand("data/osm/raw/{country}/links_relation.json", country=config_provider("countries")), + expand("data/osm/raw/{country}/substations_way.json", country=config_provider("countries")), + expand("data/osm/raw/{country}/substations_relation.json", country=config_provider("countries")), + + rule clean_osm_data: input: - cables_way=[ - f"data/osm/raw/{country}/cables_way.json" - for country in config["countries"] - ], - lines_way=[ - f"data/osm/raw/{country}/lines_way.json" for country in config["countries"] - ], - links_relation=[ - f"data/osm/raw/{country}/links_relation.json" - for country in config["countries"] - ], - substations_way=[ - f"data/osm/raw/{country}/substations_way.json" - for country in config["countries"] - ], - substations_relation=[ - f"data/osm/raw/{country}/substations_relation.json" - for country in config["countries"] - ], + cables_way=expand( + "data/osm/raw/{country}/cables_way.json", + country = config_provider("countries") + ), + lines_way=expand( + "data/osm/raw/{country}/lines_way.json", + country = config_provider("countries") + ), + links_relation=expand( + "data/osm/raw/{country}/links_relation.json", + country = config_provider("countries") + ), + substations_way=expand( + "data/osm/raw/{country}/substations_way.json", + country = config_provider("countries") + ), + substations_relation=expand( + "data/osm/raw/{country}/substations_relation.json", + country = config_provider("countries") + ), offshore_shapes=resources("offshore_shapes.geojson"), country_shapes=resources("country_shapes.geojson"), output: @@ -680,6 +690,40 @@ rule clean_osm_data: "../scripts/clean_osm_data.py" +# rule clean_osm_data: +# input: +# cables_way=[ +# f"data/osm/raw/{country}/cables_way.json" +# for country in config["countries"] +# ], +# lines_way=[ +# f"data/osm/raw/{country}/lines_way.json" for country in config["countries"] +# ], +# links_relation=[ +# f"data/osm/raw/{country}/links_relation.json" +# for country in config["countries"] +# ], +# substations_way=[ +# f"data/osm/raw/{country}/substations_way.json" +# for country in config["countries"] +# ], +# substations_relation=[ +# f"data/osm/raw/{country}/substations_relation.json" +# for country in config["countries"] +# ], +# offshore_shapes=resources("offshore_shapes.geojson"), +# country_shapes=resources("country_shapes.geojson"), +# output: +# substations=resources("osm/clean/substations.geojson"), +# substations_polygon=resources("osm/clean/substations_polygon.geojson"), +# lines=resources("osm/clean/lines.geojson"), +# links=resources("osm/clean/links.geojson"), +# log: +# logs("clean_osm_data.log"), +# script: +# "../scripts/clean_osm_data.py" + + rule build_osm_network: input: substations=resources("osm/clean/substations.geojson"), diff --git a/scripts/retrieve_osm_data.py b/scripts/retrieve_osm_data.py index bad99df3a..899337f89 100644 --- a/scripts/retrieve_osm_data.py +++ b/scripts/retrieve_osm_data.py @@ -17,7 +17,11 @@ import time import requests -from _helpers import configure_logging +from _helpers import ( + configure_logging, + # set_scenario_config, + # update_config_from_wildcards, +) logger = logging.getLogger(__name__) From bb55ad70df4d4a503404c736b7ebd55467ee6642 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Fri, 7 Jun 2024 15:31:46 +0200 Subject: [PATCH 042/100] removed commented-out code. --- rules/build_electricity.smk | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 7849031a8..185f5692a 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -690,40 +690,6 @@ rule clean_osm_data: "../scripts/clean_osm_data.py" -# rule clean_osm_data: -# input: -# cables_way=[ -# f"data/osm/raw/{country}/cables_way.json" -# for country in config["countries"] -# ], -# lines_way=[ -# f"data/osm/raw/{country}/lines_way.json" for country in config["countries"] -# ], -# links_relation=[ -# f"data/osm/raw/{country}/links_relation.json" -# for country in config["countries"] -# ], -# substations_way=[ -# f"data/osm/raw/{country}/substations_way.json" -# for country in config["countries"] -# ], -# substations_relation=[ -# f"data/osm/raw/{country}/substations_relation.json" -# for country in config["countries"] -# ], -# offshore_shapes=resources("offshore_shapes.geojson"), -# country_shapes=resources("country_shapes.geojson"), -# output: -# substations=resources("osm/clean/substations.geojson"), -# substations_polygon=resources("osm/clean/substations_polygon.geojson"), -# lines=resources("osm/clean/lines.geojson"), -# links=resources("osm/clean/links.geojson"), -# log: -# logs("clean_osm_data.log"), -# script: -# "../scripts/clean_osm_data.py" - - rule build_osm_network: input: substations=resources("osm/clean/substations.geojson"), From a629dbad4e0748adf791b10c93cf58db8af51d76 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 7 Jun 2024 13:33:35 +0000 Subject: [PATCH 043/100] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- rules/build_electricity.smk | 37 +++++++++++++++++++++++++----------- scripts/retrieve_osm_data.py | 4 +--- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 185f5692a..5b3b432f6 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -104,7 +104,7 @@ if config["electricity_network"]["base_network"] == "gridkit": "../scripts/base_network.py" -if config["electricity_network"]["base_network"] == "osm": +if config["electricity_network"]["base_network"] == "osm": rule base_network: params: @@ -648,34 +648,49 @@ rule retrieve_osm_data: rule retrieve_osm_data_all: input: - expand("data/osm/raw/{country}/cables_way.json", country=config_provider("countries")), - expand("data/osm/raw/{country}/lines_way.json", country=config_provider("countries")), - expand("data/osm/raw/{country}/links_relation.json", country=config_provider("countries")), - expand("data/osm/raw/{country}/substations_way.json", country=config_provider("countries")), - expand("data/osm/raw/{country}/substations_relation.json", country=config_provider("countries")), + expand( + "data/osm/raw/{country}/cables_way.json", + country=config_provider("countries"), + ), + expand( + "data/osm/raw/{country}/lines_way.json", + country=config_provider("countries"), + ), + expand( + "data/osm/raw/{country}/links_relation.json", + country=config_provider("countries"), + ), + expand( + "data/osm/raw/{country}/substations_way.json", + country=config_provider("countries"), + ), + expand( + "data/osm/raw/{country}/substations_relation.json", + country=config_provider("countries"), + ), rule clean_osm_data: input: cables_way=expand( "data/osm/raw/{country}/cables_way.json", - country = config_provider("countries") + country=config_provider("countries"), ), lines_way=expand( "data/osm/raw/{country}/lines_way.json", - country = config_provider("countries") + country=config_provider("countries"), ), links_relation=expand( "data/osm/raw/{country}/links_relation.json", - country = config_provider("countries") + country=config_provider("countries"), ), substations_way=expand( "data/osm/raw/{country}/substations_way.json", - country = config_provider("countries") + country=config_provider("countries"), ), substations_relation=expand( "data/osm/raw/{country}/substations_relation.json", - country = config_provider("countries") + country=config_provider("countries"), ), offshore_shapes=resources("offshore_shapes.geojson"), country_shapes=resources("country_shapes.geojson"), diff --git a/scripts/retrieve_osm_data.py b/scripts/retrieve_osm_data.py index 899337f89..67fc810ef 100644 --- a/scripts/retrieve_osm_data.py +++ b/scripts/retrieve_osm_data.py @@ -17,10 +17,8 @@ import time import requests -from _helpers import ( +from _helpers import ( # set_scenario_config,; update_config_from_wildcards, configure_logging, - # set_scenario_config, - # update_config_from_wildcards, ) logger = logging.getLogger(__name__) From 3e2f7d3d2278bed44b6237b2c0756e2a94b66e85 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Mon, 10 Jun 2024 13:36:29 +0200 Subject: [PATCH 044/100] removed commented-out code. --- config/config.default.yaml | 4 +- rules/build_electricity.smk | 157 +++-- scripts/base_network.py | 195 ++++++- scripts/base_network_osm.py | 1058 ---------------------------------- scripts/retrieve_osm_data.py | 4 +- 5 files changed, 244 insertions(+), 1174 deletions(-) delete mode 100644 scripts/base_network_osm.py diff --git a/config/config.default.yaml b/config/config.default.yaml index d7ea86dab..7de26d9df 100644 --- a/config/config.default.yaml +++ b/config/config.default.yaml @@ -77,8 +77,8 @@ enable: # Settings related to the high-voltage electricity grid electricity_network: - base_network: "osm" # "osm" or "gridkit" - osm_group_tolerance_buses: 5000 # [m] (default 5000) Tolerance in meters of the close buses to merge + base_network: "gridkit" # "gridkit", "osm-prebuilt" (prebuilt network from OSM data), "osm-raw" (retrieve and build network from raw OSM data, takes longer) + osm_group_tolerance_buses: 5000 # only relevant for "osm-raw" setting: [m] (default 5000) Tolerance in meters of the close buses to merge # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#co2-budget co2_budget: diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 185f5692a..c1c2dc0b9 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -65,99 +65,80 @@ rule build_powerplants: "../scripts/build_powerplants.py" -if config["electricity_network"]["base_network"] == "gridkit": - - rule base_network: - params: - countries=config_provider("countries"), - snapshots=config_provider("snapshots"), - drop_leap_day=config_provider("enable", "drop_leap_day"), - lines=config_provider("lines"), - links=config_provider("links"), - transformers=config_provider("transformers"), - input: - eg_buses="data/entsoegridkit/buses.csv", - eg_lines="data/entsoegridkit/lines.csv", - eg_links="data/entsoegridkit/links.csv", - eg_converters="data/entsoegridkit/converters.csv", - eg_transformers="data/entsoegridkit/transformers.csv", - parameter_corrections="data/parameter_corrections.yaml", - links_p_nom="data/links_p_nom.csv", - links_tyndp="data/links_tyndp.csv", - country_shapes=resources("country_shapes.geojson"), - offshore_shapes=resources("offshore_shapes.geojson"), - europe_shape=resources("europe_shape.geojson"), - output: - base_network=resources("networks/base.nc"), - regions_onshore=resources("regions_onshore.geojson"), - regions_offshore=resources("regions_offshore.geojson"), - log: - logs("base_network.log"), - benchmark: - benchmarks("base_network") - threads: 1 - resources: - mem_mb=1500, - conda: - "../envs/environment.yaml" - script: - "../scripts/base_network.py" - - -if config["electricity_network"]["base_network"] == "osm": - - rule base_network: - params: - countries=config_provider("countries"), - snapshots=config_provider("snapshots"), - drop_leap_day=config_provider("enable", "drop_leap_day"), - lines=config_provider("lines"), - links=config_provider("links"), - transformers=config_provider("transformers"), - input: - eg_buses=( - "data/osm/prebuilt/buses.csv" - if config["electricity_network"]["osm_use_prebuilt"] == True +rule base_network: + params: + countries=config_provider("countries"), + snapshots=config_provider("snapshots"), + drop_leap_day=config_provider("enable", "drop_leap_day"), + lines=config_provider("lines"), + links=config_provider("links"), + transformers=config_provider("transformers"), + input: + eg_buses=lambda w: ( + "data/entsoegridkit/buses.csv" if config_provider("electricity_network", "base_network")(w) == "gridkit" + else ( + "data/osm/prebuilt/buses.csv" if config_provider("electricity_network", "base_network")(w) == "osm-prebuilt" else resources("osm/buses.csv") - ), - eg_lines=( - "data/osm/prebuilt/lines.csv" - if config["electricity_network"]["osm_use_prebuilt"] == True + ) + ), + eg_lines=lambda w: ( + "data/entsoegridkit/lines.csv" if config_provider("electricity_network", "base_network")(w) == "gridkit" + else ( + "data/osm/prebuilt/lines.csv" if config_provider("electricity_network", "base_network")(w) == "osm-prebuilt" else resources("osm/lines.csv") - ), - eg_links=( - "data/osm/prebuilt/links.csv" - if config["electricity_network"]["osm_use_prebuilt"] == True + ) + ), + eg_links=lambda w: ( + "data/entsoegridkit/links.csv" if config_provider("electricity_network", "base_network")(w) == "gridkit" + else ( + "data/osm/prebuilt/links.csv" if config_provider("electricity_network", "base_network")(w) == "osm-prebuilt" else resources("osm/links.csv") - ), - eg_converters=( - "data/osm/prebuilt/converters.csv" - if config["electricity_network"]["osm_use_prebuilt"] == True + ) + ), + eg_converters=lambda w: ( + "data/entsoegridkit/converters.csv" if config_provider("electricity_network", "base_network")(w) == "gridkit" + else ( + "data/osm/prebuilt/converters.csv" if config_provider("electricity_network", "base_network")(w) == "osm-prebuilt" else resources("osm/converters.csv") - ), - eg_transformers=( - "data/osm/prebuilt/transformers.csv" - if config["electricity_network"]["osm_use_prebuilt"] == True + ) + ), + eg_transformers=lambda w: ( + "data/entsoegridkit/transformers.csv" if config_provider("electricity_network", "base_network")(w) == "gridkit" + else ( + "data/osm/prebuilt/transformers.csv" if config_provider("electricity_network", "base_network")(w) == "osm-prebuilt" else resources("osm/transformers.csv") - ), - country_shapes=resources("country_shapes.geojson"), - offshore_shapes=resources("offshore_shapes.geojson"), - europe_shape=resources("europe_shape.geojson"), - output: - base_network=resources("networks/base.nc"), - regions_onshore=resources("regions_onshore.geojson"), - regions_offshore=resources("regions_offshore.geojson"), - log: - logs("base_network.log"), - benchmark: - benchmarks("base_network") - threads: 1 - resources: - mem_mb=1500, - conda: - "../envs/environment.yaml" - script: - "../scripts/base_network_osm.py" + ) + ), + parameter_corrections=lambda w: ( + "data/parameter_corrections.yaml" if config_provider("electricity_network", "base_network")(w) == "gridkit" + else [] + ), + links_p_nom=lambda w: ( + "data/links_p_nom.csv" if config_provider("electricity_network", "base_network")(w) == "gridkit" + else [] + ), + links_tyndp=lambda w: ( + "data/links_tyndp.csv" if config_provider("electricity_network", "base_network")(w) == "gridkit" + else [] + ), + country_shapes=resources("country_shapes.geojson"), + offshore_shapes=resources("offshore_shapes.geojson"), + europe_shape=resources("europe_shape.geojson"), + output: + base_network=resources("networks/base.nc"), + regions_onshore=resources("regions_onshore.geojson"), + regions_offshore=resources("regions_offshore.geojson"), + log: + logs("base_network.log"), + benchmark: + benchmarks("base_network") + threads: 1 + resources: + mem_mb=1500, + conda: + "../envs/environment.yaml" + script: + "../scripts/base_network.py" rule build_shapes: diff --git a/scripts/base_network.py b/scripts/base_network.py index df3bc2b2c..963234a50 100644 --- a/scripts/base_network.py +++ b/scripts/base_network.py @@ -5,7 +5,10 @@ # coding: utf-8 """ -Creates the network topology from an `ENTSO-E map extract `_ (March 2022) as a PyPSA network. +Creates the network topology from a `ENTSO-E map extract. + +`_ (March 2022) as a PyPSA +network. Relevant Settings ----------------- @@ -142,29 +145,39 @@ def _load_buses_from_eg(eg_buses, europe_shape, config_elec): dtype=dict(bus_id="str"), ) .set_index("bus_id") - .drop(["station_id"], axis=1) .rename(columns=dict(voltage="v_nom")) ) - buses["carrier"] = buses.pop("dc").map({True: "DC", False: "AC"}) + if "station_id" in buses.columns: + buses.drop("station_id", axis=1, inplace=True) + + # buses["carrier"] = buses.pop("dc").map({True: "DC", False: "AC"}) buses["under_construction"] = buses.under_construction.where( lambda s: s.notnull(), False ).astype(bool) # remove all buses outside of all countries including exclusive economic zones (offshore) europe_shape = gpd.read_file(europe_shape).loc[0, "geometry"] + # TODO pypsa-eur: Temporary fix: Convex hull, this is important when nodes are between countries + # europe_shape = europe_shape.convex_hull + europe_shape_prepped = shapely.prepared.prep(europe_shape) buses_in_europe_b = buses[["x", "y"]].apply( lambda p: europe_shape_prepped.contains(Point(p)), axis=1 ) - buses_with_v_nom_to_keep_b = ( - buses.v_nom.isin(config_elec["voltages"]) | buses.v_nom.isnull() - ) - logger.info( - f'Removing buses with voltages {pd.Index(buses.v_nom.unique()).dropna().difference(config_elec["voltages"])}' - ) + # TODO pypsa-eur: Find a long-term solution + # buses_with_v_nom_to_keep_b = ( + # buses.v_nom.isin(config_elec["voltages"]) | buses.v_nom.isnull() + # ) + v_nom_min = min(config_elec["voltages"]) + v_nom_max = max(config_elec["voltages"]) + + # Quick fix: + buses_with_v_nom_to_keep_b = (v_nom_min <= buses.v_nom) & (buses.v_nom <= v_nom_max) + + logger.info(f"Removing buses outside of range {v_nom_min} - {v_nom_max} V") return pd.DataFrame(buses.loc[buses_in_europe_b & buses_with_v_nom_to_keep_b]) @@ -221,6 +234,31 @@ def _load_links_from_eg(buses, eg_links): return links +def _load_links_from_osm(buses, eg_links): + links = pd.read_csv( + eg_links, + quotechar="'", + true_values=["t"], + false_values=["f"], + dtype=dict( + link_id="str", + bus0="str", + bus1="str", + voltage="int", + p_nom="float", + ), + ).set_index("link_id") + + links["length"] /= 1e3 + + links = _remove_dangling_branches(links, buses) + + # Add DC line parameters + links["carrier"] = "DC" + + return links + + def _add_links_from_tyndp(buses, links, links_tyndp, europe_shape): links_tyndp = pd.read_csv(links_tyndp) @@ -347,7 +385,8 @@ def _load_lines_from_eg(buses, eg_lines): ) lines["length"] /= 1e3 - lines["carrier"] = "AC" + + lines["carrier"] = "AC" #TODO pypsa-eur check lines = _remove_dangling_branches(lines, buses) return lines @@ -397,7 +436,7 @@ def _reconnect_crimea(lines): return pd.concat([lines, lines_to_crimea]) -def _set_electrical_parameters_lines(lines, config): +def _set_electrical_parameters_lines_eg(lines, config): v_noms = config["electricity"]["voltages"] linetypes = config["lines"]["types"] @@ -409,16 +448,35 @@ def _set_electrical_parameters_lines(lines, config): return lines +def _set_electrical_parameters_lines_osm(lines_config, voltages, lines): + if lines.empty: + lines["type"] = [] + return lines + + linetypes = _get_linetypes_config(lines_config["types"], voltages) + + lines["carrier"] = "AC" + lines["dc"] = False + + lines.loc[:, "type"] = lines.v_nom.apply( + lambda x: _get_linetype_by_voltage(x, linetypes) + ) + + lines["s_max_pu"] = lines_config["s_max_pu"] + + return lines + + def _set_lines_s_nom_from_linetypes(n): n.lines["s_nom"] = ( np.sqrt(3) * n.lines["type"].map(n.line_types.i_nom) * n.lines["v_nom"] - * n.lines.num_parallel + * n.lines["num_parallel"] ) -def _set_electrical_parameters_links(links, config, links_p_nom): +def _set_electrical_parameters_links_eg(links, config, links_p_nom): if links.empty: return links @@ -450,6 +508,19 @@ def _set_electrical_parameters_links(links, config, links_p_nom): return links +def _set_electrical_parameters_links_osm(links, config): + if links.empty: + return links + + p_max_pu = config["links"].get("p_max_pu", 1.0) + links["p_max_pu"] = p_max_pu + links["p_min_pu"] = -p_max_pu + links["carrier"] = "DC" + links["dc"] = True + + return links + + def _set_electrical_parameters_converters(converters, config): p_max_pu = config["links"].get("p_max_pu", 1.0) converters["p_max_pu"] = p_max_pu @@ -570,7 +641,7 @@ def prefer_voltage(x, which): buses["substation_lv"] = ( lv_b & onshore_b & (~buses["under_construction"]) & has_connections_b ) - buses["substation_off"] = ((hv_b & offshore_b) | (hv_b & onshore_b)) & ( + buses["substation_off"] = (offshore_b | (hv_b & onshore_b)) & ( ~buses["under_construction"] ) @@ -737,31 +808,55 @@ def base_network( parameter_corrections, config, ): + buses = _load_buses_from_eg(eg_buses, europe_shape, config["electricity"]) - links = _load_links_from_eg(buses, eg_links) - if config["links"].get("include_tyndp"): + if config["electricity_network"].get("base_network") == "gridkit": + links = _load_links_from_eg(buses, eg_links) + elif "osm" in config["electricity_network"].get("base_network"): + links = _load_links_from_osm(buses, eg_links) + else: + raise ValueError("base_network must be either 'gridkit' or 'osm'") + + if (config["links"].get("include_tyndp") & (config["electricity_network"].get("base_network") == "gridkit")): buses, links = _add_links_from_tyndp(buses, links, links_tyndp, europe_shape) converters = _load_converters_from_eg(buses, eg_converters) + transformers = _load_transformers_from_eg(buses, eg_transformers) lines = _load_lines_from_eg(buses, eg_lines) - transformers = _load_transformers_from_eg(buses, eg_transformers) if config["lines"].get("reconnect_crimea", True) and "UA" in config["countries"]: lines = _reconnect_crimea(lines) - lines = _set_electrical_parameters_lines(lines, config) + if config["electricity_network"].get("base_network") == "gridkit": + lines = _set_electrical_parameters_lines_eg(lines, config) + links = _set_electrical_parameters_links_eg(links, config, links_p_nom) + elif "osm" in config["electricity_network"].get("base_network"): + lines = _set_electrical_parameters_lines_osm( + config["lines"], config["electricity"]["voltages"], lines + ) + links = _set_electrical_parameters_links_osm(links, config) + else: + raise ValueError("base_network must be either 'gridkit' or 'osm'") + transformers = _set_electrical_parameters_transformers(transformers, config) - links = _set_electrical_parameters_links(links, config, links_p_nom) converters = _set_electrical_parameters_converters(converters, config) n = pypsa.Network() - n.name = "PyPSA-Eur" + + if config["electricity_network"].get("base_network") == "gridkit": + n.name = "PyPSA-Eur (GridKit)" + elif "osm" in config["electricity_network"].get("base_network"): + n.name = "PyPSA-Eur (OSM)" + else: + raise ValueError("base_network must be either 'gridkit' or 'osm'") time = get_snapshots(snakemake.params.snapshots, snakemake.params.drop_leap_day) n.set_snapshots(time) - n.madd("Carrier", ["AC", "DC"]) + n.madd( + "Carrier", ["AC", "DC"] + ) # TODO: fix hard code and check if AC/DC truly exist n.import_components_from_dataframe(buses, "Bus") n.import_components_from_dataframe(lines, "Line") @@ -770,13 +865,15 @@ def base_network( n.import_components_from_dataframe(converters, "Link") _set_lines_s_nom_from_linetypes(n) + if config["electricity_network"].get("base_network") == "gridkit": + _apply_parameter_corrections(n, parameter_corrections) - _apply_parameter_corrections(n, parameter_corrections) - + # TODO: what about this? n = _remove_unconnected_components(n) _set_countries_and_substations(n, config, country_shapes, offshore_shapes) + # TODO pypsa-eur add this _set_links_underwater_fraction(n, offshore_shapes) _replace_b2b_converter_at_country_border_by_link(n) @@ -785,9 +882,59 @@ def base_network( _set_shapes(n, country_shapes, offshore_shapes) + logger.info(f"Base network created using {config['electricity_network'].get('base_network')}.") + return n +def _get_linetypes_config(line_types, voltages): + """ + Return the dictionary of linetypes for selected voltages. The dictionary is + a subset of the dictionary line_types, whose keys match the selected + voltages. + + Parameters + ---------- + line_types : dict + Dictionary of linetypes: keys are nominal voltages and values are linetypes. + voltages : list + List of selected voltages. + + Returns + ------- + Dictionary of linetypes for selected voltages. + """ + # get voltages value that are not availabile in the line types + vnoms_diff = set(voltages).symmetric_difference(set(line_types.keys())) + if vnoms_diff: + logger.warning( + f"Voltages {vnoms_diff} not in the {line_types} or {voltages} list." + ) + return {k: v for k, v in line_types.items() if k in voltages} + + +def _get_linetype_by_voltage(v_nom, d_linetypes): + """ + Return the linetype of a specific line based on its voltage v_nom. + + Parameters + ---------- + v_nom : float + The voltage of the line. + d_linetypes : dict + Dictionary of linetypes: keys are nominal voltages and values are linetypes. + + Returns + ------- + The linetype of the line whose nominal voltage is closest to the line voltage. + """ + v_nom_min, line_type_min = min( + d_linetypes.items(), + key=lambda x: abs(x[0] - v_nom), + ) + return line_type_min + + def voronoi_partition_pts(points, outline): """ Compute the polygons of a voronoi partition of `points` within the polygon @@ -968,4 +1115,4 @@ def append_bus_shapes(n, shapes, type): offshore_shapes.to_frame().to_file(snakemake.output.regions_offshore) n.meta = snakemake.config - n.export_to_netcdf(snakemake.output.base_network) + n.export_to_netcdf(snakemake.output.base_network) \ No newline at end of file diff --git a/scripts/base_network_osm.py b/scripts/base_network_osm.py deleted file mode 100644 index f9b0daf57..000000000 --- a/scripts/base_network_osm.py +++ /dev/null @@ -1,1058 +0,0 @@ -# -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: : 2017-2024 The PyPSA-Eur Authors -# -# SPDX-License-Identifier: MIT - -# coding: utf-8 -""" -Creates the network topology from a `ENTSO-E map extract. - -`_ (March 2022) as a PyPSA -network. - -Relevant Settings ------------------ - -.. code:: yaml - - countries: - - electricity: - voltages: - - lines: - types: - s_max_pu: - under_construction: - - links: - p_max_pu: - under_construction: - include_tyndp: - - transformers: - x: - s_nom: - type: - -.. seealso:: - Documentation of the configuration file ``config/config.yaml`` at - :ref:`snapshots_cf`, :ref:`toplevel_cf`, :ref:`electricity_cf`, :ref:`load_cf`, - :ref:`lines_cf`, :ref:`links_cf`, :ref:`transformers_cf` - -Inputs ------- - -- ``data/entsoegridkit``: Extract from the geographical vector data of the online `ENTSO-E Interactive Map `_ by the `GridKit `_ toolkit dating back to March 2022. -- ``data/parameter_corrections.yaml``: Corrections for ``data/entsoegridkit`` -- ``data/links_p_nom.csv``: confer :ref:`links` -- ``data/links_tyndp.csv``: List of projects in the `TYNDP 2018 `_ that are at least *in permitting* with fields for start- and endpoint (names and coordinates), length, capacity, construction status, and project reference ID. -- ``resources/country_shapes.geojson``: confer :ref:`shapes` -- ``resources/offshore_shapes.geojson``: confer :ref:`shapes` -- ``resources/europe_shape.geojson``: confer :ref:`shapes` - -Outputs -------- - -- ``networks/base.nc`` - - .. image:: img/base.png - :scale: 33 % - -- ``resources/regions_onshore.geojson``: - - .. image:: img/regions_onshore.png - :scale: 33 % - -- ``resources/regions_offshore.geojson``: - - .. image:: img/regions_offshore.png - :scale: 33 % - -Description ------------ -Creates the network topology from an ENTSO-E map extract, and create Voronoi shapes for each bus representing both onshore and offshore regions. -""" - -import logging -from itertools import product - -import geopandas as gpd -import networkx as nx -import numpy as np -import pandas as pd -import pypsa -import shapely -import shapely.prepared -import shapely.wkt -import yaml -from _helpers import REGION_COLS, configure_logging, get_snapshots, set_scenario_config -from packaging.version import Version, parse -from scipy import spatial -from scipy.sparse import csgraph -from shapely.geometry import LineString, Point, Polygon - -PD_GE_2_2 = parse(pd.__version__) >= Version("2.2") - -logger = logging.getLogger(__name__) - - -def _get_oid(df): - if "tags" in df.columns: - return df.tags.str.extract('"oid"=>"(\d+)"', expand=False) - else: - return pd.Series(np.nan, df.index) - - -def _get_country(df): - if "tags" in df.columns: - return df.tags.str.extract('"country"=>"([A-Z]{2})"', expand=False) - else: - return pd.Series(np.nan, df.index) - - -def _find_closest_links(links, new_links, distance_upper_bound=1.5): - treecoords = np.asarray( - [ - np.asarray(shapely.wkt.loads(s).coords)[[0, -1]].flatten() - for s in links.geometry - ] - ) - querycoords = np.vstack( - [new_links[["x1", "y1", "x2", "y2"]], new_links[["x2", "y2", "x1", "y1"]]] - ) - tree = spatial.KDTree(treecoords) - dist, ind = tree.query(querycoords, distance_upper_bound=distance_upper_bound) - found_b = ind < len(links) - found_i = np.arange(len(new_links) * 2)[found_b] % len(new_links) - return ( - pd.DataFrame( - dict(D=dist[found_b], i=links.index[ind[found_b] % len(links)]), - index=new_links.index[found_i], - ) - .sort_values(by="D")[lambda ds: ~ds.index.duplicated(keep="first")] - .sort_index()["i"] - ) - - -def _load_buses_from_eg(eg_buses, europe_shape, config_elec): - buses = ( - pd.read_csv( - eg_buses, - quotechar="'", - true_values=["t"], - false_values=["f"], - dtype=dict(bus_id="str"), - ) - .set_index("bus_id") - .rename(columns=dict(voltage="v_nom")) - ) - - if "station_id" in buses.columns: - buses.drop("station_id", axis=1, inplace=True) - - # buses["carrier"] = buses.pop("dc").map({True: "DC", False: "AC"}) - buses["under_construction"] = buses.under_construction.where( - lambda s: s.notnull(), False - ).astype(bool) - - # remove all buses outside of all countries including exclusive economic zones (offshore) - europe_shape = gpd.read_file(europe_shape).loc[0, "geometry"] - # TODO pypsa-eur: Temporary fix: Convex hull, this is important when nodes are between countries - # europe_shape = europe_shape.convex_hull - - europe_shape_prepped = shapely.prepared.prep(europe_shape) - buses_in_europe_b = buses[["x", "y"]].apply( - lambda p: europe_shape_prepped.contains(Point(p)), axis=1 - ) - - # TODO pypsa-eur: Find a long-term solution - # buses_with_v_nom_to_keep_b = ( - # buses.v_nom.isin(config_elec["voltages"]) | buses.v_nom.isnull() - # ) - - v_nom_min = min(config_elec["voltages"]) - v_nom_max = max(config_elec["voltages"]) - - # Quick fix: - buses_with_v_nom_to_keep_b = (v_nom_min <= buses.v_nom) & (buses.v_nom <= v_nom_max) - - logger.info(f"Removing buses outside of range {v_nom_min} - {v_nom_max} V") - return pd.DataFrame(buses.loc[buses_in_europe_b & buses_with_v_nom_to_keep_b]) - - -def _load_transformers_from_eg(buses, eg_transformers): - transformers = pd.read_csv( - eg_transformers, - quotechar="'", - true_values=["t"], - false_values=["f"], - dtype=dict(transformer_id="str", bus0="str", bus1="str"), - ).set_index("transformer_id") - - transformers = _remove_dangling_branches(transformers, buses) - - return transformers - - -def _load_converters_from_eg(buses, eg_converters): - converters = pd.read_csv( - eg_converters, - quotechar="'", - true_values=["t"], - false_values=["f"], - dtype=dict(converter_id="str", bus0="str", bus1="str"), - ).set_index("converter_id") - - converters = _remove_dangling_branches(converters, buses) - - converters["carrier"] = "B2B" - - return converters - - -def _load_links_from_eg(buses, eg_links): - links = pd.read_csv( - eg_links, - quotechar="'", - true_values=["t"], - false_values=["f"], - dtype=dict( - link_id="str", - bus0="str", - bus1="str", - voltage="int", - p_nom="float", - ), - ).set_index("link_id") - - links["length"] /= 1e3 - - links = _remove_dangling_branches(links, buses) - - # Add DC line parameters - links["carrier"] = "DC" - - return links - - -def _add_links_from_tyndp(buses, links, links_tyndp, europe_shape): - links_tyndp = pd.read_csv(links_tyndp) - - # remove all links from list which lie outside all of the desired countries - europe_shape = gpd.read_file(europe_shape).loc[0, "geometry"] - europe_shape_prepped = shapely.prepared.prep(europe_shape) - x1y1_in_europe_b = links_tyndp[["x1", "y1"]].apply( - lambda p: europe_shape_prepped.contains(Point(p)), axis=1 - ) - x2y2_in_europe_b = links_tyndp[["x2", "y2"]].apply( - lambda p: europe_shape_prepped.contains(Point(p)), axis=1 - ) - is_within_covered_countries_b = x1y1_in_europe_b & x2y2_in_europe_b - - if not is_within_covered_countries_b.all(): - logger.info( - "TYNDP links outside of the covered area (skipping): " - + ", ".join(links_tyndp.loc[~is_within_covered_countries_b, "Name"]) - ) - - links_tyndp = links_tyndp.loc[is_within_covered_countries_b] - if links_tyndp.empty: - return buses, links - - has_replaces_b = links_tyndp.replaces.notnull() - oids = dict(Bus=_get_oid(buses), Link=_get_oid(links)) - keep_b = dict( - Bus=pd.Series(True, index=buses.index), Link=pd.Series(True, index=links.index) - ) - for reps in links_tyndp.loc[has_replaces_b, "replaces"]: - for comps in reps.split(":"): - oids_to_remove = comps.split(".") - c = oids_to_remove.pop(0) - keep_b[c] &= ~oids[c].isin(oids_to_remove) - buses = buses.loc[keep_b["Bus"]] - links = links.loc[keep_b["Link"]] - - links_tyndp["j"] = _find_closest_links( - links, links_tyndp, distance_upper_bound=0.20 - ) - # Corresponds approximately to 20km tolerances - - if links_tyndp["j"].notnull().any(): - logger.info( - "TYNDP links already in the dataset (skipping): " - + ", ".join(links_tyndp.loc[links_tyndp["j"].notnull(), "Name"]) - ) - links_tyndp = links_tyndp.loc[links_tyndp["j"].isnull()] - if links_tyndp.empty: - return buses, links - - tree = spatial.KDTree(buses[["x", "y"]]) - _, ind0 = tree.query(links_tyndp[["x1", "y1"]]) - ind0_b = ind0 < len(buses) - links_tyndp.loc[ind0_b, "bus0"] = buses.index[ind0[ind0_b]] - - _, ind1 = tree.query(links_tyndp[["x2", "y2"]]) - ind1_b = ind1 < len(buses) - links_tyndp.loc[ind1_b, "bus1"] = buses.index[ind1[ind1_b]] - - links_tyndp_located_b = ( - links_tyndp["bus0"].notnull() & links_tyndp["bus1"].notnull() - ) - if not links_tyndp_located_b.all(): - logger.warning( - "Did not find connected buses for TYNDP links (skipping): " - + ", ".join(links_tyndp.loc[~links_tyndp_located_b, "Name"]) - ) - links_tyndp = links_tyndp.loc[links_tyndp_located_b] - - logger.info("Adding the following TYNDP links: " + ", ".join(links_tyndp["Name"])) - - links_tyndp = links_tyndp[["bus0", "bus1"]].assign( - carrier="DC", - p_nom=links_tyndp["Power (MW)"], - length=links_tyndp["Length (given) (km)"].fillna( - links_tyndp["Length (distance*1.2) (km)"] - ), - under_construction=True, - underground=False, - geometry=( - links_tyndp[["x1", "y1", "x2", "y2"]].apply( - lambda s: str(LineString([[s.x1, s.y1], [s.x2, s.y2]])), axis=1 - ) - ), - tags=( - '"name"=>"' - + links_tyndp["Name"] - + '", ' - + '"ref"=>"' - + links_tyndp["Ref"] - + '", ' - + '"status"=>"' - + links_tyndp["status"] - + '"' - ), - ) - - links_tyndp.index = "T" + links_tyndp.index.astype(str) - - links = pd.concat([links, links_tyndp], sort=True) - - return buses, links - - -def _load_lines_from_eg(buses, eg_lines): - lines = ( - pd.read_csv( - eg_lines, - quotechar="'", - true_values=["t"], - false_values=["f"], - dtype=dict( - line_id="str", - bus0="str", - bus1="str", - underground="bool", - under_construction="bool", - ), - ) - .set_index("line_id") - .rename(columns=dict(voltage="v_nom", circuits="num_parallel")) - ) - - lines["length"] /= 1e3 - - # lines["carrier"] = "AC" #TODO pypsa-eur clean/remove this - lines = _remove_dangling_branches(lines, buses) - - return lines - - -def _apply_parameter_corrections(n, parameter_corrections): - with open(parameter_corrections) as f: - corrections = yaml.safe_load(f) - - if corrections is None: - return - - for component, attrs in corrections.items(): - df = n.df(component) - oid = _get_oid(df) - if attrs is None: - continue - - for attr, repls in attrs.items(): - for i, r in repls.items(): - if i == "oid": - r = oid.map(repls["oid"]).dropna() - elif i == "index": - r = pd.Series(repls["index"]) - else: - raise NotImplementedError() - inds = r.index.intersection(df.index) - df.loc[inds, attr] = r[inds].astype(df[attr].dtype) - - -def _reconnect_crimea(lines): - logger.info("Reconnecting Crimea to the Ukrainian grid.") - lines_to_crimea = pd.DataFrame( - { - "bus0": ["3065", "3181", "3181"], - "bus1": ["3057", "3055", "3057"], - "v_nom": [300, 300, 300], - "num_parallel": [1, 1, 1], - "length": [140, 120, 140], - "carrier": ["AC", "AC", "AC"], - "underground": [False, False, False], - "under_construction": [False, False, False], - }, - index=["Melitopol", "Liubymivka left", "Luibymivka right"], - ) - - return pd.concat([lines, lines_to_crimea]) - - -# def _set_electrical_parameters_lines(lines, config): -# v_noms = config["electricity"]["voltages"] -# linetypes = config["lines"]["types"] - -# for v_nom in v_noms: -# lines.loc[lines["v_nom"] == v_nom, "type"] = linetypes[v_nom] - - -def _set_electrical_parameters_lines(lines_config, voltages, lines): - if lines.empty: - lines["type"] = [] - return lines - - linetypes = _get_linetypes_config(lines_config["types"], voltages) - - lines["carrier"] = "AC" - lines["dc"] = False - - lines.loc[:, "type"] = lines.v_nom.apply( - lambda x: _get_linetype_by_voltage(x, linetypes) - ) - - lines["s_max_pu"] = lines_config["s_max_pu"] - - return lines - - -def _set_lines_s_nom_from_linetypes(n): - n.lines["s_nom"] = ( - np.sqrt(3) - * n.lines["type"].map(n.line_types.i_nom) - * n.lines["v_nom"] - * n.lines["num_parallel"] - ) - # Re-define s_nom for DC lines - n.lines.loc[n.lines["carrier"] == "DC", "s_nom"] = n.lines["type"].map( - n.line_types.i_nom - ) * n.lines.eval("v_nom * num_parallel") - - -# TODO pypsa-eur: Clean/fix this, update list p_noms -def _set_electrical_parameters_links(links, config): - if links.empty: - return links - - p_max_pu = config["links"].get("p_max_pu", 1.0) - links["p_max_pu"] = p_max_pu - links["p_min_pu"] = -p_max_pu - links["carrier"] = "DC" - links["dc"] = True - - return links - - -def _set_electrical_parameters_converters(converters, config): - p_max_pu = config["links"].get("p_max_pu", 1.0) - converters["p_max_pu"] = p_max_pu - converters["p_min_pu"] = -p_max_pu - - converters["p_nom"] = 2000 - - # Converters are combined with links - converters["under_construction"] = False - converters["underground"] = False - - return converters - - -def _set_electrical_parameters_transformers(transformers, config): - config = config["transformers"] - - ## Add transformer parameters - transformers["x"] = config.get("x", 0.1) - transformers["s_nom"] = config.get("s_nom", 2000) - transformers["type"] = config.get("type", "") - - return transformers - - -def _remove_dangling_branches(branches, buses): - return pd.DataFrame( - branches.loc[branches.bus0.isin(buses.index) & branches.bus1.isin(buses.index)] - ) - - -def _remove_unconnected_components(network, threshold=6): - _, labels = csgraph.connected_components(network.adjacency_matrix(), directed=False) - component = pd.Series(labels, index=network.buses.index) - - component_sizes = component.value_counts() - components_to_remove = component_sizes.loc[component_sizes < threshold] - - logger.info( - f"Removing {len(components_to_remove)} unconnected network components with less than {components_to_remove.max()} buses. In total {components_to_remove.sum()} buses." - ) - - return network[component == component_sizes.index[0]] - - -def _set_countries_and_substations(n, config, country_shapes, offshore_shapes): - buses = n.buses - - def buses_in_shape(shape): - shape = shapely.prepared.prep(shape) - return pd.Series( - np.fromiter( - ( - shape.contains(Point(x, y)) - for x, y in buses.loc[:, ["x", "y"]].values - ), - dtype=bool, - count=len(buses), - ), - index=buses.index, - ) - - countries = config["countries"] - country_shapes = gpd.read_file(country_shapes).set_index("name")["geometry"] - # reindexing necessary for supporting empty geo-dataframes - offshore_shapes = gpd.read_file(offshore_shapes) - offshore_shapes = offshore_shapes.reindex(columns=["name", "geometry"]).set_index( - "name" - )["geometry"] - substation_b = buses["symbol"].str.contains( - "substation|converter station", case=False - ) - - def prefer_voltage(x, which): - index = x.index - if len(index) == 1: - return pd.Series(index, index) - key = ( - x.index[0] - if x["v_nom"].isnull().all() - else getattr(x["v_nom"], "idx" + which)() - ) - return pd.Series(key, index) - - compat_kws = dict(include_groups=False) if PD_GE_2_2 else {} - gb = buses.loc[substation_b].groupby( - ["x", "y"], as_index=False, group_keys=False, sort=False - ) - bus_map_low = gb.apply(prefer_voltage, "min", **compat_kws) - lv_b = (bus_map_low == bus_map_low.index).reindex(buses.index, fill_value=False) - bus_map_high = gb.apply(prefer_voltage, "max", **compat_kws) - hv_b = (bus_map_high == bus_map_high.index).reindex(buses.index, fill_value=False) - - onshore_b = pd.Series(False, buses.index) - offshore_b = pd.Series(False, buses.index) - - for country in countries: - onshore_shape = country_shapes[country] - onshore_country_b = buses_in_shape(onshore_shape) - onshore_b |= onshore_country_b - - buses.loc[onshore_country_b, "country"] = country - - if country not in offshore_shapes.index: - continue - offshore_country_b = buses_in_shape(offshore_shapes[country]) - offshore_b |= offshore_country_b - - buses.loc[offshore_country_b, "country"] = country - - # Only accept buses as low-voltage substations (where load is attached), if - # they have at least one connection which is not under_construction - has_connections_b = pd.Series(False, index=buses.index) - for b, df in product(("bus0", "bus1"), (n.lines, n.links)): - has_connections_b |= ~df.groupby(b).under_construction.min() - - buses["onshore_bus"] = onshore_b - buses["substation_lv"] = ( - lv_b & onshore_b & (~buses["under_construction"]) & has_connections_b - ) - - # TODO: fix this in pypsa-eur master branch - # buses["substation_off"] = offshore_b & ( - # ~buses["under_construction"] - # ) - - buses["substation_off"] = (offshore_b | (hv_b & onshore_b)) & ( - ~buses["under_construction"] - ) - - c_nan_b = buses.country.fillna("na") == "na" - if c_nan_b.sum() > 0: - c_tag = _get_country(buses.loc[c_nan_b]) - c_tag.loc[~c_tag.isin(countries)] = np.nan - n.buses.loc[c_nan_b, "country"] = c_tag - - c_tag_nan_b = n.buses.country.isnull() - - # Nearest country in path length defines country of still homeless buses - # Work-around until commit 705119 lands in pypsa release - n.transformers["length"] = 0.0 - graph = n.graph(weight="length") - n.transformers.drop("length", axis=1, inplace=True) - - for b in n.buses.index[c_tag_nan_b]: - df = ( - pd.DataFrame( - dict( - pathlength=nx.single_source_dijkstra_path_length( - graph, b, cutoff=200 - ) - ) - ) - .join(n.buses.country) - .dropna() - ) - assert ( - not df.empty - ), "No buses with defined country within 200km of bus `{}`".format(b) - n.buses.at[b, "country"] = df.loc[df.pathlength.idxmin(), "country"] - - logger.warning( - "{} buses are not in any country or offshore shape," - " {} have been assigned from the tag of the entsoe map," - " the rest from the next bus in terms of pathlength.".format( - c_nan_b.sum(), c_nan_b.sum() - c_tag_nan_b.sum() - ) - ) - - return buses - - -def _replace_b2b_converter_at_country_border_by_link(n): - # Affects only the B2B converter in Lithuania at the Polish border at the moment - buscntry = n.buses.country - linkcntry = n.links.bus0.map(buscntry) - converters_i = n.links.index[ - (n.links.carrier == "B2B") & (linkcntry == n.links.bus1.map(buscntry)) - ] - - def findforeignbus(G, i): - cntry = linkcntry.at[i] - for busattr in ("bus0", "bus1"): - b0 = n.links.at[i, busattr] - for b1 in G[b0]: - if buscntry[b1] != cntry: - return busattr, b0, b1 - return None, None, None - - for i in converters_i: - G = n.graph() - busattr, b0, b1 = findforeignbus(G, i) - if busattr is not None: - comp, line = next(iter(G[b0][b1])) - if comp != "Line": - logger.warning( - "Unable to replace B2B `{}` expected a Line, but found a {}".format( - i, comp - ) - ) - continue - - n.links.at[i, busattr] = b1 - n.links.at[i, "p_nom"] = min( - n.links.at[i, "p_nom"], n.lines.at[line, "s_nom"] - ) - n.links.at[i, "carrier"] = "DC" - n.links.at[i, "underwater_fraction"] = 0.0 - n.links.at[i, "length"] = n.lines.at[line, "length"] - - n.remove("Line", line) - n.remove("Bus", b0) - - logger.info( - "Replacing B2B converter `{}` together with bus `{}` and line `{}` by an HVDC tie-line {}-{}".format( - i, b0, line, linkcntry.at[i], buscntry.at[b1] - ) - ) - - -def _set_links_underwater_fraction(n, offshore_shapes): - if n.links.empty: - return - - if not hasattr(n.links, "geometry"): - n.links["underwater_fraction"] = 0.0 - else: - offshore_shape = gpd.read_file(offshore_shapes).unary_union - links = gpd.GeoSeries(n.links.geometry.dropna().map(shapely.wkt.loads)) - n.links["underwater_fraction"] = ( - links.intersection(offshore_shape).length / links.length - ) - - -def _adjust_capacities_of_under_construction_branches(n, config): - lines_mode = config["lines"].get("under_construction", "undef") - if lines_mode == "zero": - n.lines.loc[n.lines.under_construction, "num_parallel"] = 0.0 - n.lines.loc[n.lines.under_construction, "s_nom"] = 0.0 - elif lines_mode == "remove": - n.mremove("Line", n.lines.index[n.lines.under_construction]) - elif lines_mode != "keep": - logger.warning( - "Unrecognized configuration for `lines: under_construction` = `{}`. Keeping under construction lines." - ) - - links_mode = config["links"].get("under_construction", "undef") - if links_mode == "zero": - n.links.loc[n.links.under_construction, "p_nom"] = 0.0 - elif links_mode == "remove": - n.mremove("Link", n.links.index[n.links.under_construction]) - elif links_mode != "keep": - logger.warning( - "Unrecognized configuration for `links: under_construction` = `{}`. Keeping under construction links." - ) - - if lines_mode == "remove" or links_mode == "remove": - # We might need to remove further unconnected components - n = _remove_unconnected_components(n) - - return n - - -def _set_shapes(n, country_shapes, offshore_shapes): - # Write the geodataframes country_shapes and offshore_shapes to the network.shapes component - country_shapes = gpd.read_file(country_shapes).rename(columns={"name": "idx"}) - country_shapes["type"] = "country" - offshore_shapes = gpd.read_file(offshore_shapes).rename(columns={"name": "idx"}) - offshore_shapes["type"] = "offshore" - all_shapes = pd.concat([country_shapes, offshore_shapes], ignore_index=True) - n.madd( - "Shape", - all_shapes.index, - geometry=all_shapes.geometry, - idx=all_shapes.idx, - type=all_shapes["type"], - ) - - -def base_network_osm( - eg_buses, - eg_converters, - eg_transformers, - eg_lines, - eg_links, - europe_shape, - country_shapes, - offshore_shapes, - config, -): - buses = _load_buses_from_eg(eg_buses, europe_shape, config["electricity"]) - - # TODO pypsa-eur add this - links = _load_links_from_eg(buses, eg_links) - # if config["links"].get("include_tyndp"): - # buses, links = _add_links_from_tyndp(buses, links, links_tyndp, europe_shape) - - converters = _load_converters_from_eg(buses, eg_converters) - - lines = _load_lines_from_eg(buses, eg_lines) - transformers = _load_transformers_from_eg(buses, eg_transformers) - - if config["lines"].get("reconnect_crimea", True) and "UA" in config["countries"]: - lines = _reconnect_crimea(lines) - - lines = _set_electrical_parameters_lines( - config["lines"], config["electricity"]["voltages"], lines - ) - - links = _set_electrical_parameters_links(links, config) - - transformers = _set_electrical_parameters_transformers(transformers, config) - converters = _set_electrical_parameters_converters(converters, config) - - n = pypsa.Network() - n.name = "PyPSA-Eur (OSM)" - - time = get_snapshots(snakemake.params.snapshots, snakemake.params.drop_leap_day) - n.set_snapshots(time) - n.madd( - "Carrier", ["AC", "DC"] - ) # TODO: fix hard code and check if AC/DC truly exist - - n.import_components_from_dataframe(buses, "Bus") - n.import_components_from_dataframe(lines, "Line") - # The columns which names starts with "bus" are mixed up with the third-bus specification - # when executing additional_linkports() - # lines_dc.drop( - # labels=[ - # "bus0_lon", - # "bus0_lat", - # "bus1_lon", - # "bus1_lat", - # "bus_0_coors", - # "bus_1_coors", - # ], - # axis=1, - # inplace=True, - # ) - n.import_components_from_dataframe(links, "Link") - n.import_components_from_dataframe(transformers, "Transformer") - n.import_components_from_dataframe(converters, "Link") - - _set_lines_s_nom_from_linetypes(n) - - # TODO: what about this? - n = _remove_unconnected_components(n) - - _set_countries_and_substations(n, config, country_shapes, offshore_shapes) - - # TODO pypsa-eur add this - _set_links_underwater_fraction(n, offshore_shapes) - - _replace_b2b_converter_at_country_border_by_link(n) - - n = _adjust_capacities_of_under_construction_branches(n, config) - - _set_shapes(n, country_shapes, offshore_shapes) - - return n - - -def _get_linetypes_config(line_types, voltages): - """ - Return the dictionary of linetypes for selected voltages. The dictionary is - a subset of the dictionary line_types, whose keys match the selected - voltages. - - Parameters - ---------- - line_types : dict - Dictionary of linetypes: keys are nominal voltages and values are linetypes. - voltages : list - List of selected voltages. - - Returns - ------- - Dictionary of linetypes for selected voltages. - """ - # get voltages value that are not availabile in the line types - vnoms_diff = set(voltages).symmetric_difference(set(line_types.keys())) - if vnoms_diff: - logger.warning( - f"Voltages {vnoms_diff} not in the {line_types} or {voltages} list." - ) - return {k: v for k, v in line_types.items() if k in voltages} - - -def _get_linetype_by_voltage(v_nom, d_linetypes): - """ - Return the linetype of a specific line based on its voltage v_nom. - - Parameters - ---------- - v_nom : float - The voltage of the line. - d_linetypes : dict - Dictionary of linetypes: keys are nominal voltages and values are linetypes. - - Returns - ------- - The linetype of the line whose nominal voltage is closest to the line voltage. - """ - v_nom_min, line_type_min = min( - d_linetypes.items(), - key=lambda x: abs(x[0] - v_nom), - ) - return line_type_min - - -def voronoi_partition_pts(points, outline): - """ - Compute the polygons of a voronoi partition of `points` within the polygon - `outline`. Taken from - https://github.com/FRESNA/vresutils/blob/master/vresutils/graph.py. - - Attributes - ---------- - points : Nx2 - ndarray[dtype=float] - outline : Polygon - Returns - ------- - polygons : N - ndarray[dtype=Polygon|MultiPolygon] - """ - points = np.asarray(points) - - if len(points) == 1: - polygons = [outline] - else: - xmin, ymin = np.amin(points, axis=0) - xmax, ymax = np.amax(points, axis=0) - xspan = xmax - xmin - yspan = ymax - ymin - - # to avoid any network positions outside all Voronoi cells, append - # the corners of a rectangle framing these points - vor = spatial.Voronoi( - np.vstack( - ( - points, - [ - [xmin - 3.0 * xspan, ymin - 3.0 * yspan], - [xmin - 3.0 * xspan, ymax + 3.0 * yspan], - [xmax + 3.0 * xspan, ymin - 3.0 * yspan], - [xmax + 3.0 * xspan, ymax + 3.0 * yspan], - ], - ) - ) - ) - - polygons = [] - for i in range(len(points)): - poly = Polygon(vor.vertices[vor.regions[vor.point_region[i]]]) - - if not poly.is_valid: - poly = poly.buffer(0) - - with np.errstate(invalid="ignore"): - poly = poly.intersection(outline) - - polygons.append(poly) - - return polygons - - -def build_bus_shapes(n, country_shapes, offshore_shapes, countries): - country_shapes = gpd.read_file(country_shapes).set_index("name")["geometry"] - offshore_shapes = gpd.read_file(offshore_shapes) - offshore_shapes = offshore_shapes.reindex(columns=REGION_COLS).set_index("name")[ - "geometry" - ] - - onshore_regions = [] - offshore_regions = [] - - for country in countries: - c_b = n.buses.country == country - - onshore_shape = country_shapes[country] - onshore_locs = ( - n.buses.loc[c_b & n.buses.onshore_bus] - .sort_values( - by="substation_lv", ascending=False - ) # preference for substations - .drop_duplicates(subset=["x", "y"], keep="first")[["x", "y"]] - ) - onshore_regions.append( - gpd.GeoDataFrame( - { - "name": onshore_locs.index, - "x": onshore_locs["x"], - "y": onshore_locs["y"], - "geometry": voronoi_partition_pts( - onshore_locs.values, onshore_shape - ), - "country": country, - } - ) - ) - - if country not in offshore_shapes.index: - continue - offshore_shape = offshore_shapes[country] - offshore_locs = n.buses.loc[c_b & n.buses.substation_off, ["x", "y"]] - offshore_regions_c = gpd.GeoDataFrame( - { - "name": offshore_locs.index, - "x": offshore_locs["x"], - "y": offshore_locs["y"], - "geometry": voronoi_partition_pts(offshore_locs.values, offshore_shape), - "country": country, - } - ) - offshore_regions_c = offshore_regions_c.loc[offshore_regions_c.area > 1e-2] - offshore_regions.append(offshore_regions_c) - - shapes = pd.concat(onshore_regions, ignore_index=True) - - return onshore_regions, offshore_regions, shapes - - -def append_bus_shapes(n, shapes, type): - """ - Append shapes to the network. If shapes with the same component and type - already exist, they will be removed. - - Parameters: - n (pypsa.Network): The network to which the shapes will be appended. - shapes (geopandas.GeoDataFrame): The shapes to be appended. - **kwargs: Additional keyword arguments used in `n.madd`. - - Returns: - None - """ - remove = n.shapes.query("component == 'Bus' and type == @type").index - n.mremove("Shape", remove) - - offset = n.shapes.index.astype(int).max() + 1 if not n.shapes.empty else 0 - shapes = shapes.rename(lambda x: int(x) + offset) - n.madd( - "Shape", - shapes.index, - geometry=shapes.geometry, - idx=shapes.name, - component="Bus", - type=type, - ) - - -if __name__ == "__main__": - if "snakemake" not in globals(): - from _helpers import mock_snakemake - - snakemake = mock_snakemake("base_network") - configure_logging(snakemake) - set_scenario_config(snakemake) - - n = base_network_osm( - snakemake.input.eg_buses, - snakemake.input.eg_converters, - snakemake.input.eg_transformers, - snakemake.input.eg_lines, - snakemake.input.eg_links, - snakemake.input.europe_shape, - snakemake.input.country_shapes, - snakemake.input.offshore_shapes, - snakemake.config, - ) - - logger.info("Base network created using OSM.") - - onshore_regions, offshore_regions, shapes = build_bus_shapes( - n, - snakemake.input.country_shapes, - snakemake.input.offshore_shapes, - snakemake.params.countries, - ) - - shapes.to_file(snakemake.output.regions_onshore) - append_bus_shapes(n, shapes, "onshore") - - if offshore_regions: - shapes = pd.concat(offshore_regions, ignore_index=True) - shapes.to_file(snakemake.output.regions_offshore) - append_bus_shapes(n, shapes, "offshore") - else: - offshore_shapes.to_frame().to_file(snakemake.output.regions_offshore) - - n.meta = snakemake.config - n.export_to_netcdf(snakemake.output.base_network) diff --git a/scripts/retrieve_osm_data.py b/scripts/retrieve_osm_data.py index 899337f89..b36ffd176 100644 --- a/scripts/retrieve_osm_data.py +++ b/scripts/retrieve_osm_data.py @@ -19,7 +19,7 @@ import requests from _helpers import ( configure_logging, - # set_scenario_config, + set_scenario_config, # update_config_from_wildcards, ) @@ -143,8 +143,8 @@ def retrieve_osm_data( from _helpers import mock_snakemake snakemake = mock_snakemake("retrieve_osm_data", country="BE") - configure_logging(snakemake) + set_scenario_config(snakemake) # Retrieve the OSM data country = snakemake.wildcards.country From 6469ff47d67a7c45a022c97ef77f3050aa2b69fd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 10 Jun 2024 11:39:16 +0000 Subject: [PATCH 045/100] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- rules/build_electricity.smk | 44 +++++++++++++++++++++++++----------- scripts/base_network.py | 14 ++++++++---- scripts/retrieve_osm_data.py | 3 +-- 3 files changed, 41 insertions(+), 20 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index b51913e94..8aaf16a83 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -75,50 +75,68 @@ rule base_network: transformers=config_provider("transformers"), input: eg_buses=lambda w: ( - "data/entsoegridkit/buses.csv" if config_provider("electricity_network", "base_network")(w) == "gridkit" + "data/entsoegridkit/buses.csv" + if config_provider("electricity_network", "base_network")(w) == "gridkit" else ( - "data/osm/prebuilt/buses.csv" if config_provider("electricity_network", "base_network")(w) == "osm-prebuilt" + "data/osm/prebuilt/buses.csv" + if config_provider("electricity_network", "base_network")(w) + == "osm-prebuilt" else resources("osm/buses.csv") ) ), eg_lines=lambda w: ( - "data/entsoegridkit/lines.csv" if config_provider("electricity_network", "base_network")(w) == "gridkit" + "data/entsoegridkit/lines.csv" + if config_provider("electricity_network", "base_network")(w) == "gridkit" else ( - "data/osm/prebuilt/lines.csv" if config_provider("electricity_network", "base_network")(w) == "osm-prebuilt" + "data/osm/prebuilt/lines.csv" + if config_provider("electricity_network", "base_network")(w) + == "osm-prebuilt" else resources("osm/lines.csv") ) ), eg_links=lambda w: ( - "data/entsoegridkit/links.csv" if config_provider("electricity_network", "base_network")(w) == "gridkit" + "data/entsoegridkit/links.csv" + if config_provider("electricity_network", "base_network")(w) == "gridkit" else ( - "data/osm/prebuilt/links.csv" if config_provider("electricity_network", "base_network")(w) == "osm-prebuilt" + "data/osm/prebuilt/links.csv" + if config_provider("electricity_network", "base_network")(w) + == "osm-prebuilt" else resources("osm/links.csv") ) ), eg_converters=lambda w: ( - "data/entsoegridkit/converters.csv" if config_provider("electricity_network", "base_network")(w) == "gridkit" + "data/entsoegridkit/converters.csv" + if config_provider("electricity_network", "base_network")(w) == "gridkit" else ( - "data/osm/prebuilt/converters.csv" if config_provider("electricity_network", "base_network")(w) == "osm-prebuilt" + "data/osm/prebuilt/converters.csv" + if config_provider("electricity_network", "base_network")(w) + == "osm-prebuilt" else resources("osm/converters.csv") ) ), eg_transformers=lambda w: ( - "data/entsoegridkit/transformers.csv" if config_provider("electricity_network", "base_network")(w) == "gridkit" + "data/entsoegridkit/transformers.csv" + if config_provider("electricity_network", "base_network")(w) == "gridkit" else ( - "data/osm/prebuilt/transformers.csv" if config_provider("electricity_network", "base_network")(w) == "osm-prebuilt" + "data/osm/prebuilt/transformers.csv" + if config_provider("electricity_network", "base_network")(w) + == "osm-prebuilt" else resources("osm/transformers.csv") ) ), parameter_corrections=lambda w: ( - "data/parameter_corrections.yaml" if config_provider("electricity_network", "base_network")(w) == "gridkit" + "data/parameter_corrections.yaml" + if config_provider("electricity_network", "base_network")(w) == "gridkit" else [] ), links_p_nom=lambda w: ( - "data/links_p_nom.csv" if config_provider("electricity_network", "base_network")(w) == "gridkit" + "data/links_p_nom.csv" + if config_provider("electricity_network", "base_network")(w) == "gridkit" else [] ), links_tyndp=lambda w: ( - "data/links_tyndp.csv" if config_provider("electricity_network", "base_network")(w) == "gridkit" + "data/links_tyndp.csv" + if config_provider("electricity_network", "base_network")(w) == "gridkit" else [] ), country_shapes=resources("country_shapes.geojson"), diff --git a/scripts/base_network.py b/scripts/base_network.py index 963234a50..393b3e0d6 100644 --- a/scripts/base_network.py +++ b/scripts/base_network.py @@ -386,7 +386,7 @@ def _load_lines_from_eg(buses, eg_lines): lines["length"] /= 1e3 - lines["carrier"] = "AC" #TODO pypsa-eur check + lines["carrier"] = "AC" # TODO pypsa-eur check lines = _remove_dangling_branches(lines, buses) return lines @@ -808,7 +808,7 @@ def base_network( parameter_corrections, config, ): - + buses = _load_buses_from_eg(eg_buses, europe_shape, config["electricity"]) if config["electricity_network"].get("base_network") == "gridkit": @@ -818,7 +818,9 @@ def base_network( else: raise ValueError("base_network must be either 'gridkit' or 'osm'") - if (config["links"].get("include_tyndp") & (config["electricity_network"].get("base_network") == "gridkit")): + if config["links"].get("include_tyndp") & ( + config["electricity_network"].get("base_network") == "gridkit" + ): buses, links = _add_links_from_tyndp(buses, links, links_tyndp, europe_shape) converters = _load_converters_from_eg(buses, eg_converters) @@ -882,7 +884,9 @@ def base_network( _set_shapes(n, country_shapes, offshore_shapes) - logger.info(f"Base network created using {config['electricity_network'].get('base_network')}.") + logger.info( + f"Base network created using {config['electricity_network'].get('base_network')}." + ) return n @@ -1115,4 +1119,4 @@ def append_bus_shapes(n, shapes, type): offshore_shapes.to_frame().to_file(snakemake.output.regions_offshore) n.meta = snakemake.config - n.export_to_netcdf(snakemake.output.base_network) \ No newline at end of file + n.export_to_netcdf(snakemake.output.base_network) diff --git a/scripts/retrieve_osm_data.py b/scripts/retrieve_osm_data.py index e3f21443e..745533cff 100644 --- a/scripts/retrieve_osm_data.py +++ b/scripts/retrieve_osm_data.py @@ -17,10 +17,9 @@ import time import requests -from _helpers import ( # set_scenario_config,; update_config_from_wildcards, +from _helpers import ( # set_scenario_config,; update_config_from_wildcards,; update_config_from_wildcards, configure_logging, set_scenario_config, - # update_config_from_wildcards, ) logger = logging.getLogger(__name__) From 221b1656c7422975d7947fd4e8a7abd0d2cab8da Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Tue, 11 Jun 2024 14:40:48 +0200 Subject: [PATCH 046/100] Fixed bug in pdf export by substituting pdf export with svg. --- config/config.default.yaml | 34 +++++++--------------------------- rules/postprocess.smk | 6 +++--- scripts/plot_summary.py | 4 ++-- 3 files changed, 12 insertions(+), 32 deletions(-) diff --git a/config/config.default.yaml b/config/config.default.yaml index 7de26d9df..83801c611 100644 --- a/config/config.default.yaml +++ b/config/config.default.yaml @@ -75,10 +75,6 @@ enable: custom_busmap: false drop_leap_day: true -# Settings related to the high-voltage electricity grid -electricity_network: - base_network: "gridkit" # "gridkit", "osm-prebuilt" (prebuilt network from OSM data), "osm-raw" (retrieve and build network from raw OSM data, takes longer) - osm_group_tolerance_buses: 5000 # only relevant for "osm-raw" setting: [m] (default 5000) Tolerance in meters of the close buses to merge # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#co2-budget co2_budget: @@ -92,7 +88,7 @@ co2_budget: # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#electricity electricity: - voltages: [200., 220., 300., 380., 500., 750.] + voltages: [220., 300., 380., 500., 750.] gaslimit_enable: false gaslimit: false co2limit_enable: false @@ -286,27 +282,11 @@ conventional: # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#lines lines: types: - 200.: "Al/St 240/40 2-bundle 220.0" 220.: "Al/St 240/40 2-bundle 220.0" 300.: "Al/St 240/40 3-bundle 300.0" 380.: "Al/St 240/40 4-bundle 380.0" - 400.: "Al/St 240/40 4-bundle 380.0" 500.: "Al/St 240/40 4-bundle 380.0" 750.: "Al/St 560/50 4-bundle 750.0" - dc_types: - 200.: "HVDC XLPE 1000" - 250.: "HVDC XLPE 1000" - 270.: "HVDC XLPE 1000" - 285.: "HVDC XLPE 1000" - 300.: "HVDC XLPE 1000" - 320.: "HVDC XLPE 1000" - 350.: "HVDC XLPE 1000" - 380.: "HVDC Oil filled 1400" - 400.: "HVDC XLPE 1000" - 450.: "HVDC XLPE 1000" - 515.: "HVDC XLPE 1000" - 525.: "HVDC XLPE 1000" - 600.: "HVDC XLPE 1000" s_max_pu: 0.7 s_nom_max: .inf max_extension: 20000 #MW @@ -923,11 +903,11 @@ plotting: eu_node_location: x: -5.5 y: 46. - costs_max: 1000 - costs_threshold: 1 - energy_max: 20000 - energy_min: -20000 - energy_threshold: 50. + # costs_max: 1000 + # costs_threshold: 1 + # energy_max: 20000 + # energy_min: -20000 + # energy_threshold: 50. nice_names: OCGT: "Open-Cycle Gas" @@ -1225,4 +1205,4 @@ plotting: load: "#dd2e23" waste CHP: '#e3d37d' waste CHP CC: '#e3d3ff' - HVC to air: 'k' + HVC to air: 'k' \ No newline at end of file diff --git a/rules/postprocess.smk b/rules/postprocess.smk index 39fd46c9d..edeff1ef4 100644 --- a/rules/postprocess.smk +++ b/rules/postprocess.smk @@ -233,9 +233,9 @@ rule plot_summary: eurostat="data/eurostat/Balances-April2023", co2="data/bundle/eea/UNFCCC_v23.csv", output: - costs=RESULTS + "graphs/costs.pdf", - energy=RESULTS + "graphs/energy.pdf", - balances=RESULTS + "graphs/balances-energy.pdf", + costs=RESULTS + "graphs/costs.svg", + energy=RESULTS + "graphs/energy.svg", + balances=RESULTS + "graphs/balances-energy.svg", threads: 2 resources: mem_mb=10000, diff --git a/scripts/plot_summary.py b/scripts/plot_summary.py index 39fbba030..d131e9378 100644 --- a/scripts/plot_summary.py +++ b/scripts/plot_summary.py @@ -353,7 +353,7 @@ def plot_balances(): frameon=False, ) - fig.savefig(snakemake.output.balances[:-10] + k + ".pdf", bbox_inches="tight") + fig.savefig(snakemake.output.balances[:-10] + k + ".svg", bbox_inches="tight") def historical_emissions(countries): @@ -563,7 +563,7 @@ def plot_carbon_budget_distribution(input_eurostat, options): ) plt.grid(axis="y") - path = snakemake.output.balances.split("balances")[0] + "carbon_budget.pdf" + path = snakemake.output.balances.split("balances")[0] + "carbon_budget.svg" plt.savefig(path, bbox_inches="tight") From 9bcdbc0ed86748ec100851b38f2e11ad2c43ee86 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 11 Jun 2024 12:41:47 +0000 Subject: [PATCH 047/100] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- config/config.default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config.default.yaml b/config/config.default.yaml index 83801c611..40fe65ed3 100644 --- a/config/config.default.yaml +++ b/config/config.default.yaml @@ -1205,4 +1205,4 @@ plotting: load: "#dd2e23" waste CHP: '#e3d37d' waste CHP CC: '#e3d3ff' - HVC to air: 'k' \ No newline at end of file + HVC to air: 'k' From afb9e52b530e1ab55e92fba5838599deaa548197 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Tue, 11 Jun 2024 15:35:11 +0200 Subject: [PATCH 048/100] Bug-fix Snakefile --- Snakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Snakefile b/Snakefile index 726b8d2ba..4a3d3eebc 100644 --- a/Snakefile +++ b/Snakefile @@ -73,7 +73,7 @@ if config["foresight"] == "perfect": rule all: input: - expand(RESULTS + "graphs/costs.pdf", run=config["run"]["name"]), + expand(RESULTS + "graphs/costs.svg", run=config["run"]["name"]), default_target: True From 3d2169a39c880f196a879a0a40596aca96a372b1 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Mon, 17 Jun 2024 18:28:39 +0200 Subject: [PATCH 049/100] dropped not needed columns from build_osm_network. --- scripts/build_osm_network.py | 1 - scripts/build_shapes.py | 1 - 2 files changed, 2 deletions(-) diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 0372692f9..b052cd173 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -801,7 +801,6 @@ def build_network( "bus1", "voltage", "circuits", - "tag_frequency", "length", "underground", "under_construction", diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py index 85afdaea4..402b6e6ed 100644 --- a/scripts/build_shapes.py +++ b/scripts/build_shapes.py @@ -108,7 +108,6 @@ def _simplify_polys(polys, minarea=0.1, tolerance=0.01, filterremote=True): polys = mainpoly return polys.simplify(tolerance=tolerance) - def countries(naturalearth, country_list): if "RS" in country_list: country_list.append("KV") From f2bd9bf32855c626e6e732ebece00f89a877cbb9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 Jun 2024 16:33:29 +0000 Subject: [PATCH 050/100] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/build_shapes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py index 402b6e6ed..85afdaea4 100644 --- a/scripts/build_shapes.py +++ b/scripts/build_shapes.py @@ -108,6 +108,7 @@ def _simplify_polys(polys, minarea=0.1, tolerance=0.01, filterremote=True): polys = mainpoly return polys.simplify(tolerance=tolerance) + def countries(naturalearth, country_list): if "RS" in country_list: country_list.append("KV") From acda4c41c6da369204679eca26d643e1de5bf2ad Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Tue, 18 Jun 2024 14:28:32 +0200 Subject: [PATCH 051/100] Updated build_shapes, config.default and clean_osm_data. --- config/config.default.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/config/config.default.yaml b/config/config.default.yaml index d8dcff2ed..cc8ef4b63 100644 --- a/config/config.default.yaml +++ b/config/config.default.yaml @@ -86,6 +86,10 @@ co2_budget: 2045: 0.032 2050: 0.000 +electricity_network: + base_network: gridkit # Options: gridkit, osm-prebuilt, osm-raw (built from scratch using OSM data, takes longer) + osm_group_tolerance_buses: 5000 # unit: meters, default 5000 - Buses within this distance are grouped together + # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#electricity electricity: voltages: [220., 300., 380., 500., 750.] From 777f7eafd247934ffa05af09502503038541fd78 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Tue, 18 Jun 2024 17:14:17 +0200 Subject: [PATCH 052/100] pre-commit changes. --- scripts/build_shapes.py | 4 ++-- scripts/clean_osm_data.py | 25 ++++++++++++++++++++----- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py index 85afdaea4..74d138800 100644 --- a/scripts/build_shapes.py +++ b/scripts/build_shapes.py @@ -91,7 +91,7 @@ def _get_country(target, **keys): return np.nan -def _simplify_polys(polys, minarea=0.1, tolerance=0.01, filterremote=True): +def _simplify_polys(polys, minarea=0.1, filterremote=True): if isinstance(polys, MultiPolygon): polys = sorted(polys.geoms, key=attrgetter("area"), reverse=True) mainpoly = polys[0] @@ -106,7 +106,7 @@ def _simplify_polys(polys, minarea=0.1, tolerance=0.01, filterremote=True): ) else: polys = mainpoly - return polys.simplify(tolerance=tolerance) + return polys def countries(naturalearth, country_list): diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index d64bcec97..917071954 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -78,6 +78,19 @@ def _create_polygon(row): return polygon +def find_closest_polygon(gdf, point): + # Compute the distance to each polygon + gdf["distance"] = gdf["geometry"].apply(lambda geom: point.distance(geom)) + + # Find the index of the closest polygon + closest_idx = gdf["distance"].idxmin() + + # Get the closest polygon's row + closest_polygon = gdf.loc[closest_idx] + + return closest_idx + + def _extended_linemerge(lines): """ Merges a list of LineStrings into a single LineString by finding the @@ -549,15 +562,17 @@ def _add_line_endings_to_substations( axis=1, ) gdf_union = gpd.GeoDataFrame(geometry=gdf_union["geometry"], crs=crs) - utm = gdf_union.estimate_utm_crs(datum_name="WGS 84") - gdf_union = gdf_union.to_crs(utm) - gdf_union = gdf_union.buffer(2500) # meters - gdf_union = gdf_union.to_crs(crs) - gdf_union = gpd.GeoDataFrame(geometry=gdf_union, crs=crs) gdf_buses_tofix = gpd.GeoDataFrame( buses[bool_multiple_countries], geometry="geometry", crs=crs ) joined = gpd.sjoin(gdf_buses_tofix, gdf_union, how="left", predicate="within") + + # For all remaining rows where the country/index_right column is NaN, find + # find the closest polygon index + joined.loc[joined["index_right"].isna(), "index_right"] = joined.loc[ + joined["index_right"].isna(), "geometry" + ].apply(lambda x: find_closest_polygon(gdf_union, x)) + joined.reset_index(inplace=True) joined = joined.drop_duplicates(subset="bus_id") joined.set_index("bus_id", inplace=True) From 58e5129ed48fc1bc40ec82804c4cac5b72fc8b65 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Wed, 19 Jun 2024 13:40:19 +0200 Subject: [PATCH 053/100] test --- scripts/build_osm_network.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index b052cd173..5ce5a804a 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -1,9 +1,7 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: PyPSA-Earth and PyPSA-Eur Authors +# SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur and PyPSA-Earth Authors # -# SPDX-License-Identifier: AGPL-3.0-or-later - -# -*- coding: utf-8 -*- +# SPDX-License-Identifier: MIT import logging import os @@ -773,7 +771,7 @@ def build_network( # Rename "substation" in buses["symbol"] to "Substation" buses["symbol"] = buses["symbol"].replace({"substation": "Substation"}) - # Drop unncessary index column and set respective element ids as index + # Drop unnecessary index column and set respective element ids as index lines.set_index("line_id", inplace=True) links.set_index("link_id", inplace=True) converters.set_index("converter_id", inplace=True) From 917c52c8597c9a7a4b2f1126bd46e02a9729e281 Mon Sep 17 00:00:00 2001 From: Bobby Xiong Date: Wed, 19 Jun 2024 23:33:13 +0200 Subject: [PATCH 054/100] Added initial prepare_osm_network_release.py script --- Snakefile | 1 + rules/build_electricity.smk | 30 +++++------ rules/development.smk | 20 +++++++ scripts/base_network.py | 30 +++++++++-- scripts/build_osm_network.py | 2 +- scripts/prepare_osm_network_release.py | 74 ++++++++++++++++++++++++++ 6 files changed, 138 insertions(+), 19 deletions(-) create mode 100644 rules/development.smk create mode 100644 scripts/prepare_osm_network_release.py diff --git a/Snakefile b/Snakefile index 4a3d3eebc..56a704dec 100644 --- a/Snakefile +++ b/Snakefile @@ -54,6 +54,7 @@ include: "rules/build_sector.smk" include: "rules/solve_electricity.smk" include: "rules/postprocess.smk" include: "rules/validate.smk" +include: "rules/development.smk" if config["foresight"] == "overnight": diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 8aaf16a83..2b5437029 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -81,7 +81,7 @@ rule base_network: "data/osm/prebuilt/buses.csv" if config_provider("electricity_network", "base_network")(w) == "osm-prebuilt" - else resources("osm/buses.csv") + else resources("osm/pre-base/buses.csv") ) ), eg_lines=lambda w: ( @@ -91,7 +91,7 @@ rule base_network: "data/osm/prebuilt/lines.csv" if config_provider("electricity_network", "base_network")(w) == "osm-prebuilt" - else resources("osm/lines.csv") + else resources("osm/pre-base/lines.csv") ) ), eg_links=lambda w: ( @@ -101,7 +101,7 @@ rule base_network: "data/osm/prebuilt/links.csv" if config_provider("electricity_network", "base_network")(w) == "osm-prebuilt" - else resources("osm/links.csv") + else resources("osm/pre-base/links.csv") ) ), eg_converters=lambda w: ( @@ -111,7 +111,7 @@ rule base_network: "data/osm/prebuilt/converters.csv" if config_provider("electricity_network", "base_network")(w) == "osm-prebuilt" - else resources("osm/converters.csv") + else resources("osm/pre-base/converters.csv") ) ), eg_transformers=lambda w: ( @@ -121,7 +121,7 @@ rule base_network: "data/osm/prebuilt/transformers.csv" if config_provider("electricity_network", "base_network")(w) == "osm-prebuilt" - else resources("osm/transformers.csv") + else resources("osm/pre-base/transformers.csv") ) ), parameter_corrections=lambda w: ( @@ -711,16 +711,16 @@ rule build_osm_network: links=resources("osm/clean/links.geojson"), country_shapes=resources("country_shapes.geojson"), output: - lines=resources("osm/lines.csv"), - links=resources("osm/links.csv"), - converters=resources("osm/converters.csv"), - transformers=resources("osm/transformers.csv"), - substations=resources("osm/buses.csv"), - lines_geojson=resources("osm/lines.geojson"), - links_geojson=resources("osm/links.geojson"), - converters_geojson=resources("osm/converters.geojson"), - transformers_geojson=resources("osm/transformers.geojson"), - substations_geojson=resources("osm/buses.geojson"), + lines=resources("osm/pre-base/lines.csv"), + links=resources("osm/pre-base/links.csv"), + converters=resources("osm/pre-base/converters.csv"), + transformers=resources("osm/pre-base/transformers.csv"), + substations=resources("osm/pre-base/buses.csv"), + lines_geojson=resources("osm/pre-base/lines.geojson"), + links_geojson=resources("osm/pre-base/links.geojson"), + converters_geojson=resources("osm/pre-base/converters.geojson"), + transformers_geojson=resources("osm/pre-base/transformers.geojson"), + substations_geojson=resources("osm/pre-base/buses.geojson"), log: logs("build_osm_network.log"), benchmark: diff --git a/rules/development.smk b/rules/development.smk new file mode 100644 index 000000000..2316428a6 --- /dev/null +++ b/rules/development.smk @@ -0,0 +1,20 @@ +# SPDX-FileCopyrightText: : 2023-2024 The PyPSA-Eur Authors +# +# SPDX-License-Identifier: MIT + + +rule prepare_osm_network_release: + input: + base_network=resources("networks/base.nc"), + output: + lines=resources("osm/release/lines.csv"), + links=resources("osm/release/links.csv"), + converters=resources("osm/release/converters.csv"), + transformers=resources("osm/release/transformers.csv"), + buses=resources("osm/release/buses.csv"), + log: + logs("prepare_osm_network_release.log"), + benchmark: + benchmarks("prepare_osm_network_release") + script: + "../scripts/prepare_osm_network_release.py" diff --git a/scripts/base_network.py b/scripts/base_network.py index 393b3e0d6..d97ecc219 100644 --- a/scripts/base_network.py +++ b/scripts/base_network.py @@ -211,6 +211,22 @@ def _load_converters_from_eg(buses, eg_converters): return converters +def _load_converters_from_osm(buses, eg_converters): + converters = pd.read_csv( + eg_converters, + quotechar="'", + true_values=["t"], + false_values=["f"], + dtype=dict(converter_id="str", bus0="str", bus1="str"), + ).set_index("converter_id") + + converters = _remove_dangling_branches(converters, buses) + + converters["carrier"] = "" + + return converters + + def _load_links_from_eg(buses, eg_links): links = pd.read_csv( eg_links, @@ -823,12 +839,20 @@ def base_network( ): buses, links = _add_links_from_tyndp(buses, links, links_tyndp, europe_shape) - converters = _load_converters_from_eg(buses, eg_converters) + if config["electricity_network"].get("base_network") == "gridkit": + converters = _load_converters_from_eg(buses, eg_converters) + elif "osm" in config["electricity_network"].get("base_network"): + converters = _load_converters_from_osm(buses, eg_converters) + transformers = _load_transformers_from_eg(buses, eg_transformers) lines = _load_lines_from_eg(buses, eg_lines) - if config["lines"].get("reconnect_crimea", True) and "UA" in config["countries"]: + if ( + (config["electricity_network"].get("base_network") == "gridkit") + & (config["lines"].get("reconnect_crimea", True)) + & ("UA" in config["countries"]) + ): lines = _reconnect_crimea(lines) if config["electricity_network"].get("base_network") == "gridkit": @@ -908,7 +932,7 @@ def _get_linetypes_config(line_types, voltages): ------- Dictionary of linetypes for selected voltages. """ - # get voltages value that are not availabile in the line types + # get voltages value that are not available in the line types vnoms_diff = set(voltages).symmetric_difference(set(line_types.keys())) if vnoms_diff: logger.warning( diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 5ce5a804a..39b20aec7 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -227,7 +227,7 @@ def set_lines_ids(lines, buses, distance_crs): ascii=False, unit=" lines", total=lines.shape[0], - desc="Set line bus ids ", + desc="Set line/link bus ids ", ) # initialization diff --git a/scripts/prepare_osm_network_release.py b/scripts/prepare_osm_network_release.py new file mode 100644 index 000000000..1e877e233 --- /dev/null +++ b/scripts/prepare_osm_network_release.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur Authors +# +# SPDX-License-Identifier: MIT + +import logging + +import pypsa +from _helpers import configure_logging, set_scenario_config + +logger = logging.getLogger(__name__) + + +def prepare_osm_network_release(network): + return None + + +if __name__ == "__main__": + if "snakemake" not in globals(): + from _helpers import mock_snakemake + + snakemake = mock_snakemake("prepare_osm_network_release") + + configure_logging(snakemake) + set_scenario_config(snakemake) + + network = pypsa.Network(snakemake.input.base_network) + + buses_columns = [ + "bus_id", + "voltage", + "dc", + "symbol", + "under_construction", + "x", + "y", + "country", + "geometry", + ] + + lines_columns = [ + "line_id", + "bus0", + "bus1", + "voltage", + "circuits", + "length", + "underground", + "under_construction", + "geometry", + ] + + links_columns = [ + "link_id", + "bus0", + "bus1", + "voltage", + "p_nom", + "length", + "underground", + "under_construction", + "geometry", + ] + + transformers_columns = [ + "transformer_id", + "bus0", + "bus1", + "voltage_bus0", + "voltage_bus1", + "geometry", + ] + + converters_columns = [] From 2a3ad5c3f9ebc1e1c5ca10ddc6a87d4578d3b0ca Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Fri, 21 Jun 2024 14:59:22 +0200 Subject: [PATCH 055/100] Finalised prepare_osm_network_release script to build clean and stable OSM base_network input files. --- scripts/_helpers.py | 4 +- scripts/prepare_osm_network_release.py | 81 ++++++++++++++++++++++++-- 2 files changed, 79 insertions(+), 6 deletions(-) diff --git a/scripts/_helpers.py b/scripts/_helpers.py index ff304f5fa..a3b77c1c0 100644 --- a/scripts/_helpers.py +++ b/scripts/_helpers.py @@ -406,13 +406,13 @@ def mock_snakemake( from snakemake.api import Workflow from snakemake.common import SNAKEFILE_CHOICES from snakemake.script import Snakemake - from snakemake.settings import ( + from snakemake.settings.types import ( + ConfigSettings, DAGSettings, ResourceSettings, StorageSettings, WorkflowSettings, ) - from snakemake.settings.types import ConfigSettings script_dir = Path(__file__).parent.resolve() if root_dir is None: diff --git a/scripts/prepare_osm_network_release.py b/scripts/prepare_osm_network_release.py index 1e877e233..70c6f6982 100644 --- a/scripts/prepare_osm_network_release.py +++ b/scripts/prepare_osm_network_release.py @@ -4,14 +4,46 @@ # SPDX-License-Identifier: MIT import logging +import os +import pandas as pd import pypsa from _helpers import configure_logging, set_scenario_config logger = logging.getLogger(__name__) -def prepare_osm_network_release(network): +def export_clean_csv(df, columns, output_file): + """ + Export a cleaned DataFrame to a CSV file. + + Args: + df (pandas.DataFrame): The DataFrame to be exported. + columns (list): A list of column names to include in the exported CSV file. + output_file (str): The path to the output CSV file. + + Returns: + None + """ + rename_dict = { + "Bus": "bus_id", + "Line": "line_id", + "Link": "link_id", + "Transformer": "transformer_id", + "v_nom": "voltage", + "num_parallel": "circuits", + } + + if "converter_id" in columns: + rename_dict["Link"] = "converter_id" + + # Create the directory if it doesn't exist + os.makedirs(os.path.dirname(output_file), exist_ok=True) + + df.reset_index().rename(columns=rename_dict).loc[:, columns].replace( + {True: "t", False: "f"} + ).to_csv(output_file, index=False, quotechar="'") + return None @@ -24,8 +56,6 @@ def prepare_osm_network_release(network): configure_logging(snakemake) set_scenario_config(snakemake) - network = pypsa.Network(snakemake.input.base_network) - buses_columns = [ "bus_id", "voltage", @@ -71,4 +101,47 @@ def prepare_osm_network_release(network): "geometry", ] - converters_columns = [] + converters_columns = [ + "converter_id", + "bus0", + "bus1", + "geometry", + ] + + network = pypsa.Network(snakemake.input.base_network) + + # Export to clean csv for release + logger.info(f"Exporting {len(network.buses)} buses to %s", snakemake.output.buses) + export_clean_csv(network.buses, buses_columns, snakemake.output.buses) + + logger.info( + f"Exporting {len(network.transformers)} transformers to %s", + snakemake.output.transformers, + ) + export_clean_csv( + network.transformers, transformers_columns, snakemake.output.transformers + ) + + logger.info(f"Exporting {len(network.lines)} lines to %s", snakemake.output.lines) + export_clean_csv(network.lines, lines_columns, snakemake.output.lines) + + # Boolean that specifies if link element is a converter + is_converter = network.links.index.str.startswith("conv") == True + + logger.info( + f"Exporting {len(network.links[~is_converter])} links to %s", + snakemake.output.links, + ) + export_clean_csv( + network.links[~is_converter], links_columns, snakemake.output.links + ) + + logger.info( + f"Exporting {len(network.links[is_converter])} converters to %s", + snakemake.output.converters, + ) + export_clean_csv( + network.links[is_converter], converters_columns, snakemake.output.converters + ) + + logger.info("Export of OSM network for release complete.") From 38ee3da62add1121a69eb7f7c2a03504e2906f4b Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Fri, 21 Jun 2024 14:59:57 +0200 Subject: [PATCH 056/100] Added new rules/development.smk --- rules/development.smk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rules/development.smk b/rules/development.smk index 2316428a6..24c46a159 100644 --- a/rules/development.smk +++ b/rules/development.smk @@ -7,11 +7,11 @@ rule prepare_osm_network_release: input: base_network=resources("networks/base.nc"), output: + buses=resources("osm/release/buses.csv"), + converters=resources("osm/release/converters.csv"), lines=resources("osm/release/lines.csv"), links=resources("osm/release/links.csv"), - converters=resources("osm/release/converters.csv"), transformers=resources("osm/release/transformers.csv"), - buses=resources("osm/release/buses.csv"), log: logs("prepare_osm_network_release.log"), benchmark: From 0d9ba2ecec55c4001c15c68d5f8dd623dcbaaf7f Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Tue, 25 Jun 2024 20:36:34 +0200 Subject: [PATCH 057/100] Updated clean_osm_data to add substation_centroid to linestrings --- scripts/clean_osm_data.py | 239 ++++++++++++++++++++++---------------- 1 file changed, 136 insertions(+), 103 deletions(-) diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index 917071954..dc1e79915 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -34,8 +34,8 @@ import numpy as np import pandas as pd from _helpers import configure_logging, set_scenario_config -from shapely.geometry import LineString, MultiLineString, Polygon -from shapely.ops import linemerge +from shapely.geometry import LineString, MultiLineString, Point, Polygon +from shapely.ops import linemerge, unary_union logger = logging.getLogger(__name__) @@ -78,7 +78,17 @@ def _create_polygon(row): return polygon -def find_closest_polygon(gdf, point): +def _find_closest_polygon(gdf, point): + """ + Find the closest polygon in a GeoDataFrame to a given point. + + Parameters: + gdf (GeoDataFrame): A GeoDataFrame containing polygons. + point (Point): A Point object representing the target point. + + Returns: + int: The index of the closest polygon in the GeoDataFrame. + """ # Compute the distance to each polygon gdf["distance"] = gdf["geometry"].apply(lambda geom: point.distance(geom)) @@ -91,83 +101,6 @@ def find_closest_polygon(gdf, point): return closest_idx -def _extended_linemerge(lines): - """ - Merges a list of LineStrings into a single LineString by finding the - closest pair of points between all pairs of LineStrings. - - Parameters: - lines (list): A list of LineStrings to be merged. - - Returns: - merged_line (LineString): The merged LineString. - - Raises: - TypeError: If the input is not a list of LineStrings. - """ - # Ensure we have a list of LineStrings - if not isinstance(lines, list): - raise TypeError("Input should be a list of LineStrings") - if any(not isinstance(line, LineString) for line in lines): - raise TypeError("All elements in the list should be LineStrings") - - if len(lines) == 1: - return lines[0] - - merged_linestring = linemerge(lines) - - if isinstance(merged_linestring, LineString): - return merged_linestring - else: - - def find_closest_points(line1, line2): - min_dist = np.inf - closest_points = (None, None) - for point1 in line1.coords: - for point2 in line2.coords: - dist = np.linalg.norm(np.array(point1) - np.array(point2)) - if dist < min_dist: - min_dist = dist - closest_points = (point1, point2) - return closest_points - - def merge_lines(lines): - while len(lines) > 1: - min_distance = np.inf - closest_pair = (None, None) - pair_indices = (None, None) - for i in range(len(lines)): - for j in range(i + 1, len(lines)): - point1, point2 = find_closest_points(lines[i], lines[j]) - distance = np.linalg.norm(np.array(point1) - np.array(point2)) - if distance < min_distance: - min_distance = distance - closest_pair = (point1, point2) - pair_indices = (i, j) - - connecting_line = LineString([closest_pair[0], closest_pair[1]]) - combined_line = linemerge( - MultiLineString( - [ - lines[pair_indices[0]], - lines[pair_indices[1]], - connecting_line, - ] - ) - ) - - new_lines = [ - line for k, line in enumerate(lines) if k not in pair_indices - ] - new_lines.append(combined_line) - lines = new_lines - - return lines[0] - - lines = list(merged_linestring.geoms) - return merge_lines(lines) - - def _clean_voltage(column): """ Function to clean the raw voltage column: manual fixing and drop nan values @@ -571,7 +504,7 @@ def _add_line_endings_to_substations( # find the closest polygon index joined.loc[joined["index_right"].isna(), "index_right"] = joined.loc[ joined["index_right"].isna(), "geometry" - ].apply(lambda x: find_closest_polygon(gdf_union, x)) + ].apply(lambda x: _find_closest_polygon(gdf_union, x)) joined.reset_index(inplace=True) joined = joined.drop_duplicates(subset="bus_id") @@ -1128,6 +1061,27 @@ def _clean_lines(df_lines, list_voltages): def _create_substations_geometry(df_substations): + """ + Creates geometries. + + Parameters: + df_substations (DataFrame): The input DataFrame containing the substations + data. + + Returns: + df_substations (DataFrame): A new DataFrame with the + polygons ["polygon"] of the substations geometries. + """ + logger.info("Creating substations geometry.") + df_substations = df_substations.copy() + + # Create centroids from geometries and keep the original polygons + df_substations.loc[:, "polygon"] = df_substations["geometry"] + + return df_substations + + +def _create_substations_centroid(df_substations): """ Creates centroids from geometries and keeps the original polygons. @@ -1142,11 +1096,10 @@ def _create_substations_geometry(df_substations): logger.info("Creating substations geometry.") df_substations = df_substations.copy() - # Create centroids from geometries and keep the original polygons - df_substations.loc[:, "polygon"] = df_substations["geometry"] - df_substations.loc[:, "geometry"] = df_substations["geometry"].apply( + df_substations.loc[:, "geometry"] = df_substations["polygon"].apply( lambda x: x.centroid ) + df_substations.loc[:, "lon"] = df_substations["geometry"].apply(lambda x: x.x) df_substations.loc[:, "lat"] = df_substations["geometry"].apply(lambda x: x.y) @@ -1180,6 +1133,34 @@ def _create_lines_geometry(df_lines): return df_lines +def _add_bus_centroid_to_line(linestring, point): + """ + Adds the centroid of a substation to a linestring by extending the + linestring with a new segment. + + Parameters: + linestring (LineString): The original linestring to extend. + point (Point): The centroid of the bus. + + Returns: + merged (LineString): The extended linestring with the new segment. + """ + start = linestring.coords[0] + end = linestring.coords[-1] + + dist_to_start = point.distance(Point(start)) + dist_to_end = point.distance(Point(end)) + + if dist_to_start < dist_to_end: + new_segment = LineString([point.coords[0], start]) + else: + new_segment = LineString([point.coords[0], end]) + + merged = linemerge([linestring, new_segment]) + + return merged + + def _finalise_substations(df_substations): """ Finalises the substations column types. @@ -1533,9 +1514,65 @@ def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon): return gdf_lines -# Define a function to check if a polygon intersects any line in the lines GeoDataFrame -def intersects_any_line(polygon, lines): - return lines.intersects(polygon).any() +def _merge_touching_polygons(df): + """ + Merge touching polygons in a GeoDataFrame. + + Parameters: + - df: pandas.DataFrame or geopandas.GeoDataFrame + The input DataFrame containing the polygons to be merged. + + Returns: + - gdf: geopandas.GeoDataFrame + The GeoDataFrame with merged polygons. + """ + + gdf = gpd.GeoDataFrame(df, geometry="polygon", crs=crs) + combined_polygons = unary_union(gdf.geometry) + if combined_polygons.geom_type == "MultiPolygon": + gdf_combined = gpd.GeoDataFrame( + geometry=[poly for poly in combined_polygons.geoms], crs=crs + ) + else: + gdf_combined = gpd.GeoDataFrame(geometry=[combined_polygons], crs=crs) + + gdf.reset_index(drop=True, inplace=True) + + for i, combined_geom in gdf_combined.iterrows(): + mask = gdf.intersects(combined_geom.geometry) + gdf.loc[mask, "polygon_merged"] = combined_geom.geometry + + gdf.drop(columns=["polygon"], inplace=True) + gdf.rename(columns={"polygon_merged": "polygon"}, inplace=True) + + return gdf + + +def _extend_lines_to_substations(gdf_lines, gdf_substations_polygon): + """ + Extends the lines in the given GeoDataFrame `gdf_lines` to the centroid of + the nearest substations represented by the polygons in the + `gdf_substations_polygon` GeoDataFrame. + + Parameters: + gdf_lines (GeoDataFrame): A GeoDataFrame containing the lines to be extended. + gdf_substations_polygon (GeoDataFrame): A GeoDataFrame containing the polygons representing substations. + + Returns: + GeoDataFrame: A new GeoDataFrame with the lines extended to the substations. + """ + gdf = gpd.sjoin( + gdf_lines, + gdf_substations_polygon.drop_duplicates(subset="polygon", inplace=False), + how="left", + lsuffix="line", + rsuffix="bus", + predicate="intersects", + ).drop(columns="index_bus") + + # Rest of the code... + + return gdf_lines if __name__ == "__main__": @@ -1571,6 +1608,12 @@ def intersects_any_line(polygon, lines): df_substations["frequency"] = _clean_frequency(df_substations["frequency"]) df_substations = _clean_substations(df_substations, list_voltages) df_substations = _create_substations_geometry(df_substations) + # Merge touching polygons + df_substations = _merge_touching_polygons(df_substations) + # df_substations["polygon"] = df_substations["polygon"].apply( + # lambda x: x.convex_hull + # ) + df_substations = _create_substations_centroid(df_substations) df_substations = _finalise_substations(df_substations) # Create polygon GeoDataFrame to remove lines within substations @@ -1580,6 +1623,8 @@ def intersects_any_line(polygon, lines): crs=crs, ) + gdf_substations_polygon["geometry"] = gdf_substations_polygon.polygon.copy() + logger.info("---") logger.info("LINES AND CABLES") path_lines = { @@ -1618,6 +1663,7 @@ def intersects_any_line(polygon, lines): # Create GeoDataFrame gdf_lines = gpd.GeoDataFrame(df_lines, geometry="geometry", crs=crs) gdf_lines = _remove_lines_within_substations(gdf_lines, gdf_substations_polygon) + gdf_lines = _extend_lines_to_substations(gdf_lines, gdf_substations_polygon) logger.info("---") logger.info("HVDC LINKS") @@ -1661,21 +1707,6 @@ def intersects_any_line(polygon, lines): prefix="link-end", ) - # # Drop df_substations.dc == True and tag_source != "link-end" - # df_substations = df_substations[ - # ~((df_substations.dc == True) & (df_substations.tag_source != "link-end")) - # ] - - # # Apply the function to each polygon in the substations GeoDataFrame - # gdf_substations_polygon["connected"] = False - # gdf_substations_polygon['connected'] = gdf_substations_polygon['polygon'].apply(intersects_any_line, lines=gdf_lines) - - # list_buses_disconnected = gdf_substations_polygon[gdf_substations_polygon['connected'] == False]['bus_id'].tolist() - - # # Drop islanded substations - # gdf_substations_polygon = gdf_substations_polygon[~gdf_substations_polygon['bus_id'].isin(list_buses_disconnected)] - # df_substations = df_substations[~df_substations['bus_id'].isin(list_buses_disconnected)] - # Drop polygons and create GDF gdf_substations = gpd.GeoDataFrame( df_substations.drop(columns=["polygon"]), geometry="geometry", crs=crs @@ -1693,7 +1724,9 @@ def intersects_any_line(polygon, lines): logger.info( f"Exporting clean substations with polygon shapes to {output_substations_polygon}" ) - gdf_substations_polygon.to_file(output_substations_polygon, driver="GeoJSON") + gdf_substations_polygon.drop(columns=["geometry"]).to_file( + output_substations_polygon, driver="GeoJSON" + ) logger.info(f"Exporting clean substations to {output_substations}") gdf_substations.to_file(output_substations, driver="GeoJSON") logger.info(f"Exporting clean lines to {output_lines}") From c818cd999920822583f38754b894f427871da2d2 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Tue, 25 Jun 2024 20:38:17 +0200 Subject: [PATCH 058/100] Updated clean_osm_data to add substation_centroid to linestrings --- scripts/clean_osm_data.py | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index dc1e79915..0a8617f59 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -1133,34 +1133,6 @@ def _create_lines_geometry(df_lines): return df_lines -def _add_bus_centroid_to_line(linestring, point): - """ - Adds the centroid of a substation to a linestring by extending the - linestring with a new segment. - - Parameters: - linestring (LineString): The original linestring to extend. - point (Point): The centroid of the bus. - - Returns: - merged (LineString): The extended linestring with the new segment. - """ - start = linestring.coords[0] - end = linestring.coords[-1] - - dist_to_start = point.distance(Point(start)) - dist_to_end = point.distance(Point(end)) - - if dist_to_start < dist_to_end: - new_segment = LineString([point.coords[0], start]) - else: - new_segment = LineString([point.coords[0], end]) - - merged = linemerge([linestring, new_segment]) - - return merged - - def _finalise_substations(df_substations): """ Finalises the substations column types. From af3b4155f45817a5b843a3d3e54a1c9c63283e29 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Tue, 25 Jun 2024 21:06:25 +0200 Subject: [PATCH 059/100] Updated clean_osm_data to add substation_centroid to linestrings --- scripts/clean_osm_data.py | 121 +++++++++++++++++++++++++++++++++++++- 1 file changed, 120 insertions(+), 1 deletion(-) diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index 0a8617f59..e40f510fb 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -1133,6 +1133,34 @@ def _create_lines_geometry(df_lines): return df_lines +def _add_bus_centroid_to_line(linestring, point): + """ + Adds the centroid of a substation to a linestring by extending the + linestring with a new segment. + + Parameters: + linestring (LineString): The original linestring to extend. + point (Point): The centroid of the bus. + + Returns: + merged (LineString): The extended linestring with the new segment. + """ + start = linestring.coords[0] + end = linestring.coords[-1] + + dist_to_start = point.distance(Point(start)) + dist_to_end = point.distance(Point(end)) + + if dist_to_start < dist_to_end: + new_segment = LineString([point.coords[0], start]) + else: + new_segment = LineString([point.coords[0], end]) + + merged = linemerge([linestring, new_segment]) + + return merged + + def _finalise_substations(df_substations): """ Finalises the substations column types. @@ -1520,6 +1548,63 @@ def _merge_touching_polygons(df): return gdf +def _add_endpoints_to_line(linestring, polygon_dict): + """ + Adds endpoints to a line by removing any overlapping areas with polygons. + + Parameters: + linestring (LineString): The original line to add endpoints to. + polygon_dict (dict): A dictionary of polygons, where the keys are bus IDs and the values are the corresponding polygons. + + Returns: + LineString: The modified line with added endpoints. + """ + if not polygon_dict: + return linestring + + polygon_centroids = { + bus_id: polygon.centroid for bus_id, polygon in polygon_dict.items() + } + polygon_unary = polygons = unary_union(list(polygon_dict.values())) + + # difference with polygon + linestring_new = linestring.difference(polygon_unary) + + if type(linestring_new) == MultiLineString: + # keep the longest line in the multilinestring + linestring_new = max(linestring_new.geoms, key=lambda x: x.length) + + for p in polygon_centroids: + linestring_new = _add_bus_centroid_to_line(linestring_new, polygon_centroids[p]) + + return linestring_new + + +def _get_polygons_at_endpoints(linestring, polygon_dict): + """ + Get the polygons that contain the endpoints of a given linestring. + + Parameters: + linestring (LineString): The linestring for which to find the polygons at the endpoints. + polygon_dict (dict): A dictionary containing polygons as values, with bus_ids as keys. + + Returns: + dict: A dictionary containing bus_ids as keys and polygons as values, where the polygons contain the endpoints of the linestring. + """ + # Get the endpoints of the linestring + start_point = Point(linestring.coords[0]) + end_point = Point(linestring.coords[-1]) + + # Initialize dictionary to store bus_ids as keys and polygons as values + bus_id_polygon_dict = {} + + for bus_id, polygon in polygon_dict.items(): + if polygon.contains(start_point) or polygon.contains(end_point): + bus_id_polygon_dict[bus_id] = polygon + + return bus_id_polygon_dict + + def _extend_lines_to_substations(gdf_lines, gdf_substations_polygon): """ Extends the lines in the given GeoDataFrame `gdf_lines` to the centroid of @@ -1542,7 +1627,41 @@ def _extend_lines_to_substations(gdf_lines, gdf_substations_polygon): predicate="intersects", ).drop(columns="index_bus") - # Rest of the code... + # Group by 'line_id' and create a dictionary mapping 'bus_id' to 'geometry_bus', excluding the grouping columns + gdf = ( + gdf.groupby("line_id") + .apply( + lambda x: x[["bus_id", "geometry_bus"]] + .dropna() + .set_index("bus_id")["geometry_bus"] + .to_dict(), + include_groups=False, + ) + .reset_index() + ) + gdf.columns = ["line_id", "bus_dict"] + + gdf["intersects_bus"] = gdf.apply(lambda row: len(row["bus_dict"]) > 0, axis=1) + + gdf.loc[:, "line_geometry"] = gdf.join( + gdf_lines.set_index("line_id")["geometry"], on="line_id" + )["geometry"] + + # Polygons at the endpoints of the linestring + gdf["bus_endpoints"] = gdf.apply( + lambda row: _get_polygons_at_endpoints(row["line_geometry"], row["bus_dict"]), + axis=1, + ) + + gdf.loc[:, "line_geometry_new"] = gdf.apply( + lambda row: _add_endpoints_to_line(row["line_geometry"], row["bus_endpoints"]), + axis=1, + ) + + gdf.set_index("line_id", inplace=True) + gdf_lines.set_index("line_id", inplace=True) + + gdf_lines.loc[:, "geometry"] = gdf["line_geometry_new"] return gdf_lines From 45bccfcd994cdea6a45c5cd95a9771bf9d24af48 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Tue, 25 Jun 2024 21:16:21 +0200 Subject: [PATCH 060/100] Updated clean_osm_data to add substation_centroid to linestrings --- scripts/clean_osm_data.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index e40f510fb..d17c5b172 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -1561,7 +1561,6 @@ def _add_endpoints_to_line(linestring, polygon_dict): """ if not polygon_dict: return linestring - polygon_centroids = { bus_id: polygon.centroid for bus_id, polygon in polygon_dict.items() } From 0db49f2772514ca35c038238d63b6483856e9fc5 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Tue, 25 Jun 2024 22:55:31 +0200 Subject: [PATCH 061/100] Added osm-prebuilt functionality and zenodo sandbox repository. --- rules/retrieve.smk | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/rules/retrieve.smk b/rules/retrieve.smk index 10ad9684a..33f36d0ec 100644 --- a/rules/retrieve.smk +++ b/rules/retrieve.smk @@ -319,3 +319,34 @@ if config["enable"]["retrieve"]: "../envs/retrieve.yaml" script: "../scripts/retrieve_monthly_fuel_prices.py" + + +if config["enable"]["retrieve"] and ( + config["electricity_network"]["base_network"] == "osm-prebuilt" +): + + rule retrieve_osm_prebuilt: + input: + buses=storage("https://sandbox.zenodo.org/records/74826/files/buses.csv"), + converters=storage( + "https://sandbox.zenodo.org/records/74826/files/converters.csv" + ), + lines=storage("https://sandbox.zenodo.org/records/74826/files/lines.csv"), + links=storage("https://sandbox.zenodo.org/records/74826/files/links.csv"), + transformers=storage( + "https://sandbox.zenodo.org/records/74826/files/transformers.csv" + ), + output: + buses="data/osm/prebuilt/buses.csv", + converters="data/osm/prebuilt/converters.csv", + lines="data/osm/prebuilt/lines.csv", + links="data/osm/prebuilt/links.csv", + transformers="data/osm/prebuilt/transformers.csv", + log: + "logs/retrieve_osm_prebuilt.log", + resources: + mem_mb=500, + retries: 2 + run: + for key in input.keys(): + move(input[key], output[key]) From 4f4f7c6783989c7d24874a5090d6ecca049ef8a2 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Wed, 10 Jul 2024 16:44:06 +0200 Subject: [PATCH 062/100] Updated clean_osm_data to geopandas v.1.01 --- scripts/clean_osm_data.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index d17c5b172..babd06243 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -491,19 +491,21 @@ def _add_line_endings_to_substations( gdf_offshore, how="outer", left_index=True, right_index=True ) gdf_union["geometry"] = gdf_union.apply( - lambda row: gpd.GeoSeries([row["geometry_x"], row["geometry_y"]]).unary_union, + lambda row: gpd.GeoSeries([row["geometry_x"], row["geometry_y"]]).union_all(), axis=1, ) gdf_union = gpd.GeoDataFrame(geometry=gdf_union["geometry"], crs=crs) gdf_buses_tofix = gpd.GeoDataFrame( buses[bool_multiple_countries], geometry="geometry", crs=crs ) - joined = gpd.sjoin(gdf_buses_tofix, gdf_union, how="left", predicate="within") + joined = gpd.sjoin( + gdf_buses_tofix, gdf_union.reset_index(), how="left", predicate="within" + ) # For all remaining rows where the country/index_right column is NaN, find # find the closest polygon index - joined.loc[joined["index_right"].isna(), "index_right"] = joined.loc[ - joined["index_right"].isna(), "geometry" + joined.loc[joined["name"].isna(), "name"] = joined.loc[ + joined["name"].isna(), "geometry" ].apply(lambda x: _find_closest_polygon(gdf_union, x)) joined.reset_index(inplace=True) @@ -511,7 +513,7 @@ def _add_line_endings_to_substations( joined.set_index("bus_id", inplace=True) buses.loc[bool_multiple_countries, "country"] = joined.loc[ - bool_multiple_countries, "index_right" + bool_multiple_countries, "name" ] return buses.reset_index() From 0b5a1feca99cd4f640e01850e11d1c78f3654dfc Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Wed, 10 Jul 2024 23:18:00 +0200 Subject: [PATCH 063/100] Made base_network and build_osm_network function more robust for empty links. --- scripts/build_osm_network.py | 12 +++++++++--- scripts/clean_osm_data.py | 13 ++++++++----- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 39b20aec7..586b2807b 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -773,14 +773,16 @@ def build_network( # Drop unnecessary index column and set respective element ids as index lines.set_index("line_id", inplace=True) - links.set_index("link_id", inplace=True) + if not links.empty: + links.set_index("link_id", inplace=True) converters.set_index("converter_id", inplace=True) transformers.set_index("transformer_id", inplace=True) buses.set_index("bus_id", inplace=True) # Convert voltages from V to kV lines["voltage"] = lines["voltage"] / 1000 - links["voltage"] = links["voltage"] / 1000 + if not links.empty: + links["voltage"] = links["voltage"] / 1000 transformers["voltage_bus0"], transformers["voltage_bus1"] = ( transformers["voltage_bus0"] / 1000, transformers["voltage_bus1"] / 1000, @@ -817,7 +819,8 @@ def build_network( "geometry", ] - links = links[cols_links] + if not links.empty: + links = links[cols_links] cols_transformers = [ "bus0", @@ -830,6 +833,9 @@ def build_network( transformers = transformers[cols_transformers] + if links.empty: # create empty dataframe with cols_links as columns + links = pd.DataFrame(columns=["link_id"] + cols_links) + to_csv_nafix(lines, outputs["lines"], quotechar="'") # Generate CSV to_csv_nafix(links, outputs["links"], quotechar="'") # Generate CSV to_csv_nafix(converters, outputs["converters"], quotechar="'") # Generate CSV diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index babd06243..9992dba6d 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -792,6 +792,9 @@ def _filter_by_voltage(df, min_voltage=200000): - list_voltages (list): A list of unique voltage values above min_voltage. The type of the list elements is string. """ + if df.empty: + return df, [] + logger.info( f"Filtering dataframe by voltage. Only keeping rows above and including {min_voltage} V." ) @@ -1307,11 +1310,11 @@ def _finalise_links(df_links): ) # Initiate new columns for subsequent build_osm_network step - df_links.loc[:, "bus0"] = None - df_links.loc[:, "bus1"] = None - df_links.loc[:, "length"] = None - df_links.loc[:, "under_construction"] = False - df_links.loc[:, "dc"] = True + df_links["bus0"] = None + df_links["bus1"] = None + df_links["length"] = None + df_links["under_construction"] = False + df_links["dc"] = True # Only include needed columns df_links = df_links[ From e0ae315044b47684f0266c1ce9d7fe0c0760aa51 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Wed, 10 Jul 2024 23:31:21 +0200 Subject: [PATCH 064/100] Made base_network and build_osm_network function more robust for empty links. --- scripts/build_osm_network.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 586b2807b..b96c43321 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -835,6 +835,7 @@ def build_network( if links.empty: # create empty dataframe with cols_links as columns links = pd.DataFrame(columns=["link_id"] + cols_links) + links.set_index("link_id", inplace=True) to_csv_nafix(lines, outputs["lines"], quotechar="'") # Generate CSV to_csv_nafix(links, outputs["links"], quotechar="'") # Generate CSV From 299029909b45b3aeb99505862332393fcf1263ba Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Fri, 12 Jul 2024 13:52:44 +0200 Subject: [PATCH 065/100] Bug fix in base_network. Voltage level null is now kept (relevant e.g. for Corsica) --- scripts/base_network.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/scripts/base_network.py b/scripts/base_network.py index 923c1a148..7e229591e 100644 --- a/scripts/base_network.py +++ b/scripts/base_network.py @@ -151,31 +151,23 @@ def _load_buses_from_eg(eg_buses, europe_shape, config_elec): if "station_id" in buses.columns: buses.drop("station_id", axis=1, inplace=True) - # buses["carrier"] = buses.pop("dc").map({True: "DC", False: "AC"}) + buses["carrier"] = buses.pop("dc").map({True: "DC", False: "AC"}) buses["under_construction"] = buses.under_construction.where( lambda s: s.notnull(), False ).astype(bool) - - # remove all buses outside of all countries including exclusive economic zones (offshore) europe_shape = gpd.read_file(europe_shape).loc[0, "geometry"] - # TODO pypsa-eur: Temporary fix: Convex hull, this is important when nodes are between countries - # europe_shape = europe_shape.convex_hull - europe_shape_prepped = shapely.prepared.prep(europe_shape) buses_in_europe_b = buses[["x", "y"]].apply( lambda p: europe_shape_prepped.contains(Point(p)), axis=1 ) - # TODO pypsa-eur: Find a long-term solution - # buses_with_v_nom_to_keep_b = ( - # buses.v_nom.isin(config_elec["voltages"]) | buses.v_nom.isnull() - # ) - v_nom_min = min(config_elec["voltages"]) v_nom_max = max(config_elec["voltages"]) # Quick fix: - buses_with_v_nom_to_keep_b = (v_nom_min <= buses.v_nom) & (buses.v_nom <= v_nom_max) + buses_with_v_nom_to_keep_b = (v_nom_min <= buses.v_nom) & ( + buses.v_nom <= v_nom_max + ) | buses.v_nom.isnull() logger.info(f"Removing buses outside of range {v_nom_min} - {v_nom_max} V") return pd.DataFrame(buses.loc[buses_in_europe_b & buses_with_v_nom_to_keep_b]) From c0e6e43a24212c8cd1bcd649b3640f89e273a5f1 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Mon, 15 Jul 2024 10:08:55 +0200 Subject: [PATCH 066/100] Merge with hcanges in upstream PR 1146. Fixing UA and MD. --- rules/build_electricity.smk | 3 +- rules/retrieve.smk | 17 +++ scripts/_helpers.py | 21 ++++ scripts/add_electricity.py | 114 ++++++++++++++++-- .../determine_availability_matrix_MD_UA.py | 2 + scripts/retrieve_gdp_uamd.py | 34 ++++++ 6 files changed, 183 insertions(+), 8 deletions(-) create mode 100644 scripts/retrieve_gdp_uamd.py diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index cdcbd6662..1896ce9d5 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -468,7 +468,8 @@ rule add_electricity: ), load=resources("electricity_demand.csv"), nuts3_shapes=resources("nuts3_shapes.geojson"), - ua_md_gdp="data/GDP_PPP_30arcsec_v3_mapped_default.csv", + gdp_file="data/GDP_per_capita_PPP_1990_2015_v2.nc", + ppp_file="data/ppp_2013_1km_Aggregated.tif", output: resources("networks/elec.nc"), log: diff --git a/rules/retrieve.smk b/rules/retrieve.smk index 33f36d0ec..0590d766a 100644 --- a/rules/retrieve.smk +++ b/rules/retrieve.smk @@ -321,6 +321,23 @@ if config["enable"]["retrieve"]: "../scripts/retrieve_monthly_fuel_prices.py" +if config["enable"]["retrieve"] and any(c in ["UA", "MD"] for c in config["countries"]): + + rule retrieve_gdp_uamd: + output: + gdp="data/GDP_per_capita_PPP_1990_2015_v2.nc", + ppp="data/ppp_2013_1km_Aggregated.tif", + log: + "logs/retrieve_gdp_uamd.log", + resources: + mem_mb=5000, + retries: 2 + conda: + "../envs/retrieve.yaml" + script: + "../scripts/retrieve_gdp_uamd.py" + + if config["enable"]["retrieve"] and ( config["electricity_network"]["base_network"] == "osm-prebuilt" ): diff --git a/scripts/_helpers.py b/scripts/_helpers.py index 0bf92e396..c40945ad1 100644 --- a/scripts/_helpers.py +++ b/scripts/_helpers.py @@ -370,6 +370,27 @@ def update_to(b=1, bsize=1, tsize=None): urllib.request.urlretrieve(url, file, reporthook=update_to) +def retrieve_file(url, destination): + """ + Downloads a file from a specified URL to a local destination using custom + headers that mimic a Firefox browser request. + + This function is useful for overcoming 'HTTP Error 403: Forbidden' + issues, which often occur when the server requires more typical + browser-like headers for access. + """ + + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" + } + response = requests.get(url, headers=headers) + response.raise_for_status() + + with open(destination, "wb") as f: + f.write(response.content) + logger.info(f"File downloaded and saved as {destination}") + + def mock_snakemake( rulename, root_dir=None, diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py index f90d6c851..ae7167562 100755 --- a/scripts/add_electricity.py +++ b/scripts/add_electricity.py @@ -91,6 +91,7 @@ import pandas as pd import powerplantmatching as pm import pypsa +import rasterio import scipy.sparse as sparse import xarray as xr from _helpers import ( @@ -100,6 +101,8 @@ update_p_nom_max, ) from powerplantmatching.export import map_country_bus +from rasterio.mask import mask +from shapely.geometry import box from shapely.prepared import prep idx = pd.IndexSlice @@ -294,13 +297,13 @@ def shapes_to_shapes(orig, dest): return transfer -def attach_load(n, regions, load, nuts3_shapes, ua_md_gdp, countries, scaling=1.0): +def attach_load( + n, regions, load, nuts3_shapes, gdp_file, ppp_file, countries, scaling=1.0 +): substation_lv_i = n.buses.index[n.buses["substation_lv"]] - regions = gpd.read_file(regions).set_index("name").reindex(substation_lv_i) + gdf_regions = gpd.read_file(regions).set_index("name").reindex(substation_lv_i) opsd_load = pd.read_csv(load, index_col=0, parse_dates=True).filter(items=countries) - ua_md_gdp = pd.read_csv(ua_md_gdp, dtype={"name": "str"}).set_index("name") - logger.info(f"Load data scaled by factor {scaling}.") opsd_load *= scaling @@ -325,7 +328,12 @@ def upsample(cntry, group): factors = normed(0.6 * normed(gdp_n) + 0.4 * normed(pop_n)) if cntry in ["UA", "MD"]: # overwrite factor because nuts3 provides no data for UA+MD - factors = normed(ua_md_gdp.loc[group.index, "GDP_PPP"].squeeze()) + gdp_ua_md, ppp_ua_md = calculate_ua_md_gdp_ppp( + gdf_regions[gdf_regions.country == cntry], gdp_file, ppp_file + ) + factors = normed( + 0.6 * normed(gdp_ua_md["gdp"]) + 0.4 * normed(ppp_ua_md["ppp"]) + ) return pd.DataFrame( factors.values * load.values[:, np.newaxis], index=load.index, @@ -335,7 +343,7 @@ def upsample(cntry, group): load = pd.concat( [ upsample(cntry, group) - for cntry, group in regions.geometry.groupby(regions.country) + for cntry, group in gdf_regions.geometry.groupby(gdf_regions.country) ], axis=1, ) @@ -791,6 +799,97 @@ def attach_line_rating( n.lines_t.s_max_pu *= s_max_pu +def calculate_ua_md_gdp_ppp(gdf_regions, gdp_file, ppp_file): + """ + Calculate the GDP and PPP values for the regions within the bounding box of + UA and MD. + + Parameters: + gdf_regions (GeoDataFrame): A GeoDataFrame containing the regions. + gdp_file (str): The file path to the dataset containing the GDP values for UA and MD. + ppp_file (str): The file path to the dataset containing the PPP values for UA and MD. + + Returns: + tuple: A tuple containing two GeoDataFrames: + - gdp_ua_md: A GeoDataFrame with the aggregated GDP values mapped to each bus. + - ppp_ua_md: A GeoDataFrame with the aggregated PPP values mapped to each bus. + """ + # Create a bounding box for UA, MD from region shape, including a buffer of 10000 metres + box_ua_md = ( + gpd.GeoDataFrame(geometry=[box(*gdf_regions.total_bounds)], crs=gdf_regions.crs) + .to_crs(epsg=3857) + .buffer(10000) + .to_crs(gdf_regions.crs) + ) + + # GDP + with xr.open_dataset(gdp_file) as src_gdp_ua_md: + src_gdp_ua_md = src_gdp_ua_md.where( + (src_gdp_ua_md.longitude >= box_ua_md.bounds.minx.min()) + & (src_gdp_ua_md.longitude <= box_ua_md.bounds.maxx.max()) + & (src_gdp_ua_md.latitude >= box_ua_md.bounds.miny.min()) + & (src_gdp_ua_md.latitude <= box_ua_md.bounds.maxy.max()), + drop=True, + ) + gdp_ua_md = src_gdp_ua_md.to_dataframe().reset_index() + + gdp_ua_md = gdp_ua_md.rename(columns={"GDP_per_capita_PPP": "gdp"}) + gdp_ua_md = gdp_ua_md[gdp_ua_md.time == gdp_ua_md.time.max()] + gdp_ua_md = gpd.GeoDataFrame( + gdp_ua_md, + geometry=gpd.points_from_xy(gdp_ua_md.longitude, gdp_ua_md.latitude), + crs="EPSG:4326", + ) + + gdp_ua_md = gpd.sjoin( + gdp_ua_md, gdf_regions.reset_index(), predicate="within" + ).drop(columns=["index_right"]) + gdp_ua_md = ( + gdp_ua_md.groupby(["Bus", "country", "time"]) + .agg({"gdp": "sum"}) + .reset_index(level=["country", "time"]) + ) + + # PPP + with rasterio.open(ppp_file) as src_ppp_ua_md: + # Mask the raster with the bounding box + out_image, out_transform = mask(src_ppp_ua_md, box_ua_md, crop=True) + out_image, + out_meta = src_ppp_ua_md.meta.copy() + out_meta.update( + { + "driver": "GTiff", + "height": out_image.shape[1], + "width": out_image.shape[2], + "transform": out_transform, + } + ) + + masked_data = out_image[0] # Use the first band (rest is empty) + row_indices, col_indices = np.where(masked_data != src_ppp_ua_md.nodata) + values = masked_data[row_indices, col_indices] + + # Affine transformation from pixel coordinates to geo coordinates + x_coords, y_coords = rasterio.transform.xy(out_transform, row_indices, col_indices) + ppp_ua_md = pd.DataFrame({"x": x_coords, "y": y_coords, "ppp": values}) + + ppp_ua_md = gpd.GeoDataFrame( + ppp_ua_md, + geometry=gpd.points_from_xy(ppp_ua_md.x, ppp_ua_md.y), + crs=src_ppp_ua_md.crs, + ) + + ppp_ua_md = gpd.sjoin(ppp_ua_md, gdf_regions.reset_index(), predicate="within") + ppp_ua_md = ( + ppp_ua_md.groupby(["Bus", "country"]) + .agg({"ppp": "sum"}) + .reset_index() + .set_index("Bus") + ) + + return gdp_ua_md, ppp_ua_md + + if __name__ == "__main__": if "snakemake" not in globals(): from _helpers import mock_snakemake @@ -821,7 +920,8 @@ def attach_line_rating( snakemake.input.regions, snakemake.input.load, snakemake.input.nuts3_shapes, - snakemake.input.ua_md_gdp, + snakemake.input.gdp_file, + snakemake.input.ppp_file, params.countries, params.scaling_factor, ) diff --git a/scripts/determine_availability_matrix_MD_UA.py b/scripts/determine_availability_matrix_MD_UA.py index 678ef025d..f19919e39 100644 --- a/scripts/determine_availability_matrix_MD_UA.py +++ b/scripts/determine_availability_matrix_MD_UA.py @@ -49,6 +49,8 @@ def get_wdpa_layer_name(wdpa_fn, layer_substring): gpd.read_file(snakemake.input.regions).set_index("name").rename_axis("bus") ) buses = regions.index + buses = regions.loc[regions["country"].isin(["UA", "MD"])].index.values + regions = regions.loc[buses] excluder = atlite.ExclusionContainer(crs=3035, res=100) diff --git a/scripts/retrieve_gdp_uamd.py b/scripts/retrieve_gdp_uamd.py new file mode 100644 index 000000000..3da3be4f0 --- /dev/null +++ b/scripts/retrieve_gdp_uamd.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: : 2023-2024 The PyPSA-Eur Authors +# +# SPDX-License-Identifier: MIT +""" +Retrieve monthly fuel prices from Destatis. +""" + +import logging +from pathlib import Path + +from _helpers import configure_logging, retrieve_file, set_scenario_config + +logger = logging.getLogger(__name__) + + +if __name__ == "__main__": + if "snakemake" not in globals(): + from _helpers import mock_snakemake + + snakemake = mock_snakemake("retrieve_gdp_uamd") + configure_logging(snakemake) + set_scenario_config(snakemake) + +dict_urls = dict( + { + "gdp": "https://datadryad.org/stash/downloads/file_stream/241947", + "ppp": "https://github.com/ecohealthalliance/sars_cov_risk/releases/download/v2.0.1/ppp_2020_1km_Aggregated.tif", + } +) + +# Download and validate each dataset +for key, path in snakemake.output.items(): + retrieve_file(dict_urls[key], path) From 84bf9ec66b9065b8cee876097f0f60d5e033ba6a Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Mon, 15 Jul 2024 10:46:10 +0200 Subject: [PATCH 067/100] Updated Zenodo and fixed prepare_osm_network_release --- rules/retrieve.smk | 10 +++++----- scripts/prepare_osm_network_release.py | 1 - 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/rules/retrieve.smk b/rules/retrieve.smk index 0590d766a..6cf1efc7c 100644 --- a/rules/retrieve.smk +++ b/rules/retrieve.smk @@ -344,14 +344,14 @@ if config["enable"]["retrieve"] and ( rule retrieve_osm_prebuilt: input: - buses=storage("https://sandbox.zenodo.org/records/74826/files/buses.csv"), + buses=storage("https://sandbox.zenodo.org/records/87601/files/buses.csv"), converters=storage( - "https://sandbox.zenodo.org/records/74826/files/converters.csv" + "https://sandbox.zenodo.org/records/87601/files/converters.csv" ), - lines=storage("https://sandbox.zenodo.org/records/74826/files/lines.csv"), - links=storage("https://sandbox.zenodo.org/records/74826/files/links.csv"), + lines=storage("https://sandbox.zenodo.org/records/87601/files/lines.csv"), + links=storage("https://sandbox.zenodo.org/records/87601/files/links.csv"), transformers=storage( - "https://sandbox.zenodo.org/records/74826/files/transformers.csv" + "https://sandbox.zenodo.org/records/87601/files/transformers.csv" ), output: buses="data/osm/prebuilt/buses.csv", diff --git a/scripts/prepare_osm_network_release.py b/scripts/prepare_osm_network_release.py index 70c6f6982..70f1f3be1 100644 --- a/scripts/prepare_osm_network_release.py +++ b/scripts/prepare_osm_network_release.py @@ -59,7 +59,6 @@ def export_clean_csv(df, columns, output_file): buses_columns = [ "bus_id", "voltage", - "dc", "symbol", "under_construction", "x", From 7d96dd2d70924089d09fe5802f7855217d69ef0c Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Mon, 15 Jul 2024 11:02:40 +0200 Subject: [PATCH 068/100] Updated osm network release. --- rules/retrieve.smk | 10 +++++----- scripts/prepare_osm_network_release.py | 3 +++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/rules/retrieve.smk b/rules/retrieve.smk index 6cf1efc7c..91d72d8ac 100644 --- a/rules/retrieve.smk +++ b/rules/retrieve.smk @@ -344,14 +344,14 @@ if config["enable"]["retrieve"] and ( rule retrieve_osm_prebuilt: input: - buses=storage("https://sandbox.zenodo.org/records/87601/files/buses.csv"), + buses=storage("https://sandbox.zenodo.org/records/87612/files/buses.csv"), converters=storage( - "https://sandbox.zenodo.org/records/87601/files/converters.csv" + "https://sandbox.zenodo.org/records/87612/files/converters.csv" ), - lines=storage("https://sandbox.zenodo.org/records/87601/files/lines.csv"), - links=storage("https://sandbox.zenodo.org/records/87601/files/links.csv"), + lines=storage("https://sandbox.zenodo.org/records/87612/files/lines.csv"), + links=storage("https://sandbox.zenodo.org/records/87612/files/links.csv"), transformers=storage( - "https://sandbox.zenodo.org/records/87601/files/transformers.csv" + "https://sandbox.zenodo.org/records/87612/files/transformers.csv" ), output: buses="data/osm/prebuilt/buses.csv", diff --git a/scripts/prepare_osm_network_release.py b/scripts/prepare_osm_network_release.py index 70f1f3be1..41ebd5c83 100644 --- a/scripts/prepare_osm_network_release.py +++ b/scripts/prepare_osm_network_release.py @@ -59,6 +59,7 @@ def export_clean_csv(df, columns, output_file): buses_columns = [ "bus_id", "voltage", + "dc", "symbol", "under_construction", "x", @@ -109,6 +110,8 @@ def export_clean_csv(df, columns, output_file): network = pypsa.Network(snakemake.input.base_network) + network.buses["dc"] = network.buses.pop("carrier").map({"DC": "t", "AC": "f"}) + # Export to clean csv for release logger.info(f"Exporting {len(network.buses)} buses to %s", snakemake.output.buses) export_clean_csv(network.buses, buses_columns, snakemake.output.buses) From 867de1edaf39d46f6e84de703a4e142fc40a5d19 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Mon, 15 Jul 2024 20:32:19 +0200 Subject: [PATCH 069/100] Updated prepare osm network release. --- rules/retrieve.smk | 10 +++++----- scripts/prepare_osm_network_release.py | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/rules/retrieve.smk b/rules/retrieve.smk index 91d72d8ac..33f1ccc6d 100644 --- a/rules/retrieve.smk +++ b/rules/retrieve.smk @@ -344,14 +344,14 @@ if config["enable"]["retrieve"] and ( rule retrieve_osm_prebuilt: input: - buses=storage("https://sandbox.zenodo.org/records/87612/files/buses.csv"), + buses=storage("https://sandbox.zenodo.org/records/87679/files/buses.csv"), converters=storage( - "https://sandbox.zenodo.org/records/87612/files/converters.csv" + "https://sandbox.zenodo.org/records/87679/files/converters.csv" ), - lines=storage("https://sandbox.zenodo.org/records/87612/files/lines.csv"), - links=storage("https://sandbox.zenodo.org/records/87612/files/links.csv"), + lines=storage("https://sandbox.zenodo.org/records/87679/files/lines.csv"), + links=storage("https://sandbox.zenodo.org/records/87679/files/links.csv"), transformers=storage( - "https://sandbox.zenodo.org/records/87612/files/transformers.csv" + "https://sandbox.zenodo.org/records/87679/files/transformers.csv" ), output: buses="data/osm/prebuilt/buses.csv", diff --git a/scripts/prepare_osm_network_release.py b/scripts/prepare_osm_network_release.py index 41ebd5c83..b33009e0b 100644 --- a/scripts/prepare_osm_network_release.py +++ b/scripts/prepare_osm_network_release.py @@ -111,6 +111,8 @@ def export_clean_csv(df, columns, output_file): network = pypsa.Network(snakemake.input.base_network) network.buses["dc"] = network.buses.pop("carrier").map({"DC": "t", "AC": "f"}) + network.lines.length = network.lines.length * 1e3 + network.links.length = network.links.length * 1e3 # Export to clean csv for release logger.info(f"Exporting {len(network.buses)} buses to %s", snakemake.output.buses) From 41dff328bb3880c99e723dc3e1fbac4d192961d5 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Tue, 16 Jul 2024 10:04:21 +0200 Subject: [PATCH 070/100] Updated MD, UA scripts. --- rules/build_electricity.smk | 211 ++++-------------- rules/retrieve.smk | 33 +-- scripts/build_gdp_ppp_non_nuts3.py | 151 +++++++++++++ .../determine_availability_matrix_MD_UA.py | 3 +- 4 files changed, 194 insertions(+), 204 deletions(-) create mode 100644 scripts/build_gdp_ppp_non_nuts3.py diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 1896ce9d5..4e71d9f19 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -74,71 +74,14 @@ rule base_network: links=config_provider("links"), transformers=config_provider("transformers"), input: - eg_buses=lambda w: ( - "data/entsoegridkit/buses.csv" - if config_provider("electricity_network", "base_network")(w) == "gridkit" - else ( - "data/osm/prebuilt/buses.csv" - if config_provider("electricity_network", "base_network")(w) - == "osm-prebuilt" - else resources("osm/pre-base/buses.csv") - ) - ), - eg_lines=lambda w: ( - "data/entsoegridkit/lines.csv" - if config_provider("electricity_network", "base_network")(w) == "gridkit" - else ( - "data/osm/prebuilt/lines.csv" - if config_provider("electricity_network", "base_network")(w) - == "osm-prebuilt" - else resources("osm/pre-base/lines.csv") - ) - ), - eg_links=lambda w: ( - "data/entsoegridkit/links.csv" - if config_provider("electricity_network", "base_network")(w) == "gridkit" - else ( - "data/osm/prebuilt/links.csv" - if config_provider("electricity_network", "base_network")(w) - == "osm-prebuilt" - else resources("osm/pre-base/links.csv") - ) - ), - eg_converters=lambda w: ( - "data/entsoegridkit/converters.csv" - if config_provider("electricity_network", "base_network")(w) == "gridkit" - else ( - "data/osm/prebuilt/converters.csv" - if config_provider("electricity_network", "base_network")(w) - == "osm-prebuilt" - else resources("osm/pre-base/converters.csv") - ) - ), - eg_transformers=lambda w: ( - "data/entsoegridkit/transformers.csv" - if config_provider("electricity_network", "base_network")(w) == "gridkit" - else ( - "data/osm/prebuilt/transformers.csv" - if config_provider("electricity_network", "base_network")(w) - == "osm-prebuilt" - else resources("osm/pre-base/transformers.csv") - ) - ), - parameter_corrections=lambda w: ( - "data/parameter_corrections.yaml" - if config_provider("electricity_network", "base_network")(w) == "gridkit" - else [] - ), - links_p_nom=lambda w: ( - "data/links_p_nom.csv" - if config_provider("electricity_network", "base_network")(w) == "gridkit" - else [] - ), - links_tyndp=lambda w: ( - "data/links_tyndp.csv" - if config_provider("electricity_network", "base_network")(w) == "gridkit" - else [] - ), + eg_buses="data/entsoegridkit/buses.csv", + eg_lines="data/entsoegridkit/lines.csv", + eg_links="data/entsoegridkit/links.csv", + eg_converters="data/entsoegridkit/converters.csv", + eg_transformers="data/entsoegridkit/transformers.csv", + parameter_corrections="data/parameter_corrections.yaml", + links_p_nom="data/links_p_nom.csv", + links_tyndp="data/links_tyndp.csv", country_shapes=resources("country_shapes.geojson"), offshore_shapes=resources("offshore_shapes.geojson"), europe_shape=resources("europe_shape.geojson"), @@ -336,7 +279,7 @@ rule build_renewable_profiles: benchmarks("build_renewable_profiles_{technology}") threads: config["atlite"].get("nprocesses", 4) resources: - mem_mb=config["atlite"].get("nprocesses", 4) * 10000, + mem_mb=config["atlite"].get("nprocesses", 4) * 5000, wildcard_constraints: technology="(?!hydro).*", # Any technology other than hydro conda: @@ -432,6 +375,35 @@ def input_conventional(w): } +# Optional input when having Ukraine (UA) or Moldova (MD) in the countries list +def input_gdp_ppp_non_nuts3(w): + countries = set(config_provider("countries")(w)) + if {"UA", "MD"}.intersection(countries): + return {"gdp_ppp_non_nuts3": resources("gdp_ppp_non_nuts3.geojson")} + return {} + + +rule build_gdp_ppp_non_nuts3: + params: + countries=config_provider("countries"), + input: + base_network=resources("networks/base.nc"), + regions=resources("regions_onshore.geojson"), + gdp_non_nuts3="data/GDP_per_capita_PPP_1990_2015_v2.nc", + ppp_non_nuts3="data/ppp_2013_1km_Aggregated.tif", + output: + resources("gdp_ppp_non_nuts3.geojson"), + log: + logs("build_gdp_ppp_non_nuts3.log"), + threads: 1 + resources: + mem_mb=1500, + conda: + "../envs/environment.yaml" + script: + "../scripts/build_gdp_ppp_non_nuts3.py" + + rule add_electricity: params: length_factor=config_provider("lines", "length_factor"), @@ -447,6 +419,7 @@ rule add_electricity: input: unpack(input_profile_tech), unpack(input_conventional), + unpack(input_gdp_ppp_non_nuts3), base_network=resources("networks/base.nc"), line_rating=lambda w: ( resources("networks/line_rating.nc") @@ -468,8 +441,6 @@ rule add_electricity: ), load=resources("electricity_demand.csv"), nuts3_shapes=resources("nuts3_shapes.geojson"), - gdp_file="data/GDP_per_capita_PPP_1990_2015_v2.nc", - ppp_file="data/ppp_2013_1km_Aggregated.tif", output: resources("networks/elec.nc"), log: @@ -515,7 +486,7 @@ rule simplify_network: benchmarks("simplify_network/elec_s{simpl}") threads: 1 resources: - mem_mb=40000, + mem_mb=12000, conda: "../envs/environment.yaml" script: @@ -562,7 +533,7 @@ rule cluster_network: benchmarks("cluster_network/elec_s{simpl}_{clusters}") threads: 1 resources: - mem_mb=40000, + mem_mb=10000, conda: "../envs/environment.yaml" script: @@ -628,103 +599,3 @@ rule prepare_network: "../envs/environment.yaml" script: "../scripts/prepare_network.py" - - -rule retrieve_osm_data: - output: - cables_way="data/osm/raw/{country}/cables_way.json", - lines_way="data/osm/raw/{country}/lines_way.json", - links_relation="data/osm/raw/{country}/links_relation.json", - substations_way="data/osm/raw/{country}/substations_way.json", - substations_relation="data/osm/raw/{country}/substations_relation.json", - log: - "logs/retrieve_osm_data_{country}.log", - resources: - cores=2, - threads=1, - script: - "../scripts/retrieve_osm_data.py" - - -rule retrieve_osm_data_all: - input: - expand( - "data/osm/raw/{country}/cables_way.json", - country=config_provider("countries"), - ), - expand( - "data/osm/raw/{country}/lines_way.json", - country=config_provider("countries"), - ), - expand( - "data/osm/raw/{country}/links_relation.json", - country=config_provider("countries"), - ), - expand( - "data/osm/raw/{country}/substations_way.json", - country=config_provider("countries"), - ), - expand( - "data/osm/raw/{country}/substations_relation.json", - country=config_provider("countries"), - ), - - -rule clean_osm_data: - input: - cables_way=expand( - "data/osm/raw/{country}/cables_way.json", - country=config_provider("countries"), - ), - lines_way=expand( - "data/osm/raw/{country}/lines_way.json", - country=config_provider("countries"), - ), - links_relation=expand( - "data/osm/raw/{country}/links_relation.json", - country=config_provider("countries"), - ), - substations_way=expand( - "data/osm/raw/{country}/substations_way.json", - country=config_provider("countries"), - ), - substations_relation=expand( - "data/osm/raw/{country}/substations_relation.json", - country=config_provider("countries"), - ), - offshore_shapes=resources("offshore_shapes.geojson"), - country_shapes=resources("country_shapes.geojson"), - output: - substations=resources("osm/clean/substations.geojson"), - substations_polygon=resources("osm/clean/substations_polygon.geojson"), - lines=resources("osm/clean/lines.geojson"), - links=resources("osm/clean/links.geojson"), - log: - logs("clean_osm_data.log"), - script: - "../scripts/clean_osm_data.py" - - -rule build_osm_network: - input: - substations=resources("osm/clean/substations.geojson"), - lines=resources("osm/clean/lines.geojson"), - links=resources("osm/clean/links.geojson"), - country_shapes=resources("country_shapes.geojson"), - output: - lines=resources("osm/pre-base/lines.csv"), - links=resources("osm/pre-base/links.csv"), - converters=resources("osm/pre-base/converters.csv"), - transformers=resources("osm/pre-base/transformers.csv"), - substations=resources("osm/pre-base/buses.csv"), - lines_geojson=resources("osm/pre-base/lines.geojson"), - links_geojson=resources("osm/pre-base/links.geojson"), - converters_geojson=resources("osm/pre-base/converters.geojson"), - transformers_geojson=resources("osm/pre-base/transformers.geojson"), - substations_geojson=resources("osm/pre-base/buses.geojson"), - log: - logs("build_osm_network.log"), - benchmark: - benchmarks("build_osm_network") - script: - "../scripts/build_osm_network.py" diff --git a/rules/retrieve.smk b/rules/retrieve.smk index 33f1ccc6d..2f9fe21df 100644 --- a/rules/retrieve.smk +++ b/rules/retrieve.smk @@ -321,7 +321,7 @@ if config["enable"]["retrieve"]: "../scripts/retrieve_monthly_fuel_prices.py" -if config["enable"]["retrieve"] and any(c in ["UA", "MD"] for c in config["countries"]): +if config["enable"]["retrieve"] and {"UA", "MD"}.intersection(config["countries"]): rule retrieve_gdp_uamd: output: @@ -336,34 +336,3 @@ if config["enable"]["retrieve"] and any(c in ["UA", "MD"] for c in config["count "../envs/retrieve.yaml" script: "../scripts/retrieve_gdp_uamd.py" - - -if config["enable"]["retrieve"] and ( - config["electricity_network"]["base_network"] == "osm-prebuilt" -): - - rule retrieve_osm_prebuilt: - input: - buses=storage("https://sandbox.zenodo.org/records/87679/files/buses.csv"), - converters=storage( - "https://sandbox.zenodo.org/records/87679/files/converters.csv" - ), - lines=storage("https://sandbox.zenodo.org/records/87679/files/lines.csv"), - links=storage("https://sandbox.zenodo.org/records/87679/files/links.csv"), - transformers=storage( - "https://sandbox.zenodo.org/records/87679/files/transformers.csv" - ), - output: - buses="data/osm/prebuilt/buses.csv", - converters="data/osm/prebuilt/converters.csv", - lines="data/osm/prebuilt/lines.csv", - links="data/osm/prebuilt/links.csv", - transformers="data/osm/prebuilt/transformers.csv", - log: - "logs/retrieve_osm_prebuilt.log", - resources: - mem_mb=500, - retries: 2 - run: - for key in input.keys(): - move(input[key], output[key]) diff --git a/scripts/build_gdp_ppp_non_nuts3.py b/scripts/build_gdp_ppp_non_nuts3.py new file mode 100644 index 000000000..80dd1bb32 --- /dev/null +++ b/scripts/build_gdp_ppp_non_nuts3.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: : 2017-2024 The PyPSA-Eur Authors +# +# SPDX-License-Identifier: MIT +""" + +""" + +import logging + +import geopandas as gpd +import numpy as np +import pandas as pd +import pypsa +import rasterio +import xarray as xr +from _helpers import configure_logging, set_scenario_config +from rasterio.mask import mask +from shapely.geometry import box + +logger = logging.getLogger(__name__) + + +def calc_gdp_ppp(country, regions, gdp_non_nuts3, ppp_non_nuts3): + """ + Calculate the GDP and PPP values for non NUTS3 regions. + + Parameters: + country (str): The two-letter country code of the non-NUTS3 region. + regions (GeoDataFrame): A GeoDataFrame containing the regions. + gdp_non_nuts3 (str): The file path to the dataset containing the GDP values + for non NUTS3 countries (e.g. MD, UA) + ppp_non_nuts3 (str): The file path to the dataset containing the PPP values + for non NUTS3 countries (e.g. MD, UA) + + Returns: + tuple: A tuple containing two GeoDataFrames: + - gdp: A GeoDataFrame with the aggregated GDP values mapped to each bus. + - ppp: A GeoDataFrame with the aggregated PPP values mapped to each bus. + """ + regions = regions.drop(columns=["x", "y"]) + regions = regions[regions.country == country] + # Create a bounding box for UA, MD from region shape, including a buffer of 10000 metres + bounding_box = ( + gpd.GeoDataFrame(geometry=[box(*regions.total_bounds)], crs=regions.crs) + .to_crs(epsg=3857) + .buffer(10000) + .to_crs(regions.crs) + ) + + # GDP + logger.info(f"Mapping GDP values to non-NUTS3 region: {regions.country.unique()}") + with xr.open_dataset(gdp_non_nuts3) as src_gdp: + src_gdp = src_gdp.where( + (src_gdp.longitude >= bounding_box.bounds.minx.min()) + & (src_gdp.longitude <= bounding_box.bounds.maxx.max()) + & (src_gdp.latitude >= bounding_box.bounds.miny.min()) + & (src_gdp.latitude <= bounding_box.bounds.maxy.max()), + drop=True, + ) + gdp = src_gdp.to_dataframe().reset_index() + gdp = gdp.rename(columns={"GDP_per_capita_PPP": "gdp"}) + gdp = gdp[gdp.time == gdp.time.max()] + gdp = gpd.GeoDataFrame( + gdp, + geometry=gpd.points_from_xy(gdp.longitude, gdp.latitude), + crs="EPSG:4326", + ) + gdp = gpd.sjoin(gdp, regions, predicate="within") + gdp = ( + gdp.groupby(["Bus", "country"]) + .agg({"gdp": "sum"}) + .reset_index(level=["country"]) + ) + + # PPP + logger.info(f"Mapping PPP values to non-NUTS3 region: {regions.country.unique()}") + with rasterio.open(ppp_non_nuts3) as src_ppp: + # Mask the raster with the bounding box + out_image, out_transform = mask(src_ppp, bounding_box, crop=True) + out_image, + out_meta = src_ppp.meta.copy() + out_meta.update( + { + "driver": "GTiff", + "height": out_image.shape[1], + "width": out_image.shape[2], + "transform": out_transform, + } + ) + masked_data = out_image[0] # Use the first band (rest is empty) + row_indices, col_indices = np.where(masked_data != src_ppp.nodata) + values = masked_data[row_indices, col_indices] + + # Affine transformation from pixel coordinates to geo coordinates + x_coords, y_coords = rasterio.transform.xy(out_transform, row_indices, col_indices) + ppp = pd.DataFrame({"x": x_coords, "y": y_coords, "ppp": values}) + ppp = gpd.GeoDataFrame( + ppp, + geometry=gpd.points_from_xy(ppp.x, ppp.y), + crs=src_ppp.crs, + ) + ppp = gpd.sjoin(ppp, regions, predicate="within") + ppp = ( + ppp.groupby(["Bus", "country"]) + .agg({"ppp": "sum"}) + .reset_index() + .set_index("Bus") + ) + gdp_ppp = regions.join(gdp.drop(columns="country"), on="Bus").join( + ppp.drop(columns="country"), on="Bus" + ) + gdp_ppp.fillna(0, inplace=True) + + return gdp_ppp + + +if __name__ == "__main__": + if "snakemake" not in globals(): + from _helpers import mock_snakemake + + snakemake = mock_snakemake("build_gdp_ppp_non_nuts3") + configure_logging(snakemake) + set_scenario_config(snakemake) + + n = pypsa.Network(snakemake.input.base_network) + substation_lv_i = n.buses.index[n.buses["substation_lv"]] + regions = ( + gpd.read_file(snakemake.input.regions) + .set_index("name") + .reindex(substation_lv_i) + ) + + gdp_non_nuts3 = snakemake.input.gdp_non_nuts3 + ppp_non_nuts3 = snakemake.input.ppp_non_nuts3 + + countries_non_nuts3 = pd.Index(("MD", "UA")) + subset = set(countries_non_nuts3) & set(snakemake.params.countries) + + gdp_ppp = pd.concat( + [ + calc_gdp_ppp(country, regions, gdp_non_nuts3, ppp_non_nuts3) + for country in subset + ], + axis=0, + ) + + logger.info( + f"Exporting GDP and PPP values for non-NUTS3 regions {snakemake.output}" + ) + gdp_ppp.reset_index().to_file(snakemake.output, driver="GeoJSON") diff --git a/scripts/determine_availability_matrix_MD_UA.py b/scripts/determine_availability_matrix_MD_UA.py index f19919e39..f6f416dfb 100644 --- a/scripts/determine_availability_matrix_MD_UA.py +++ b/scripts/determine_availability_matrix_MD_UA.py @@ -49,6 +49,7 @@ def get_wdpa_layer_name(wdpa_fn, layer_substring): gpd.read_file(snakemake.input.regions).set_index("name").rename_axis("bus") ) buses = regions.index + # Limit to "UA" and "MD" regions buses = regions.loc[regions["country"].isin(["UA", "MD"])].index.values regions = regions.loc[buses] @@ -154,8 +155,6 @@ def get_wdpa_layer_name(wdpa_fn, layer_substring): plt.axis("off") plt.savefig(snakemake.output.availability_map, bbox_inches="tight", dpi=500) - # Limit results only to buses for UA and MD - buses = regions.loc[regions["country"].isin(["UA", "MD"])].index.values availability = availability.sel(bus=buses) # Save and plot for verification From 75f146cec17c695c16fbb0dfc64e001bce231145 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Tue, 16 Jul 2024 10:06:38 +0200 Subject: [PATCH 071/100] Cleaned determine_availability_matrix_MD_UA.py, removed redundant code --- scripts/determine_availability_matrix_MD_UA.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/determine_availability_matrix_MD_UA.py b/scripts/determine_availability_matrix_MD_UA.py index f6f416dfb..2ed11d3c0 100644 --- a/scripts/determine_availability_matrix_MD_UA.py +++ b/scripts/determine_availability_matrix_MD_UA.py @@ -48,7 +48,6 @@ def get_wdpa_layer_name(wdpa_fn, layer_substring): regions = ( gpd.read_file(snakemake.input.regions).set_index("name").rename_axis("bus") ) - buses = regions.index # Limit to "UA" and "MD" regions buses = regions.loc[regions["country"].isin(["UA", "MD"])].index.values regions = regions.loc[buses] From 44f46362bd49a4f4defaf6238955906a5cd0c558 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Tue, 16 Jul 2024 15:04:31 +0200 Subject: [PATCH 072/100] Bug fixes. --- rules/build_electricity.smk | 137 +++++++++++++++++++++++++++-- rules/retrieve.smk | 80 +++++++++++++++++ scripts/add_electricity.py | 120 ++++--------------------- scripts/build_gdp_ppp_non_nuts3.py | 4 + 4 files changed, 229 insertions(+), 112 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 4e71d9f19..27b3ac58e 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -74,14 +74,71 @@ rule base_network: links=config_provider("links"), transformers=config_provider("transformers"), input: - eg_buses="data/entsoegridkit/buses.csv", - eg_lines="data/entsoegridkit/lines.csv", - eg_links="data/entsoegridkit/links.csv", - eg_converters="data/entsoegridkit/converters.csv", - eg_transformers="data/entsoegridkit/transformers.csv", - parameter_corrections="data/parameter_corrections.yaml", - links_p_nom="data/links_p_nom.csv", - links_tyndp="data/links_tyndp.csv", + eg_buses=lambda w: ( + "data/entsoegridkit/buses.csv" + if config_provider("electricity_network", "base_network")(w) == "gridkit" + else ( + "data/osm/prebuilt/buses.csv" + if config_provider("electricity_network", "base_network")(w) + == "osm-prebuilt" + else resources("osm/pre-base/buses.csv") + ) + ), + eg_lines=lambda w: ( + "data/entsoegridkit/lines.csv" + if config_provider("electricity_network", "base_network")(w) == "gridkit" + else ( + "data/osm/prebuilt/lines.csv" + if config_provider("electricity_network", "base_network")(w) + == "osm-prebuilt" + else resources("osm/pre-base/lines.csv") + ) + ), + eg_links=lambda w: ( + "data/entsoegridkit/links.csv" + if config_provider("electricity_network", "base_network")(w) == "gridkit" + else ( + "data/osm/prebuilt/links.csv" + if config_provider("electricity_network", "base_network")(w) + == "osm-prebuilt" + else resources("osm/pre-base/links.csv") + ) + ), + eg_converters=lambda w: ( + "data/entsoegridkit/converters.csv" + if config_provider("electricity_network", "base_network")(w) == "gridkit" + else ( + "data/osm/prebuilt/converters.csv" + if config_provider("electricity_network", "base_network")(w) + == "osm-prebuilt" + else resources("osm/pre-base/converters.csv") + ) + ), + eg_transformers=lambda w: ( + "data/entsoegridkit/transformers.csv" + if config_provider("electricity_network", "base_network")(w) == "gridkit" + else ( + "data/osm/prebuilt/transformers.csv" + if config_provider("electricity_network", "base_network")(w) + == "osm-prebuilt" + else resources("osm/pre-base/transformers.csv") + ) + ), + parameter_corrections=lambda w: ( + "data/parameter_corrections.yaml" + if config_provider("electricity_network", "base_network")(w) == "gridkit" + else [] + ), + links_p_nom=lambda w: ( + "data/links_p_nom.csv" + if config_provider("electricity_network", "base_network")(w) == "gridkit" + else [] + ), + links_tyndp=lambda w: ( + "data/links_tyndp.csv" + if config_provider("electricity_network", "base_network")(w) == "gridkit" + else [] + ), country_shapes=resources("country_shapes.geojson"), offshore_shapes=resources("offshore_shapes.geojson"), europe_shape=resources("europe_shape.geojson"), @@ -599,3 +656,67 @@ rule prepare_network: "../envs/environment.yaml" script: "../scripts/prepare_network.py" + + +if config["electricity_network"]["base_network"] == "osm-raw": + + rule clean_osm_data: + input: + cables_way=expand( + "data/osm/raw/{country}/cables_way.json", + country=config_provider("countries"), + ), + lines_way=expand( + "data/osm/raw/{country}/lines_way.json", + country=config_provider("countries"), + ), + links_relation=expand( + "data/osm/raw/{country}/links_relation.json", + country=config_provider("countries"), + ), + substations_way=expand( + "data/osm/raw/{country}/substations_way.json", + country=config_provider("countries"), + ), + substations_relation=expand( + "data/osm/raw/{country}/substations_relation.json", + country=config_provider("countries"), + ), + offshore_shapes=resources("offshore_shapes.geojson"), + country_shapes=resources("country_shapes.geojson"), + output: + substations=resources("osm/clean/substations.geojson"), + substations_polygon=resources("osm/clean/substations_polygon.geojson"), + lines=resources("osm/clean/lines.geojson"), + links=resources("osm/clean/links.geojson"), + log: + logs("clean_osm_data.log"), + script: + "../scripts/clean_osm_data.py" + + +if config["electricity_network"]["base_network"] == "osm-raw": + + rule build_osm_network: + input: + substations=resources("osm/clean/substations.geojson"), + lines=resources("osm/clean/lines.geojson"), + links=resources("osm/clean/links.geojson"), + country_shapes=resources("country_shapes.geojson"), + output: + lines=resources("osm/pre-base/lines.csv"), + links=resources("osm/pre-base/links.csv"), + converters=resources("osm/pre-base/converters.csv"), + transformers=resources("osm/pre-base/transformers.csv"), + substations=resources("osm/pre-base/buses.csv"), + lines_geojson=resources("osm/pre-base/lines.geojson"), + links_geojson=resources("osm/pre-base/links.geojson"), + converters_geojson=resources("osm/pre-base/converters.geojson"), + transformers_geojson=resources("osm/pre-base/transformers.geojson"), + substations_geojson=resources("osm/pre-base/buses.geojson"), + log: + logs("build_osm_network.log"), + benchmark: + benchmarks("build_osm_network") + script: + "../scripts/build_osm_network.py" diff --git a/rules/retrieve.smk b/rules/retrieve.smk index 2f9fe21df..11312b600 100644 --- a/rules/retrieve.smk +++ b/rules/retrieve.smk @@ -336,3 +336,83 @@ if config["enable"]["retrieve"] and {"UA", "MD"}.intersection(config["countries" "../envs/retrieve.yaml" script: "../scripts/retrieve_gdp_uamd.py" + + +if config["enable"]["retrieve"] and ( + config["electricity_network"]["base_network"] == "osm-prebuilt" +): + + rule retrieve_osm_prebuilt: + input: + buses=storage("https://sandbox.zenodo.org/records/87679/files/buses.csv"), + converters=storage( + "https://sandbox.zenodo.org/records/87679/files/converters.csv" + ), + lines=storage("https://sandbox.zenodo.org/records/87679/files/lines.csv"), + links=storage("https://sandbox.zenodo.org/records/87679/files/links.csv"), + transformers=storage( + "https://sandbox.zenodo.org/records/87679/files/transformers.csv" + ), + output: + buses="data/osm/prebuilt/buses.csv", + converters="data/osm/prebuilt/converters.csv", + lines="data/osm/prebuilt/lines.csv", + links="data/osm/prebuilt/links.csv", + transformers="data/osm/prebuilt/transformers.csv", + log: + "logs/retrieve_osm_prebuilt.log", + resources: + mem_mb=500, + retries: 2 + run: + for key in input.keys(): + move(input[key], output[key]) + + + +if config["enable"]["retrieve"] and ( + config["electricity_network"]["base_network"] == "osm-raw" +): + + rule retrieve_osm_data: + output: + cables_way="data/osm/raw/{country}/cables_way.json", + lines_way="data/osm/raw/{country}/lines_way.json", + links_relation="data/osm/raw/{country}/links_relation.json", + substations_way="data/osm/raw/{country}/substations_way.json", + substations_relation="data/osm/raw/{country}/substations_relation.json", + log: + "logs/retrieve_osm_data_{country}.log", + resources: + cores=2, + threads=1, + script: + "../scripts/retrieve_osm_data.py" + + +if config["enable"]["retrieve"] and ( + config["electricity_network"]["base_network"] == "osm-raw" +): + + rule retrieve_osm_data_all: + input: + expand( + "data/osm/raw/{country}/cables_way.json", + country=config_provider("countries"), + ), + expand( + "data/osm/raw/{country}/lines_way.json", + country=config_provider("countries"), + ), + expand( + "data/osm/raw/{country}/links_relation.json", + country=config_provider("countries"), + ), + expand( + "data/osm/raw/{country}/substations_way.json", + country=config_provider("countries"), + ), + expand( + "data/osm/raw/{country}/substations_relation.json", + country=config_provider("countries"), + ), diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py index ae7167562..df40a8007 100755 --- a/scripts/add_electricity.py +++ b/scripts/add_electricity.py @@ -91,7 +91,6 @@ import pandas as pd import powerplantmatching as pm import pypsa -import rasterio import scipy.sparse as sparse import xarray as xr from _helpers import ( @@ -101,8 +100,6 @@ update_p_nom_max, ) from powerplantmatching.export import map_country_bus -from rasterio.mask import mask -from shapely.geometry import box from shapely.prepared import prep idx = pd.IndexSlice @@ -298,7 +295,7 @@ def shapes_to_shapes(orig, dest): def attach_load( - n, regions, load, nuts3_shapes, gdp_file, ppp_file, countries, scaling=1.0 + n, regions, load, nuts3_shapes, gdp_ppp_non_nuts3, countries, scaling=1.0 ): substation_lv_i = n.buses.index[n.buses["substation_lv"]] gdf_regions = gpd.read_file(regions).set_index("name").reindex(substation_lv_i) @@ -309,7 +306,7 @@ def attach_load( nuts3 = gpd.read_file(nuts3_shapes).set_index("index") - def upsample(cntry, group): + def upsample(cntry, group, gdp_ppp_non_nuts3): load = opsd_load[cntry] if len(group) == 1: @@ -326,13 +323,15 @@ def upsample(cntry, group): # relative factors 0.6 and 0.4 have been determined from a linear # regression on the country to continent load data factors = normed(0.6 * normed(gdp_n) + 0.4 * normed(pop_n)) - if cntry in ["UA", "MD"]: + if cntry in ["UA", "MD"] and gdp_ppp_non_nuts3 is not None: # overwrite factor because nuts3 provides no data for UA+MD - gdp_ua_md, ppp_ua_md = calculate_ua_md_gdp_ppp( - gdf_regions[gdf_regions.country == cntry], gdp_file, ppp_file - ) + gdp_ppp_non_nuts3 = gpd.read_file(gdp_ppp_non_nuts3).set_index("Bus") + gdp_ppp_non_nuts3 = gdp_ppp_non_nuts3.loc[ + gdp_ppp_non_nuts3.country == cntry + ] factors = normed( - 0.6 * normed(gdp_ua_md["gdp"]) + 0.4 * normed(ppp_ua_md["ppp"]) + 0.6 * normed(gdp_ppp_non_nuts3["gdp"]) + + 0.4 * normed(gdp_ppp_non_nuts3["ppp"]) ) return pd.DataFrame( factors.values * load.values[:, np.newaxis], @@ -342,7 +341,7 @@ def upsample(cntry, group): load = pd.concat( [ - upsample(cntry, group) + upsample(cntry, group, gdp_ppp_non_nuts3) for cntry, group in gdf_regions.geometry.groupby(gdf_regions.country) ], axis=1, @@ -799,97 +798,6 @@ def attach_line_rating( n.lines_t.s_max_pu *= s_max_pu -def calculate_ua_md_gdp_ppp(gdf_regions, gdp_file, ppp_file): - """ - Calculate the GDP and PPP values for the regions within the bounding box of - UA and MD. - - Parameters: - gdf_regions (GeoDataFrame): A GeoDataFrame containing the regions. - gdp_file (str): The file path to the dataset containing the GDP values for UA and MD. - ppp_file (str): The file path to the dataset containing the PPP values for UA and MD. - - Returns: - tuple: A tuple containing two GeoDataFrames: - - gdp_ua_md: A GeoDataFrame with the aggregated GDP values mapped to each bus. - - ppp_ua_md: A GeoDataFrame with the aggregated PPP values mapped to each bus. - """ - # Create a bounding box for UA, MD from region shape, including a buffer of 10000 metres - box_ua_md = ( - gpd.GeoDataFrame(geometry=[box(*gdf_regions.total_bounds)], crs=gdf_regions.crs) - .to_crs(epsg=3857) - .buffer(10000) - .to_crs(gdf_regions.crs) - ) - - # GDP - with xr.open_dataset(gdp_file) as src_gdp_ua_md: - src_gdp_ua_md = src_gdp_ua_md.where( - (src_gdp_ua_md.longitude >= box_ua_md.bounds.minx.min()) - & (src_gdp_ua_md.longitude <= box_ua_md.bounds.maxx.max()) - & (src_gdp_ua_md.latitude >= box_ua_md.bounds.miny.min()) - & (src_gdp_ua_md.latitude <= box_ua_md.bounds.maxy.max()), - drop=True, - ) - gdp_ua_md = src_gdp_ua_md.to_dataframe().reset_index() - - gdp_ua_md = gdp_ua_md.rename(columns={"GDP_per_capita_PPP": "gdp"}) - gdp_ua_md = gdp_ua_md[gdp_ua_md.time == gdp_ua_md.time.max()] - gdp_ua_md = gpd.GeoDataFrame( - gdp_ua_md, - geometry=gpd.points_from_xy(gdp_ua_md.longitude, gdp_ua_md.latitude), - crs="EPSG:4326", - ) - - gdp_ua_md = gpd.sjoin( - gdp_ua_md, gdf_regions.reset_index(), predicate="within" - ).drop(columns=["index_right"]) - gdp_ua_md = ( - gdp_ua_md.groupby(["Bus", "country", "time"]) - .agg({"gdp": "sum"}) - .reset_index(level=["country", "time"]) - ) - - # PPP - with rasterio.open(ppp_file) as src_ppp_ua_md: - # Mask the raster with the bounding box - out_image, out_transform = mask(src_ppp_ua_md, box_ua_md, crop=True) - out_image, - out_meta = src_ppp_ua_md.meta.copy() - out_meta.update( - { - "driver": "GTiff", - "height": out_image.shape[1], - "width": out_image.shape[2], - "transform": out_transform, - } - ) - - masked_data = out_image[0] # Use the first band (rest is empty) - row_indices, col_indices = np.where(masked_data != src_ppp_ua_md.nodata) - values = masked_data[row_indices, col_indices] - - # Affine transformation from pixel coordinates to geo coordinates - x_coords, y_coords = rasterio.transform.xy(out_transform, row_indices, col_indices) - ppp_ua_md = pd.DataFrame({"x": x_coords, "y": y_coords, "ppp": values}) - - ppp_ua_md = gpd.GeoDataFrame( - ppp_ua_md, - geometry=gpd.points_from_xy(ppp_ua_md.x, ppp_ua_md.y), - crs=src_ppp_ua_md.crs, - ) - - ppp_ua_md = gpd.sjoin(ppp_ua_md, gdf_regions.reset_index(), predicate="within") - ppp_ua_md = ( - ppp_ua_md.groupby(["Bus", "country"]) - .agg({"ppp": "sum"}) - .reset_index() - .set_index("Bus") - ) - - return gdp_ua_md, ppp_ua_md - - if __name__ == "__main__": if "snakemake" not in globals(): from _helpers import mock_snakemake @@ -915,13 +823,17 @@ def calculate_ua_md_gdp_ppp(gdf_regions, gdp_file, ppp_file): ) ppl = load_powerplants(snakemake.input.powerplants) + if "gdp_ppp_non_nuts3" in snakemake.input.keys(): + gdp_ppp_non_nuts3 = snakemake.input.gdp_ppp_non_nuts3 + else: + gdp_ppp_non_nuts3 = None + attach_load( n, snakemake.input.regions, snakemake.input.load, snakemake.input.nuts3_shapes, - snakemake.input.gdp_file, - snakemake.input.ppp_file, + gdp_ppp_non_nuts3, params.countries, params.scaling_factor, ) diff --git a/scripts/build_gdp_ppp_non_nuts3.py b/scripts/build_gdp_ppp_non_nuts3.py index 80dd1bb32..4c9cda265 100644 --- a/scripts/build_gdp_ppp_non_nuts3.py +++ b/scripts/build_gdp_ppp_non_nuts3.py @@ -3,7 +3,11 @@ # # SPDX-License-Identifier: MIT """ +Maps the GDP and PPP values to non-NUTS3 regions. +The script takes as input the country code, a GeoDataFrame containing +the regions, and the file paths to the datasets containing the GDP and +PPP values for non-NUTS3 countries. """ import logging From 712c476260fa33b3df071f024a97702b6481c4fa Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Tue, 16 Jul 2024 21:54:29 +0200 Subject: [PATCH 073/100] Bug fixes for UA MD scripts. --- rules/build_electricity.smk | 16 ++-- rules/retrieve.smk | 4 +- scripts/add_electricity.py | 26 +++--- scripts/build_gdp_ppp_non_nuts3.py | 136 +++++++++++++++++++---------- scripts/retrieve_gdp_uamd.py | 4 +- 5 files changed, 114 insertions(+), 72 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 27b3ac58e..ce73d1d5c 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -433,32 +433,32 @@ def input_conventional(w): # Optional input when having Ukraine (UA) or Moldova (MD) in the countries list -def input_gdp_ppp_non_nuts3(w): +def input_gdp_pop_non_nuts3(w): countries = set(config_provider("countries")(w)) if {"UA", "MD"}.intersection(countries): - return {"gdp_ppp_non_nuts3": resources("gdp_ppp_non_nuts3.geojson")} + return {"gdp_pop_non_nuts3": resources("gdp_pop_non_nuts3.geojson")} return {} -rule build_gdp_ppp_non_nuts3: +rule build_gdp_pop_non_nuts3: params: countries=config_provider("countries"), input: base_network=resources("networks/base.nc"), regions=resources("regions_onshore.geojson"), gdp_non_nuts3="data/GDP_per_capita_PPP_1990_2015_v2.nc", - ppp_non_nuts3="data/ppp_2013_1km_Aggregated.tif", + pop_non_nuts3="data/ppp_2013_1km_Aggregated.tif", output: - resources("gdp_ppp_non_nuts3.geojson"), + resources("gdp_pop_non_nuts3.geojson"), log: - logs("build_gdp_ppp_non_nuts3.log"), + logs("build_gdp_pop_non_nuts3.log"), threads: 1 resources: mem_mb=1500, conda: "../envs/environment.yaml" script: - "../scripts/build_gdp_ppp_non_nuts3.py" + "../scripts/build_gdp_pop_non_nuts3.py" rule add_electricity: @@ -476,7 +476,7 @@ rule add_electricity: input: unpack(input_profile_tech), unpack(input_conventional), - unpack(input_gdp_ppp_non_nuts3), + unpack(input_gdp_pop_non_nuts3), base_network=resources("networks/base.nc"), line_rating=lambda w: ( resources("networks/line_rating.nc") diff --git a/rules/retrieve.smk b/rules/retrieve.smk index 11312b600..a939ecd9a 100644 --- a/rules/retrieve.smk +++ b/rules/retrieve.smk @@ -325,8 +325,8 @@ if config["enable"]["retrieve"] and {"UA", "MD"}.intersection(config["countries" rule retrieve_gdp_uamd: output: - gdp="data/GDP_per_capita_PPP_1990_2015_v2.nc", - ppp="data/ppp_2013_1km_Aggregated.tif", + gdp_non_nuts3="data/GDP_per_capita_PPP_1990_2015_v2.nc", + pop_non_nuts3="data/ppp_2013_1km_Aggregated.tif", log: "logs/retrieve_gdp_uamd.log", resources: diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py index df40a8007..1bd139189 100755 --- a/scripts/add_electricity.py +++ b/scripts/add_electricity.py @@ -295,7 +295,7 @@ def shapes_to_shapes(orig, dest): def attach_load( - n, regions, load, nuts3_shapes, gdp_ppp_non_nuts3, countries, scaling=1.0 + n, regions, load, nuts3_shapes, gdp_pop_non_nuts3, countries, scaling=1.0 ): substation_lv_i = n.buses.index[n.buses["substation_lv"]] gdf_regions = gpd.read_file(regions).set_index("name").reindex(substation_lv_i) @@ -306,7 +306,7 @@ def attach_load( nuts3 = gpd.read_file(nuts3_shapes).set_index("index") - def upsample(cntry, group, gdp_ppp_non_nuts3): + def upsample(cntry, group, gdp_pop_non_nuts3): load = opsd_load[cntry] if len(group) == 1: @@ -323,15 +323,15 @@ def upsample(cntry, group, gdp_ppp_non_nuts3): # relative factors 0.6 and 0.4 have been determined from a linear # regression on the country to continent load data factors = normed(0.6 * normed(gdp_n) + 0.4 * normed(pop_n)) - if cntry in ["UA", "MD"] and gdp_ppp_non_nuts3 is not None: + if cntry in ["UA", "MD"] and gdp_pop_non_nuts3 is not None: # overwrite factor because nuts3 provides no data for UA+MD - gdp_ppp_non_nuts3 = gpd.read_file(gdp_ppp_non_nuts3).set_index("Bus") - gdp_ppp_non_nuts3 = gdp_ppp_non_nuts3.loc[ - gdp_ppp_non_nuts3.country == cntry + gdp_pop_non_nuts3 = gpd.read_file(gdp_pop_non_nuts3).set_index("Bus") + gdp_pop_non_nuts3 = gdp_pop_non_nuts3.loc[ + gdp_pop_non_nuts3.country == cntry ] factors = normed( - 0.6 * normed(gdp_ppp_non_nuts3["gdp"]) - + 0.4 * normed(gdp_ppp_non_nuts3["ppp"]) + 0.6 * normed(gdp_pop_non_nuts3["gdp"]) + + 0.4 * normed(gdp_pop_non_nuts3["pop"]) ) return pd.DataFrame( factors.values * load.values[:, np.newaxis], @@ -341,7 +341,7 @@ def upsample(cntry, group, gdp_ppp_non_nuts3): load = pd.concat( [ - upsample(cntry, group, gdp_ppp_non_nuts3) + upsample(cntry, group, gdp_pop_non_nuts3) for cntry, group in gdf_regions.geometry.groupby(gdf_regions.country) ], axis=1, @@ -823,17 +823,17 @@ def attach_line_rating( ) ppl = load_powerplants(snakemake.input.powerplants) - if "gdp_ppp_non_nuts3" in snakemake.input.keys(): - gdp_ppp_non_nuts3 = snakemake.input.gdp_ppp_non_nuts3 + if "gdp_pop_non_nuts3" in snakemake.input.keys(): + gdp_pop_non_nuts3 = snakemake.input.gdp_pop_non_nuts3 else: - gdp_ppp_non_nuts3 = None + gdp_pop_non_nuts3 = None attach_load( n, snakemake.input.regions, snakemake.input.load, snakemake.input.nuts3_shapes, - gdp_ppp_non_nuts3, + gdp_pop_non_nuts3, params.countries, params.scaling_factor, ) diff --git a/scripts/build_gdp_ppp_non_nuts3.py b/scripts/build_gdp_ppp_non_nuts3.py index 4c9cda265..7d45da748 100644 --- a/scripts/build_gdp_ppp_non_nuts3.py +++ b/scripts/build_gdp_ppp_non_nuts3.py @@ -3,16 +3,20 @@ # # SPDX-License-Identifier: MIT """ -Maps the GDP and PPP values to non-NUTS3 regions. +Maps the GDP p.c. -The script takes as input the country code, a GeoDataFrame containing -the regions, and the file paths to the datasets containing the GDP and -PPP values for non-NUTS3 countries. +and population values to non-NUTS3 regions. The script takes as input +the country code, a GeoDataFrame containing the regions, and the file +paths to the datasets containing the GDP and POP values for non-NUTS3 +countries. """ import logging +import cartopy.crs as ccrs import geopandas as gpd +import matplotlib.colors as colors +import matplotlib.pyplot as plt import numpy as np import pandas as pd import pypsa @@ -25,24 +29,28 @@ logger = logging.getLogger(__name__) -def calc_gdp_ppp(country, regions, gdp_non_nuts3, ppp_non_nuts3): +def calc_gdp_pop(country, regions, gdp_non_nuts3, pop_non_nuts3): """ - Calculate the GDP and PPP values for non NUTS3 regions. + Calculate the GDP p.c. and population values for non NUTS3 regions. Parameters: country (str): The two-letter country code of the non-NUTS3 region. regions (GeoDataFrame): A GeoDataFrame containing the regions. - gdp_non_nuts3 (str): The file path to the dataset containing the GDP values + gdp_non_nuts3 (str): The file path to the dataset containing the GDP p.c values for non NUTS3 countries (e.g. MD, UA) - ppp_non_nuts3 (str): The file path to the dataset containing the PPP values + pop_non_nuts3 (str): The file path to the dataset containing the POP values for non NUTS3 countries (e.g. MD, UA) Returns: tuple: A tuple containing two GeoDataFrames: - - gdp: A GeoDataFrame with the aggregated GDP values mapped to each bus. - - ppp: A GeoDataFrame with the aggregated PPP values mapped to each bus. + - gdp: A GeoDataFrame with the mean GDP p.c. values mapped to each bus. + - pop: A GeoDataFrame with the summed POP values mapped to each bus. """ - regions = regions.drop(columns=["x", "y"]) + regions = ( + regions.rename(columns={"name": "Bus"}) + .drop(columns=["x", "y"]) + .set_index("Bus") + ) regions = regions[regions.country == country] # Create a bounding box for UA, MD from region shape, including a buffer of 10000 metres bounding_box = ( @@ -53,7 +61,9 @@ def calc_gdp_ppp(country, regions, gdp_non_nuts3, ppp_non_nuts3): ) # GDP - logger.info(f"Mapping GDP values to non-NUTS3 region: {regions.country.unique()}") + logger.info( + f"Mapping mean GDP p.c. to non-NUTS3 region: {regions.country.unique()}" + ) with xr.open_dataset(gdp_non_nuts3) as src_gdp: src_gdp = src_gdp.where( (src_gdp.longitude >= bounding_box.bounds.minx.min()) @@ -65,25 +75,28 @@ def calc_gdp_ppp(country, regions, gdp_non_nuts3, ppp_non_nuts3): gdp = src_gdp.to_dataframe().reset_index() gdp = gdp.rename(columns={"GDP_per_capita_PPP": "gdp"}) gdp = gdp[gdp.time == gdp.time.max()] - gdp = gpd.GeoDataFrame( + gdp_raster = gpd.GeoDataFrame( gdp, geometry=gpd.points_from_xy(gdp.longitude, gdp.latitude), crs="EPSG:4326", ) - gdp = gpd.sjoin(gdp, regions, predicate="within") + gdp_mapped = gpd.sjoin(gdp_raster, regions, predicate="within") gdp = ( - gdp.groupby(["Bus", "country"]) - .agg({"gdp": "sum"}) + gdp_mapped.copy() + .groupby(["Bus", "country"]) + .agg({"gdp": "mean"}) .reset_index(level=["country"]) ) - # PPP - logger.info(f"Mapping PPP values to non-NUTS3 region: {regions.country.unique()}") - with rasterio.open(ppp_non_nuts3) as src_ppp: + # POP + logger.info( + f"Mapping summed population to non-NUTS3 region: {regions.country.unique()}" + ) + with rasterio.open(pop_non_nuts3) as src_pop: # Mask the raster with the bounding box - out_image, out_transform = mask(src_ppp, bounding_box, crop=True) + out_image, out_transform = mask(src_pop, bounding_box, crop=True) out_image, - out_meta = src_ppp.meta.copy() + out_meta = src_pop.meta.copy() out_meta.update( { "driver": "GTiff", @@ -93,63 +106,92 @@ def calc_gdp_ppp(country, regions, gdp_non_nuts3, ppp_non_nuts3): } ) masked_data = out_image[0] # Use the first band (rest is empty) - row_indices, col_indices = np.where(masked_data != src_ppp.nodata) + row_indices, col_indices = np.where(masked_data != src_pop.nodata) values = masked_data[row_indices, col_indices] # Affine transformation from pixel coordinates to geo coordinates x_coords, y_coords = rasterio.transform.xy(out_transform, row_indices, col_indices) - ppp = pd.DataFrame({"x": x_coords, "y": y_coords, "ppp": values}) - ppp = gpd.GeoDataFrame( - ppp, - geometry=gpd.points_from_xy(ppp.x, ppp.y), - crs=src_ppp.crs, + pop_raster = pd.DataFrame({"x": x_coords, "y": y_coords, "pop": values}) + pop_raster = gpd.GeoDataFrame( + pop_raster, + geometry=gpd.points_from_xy(pop_raster.x, pop_raster.y), + crs=src_pop.crs, ) - ppp = gpd.sjoin(ppp, regions, predicate="within") - ppp = ( - ppp.groupby(["Bus", "country"]) - .agg({"ppp": "sum"}) + pop_mapped = gpd.sjoin(pop_raster, regions, predicate="within") + pop = ( + pop_mapped.groupby(["Bus", "country"]) + .agg({"pop": "sum"}) .reset_index() .set_index("Bus") ) - gdp_ppp = regions.join(gdp.drop(columns="country"), on="Bus").join( - ppp.drop(columns="country"), on="Bus" + gdp_pop = regions.join(gdp.drop(columns="country"), on="Bus").join( + pop.drop(columns="country"), on="Bus" + ) + gdp_pop.fillna(0, inplace=True) + + # Plot for validation purposes + cmap = plt.get_cmap("viridis") + norm = colors.Normalize(vmin=0, vmax=gdp_mapped.gdp.max()) + crs = ccrs.AlbersEqualArea() + # two column plot + fig, axes = plt.subplots(1, 2, figsize=(10, 5), subplot_kw={"projection": crs}) + gpd.GeoDataFrame( + regions.join(gdp.drop(columns="country"), on="Bus"), + crs=src_gdp.attrs["projection"], + ).to_crs(crs.proj4_init).plot( + ax=axes[0], + column="gdp", + cmap=cmap, + norm=norm, + legend=True, + legend_kwds={ + "label": "Mean GDP (mapped to bus regions)", + "orientation": "horizontal", + }, + ) + gpd.GeoDataFrame( + regions.join(pop.drop(columns="country"), on="Bus"), crs=src_pop.crs + ).to_crs(crs.proj4_init).plot( + ax=axes[1], + column="pop", + cmap=cmap, + legend=True, + legend_kwds={ + "label": "Abs. population (mapped to bus regions)", + "orientation": "horizontal", + }, ) - gdp_ppp.fillna(0, inplace=True) + plt.show() - return gdp_ppp + return gdp_pop if __name__ == "__main__": if "snakemake" not in globals(): from _helpers import mock_snakemake - snakemake = mock_snakemake("build_gdp_ppp_non_nuts3") + snakemake = mock_snakemake("build_gdp_pop_non_nuts3") configure_logging(snakemake) set_scenario_config(snakemake) n = pypsa.Network(snakemake.input.base_network) - substation_lv_i = n.buses.index[n.buses["substation_lv"]] - regions = ( - gpd.read_file(snakemake.input.regions) - .set_index("name") - .reindex(substation_lv_i) - ) + regions = gpd.read_file(snakemake.input.regions) gdp_non_nuts3 = snakemake.input.gdp_non_nuts3 - ppp_non_nuts3 = snakemake.input.ppp_non_nuts3 + pop_non_nuts3 = snakemake.input.pop_non_nuts3 countries_non_nuts3 = pd.Index(("MD", "UA")) subset = set(countries_non_nuts3) & set(snakemake.params.countries) - gdp_ppp = pd.concat( + gdp_pop = pd.concat( [ - calc_gdp_ppp(country, regions, gdp_non_nuts3, ppp_non_nuts3) + calc_gdp_pop(country, regions, gdp_non_nuts3, pop_non_nuts3) for country in subset ], axis=0, ) logger.info( - f"Exporting GDP and PPP values for non-NUTS3 regions {snakemake.output}" + f"Exporting GDP and POP values for non-NUTS3 regions {snakemake.output}" ) - gdp_ppp.reset_index().to_file(snakemake.output, driver="GeoJSON") + gdp_pop.reset_index().to_file(snakemake.output, driver="GeoJSON") diff --git a/scripts/retrieve_gdp_uamd.py b/scripts/retrieve_gdp_uamd.py index 3da3be4f0..780f2ea65 100644 --- a/scripts/retrieve_gdp_uamd.py +++ b/scripts/retrieve_gdp_uamd.py @@ -24,8 +24,8 @@ dict_urls = dict( { - "gdp": "https://datadryad.org/stash/downloads/file_stream/241947", - "ppp": "https://github.com/ecohealthalliance/sars_cov_risk/releases/download/v2.0.1/ppp_2020_1km_Aggregated.tif", + "gdp_non_nuts3": "https://datadryad.org/stash/downloads/file_stream/241947", + "pop_non_nuts3": "https://github.com/ecohealthalliance/sars_cov_risk/releases/download/v2.0.1/ppp_2020_1km_Aggregated.tif", } ) From 5071c298785f79f91964740b0d4b68a26d538877 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Tue, 16 Jul 2024 22:45:05 +0200 Subject: [PATCH 074/100] Rename of build script. --- .../{build_gdp_ppp_non_nuts3.py => build_gdp_pop_non_nuts3.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/{build_gdp_ppp_non_nuts3.py => build_gdp_pop_non_nuts3.py} (100%) diff --git a/scripts/build_gdp_ppp_non_nuts3.py b/scripts/build_gdp_pop_non_nuts3.py similarity index 100% rename from scripts/build_gdp_ppp_non_nuts3.py rename to scripts/build_gdp_pop_non_nuts3.py From d8941f22ab8930134e3a7faaf5bffad04933ccd8 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Tue, 16 Jul 2024 23:28:04 +0200 Subject: [PATCH 075/100] Bug fix: only distribute load to buses with substation. --- scripts/add_electricity.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py index 1bd139189..0f6ca7f6e 100755 --- a/scripts/add_electricity.py +++ b/scripts/add_electricity.py @@ -327,7 +327,8 @@ def upsample(cntry, group, gdp_pop_non_nuts3): # overwrite factor because nuts3 provides no data for UA+MD gdp_pop_non_nuts3 = gpd.read_file(gdp_pop_non_nuts3).set_index("Bus") gdp_pop_non_nuts3 = gdp_pop_non_nuts3.loc[ - gdp_pop_non_nuts3.country == cntry + (gdp_pop_non_nuts3.country == cntry) + & (gdp_pop_non_nuts3.index.isin(substation_lv_i)) ] factors = normed( 0.6 * normed(gdp_pop_non_nuts3["gdp"]) From f234c9c2dd17d3344b7302908ea96f41835b2e3d Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Wed, 17 Jul 2024 23:33:58 +0200 Subject: [PATCH 076/100] Updated zenodo sandbox repository. --- rules/retrieve.smk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rules/retrieve.smk b/rules/retrieve.smk index a939ecd9a..22c2b65a0 100644 --- a/rules/retrieve.smk +++ b/rules/retrieve.smk @@ -344,14 +344,14 @@ if config["enable"]["retrieve"] and ( rule retrieve_osm_prebuilt: input: - buses=storage("https://sandbox.zenodo.org/records/87679/files/buses.csv"), + buses=storage("https://sandbox.zenodo.org/records/89508/files/buses.csv"), converters=storage( - "https://sandbox.zenodo.org/records/87679/files/converters.csv" + "https://sandbox.zenodo.org/records/89508/files/converters.csv" ), - lines=storage("https://sandbox.zenodo.org/records/87679/files/lines.csv"), - links=storage("https://sandbox.zenodo.org/records/87679/files/links.csv"), + lines=storage("https://sandbox.zenodo.org/records/89508/files/lines.csv"), + links=storage("https://sandbox.zenodo.org/records/89508/files/links.csv"), transformers=storage( - "https://sandbox.zenodo.org/records/87679/files/transformers.csv" + "https://sandbox.zenodo.org/records/89508/files/transformers.csv" ), output: buses="data/osm/prebuilt/buses.csv", From f7d8992e816d43f92ebcd015b1f59fd20d1d4e88 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Thu, 18 Jul 2024 11:29:19 +0200 Subject: [PATCH 077/100] Updated config.default --- config/config.default.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/config/config.default.yaml b/config/config.default.yaml index e2b5f3ee5..a0932761a 100644 --- a/config/config.default.yaml +++ b/config/config.default.yaml @@ -907,11 +907,11 @@ plotting: eu_node_location: x: -5.5 y: 46. - # costs_max: 1000 - # costs_threshold: 1 - # energy_max: 20000 - # energy_min: -20000 - # energy_threshold: 50. + costs_max: 1000 + costs_threshold: 1 + energy_max: 20000 + energy_min: -20000 + energy_threshold: 50. nice_names: OCGT: "Open-Cycle Gas" From 4c9a055e574bc90762b0a59feeffe1d24bb55f97 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Tue, 23 Jul 2024 10:07:10 +0200 Subject: [PATCH 078/100] Cleaned config.default.yaml: Related settings grouped together and redundant voltage settings aggregated. --- scripts/base_network.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/scripts/base_network.py b/scripts/base_network.py index 7e229591e..291777996 100644 --- a/scripts/base_network.py +++ b/scripts/base_network.py @@ -135,7 +135,7 @@ def _find_closest_links(links, new_links, distance_upper_bound=1.5): ) -def _load_buses_from_eg(eg_buses, europe_shape, config_elec): +def _load_buses_from_eg(eg_buses, europe_shape, config): buses = ( pd.read_csv( eg_buses, @@ -161,8 +161,8 @@ def _load_buses_from_eg(eg_buses, europe_shape, config_elec): lambda p: europe_shape_prepped.contains(Point(p)), axis=1 ) - v_nom_min = min(config_elec["voltages"]) - v_nom_max = max(config_elec["voltages"]) + v_nom_min = min(config["lines"]["types"].keys()) + v_nom_max = max(config["lines"]["types"].keys()) # Quick fix: buses_with_v_nom_to_keep_b = (v_nom_min <= buses.v_nom) & ( @@ -445,7 +445,7 @@ def _reconnect_crimea(lines): def _set_electrical_parameters_lines_eg(lines, config): - v_noms = config["electricity"]["voltages"] + v_noms = list(config["lines"]["types"].keys()) linetypes = config["lines"]["types"] for v_nom in v_noms: @@ -456,12 +456,13 @@ def _set_electrical_parameters_lines_eg(lines, config): return lines -def _set_electrical_parameters_lines_osm(lines_config, voltages, lines): +def _set_electrical_parameters_lines_osm(lines, config): if lines.empty: lines["type"] = [] return lines - linetypes = _get_linetypes_config(lines_config["types"], voltages) + v_noms = list(config["lines"]["types"].keys()) + linetypes = _get_linetypes_config(config["lines"]["types"], v_noms) lines["carrier"] = "AC" lines["dc"] = False @@ -470,7 +471,7 @@ def _set_electrical_parameters_lines_osm(lines_config, voltages, lines): lambda x: _get_linetype_by_voltage(x, linetypes) ) - lines["s_max_pu"] = lines_config["s_max_pu"] + lines["s_max_pu"] = config["lines"]["s_max_pu"] return lines @@ -817,7 +818,7 @@ def base_network( config, ): - buses = _load_buses_from_eg(eg_buses, europe_shape, config["electricity"]) + buses = _load_buses_from_eg(eg_buses, europe_shape, config) if config["electricity_network"].get("base_network") == "gridkit": links = _load_links_from_eg(buses, eg_links) @@ -851,9 +852,7 @@ def base_network( lines = _set_electrical_parameters_lines_eg(lines, config) links = _set_electrical_parameters_links_eg(links, config, links_p_nom) elif "osm" in config["electricity_network"].get("base_network"): - lines = _set_electrical_parameters_lines_osm( - config["lines"], config["electricity"]["voltages"], lines - ) + lines = _set_electrical_parameters_lines_osm(lines, config) links = _set_electrical_parameters_links_osm(links, config) else: raise ValueError("base_network must be either 'gridkit' or 'osm'") From 306883f1dd2559a778c3d2553baed58d83c9d7ae Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Tue, 23 Jul 2024 10:31:04 +0200 Subject: [PATCH 079/100] Cleaned config.default.yaml: Related settings grouped together and redundant voltage settings aggregated. Added release notes. --- config/config.default.yaml | 72 ++++++++++++++++++-------------------- doc/release_notes.rst | 4 +++ 2 files changed, 39 insertions(+), 37 deletions(-) diff --git a/config/config.default.yaml b/config/config.default.yaml index d13541bad..87d8e4f09 100644 --- a/config/config.default.yaml +++ b/config/config.default.yaml @@ -75,7 +75,6 @@ enable: custom_busmap: false drop_leap_day: true - # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#co2-budget co2_budget: 2020: 0.701 @@ -90,9 +89,43 @@ electricity_network: base_network: gridkit # Options: gridkit, osm-prebuilt, osm-raw (built from scratch using OSM data, takes longer) osm_group_tolerance_buses: 5000 # unit: meters, default 5000 - Buses within this distance are grouped together +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#lines +lines: + types: # Specify voltages (keys) and line types (values) for lines + 220.: "Al/St 240/40 2-bundle 220.0" + 300.: "Al/St 240/40 3-bundle 300.0" + 380.: "Al/St 240/40 4-bundle 380.0" + 500.: "Al/St 240/40 4-bundle 380.0" + 750.: "Al/St 560/50 4-bundle 750.0" + s_max_pu: 0.7 + s_nom_max: .inf + max_extension: 20000 #MW + length_factor: 1.25 # Note that 'osm-raw' and 'osm-prebuilt' already contain higher spatial resolution line routes and lengths + reconnect_crimea: true # Only needed for 'gridkit' base_network, in OSM, the lines are already connected + under_construction: 'keep' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity + dynamic_line_rating: + activate: false + cutout: europe-2013-era5 + correction_factor: 0.95 + max_voltage_difference: false + max_line_rating: false + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#links +links: + p_max_pu: 1.0 + p_nom_max: .inf + max_extension: 30000 #MW + include_tyndp: true + under_construction: 'zero' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#transformers +transformers: + x: 0.1 + s_nom: 2000. + type: '' + # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#electricity electricity: - voltages: [220., 300., 380., 500., 750.] gaslimit_enable: false gaslimit: false co2limit_enable: false @@ -283,41 +316,6 @@ conventional: nuclear: p_max_pu: "data/nuclear_p_max_pu.csv" # float of file name -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#lines -lines: - types: - 220.: "Al/St 240/40 2-bundle 220.0" - 300.: "Al/St 240/40 3-bundle 300.0" - 380.: "Al/St 240/40 4-bundle 380.0" - 500.: "Al/St 240/40 4-bundle 380.0" - 750.: "Al/St 560/50 4-bundle 750.0" - s_max_pu: 0.7 - s_nom_max: .inf - max_extension: 20000 #MW - length_factor: 1.25 - reconnect_crimea: true - under_construction: 'keep' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity - dynamic_line_rating: - activate: false - cutout: europe-2013-era5 - correction_factor: 0.95 - max_voltage_difference: false - max_line_rating: false - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#links -links: - p_max_pu: 1.0 - p_nom_max: .inf - max_extension: 30000 #MW - include_tyndp: true - under_construction: 'zero' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#transformers -transformers: - x: 0.1 - s_nom: 2000. - type: '' - # docs-load in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#load load: interpolate_limit: 3 diff --git a/doc/release_notes.rst b/doc/release_notes.rst index eb29ce4b1..640ec5d6d 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -41,6 +41,10 @@ Upcoming Release * Enable parallelism in :mod:`determine_availability_matrix_MD_UA.py` and remove plots. This requires the use of temporary files. +* Added new feature that to base the electricity network on OpenStreetMap (OSM data) (PR https://github.com/PyPSA/pypsa-eur/pull/1079). Note that a heuristics based cleaning process is used for lines and links where electrical parameters are incomplete, missing, or ambiguous. Through ``electricity_network["base_network"]``, the base network can be set to "gridkit" (original default setting), "osm-prebuilt" (which downloads the latest prebuilt snapshot based on OSM data from Zenodo), or "osm-raw" which retrieves (once) and cleans the raw OSM data and subsequently builds the network. Note that this process may take a few minutes. + +* Voltage settings have been aggregated and are now directly read from the line type dictionary. Instead of ``electricity["voltages"]``, scripts have been updated to refer to ``lines["types"].keys()``. + PyPSA-Eur 0.11.0 (25th May 2024) ===================================== From 77ac356de2bc6f837102eb0940791371a81658ac Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Tue, 23 Jul 2024 11:31:01 +0200 Subject: [PATCH 080/100] Updated Zenodo repositories for OSM-prebuilt to offcial publication. --- rules/retrieve.smk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rules/retrieve.smk b/rules/retrieve.smk index 999100c1c..3b563d783 100644 --- a/rules/retrieve.smk +++ b/rules/retrieve.smk @@ -346,14 +346,14 @@ if config["enable"]["retrieve"] and ( rule retrieve_osm_prebuilt: input: - buses=storage("https://sandbox.zenodo.org/records/89508/files/buses.csv"), + buses=storage("https://zenodo.org/records/12799202/files/buses.csv"), converters=storage( - "https://sandbox.zenodo.org/records/89508/files/converters.csv" + "https://zenodo.org/records/12799202/files/converters.csv" ), - lines=storage("https://sandbox.zenodo.org/records/89508/files/lines.csv"), - links=storage("https://sandbox.zenodo.org/records/89508/files/links.csv"), + lines=storage("https://zenodo.org/records/12799202/files/lines.csv"), + links=storage("https://zenodo.org/records/12799202/files/links.csv"), transformers=storage( - "https://sandbox.zenodo.org/records/89508/files/transformers.csv" + "https://zenodo.org/records/12799202/files/transformers.csv" ), output: buses="data/osm/prebuilt/buses.csv", From f6717925412fed1679846ab68eae30cca627cd0e Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Tue, 30 Jul 2024 11:14:07 +0200 Subject: [PATCH 081/100] Updated configtables --- config/config_backup.yaml | 262 +++++++++++++---------- doc/configtables/electricity_network.csv | 3 + 2 files changed, 150 insertions(+), 115 deletions(-) create mode 100644 doc/configtables/electricity_network.csv diff --git a/config/config_backup.yaml b/config/config_backup.yaml index 2bcaf173c..9ebeea351 100644 --- a/config/config_backup.yaml +++ b/config/config_backup.yaml @@ -3,7 +3,7 @@ # SPDX-License-Identifier: CC0-1.0 # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#top-level-configuration -version: 0.10.0 +version: 0.11.0 tutorial: false logging: @@ -15,14 +15,13 @@ private: entsoe_api: remote: - ssh: "z1" - path: "~/scratch/projects/pypsa-eur" + ssh: z1 + path: ~/scratch/projects/pypsa-eur # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#run run: prefix: "" - # name: "test-europe1-gridkit" - name: "test-begb-gridkit" + name: "europe-ua-md-gridkit-custom" scenarios: enable: false file: config/scenarios.yaml @@ -41,60 +40,40 @@ scenario: simpl: - '' ll: - - v1.0 # TODO mit und ohne Netzausbau v1.0 + - v1.0 clusters: - - 40 - # - 128 - # - 256 - # - 512 - # # - 1024 + - 320 opts: - - 'Co2L0-169H' + - '' sector_opts: - '' planning_horizons: # - 2020 - # - 2030 + - 2030 # - 2040 - - 2050 + # - 2050 # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#countries -countries: ["BE", "GB"] -# countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK'] -# countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MD', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK', 'UA'] - -# Settings related to the high-voltage electricity grid -electricity_network: - base_network: "gridkit" # "osm" or "gridkit" - build_osm_network: true # If 'true', the network will be built from scratch (retrieving OSM data, cleaning, and building) and stored under resources, 'false' will use snapshots in data/osm - -build_osm_network: # Options of the build_osm_network script; osm = OpenStreetMap - group_tolerance_buses: 5000 # [m] (default 5000) Tolerance in meters of the close buses to merge - split_overpassing_lines: false # When True, lines overpassing buses are splitted and connected to the bueses - overpassing_lines_tolerance: 1 # [m] (default 1) Tolerance to identify lines overpassing buses - force_ac: false # When true, it forces all components (lines and substation) to be AC-only. To be used if DC assets create problem. +countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK', 'UA', 'MD'] # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#snapshots snapshots: - start: "2013-01-01" - end: "2014-01-01" + start: '2013-01-01' + end: '2014-01-01' inclusive: 'left' # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#enable enable: - retrieve: true + retrieve: auto prepare_links_p_nom: false retrieve_databundle: true - retrieve_sector_databundle: true retrieve_cost_data: true build_cutout: false - retrieve_irena: false retrieve_cutout: true - build_natura_raster: false - retrieve_natura_raster: true - custom_busmap: false + custom_busmap: true drop_leap_day: true + # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#co2-budget co2_budget: 2020: 0.701 @@ -105,6 +84,10 @@ co2_budget: 2045: 0.032 2050: 0.000 +electricity_network: + base_network: gridkit # Options: gridkit, osm-prebuilt, osm-raw (built from scratch using OSM data, takes longer) + osm_group_tolerance_buses: 5000 # unit: meters, default 5000 - Buses within this distance are grouped together + # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#electricity electricity: voltages: [200., 220., 300., 380., 400., 500., 750.] @@ -113,7 +96,6 @@ electricity: co2limit_enable: false co2limit: 7.75e+7 co2base: 1.487e+9 - agg_p_nom_limits: data/agg_p_nom_minmax.csv operational_reserve: activate: false @@ -126,17 +108,18 @@ electricity: H2: 168 extendable_carriers: - Generator: [solar, onwind, offwind-ac, offwind-dc, OCGT] + Generator: [solar, solar-hsat, onwind, offwind-ac, offwind-dc, offwind-float, OCGT, CCGT] StorageUnit: [] # battery, H2 Store: [battery, H2] Link: [] # H2 pipeline powerplants_filter: (DateOut >= 2023 or DateOut != DateOut) and not (Country == 'Germany' and Fueltype == 'Nuclear') custom_powerplants: false - everywhere_powerplants: [nuclear, oil, OCGT, CCGT, coal, lignite, geothermal, biomass] + everywhere_powerplants: [] conventional_carriers: [nuclear, oil, OCGT, CCGT, coal, lignite, geothermal, biomass] - renewable_carriers: [solar, onwind, offwind-ac, offwind-dc, hydro] # hydro removed + renewable_carriers: [solar, onwind, offwind-ac, offwind-dc, hydro] + # renewable_carriers: [solar, solar-hsat, onwind, offwind-ac, offwind-dc, offwind-float, hydro] estimate_renewable_capacities: enable: true @@ -144,7 +127,7 @@ electricity: year: 2020 expansion_limit: false technology_mapping: - Offshore: [offwind-ac, offwind-dc] + Offshore: [offwind-ac, offwind-dc, offwind-float] Onshore: [onwind] PV: [solar] @@ -212,7 +195,7 @@ renewable: luisa: false # [0, 5230] natura: true ship_threshold: 400 - max_depth: 50 + max_depth: 60 max_shore_distance: 30000 excluder_resolution: 200 clip_p_max_pu: 1.e-2 @@ -228,10 +211,28 @@ renewable: luisa: false # [0, 5230] natura: true ship_threshold: 400 - max_depth: 50 + max_depth: 60 min_shore_distance: 30000 excluder_resolution: 200 clip_p_max_pu: 1.e-2 + offwind-float: + cutout: europe-2013-era5 + resource: + method: wind + turbine: NREL_ReferenceTurbine_5MW_offshore + # ScholzPhd Tab 4.3.1: 10MW/km^2 + capacity_per_sqkm: 2 + correction_factor: 0.8855 + # proxy for wake losses + # from 10.1016/j.energy.2018.08.153 + # until done more rigorously in #153 + corine: [44, 255] + natura: true + ship_threshold: 400 + excluder_resolution: 200 + min_depth: 60 + max_depth: 1000 + clip_p_max_pu: 1.e-2 solar: cutout: europe-2013-sarah resource: @@ -247,6 +248,21 @@ renewable: natura: true excluder_resolution: 100 clip_p_max_pu: 1.e-2 + solar-hsat: + cutout: europe-2013-sarah + resource: + method: pv + panel: CSi + orientation: + slope: 35. + azimuth: 180. + tracking: horizontal + capacity_per_sqkm: 4.43 # 15% higher land usage acc. to NREL + corine: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 26, 31, 32] + luisa: false # [1111, 1121, 1122, 1123, 1130, 1210, 1221, 1222, 1230, 1241, 1242, 1310, 1320, 1330, 1410, 1421, 1422, 2110, 2120, 2130, 2210, 2220, 2230, 2310, 2410, 2420, 3210, 3320, 3330] + natura: true + excluder_resolution: 100 + clip_p_max_pu: 1.e-2 hydro: cutout: europe-2013-era5 carriers: [ror, PHS, hydro] @@ -269,27 +285,19 @@ conventional: # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#lines lines: types: - 200.: "Al/St 240/40 2-bundle 220.0" - 220.: "Al/St 240/40 2-bundle 220.0" - 300.: "Al/St 240/40 3-bundle 300.0" - 380.: "Al/St 240/40 4-bundle 380.0" - 400.: "Al/St 240/40 4-bundle 380.0" - 500.: "Al/St 240/40 4-bundle 380.0" - 750.: "Al/St 560/50 4-bundle 750.0" - dc_types: # setting only for osm - 200.: "HVDC XLPE 1000" - 220.: "HVDC XLPE 1000" - 300.: "HVDC XLPE 1000" - 750.: "HVDC XLPE 1000" - 380.: "HVDC XLPE 1000" - 400.: "HVDC XLPE 1000" - 500.: "HVDC XLPE 1000" + 200.0: Al/St 240/40 2-bundle 220.0 + 220.0: Al/St 240/40 2-bundle 220.0 + 300.0: Al/St 240/40 3-bundle 300.0 + 380.0: Al/St 240/40 4-bundle 380.0 + 400.0: Al/St 240/40 4-bundle 380.0 + 500.0: Al/St 240/40 4-bundle 380.0 + 750.0: Al/St 560/50 4-bundle 750.0 s_max_pu: 0.7 s_nom_max: .inf max_extension: 20000 #MW length_factor: 1.25 reconnect_crimea: true - under_construction: 'zero' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity + under_construction: 'keep' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity dynamic_line_rating: activate: false cutout: europe-2013-era5 @@ -302,7 +310,7 @@ links: p_max_pu: 1.0 p_nom_max: .inf max_extension: 30000 #MW - include_tyndp: false + include_tyndp: true under_construction: 'zero' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#transformers @@ -335,6 +343,8 @@ pypsa_eur: - onwind - offwind-ac - offwind-dc + - offwind-float + - solar-hsat - solar - ror - nuclear @@ -385,8 +395,8 @@ solar_thermal: # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#existing-capacities existing_capacities: - grouping_years_power: [1895, 1920, 1950, 1955, 1960, 1965, 1970, 1975, 1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2020, 2025, 2030] - grouping_years_heat: [1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2020] # heat grouping years >= baseyear will be ignored + grouping_years_power: [1920, 1950, 1955, 1960, 1965, 1970, 1975, 1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2020, 2025] + grouping_years_heat: [1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2019] # heat grouping years >= baseyear will be ignored threshold_capacity: 10 default_heating_lifetime: 20 conventional_carriers: @@ -427,7 +437,6 @@ sector: bev_availability: 0.5 bev_energy: 0.05 bev_charge_efficiency: 0.9 - bev_plug_to_wheel_efficiency: 0.2 bev_charge_rate: 0.011 bev_avail_max: 0.95 bev_avail_mean: 0.8 @@ -456,8 +465,9 @@ sector: 2040: 0.3 2045: 0.15 2050: 0 - transport_fuel_cell_efficiency: 0.5 - transport_internal_combustion_efficiency: 0.3 + transport_electric_efficiency: 53.19 # 1 MWh_el = 53.19*100 km + transport_fuel_cell_efficiency: 30.003 # 1 MWh_H2 = 30.003*100 km + transport_ice_efficiency: 16.0712 # 1 MWh_oil = 16.0712 * 100 km agriculture_machinery_electric_share: 0 agriculture_machinery_oil_share: 1 agriculture_machinery_fuel_efficiency: 0.7 @@ -563,15 +573,15 @@ sector: - nearshore # within 50 km of sea # - offshore ammonia: false - min_part_load_fischer_tropsch: 0.7 + min_part_load_fischer_tropsch: 0.5 min_part_load_methanolisation: 0.3 min_part_load_methanation: 0.3 - use_fischer_tropsch_waste_heat: true - use_haber_bosch_waste_heat: true - use_methanolisation_waste_heat: true - use_methanation_waste_heat: true - use_fuel_cell_waste_heat: true - use_electrolysis_waste_heat: true + use_fischer_tropsch_waste_heat: 0.25 + use_haber_bosch_waste_heat: 0.25 + use_methanolisation_waste_heat: 0.25 + use_methanation_waste_heat: 0.25 + use_fuel_cell_waste_heat: 0.25 + use_electrolysis_waste_heat: 0.25 electricity_transmission_grid: true electricity_distribution_grid: true electricity_distribution_grid_cost_factor: 1.0 @@ -586,6 +596,8 @@ sector: gas pipeline: efficiency_per_1000km: 1 #0.977 compression_per_1000km: 0.01 + electricity distribution grid: + efficiency_static: 0.97 H2_network: true gas_network: false H2_retrofit: false @@ -614,6 +626,13 @@ sector: solar: 3 offwind-ac: 3 offwind-dc: 3 + enhanced_geothermal: + enable: false + flexible: true + max_hours: 240 + max_boost: 0.25 + var_cf: true + sustainability_factor: 0.0025 # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#industry industry: @@ -679,6 +698,9 @@ industry: 2040: 0.12 2045: 0.16 2050: 0.20 + HVC_environment_sequestration_fraction: 0. + waste_to_energy: false + waste_to_energy_cc: false sector_ratios_fraction_future: 2020: 0.0 2025: 0.1 @@ -697,6 +719,7 @@ industry: methanol_production_today: 1.5 MWh_elec_per_tMeOH: 0.167 MWh_CH4_per_tMeOH: 10.25 + MWh_MeOH_per_tMeOH: 5.528 hotmaps_locate_missing: false reference_year: 2015 @@ -704,8 +727,7 @@ industry: # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#costs costs: year: 2030 - version: v0.8.1 - rooftop_share: 0.14 # based on the potentials, assuming (0.1 kW/m2 and 10 m2/person) + version: v0.9.0 social_discountrate: 0.02 fill_values: FOM: 0 @@ -730,8 +752,8 @@ costs: battery: 0. battery inverter: 0. emission_prices: - enable: false - co2: 0. + enable: true + co2: 100. co2_monthly_prices: false # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#clustering @@ -755,8 +777,8 @@ clustering: ramp_limit_up: max ramp_limit_down: max temporal: - resolution_elec: 169H - resolution_sector: 169H + resolution_elec: 25H + resolution_sector: 25H # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#adjustments adjustments: @@ -777,11 +799,28 @@ solving: # io_api: "direct" # Increases performance but only supported for the highs and gurobi solvers # options that go into the optimize function track_iterations: false - min_iterations: 4 - max_iterations: 6 + min_iterations: 2 + max_iterations: 3 transmission_losses: 2 linearized_unit_commitment: true horizon: 365 + post_discretization: + enable: false + line_unit_size: 1700 + line_threshold: 0.3 + link_unit_size: + DC: 2000 + H2 pipeline: 1200 + gas pipeline: 1500 + link_threshold: + DC: 0.3 + H2 pipeline: 0.3 + gas pipeline: 0.3 + + agg_p_nom_limits: + agg_offwind: false + include_existing: false + file: data/agg_p_nom_minmax.csv constraints: CCL: false @@ -795,7 +834,7 @@ solving: solver_options: highs-default: - # refer to https://ergo-code.github.io/HiGHS/options/definitions.html#solver + # refer to https://ergo-code.github.io/HiGHS/dev/options/definitions/ threads: 4 solver: "ipm" run_crossover: "off" @@ -848,23 +887,17 @@ solving: cbc-default: {} # Used in CI glpk-default: {} # Used in CI - mem_mb: 100000 #memory in MB; 20 GB enough for 50+B+I+H2; 100 GB for 181+B+I+H2 - runtime: 12h #runtime in humanfriendly style https://humanfriendly.readthedocs.io/en/latest/ + mem_mb: 30000 #memory in MB; 20 GB enough for 50+B+I+H2; 100 GB for 181+B+I+H2 + runtime: 6h #runtime in humanfriendly style https://humanfriendly.readthedocs.io/en/latest/ # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#plotting - plotting: map: - boundaries: - eu_node_location: - x: -5.5 - y: 46. - # costs_max: 1000 - # costs_threshold: 0.0000001 - # energy_max: - # energy_min: - # energy_threshold: 0.000001 + boundaries: [-11, 30, 34, 71] + color_geomap: + ocean: white + land: white projection: name: "EqualEarth" # See https://scitools.org.uk/cartopy/docs/latest/reference/projections.html for alternatives, for example: @@ -872,34 +905,21 @@ plotting: # central_longitude: 10. # central_latitude: 50. # standard_parallels: [35, 65] - -# plotting: -# map: -# boundaries: [-11, 30, 34, 71] -# color_geomap: -# ocean: white -# land: white -# projection: -# name: "EqualEarth" -# # See https://scitools.org.uk/cartopy/docs/latest/reference/projections.html for alternatives, for example: -# # name: "LambertConformal" -# # central_longitude: 10. -# # central_latitude: 50. -# # standard_parallels: [35, 65] -# eu_node_location: -# x: -5.5 -# y: 46. -# costs_max: 1000 -# costs_threshold: 1 -# energy_max: 20000 -# energy_min: -20000 -# energy_threshold: 50. + eu_node_location: + x: -5.5 + y: 46. + costs_max: 1000 + costs_threshold: 1 + energy_max: 20000 + energy_min: -20000 + energy_threshold: 50. nice_names: OCGT: "Open-Cycle Gas" CCGT: "Combined-Cycle Gas" offwind-ac: "Offshore Wind (AC)" offwind-dc: "Offshore Wind (DC)" + offwind-float: "Offshore Wind (Floating)" onwind: "Onshore Wind" solar: "Solar" PHS: "Pumped Hydro Storage" @@ -924,6 +944,9 @@ plotting: offwind-dc: "#74c6f2" offshore wind (DC): "#74c6f2" offshore wind dc: "#74c6f2" + offwind-float: "#b5e2fa" + offshore wind (Float): "#b5e2fa" + offshore wind float: "#b5e2fa" # water hydro: '#298c81' hydro reservoir: '#298c81' @@ -935,6 +958,7 @@ plotting: # solar solar: "#f9d002" solar PV: "#f9d002" + solar-hsat: "#fdb915" solar thermal: '#ffbf2b' residential rural solar thermal: '#f1c069' services rural solar thermal: '#eabf61' @@ -1036,6 +1060,7 @@ plotting: BEV charger: '#baf238' V2G: '#e5ffa8' land transport EV: '#baf238' + land transport demand: '#38baf2' Li ion: '#baf238' # hot water storage water tanks: '#e69487' @@ -1140,6 +1165,7 @@ plotting: methanolisation: '#83d6d5' methanol: '#468c8b' shipping methanol: '#468c8b' + industry methanol: '#468c8b' # co2 CC: '#f29dae' CCS: '#f29dae' @@ -1170,6 +1196,9 @@ plotting: waste: '#e3d37d' other: '#000000' geothermal: '#ba91b1' + geothermal heat: '#ba91b1' + geothermal district heat: '#d19D00' + geothermal organic rankine cycle: '#ffbf00' AC: "#70af1d" AC-AC: "#70af1d" AC line: "#70af1d" @@ -1179,3 +1208,6 @@ plotting: DC-DC: "#8a1caf" DC link: "#8a1caf" load: "#dd2e23" + waste CHP: '#e3d37d' + waste CHP CC: '#e3d3ff' + HVC to air: 'k' diff --git a/doc/configtables/electricity_network.csv b/doc/configtables/electricity_network.csv new file mode 100644 index 000000000..f7a51ef1f --- /dev/null +++ b/doc/configtables/electricity_network.csv @@ -0,0 +1,3 @@ +,Unit,Values,Description +base_network, --, "Any value in {'gridkit', 'osm-prebuilt', 'osm-raw}", "Specify the underlying base network, i.e. GridKit (based on ENTSO-E web map extract, OpenStreetMap (OSM) prebuilt or raw (built from raw OSM data), takes longer." +osm_group_tolerance_buses, meters, float, "Specifies the radius in which substations shall be clustered to a single bus. Default recommendation: 5000 (meters)" From 008df4bcc8406db3a0f3ed0224b77ee6acb68ef7 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Fri, 9 Aug 2024 16:42:04 +0200 Subject: [PATCH 082/100] Updated links.csv: Under_construction lines to in commission. --- data/entsoegridkit/links.csv | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/data/entsoegridkit/links.csv b/data/entsoegridkit/links.csv index 00a488ddf..4a94c32bb 100644 --- a/data/entsoegridkit/links.csv +++ b/data/entsoegridkit/links.csv @@ -6,7 +6,7 @@ link_id,bus0,bus1,length,underground,under_construction,tags,geometry 5587,1377,2382,76847.0139826037,f,f,'"MW"=>"None", "TSO"=>"None", "oid"=>"32533", "ext1"=>"None", "EIC_2"=>"None", "EIC_3"=>"None", "EIC_4"=>"None", "text_"=>"200", "symbol"=>"DC-Line", "country"=>"IT", "t9_code"=>"0", "visible"=>"1", "EIC_code"=>"None", "tie_line"=>"0", "oneCircuit"=>"0", "CreatedDate"=>"None", "DeletedDate"=>"None", "ModifiedDate"=>"None", "Internalcomments"=>"None", "visible_on_printed"=>"1"','LINESTRING(8.67675371049727 40.6777653795244,9.03900099999999 40.979898,9.22164899999999 41.133159,9.19977299501706 41.2082924934473)' 5640,1422,1638,234733.218840324,f,f,'"MW"=>"None", "TSO"=>"None", "oid"=>"32590", "ext1"=>"None", "EIC_2"=>"None", "EIC_3"=>"None", "EIC_4"=>"None", "text_"=>"RĂłmulo", "symbol"=>"DC-Line", "country"=>"ES", "t9_code"=>"0", "visible"=>"1", "EIC_code"=>"None", "tie_line"=>"0", "oneCircuit"=>"0", "CreatedDate"=>"None", "DeletedDate"=>"None", "ModifiedDate"=>"None", "Internalcomments"=>"None", "visible_on_printed"=>"1"','LINESTRING(2.48932993486183 39.561252379133,1.13159199999999 39.610978,0 39.710356,-0.234388957535875 39.7314420592468)' 13589,2262,7428,316517.539537871,f,f,,'LINESTRING(9.17009350125146 41.2967653544603,9.38095099999999 41.331451,9.858856 41.352072,10.70755 41.479776,11.25 41.448903,12.100067 41.432431,12.380219 41.426253,12.418671 41.401536,12.704315 41.347948,12.805939 41.368564,12.9016442293009 41.3921592955445)' -14802,2258,7029,391819.608605717,f,t,,'LINESTRING(14.0986517070226 42.4133438660838,14.412689 42.431566,15.115814 42.363618,16.269379 42.067646,16.875 42.126747,16.962891 42.135913,18.531189 42.271212,18.7271798293119 42.3522936900005)' +14802,2258,7029,391819.608605717,f,f,,'LINESTRING(14.0986517070226 42.4133438660838,14.412689 42.431566,15.115814 42.363618,16.269379 42.067646,16.875 42.126747,16.962891 42.135913,18.531189 42.271212,18.7271798293119 42.3522936900005)' 14668,2333,3671,146536.932669904,f,t,,'LINESTRING(6.04271995139229 45.4637174756646,6.16607700000001 45.327048,6.351471 45.183973,6.54922499999999 45.148148,6.62338299999999 45.101638,6.642609 45.089036,6.70440700000001 45.05121,6.980438 45.089036,7.00653099999999 45.092914,7.21939099999999 45.094853,7.223511 45.089036,7.378693 44.871443,7.32136143270145 44.8385424366672)' 14808,2379,2383,103628.671904731,f,f,,'LINESTRING(9.37725891362686 42.7057449479108,9.79980499999999 42.799431,10.5931379465185 42.9693952059839)' 5575,2379,2380,24868.4258834249,f,f,'"MW"=>"None", "TSO"=>"None", "oid"=>"32521", "ext1"=>"None", "EIC_2"=>"None", "EIC_3"=>"None", "EIC_4"=>"None", "text_"=>" ", "symbol"=>"DC-Line", "country"=>"FR", "t9_code"=>"0", "visible"=>"1", "EIC_code"=>"None", "tie_line"=>"0", "oneCircuit"=>"0", "CreatedDate"=>"None", "DeletedDate"=>"None", "ModifiedDate"=>"None", "Internalcomments"=>"None", "visible_on_printed"=>"1"','LINESTRING(9.37679000208623 42.7053229039427,9.357605 42.552069,9.45054814341409 42.5389781005166)' @@ -15,7 +15,7 @@ link_id,bus0,bus1,length,underground,under_construction,tags,geometry 5583,2382,7428,11623.019620339,f,f,'"MW"=>"None", "TSO"=>"None", "oid"=>"32529", "ext1"=>"None", "EIC_2"=>"None", "EIC_3"=>"None", "EIC_4"=>"None", "text_"=>" ", "symbol"=>"DC-Line", "country"=>"IT", "t9_code"=>"FR-IT-01", "visible"=>"1", "EIC_code"=>"None", "tie_line"=>"1", "oneCircuit"=>"0", "CreatedDate"=>"None", "DeletedDate"=>"None", "ModifiedDate"=>"1.555323123e+12", "Internalcomments"=>"None", "visible_on_printed"=>"1"','LINESTRING(9.17008474107272 41.2967639130447,9.168091 41.303603,9.18319700000001 41.250968,9.1995514318356 41.2089447559651)' 14825,2476,2585,45367.7245799963,f,f,,'LINESTRING(2.98259070757654 42.2776059846425,2.90313700000001 42.397094,2.867432 42.467032,2.77404800000001 42.655172)' 8745,3611,8302,9361.61122972312,f,f,'"MW"=>"None", "TSO"=>"None", "oid"=>"120591", "ext1"=>"None", "EIC_2"=>"None", "EIC_3"=>"None", "EIC_4"=>"None", "text_"=>"None", "symbol"=>"DC-Line", "country"=>"CH", "t9_code"=>"None", "visible"=>"1", "EIC_code"=>"None", "tie_line"=>"0", "oneCircuit"=>"1", "CreatedDate"=>"1.556535027e+12", "DeletedDate"=>"None", "ModifiedDate"=>"None", "Internalcomments"=>"None", "visible_on_printed"=>"1"','LINESTRING(7.95410166666667 47.5542867377085,7.928009 47.555214,7.937622 47.526475,7.96895162362761 47.4961125343931)' -14801,4709,4781,50206.4589537583,f,t,,'LINESTRING(6.43068069229957 50.8136946409214,6.020508 50.766865,5.925751 50.755572,5.73118285928413 50.7304278585398)' +14801,4709,4781,50206.4589537583,f,f,,'LINESTRING(6.43068069229957 50.8136946409214,6.020508 50.766865,5.925751 50.755572,5.73118285928413 50.7304278585398)' 14814,4972,5062,232745.802729813,f,f,,'LINESTRING(4.04528166772434 51.9611233898246,2.41561900000001 51.702353,0.794192405058928 51.4189824547604)' 5558,4975,7427,45665.1050240866,f,t,'"MW"=>"None", "TSO"=>"None", "oid"=>"32502", "ext1"=>"None", "EIC_2"=>"None", "EIC_3"=>"None", "EIC_4"=>"None", "text_"=>" ", "symbol"=>"DC-Line", "country"=>"UK", "t9_code"=>" BE-UK-01", "visible"=>"1", "EIC_code"=>"None", "tie_line"=>"1", "oneCircuit"=>"0", "CreatedDate"=>"None", "DeletedDate"=>"None", "ModifiedDate"=>"1.555407949e+12", "Internalcomments"=>"None", "visible_on_printed"=>"1"','LINESTRING(1.92947399999999 51.251601,1.27623412238205 51.2327009391635)' 14826,4977,4983,52725.5506558225,f,f,,'LINESTRING(1.75051314494826 50.9186901861196,1.43508900000001 50.970535,1.02353536683349 51.0370060560335)' @@ -33,16 +33,16 @@ link_id,bus0,bus1,length,underground,under_construction,tags,geometry 5571,5743,7074,89346.6337548304,f,f,'"MW"=>"None", "TSO"=>"None", "oid"=>"32517", "ext1"=>"None", "EIC_2"=>"None", "EIC_3"=>"None", "EIC_4"=>"None", "text_"=>"HelWin1", "symbol"=>"DC-Line", "country"=>"DE", "t9_code"=>"0", "visible"=>"1", "EIC_code"=>"None", "tie_line"=>"0", "oneCircuit"=>"0", "CreatedDate"=>"None", "DeletedDate"=>"None", "ModifiedDate"=>"1.545224101e+12", "Internalcomments"=>"None", "visible_on_printed"=>"1"','LINESTRING(8.12610708224912 54.310749538123,8.238373 54.256401,9.32699442549698 53.9319562532009)' 5567,5744,5787,139209.866527364,f,f,'"MW"=>"None", "TSO"=>"None", "oid"=>"32512", "ext1"=>"None", "EIC_2"=>"None", "EIC_3"=>"None", "EIC_4"=>"None", "text_"=>"DolWin1", "symbol"=>"DC-Line", "country"=>"DE", "t9_code"=>"0", "visible"=>"1", "EIC_code"=>"None", "tie_line"=>"0", "oneCircuit"=>"0", "CreatedDate"=>"None", "DeletedDate"=>"None", "ModifiedDate"=>"1.545224147e+12", "Internalcomments"=>"None", "visible_on_printed"=>"1"','LINESTRING(6.84493115764205 53.880869,6.909027 53.880869,7.116394 53.835512,7.36358600000001 53.396432,7.32101399999999 53.112163,7.33612100000001 52.893992,7.16075117704058 52.8485079587114)' 5570,5745,8272,99066.5793764307,f,f,'"MW"=>"None", "TSO"=>"None", "oid"=>"32515", "ext1"=>"None", "EIC_2"=>"None", "EIC_3"=>"None", "EIC_4"=>"None", "text_"=>"DolWin3", "symbol"=>"DC-Line", "country"=>"DE", "t9_code"=>"0", "visible"=>"1", "EIC_code"=>"None", "tie_line"=>"0", "oneCircuit"=>"0", "CreatedDate"=>"None", "DeletedDate"=>"None", "ModifiedDate"=>"1.545224133e+12", "Internalcomments"=>"None", "visible_on_printed"=>"1"','LINESTRING(6.84423599483409 53.8134043878533,6.71127300000001 53.693454,6.65634200000001 53.59821,6.73461900000001 53.55581,7.112274 53.45126,7.05596900000001 53.340713,7.237244 53.26932,7.223511 53.18135,7.223511 53.1805270078955)' -14803,5751,5803,280301.445474794,f,t,,'LINESTRING(6.75668661933496 53.437616158174,6.838989 53.664171,6.96258499999999 53.785238,7.34298700000001 53.882488,7.80029300000001 54.517096,8.20678699999999 55.297102,8.86005375885099 55.4336013425692)' +14803,5751,5803,280301.445474794,f,f,,'LINESTRING(6.75668661933496 53.437616158174,6.838989 53.664171,6.96258499999999 53.785238,7.34298700000001 53.882488,7.80029300000001 54.517096,8.20678699999999 55.297102,8.86005375885099 55.4336013425692)' 14821,5749,6363,575352.425009444,f,f,,'LINESTRING(6.83036734046461 53.4374933986115,6.253967 53.645452,6.33636499999999 55.776573,6.34597800000001 56.029855,6.34597800000001 56.030622,6.43661500000001 58.130121,6.90176957000565 58.2653404287817)' 5568,5768,5787,131420.09609615,f,f,'"MW"=>"None", "TSO"=>"None", "oid"=>"32513", "ext1"=>"None", "EIC_2"=>"None", "EIC_3"=>"None", "EIC_4"=>"None", "text_"=>"DolWin2", "symbol"=>"DC-Line", "country"=>"DE", "t9_code"=>"0", "visible"=>"1", "EIC_code"=>"None", "tie_line"=>"0", "oneCircuit"=>"0", "CreatedDate"=>"None", "DeletedDate"=>"None", "ModifiedDate"=>"1.545224159e+12", "Internalcomments"=>"None", "visible_on_printed"=>"1"','LINESTRING(7.11083415172816 53.9630966319811,7.07107499999999 53.80795,7.301788 53.39807,7.267456 53.110514,7.29354899999999 52.907246,7.16070024970726 52.8485606886388)' 12932,5770,5773,6905.52230262641,f,t,,'LINESTRING(7.15460523215685 53.4027398808691,7.24823000000001 53.375956)' -14848,5858,6358,574884.998052791,f,t,,'LINESTRING(6.81690675921544 58.6338502746805,6.63024900000001 58.249559,6.78268399999999 57.579197,7.17544599999999 56.532986,7.17407200000001 56.5345,7.46521000000001 55.776573,7.46521000000001 55.776573,7.64099100000001 55.312736,8.458099 54.316523,9.394684 53.934262)' +14848,5858,6358,574884.998052791,f,f,,'LINESTRING(6.81690675921544 58.6338502746805,6.63024900000001 58.249559,6.78268399999999 57.579197,7.17544599999999 56.532986,7.17407200000001 56.5345,7.46521000000001 55.776573,7.46521000000001 55.776573,7.64099100000001 55.312736,8.458099 54.316523,9.394684 53.934262)' 5581,5893,6072,59184.4227659405,f,f,'"MW"=>"None", "TSO"=>"None", "oid"=>"32527", "ext1"=>"None", "EIC_2"=>"None", "EIC_3"=>"None", "EIC_4"=>"None", "text_"=>" ", "symbol"=>"DC-Line", "country"=>"UK", "t9_code"=>"222.1.2", "visible"=>"1", "EIC_code"=>"None", "tie_line"=>"1", "oneCircuit"=>"0", "CreatedDate"=>"None", "DeletedDate"=>"None", "ModifiedDate"=>"None", "Internalcomments"=>"None", "visible_on_printed"=>"1"','LINESTRING(-4.94702447012386 55.0727948492206,-5.137482 55.042188,-5.62500000000001 54.890036,-5.631866 54.887667,-5.7332134509551 54.813550429852)' 5580,5893,6072,58741.4601812995,f,f,'"MW"=>"None", "TSO"=>"None", "oid"=>"32526", "ext1"=>"None", "EIC_2"=>"None", "EIC_3"=>"None", "EIC_4"=>"None", "text_"=>" ", "symbol"=>"DC-Line", "country"=>"UK", "t9_code"=>"222.1.1", "visible"=>"1", "EIC_code"=>"None", "tie_line"=>"1", "oneCircuit"=>"0", "CreatedDate"=>"None", "DeletedDate"=>"None", "ModifiedDate"=>"None", "Internalcomments"=>"None", "visible_on_printed"=>"1"','LINESTRING(-4.94689333475508 55.0726735779237,-5.045471 55.009914,-5.59616099999999 54.840245,-5.62500000000001 54.834709,-5.73306677066227 54.8134313531551)' 8009,5897,5936,363085.503577327,f,f,'"MW"=>"None", "TSO"=>"None", "oid"=>"70191", "ext1"=>"None", "EIC_2"=>"None", "EIC_3"=>"None", "EIC_4"=>"None", "text_"=>"Western HVDC link", "symbol"=>"DC-Line", "country"=>"UK", "t9_code"=>"None", "visible"=>"1", "EIC_code"=>"None", "tie_line"=>"0", "oneCircuit"=>"0", "CreatedDate"=>"1.514994622e+12", "DeletedDate"=>"None", "ModifiedDate"=>"1.51499467e+12", "Internalcomments"=>"None", "visible_on_printed"=>"1"','LINESTRING(-3.18595885129092 53.213699479605,-3.158569 53.308724,-3.40988200000001 53.511735,-4.081421 53.803084,-5.158081 54.013418,-5.28442399999999 54.866334,-5.177307 55.345546,-4.88616899999999 55.586883,-4.8806877889882 55.7044245716822)' 14815,5937,6086,242400.41935291,f,f,,'LINESTRING(-3.12293971810515 53.2087645354697,-3.13934300000001 53.266034,-3.368683 53.377594,-5.18280000000001 53.495399,-5.62500000000001 53.519084,-5.62500000000001 53.519084,-6.101532 53.503568,-6.61057668606004 53.483977180569)' -14804,5949,6684,695432.776022422,f,t,,'LINESTRING(6.64773945778347 59.5995729910866,6.483307 59.539192,6.374817 59.538495,6.24847399999999 59.510636,6.196289 59.448566,5.898285 59.321981,5.64697299999999 59.234284,5.62500000000001 59.223042,4.81338500000001 58.813742,2.03384400000001 57.374679,0 56.170023,-0.650940000000012 55.776573,-1.55838055228731 55.2221613174321)' +14804,5949,6684,695432.776022422,f,f,,'LINESTRING(6.64773945778347 59.5995729910866,6.483307 59.539192,6.374817 59.538495,6.24847399999999 59.510636,6.196289 59.448566,5.898285 59.321981,5.64697299999999 59.234284,5.62500000000001 59.223042,4.81338500000001 58.813742,2.03384400000001 57.374679,0 56.170023,-0.650940000000012 55.776573,-1.55838055228731 55.2221613174321)' 5635,6300,6348,93313.2906756649,f,f,'"MW"=>"None", "TSO"=>"None", "oid"=>"32585", "ext1"=>"None", "EIC_2"=>"None", "EIC_3"=>"None", "EIC_4"=>"None", "text_"=>"150", "symbol"=>"DC-Line", "country"=>"SE", "t9_code"=>"0", "visible"=>"1", "EIC_code"=>"None", "tie_line"=>"0", "oneCircuit"=>"0", "CreatedDate"=>"None", "DeletedDate"=>"None", "ModifiedDate"=>"None", "Internalcomments"=>"None", "visible_on_printed"=>"1"','LINESTRING(18.2272491895352 57.5711315582343,17.274628 57.645401,16.875 57.674052,16.6818074486274 57.692364166947)' 14819,6311,6416,122337.134741418,f,f,,'LINESTRING(10.2163282994747 57.1311139024238,10.567474 57.20771,10.737762 57.192832,10.972595 57.230016,11.25 57.33171,11.532898 57.436081,11.867981 57.556366,12.0227165657676 57.561507168045)' 14809,6311,6416,122935.90852816,f,f,,'LINESTRING(10.2163571716117 57.1310010356663,10.366974 57.123569,10.578461 57.16678,10.740509 57.15263,11.001434 57.197296,11.174469 57.255281,11.25 57.282754,11.56723 57.399104,12.0227887239052 57.5613889668514)' From 210ef806794331b9b8193d10897854ed19618cfa Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Fri, 9 Aug 2024 17:40:02 +0200 Subject: [PATCH 083/100] Updated link 8394 and parameter_corrections: Continuation of North-Sea-Link. --- data/entsoegridkit/links.csv | 2 +- data/parameter_corrections.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/data/entsoegridkit/links.csv b/data/entsoegridkit/links.csv index 4a94c32bb..abcaf0cc1 100644 --- a/data/entsoegridkit/links.csv +++ b/data/entsoegridkit/links.csv @@ -58,6 +58,6 @@ link_id,bus0,bus1,length,underground,under_construction,tags,geometry 14818,6586,6618,257364.279393886,f,f,,'LINESTRING(21.3559064590049 61.0800030227353,21.303864 61.005076,20.946808 60.801394,18.153534 60.501202,18.007965 60.483615,17.171631 60.503906,17.0593630437863 60.5503864910584)' 14817,6589,6618,197128.229552834,f,f,,'LINESTRING(21.3557421230034 61.0800501553429,20.902863 60.846249,18.224945 60.556604,18.0193872312079 60.533018071939)' 14812,6620,6623,140169.735736189,f,f,,'LINESTRING(22.3045576957813 60.4368452717433,21.404114 60.329667,19.8472351583549 60.129935739173)' -8394,6684,6696,21158.5735245602,f,t,'"MW"=>"None", "TSO"=>"None", "oid"=>"89791", "ext1"=>"None", "EIC_2"=>"None", "EIC_3"=>"None", "EIC_4"=>"None", "text_"=>"None", "symbol"=>"DC-Line", "country"=>"NO", "t9_code"=>"None", "visible"=>"1", "EIC_code"=>"None", "tie_line"=>"0", "oneCircuit"=>"0", "CreatedDate"=>"1.518010133e+12", "DeletedDate"=>"None", "ModifiedDate"=>"None", "Internalcomments"=>"None", "visible_on_printed"=>"1"','LINESTRING(6.64851407057135 59.5996162767494,6.99238592942864 59.5246589234811)' +8394,6684,6696,21158.5735245602,f,f,'"MW"=>"None", "TSO"=>"None", "oid"=>"89791", "ext1"=>"None", "EIC_2"=>"None", "EIC_3"=>"None", "EIC_4"=>"None", "text_"=>"None", "symbol"=>"DC-Line", "country"=>"NO", "t9_code"=>"None", "visible"=>"1", "EIC_code"=>"None", "tie_line"=>"0", "oneCircuit"=>"0", "CreatedDate"=>"1.518010133e+12", "DeletedDate"=>"None", "ModifiedDate"=>"None", "Internalcomments"=>"None", "visible_on_printed"=>"1"','LINESTRING(6.64851407057135 59.5996162767494,6.99238592942864 59.5246589234811)' 5569,5787,8272,38561.1931761179,f,t,'"MW"=>"None", "TSO"=>"None", "oid"=>"32514", "ext1"=>"None", "EIC_2"=>"None", "EIC_3"=>"None", "EIC_4"=>"None", "text_"=>"DolWin 3", "symbol"=>"DC-Line", "country"=>"DE", "t9_code"=>"0", "visible"=>"1", "EIC_code"=>"None", "tie_line"=>"0", "oneCircuit"=>"0", "CreatedDate"=>"None", "DeletedDate"=>"None", "ModifiedDate"=>"1.489072219e+12", "Internalcomments"=>"None", "visible_on_printed"=>"1"','LINESTRING(7.223511 53.1805270078955,7.223511 53.179704,7.21527100000001 53.121229,7.24273699999999 52.932086,7.16056753068224 52.8486333236236)' 14813,7053,7430,192856.020480538,f,f,,'LINESTRING(10.8823542109264 53.948125809387,11.25 54.061,11.657867 54.186548,12.208557 54.386955,12.236023 54.402946,12.43515 54.541003,12.602692 54.684153,12.745514 54.844199,12.744141 54.842618,12.87735 54.979978,12.947388 55.077581,12.9299984288384 55.0630403498842)' diff --git a/data/parameter_corrections.yaml b/data/parameter_corrections.yaml index df15738af..3d19bed8d 100644 --- a/data/parameter_corrections.yaml +++ b/data/parameter_corrections.yaml @@ -15,6 +15,7 @@ Link: "115000": 1200 # Caithness Moray HVDC index: "14804": 1400 # North-Sea link (NSN Link) + "8394": 1400 # North-Sea Link (NSN Link) continuation "14822": 700 # NO-DK Skagerrak 4 "14827": 440 # NO-DK Skagerrak 3 "14810": 500 # NO-DK Skagerrak 1-2 From e33edfe3796fcb12c0c66bb944fd1054cb65e0eb Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Wed, 14 Aug 2024 14:13:57 +0200 Subject: [PATCH 084/100] Major update: fix simplify_network, fix Corsica, updated build_osm_network to include lines overpassing nodes. --- scripts/base_network.py | 14 +- scripts/build_osm_network.py | 315 +++++++++++++++++++++++++++++++---- scripts/clean_osm_data.py | 35 +++- scripts/simplify_network.py | 46 +++++ 4 files changed, 366 insertions(+), 44 deletions(-) diff --git a/scripts/base_network.py b/scripts/base_network.py index 254cb9053..f8ce9faf2 100644 --- a/scripts/base_network.py +++ b/scripts/base_network.py @@ -166,11 +166,13 @@ def _load_buses_from_eg(eg_buses, europe_shape, config): v_nom_max = max(config["lines"]["types"].keys()) # Quick fix: - buses_with_v_nom_to_keep_b = (v_nom_min <= buses.v_nom) & ( - buses.v_nom <= v_nom_max - ) | buses.v_nom.isnull() + buses_with_v_nom_to_keep_b = ( + (v_nom_min <= buses.v_nom) & (buses.v_nom <= v_nom_max) + | (buses.v_nom.isnull()) + | (buses.carrier == "DC") + ) - logger.info(f"Removing buses outside of range {v_nom_min} - {v_nom_max} V") + logger.info(f"Removing buses outside of range AC {v_nom_min} - {v_nom_max} V") return pd.DataFrame(buses.loc[buses_in_europe_b & buses_with_v_nom_to_keep_b]) @@ -536,7 +538,9 @@ def _set_electrical_parameters_converters(converters, config): converters["p_max_pu"] = p_max_pu converters["p_min_pu"] = -p_max_pu - converters["p_nom"] = 2000 + # if column "p_nom" does not exist, set to 2000 + if "p_nom" not in converters: + converters["p_nom"] = 2000 # Converters are combined with links converters["under_construction"] = False diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index b96c43321..795712067 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -5,6 +5,7 @@ import logging import os +import string import geopandas as gpd import numpy as np @@ -12,7 +13,7 @@ from _benchmark import memory_logger from _helpers import configure_logging, set_scenario_config from shapely.geometry import LineString, Point -from shapely.ops import linemerge, split +from shapely.ops import linemerge, nearest_points, split from tqdm import tqdm logger = logging.getLogger(__name__) @@ -451,62 +452,170 @@ def get_transformers(buses, lines): return df_transformers -def get_converters(buses): +# def get_converters(buses): +# """ +# Function to create fake converter lines that connect buses of the same +# station_id of different polarities. +# """ + +# df_converters = [] + +# for g_name, g_value in buses.sort_values("voltage", ascending=True).groupby( +# by="station_id" +# ): +# # note: by construction there cannot be more that two buses with the same station_id and same voltage +# n_voltages = len(g_value) + +# # A converter stations should have both AC and DC parts +# if g_value["dc"].any() & ~g_value["dc"].all(): +# dc_voltage = g_value[g_value.dc]["voltage"].values + +# for u in dc_voltage: +# id_0 = g_value[g_value["dc"] & g_value["voltage"].isin([u])].index[0] + +# ac_voltages = g_value[~g_value.dc]["voltage"] +# # A converter is added between a DC nodes and AC one with the closest voltage +# id_1 = ac_voltages.sub(u).abs().idxmin() + +# geom_conv = LineString( +# [g_value.geometry.loc[id_0], g_value.geometry.loc[id_1]] +# ) + +# # check if bus is a dclink boundary point, only then add converter +# df_converters.append( +# [ +# f"convert_{g_name}_{id_0}", # "line_id" +# g_value["bus_id"].loc[id_0], # "bus0" +# g_value["bus_id"].loc[id_1], # "bus1" +# False, # "underground" +# False, # "under_construction" +# g_value.country.loc[id_0], # "country" +# geom_conv, # "geometry" +# ] +# ) + +# # name of the columns +# conv_columns = [ +# "converter_id", +# "bus0", +# "bus1", +# "underground", +# "under_construction", +# "country", +# "geometry", +# ] + +# df_converters = gpd.GeoDataFrame(df_converters, columns=conv_columns).reset_index() + +# return df_converters + + +def _find_closest_bus(row, buses, distance_crs, tol=5000): """ - Function to create fake converter lines that connect buses of the same - station_id of different polarities. + Find the closest bus to a given bus based on geographical distance and + country. + + Parameters: + - row: The bus_id of the bus to find the closest bus for. + - buses: A GeoDataFrame containing information about all the buses. + - distance_crs: The coordinate reference system to use for distance calculations. + - tol: The tolerance distance within which a bus is considered closest (default: 5000). + Returns: + - closest_bus_id: The bus_id of the closest bus, or None if no bus is found within the distance and same country. """ + gdf_buses = buses.copy() + gdf_buses = gdf_buses.to_crs(distance_crs) + # Get the geometry of the bus with bus_id = link_bus_id + bus = gdf_buses[gdf_buses["bus_id"] == row] + bus_geom = bus.geometry.values[0] - df_converters = [] + gdf_buses_filtered = gdf_buses[gdf_buses["dc"] == False] - for g_name, g_value in buses.sort_values("voltage", ascending=True).groupby( - by="station_id" - ): - # note: by construction there cannot be more that two buses with the same station_id and same voltage - n_voltages = len(g_value) + # Find the closest point in the filtered buses + nearest_geom = nearest_points(bus_geom, gdf_buses_filtered.union_all())[1] - # A converter stations should have both AC and DC parts - if g_value["dc"].any() & ~g_value["dc"].all(): - dc_voltage = g_value[g_value.dc]["voltage"].values + # Get the bus_id of the closest bus + closest_bus = gdf_buses_filtered.loc[gdf_buses["geometry"] == nearest_geom] - for u in dc_voltage: - id_0 = g_value[g_value["dc"] & g_value["voltage"].isin([u])].index[0] + # check if closest_bus_id is within the distance + within_distance = ( + closest_bus.to_crs(distance_crs).distance(bus.to_crs(distance_crs), align=False) + ).values[0] <= tol - ac_voltages = g_value[~g_value.dc]["voltage"] - # A converter is added between a DC nodes and AC one with the closest voltage - id_1 = ac_voltages.sub(u).abs().idxmin() + in_same_country = closest_bus.country.values[0] == bus.country.values[0] - geom_conv = LineString( - [g_value.geometry.loc[id_0], g_value.geometry.loc[id_1]] - ) + if within_distance and in_same_country: + closest_bus_id = closest_bus.bus_id.values[0] + else: + closest_bus_id = None - # check if bus is a dclink boundary point, only then add converter - df_converters.append( - [ - f"convert_{g_name}_{id_0}", # "line_id" - g_value["bus_id"].loc[id_0], # "bus0" - g_value["bus_id"].loc[id_1], # "bus1" - False, # "underground" - False, # "under_construction" - g_value.country.loc[id_0], # "country" - geom_conv, # "geometry" - ] - ) + return closest_bus_id + + +def _get_converters(buses, links, distance_crs, tol): + """ + Get the converters for the given buses and links. Connecting link endings + to closest AC bus. + + Parameters: + - buses (pandas.DataFrame): DataFrame containing information about buses. + - links (pandas.DataFrame): DataFrame containing information about links. + Returns: + - gdf_converters (geopandas.GeoDataFrame): GeoDataFrame containing information about converters. + """ + converters = [] + for idx, row in links.iterrows(): + for conv in range(2): + link_end = row[f"bus{conv}"] + # HVDC Gotland is connected to 130 kV grid, closest HVAC bus is further away + + closest_bus = _find_closest_bus(link_end, buses, distance_crs, tol=40000) + + if closest_bus is None: + continue + + converter_id = f"converter/{row['link_id']}_{conv}" + logger.info( + f"Added converter #{conv+1}/2 for link {row['link_id']}:{converter_id}." + ) + + # Create the converter + converters.append( + [ + converter_id, # "line_id" + link_end, # "bus0" + closest_bus, # "bus1" + row["p_nom"], # "p_nom" + False, # "underground" + False, # "under_construction" + buses[buses["bus_id"] == closest_bus].country.values[ + 0 + ], # "country" + LineString( + [ + buses[buses["bus_id"] == link_end].geometry.values[0], + buses[buses["bus_id"] == closest_bus].geometry.values[0], + ] + ), # "geometry" + ] + ) - # name of the columns conv_columns = [ "converter_id", "bus0", "bus1", + "p_nom", "underground", "under_construction", "country", "geometry", ] - df_converters = gpd.GeoDataFrame(df_converters, columns=conv_columns).reset_index() + gdf_converters = gpd.GeoDataFrame( + converters, columns=conv_columns, crs=geo_crs + ).reset_index() - return df_converters + return gdf_converters def connect_stations_same_station_id(lines, buses): @@ -669,6 +778,133 @@ def merge_stations_lines_by_station_id_and_voltage( return lines, links, buses +def _split_linestring_by_point(linestring, points): + """ + Function to split a linestring geometry by multiple inner points. + + Parameters + ---------- + lstring : LineString + Linestring of the line to be split + points : list + List of points to split the linestring + + Return + ------ + list_lines : list + List of linestring to split the line + """ + + list_linestrings = [linestring] + + for p in points: + # execute split to all lines and store results + temp_list = [split(l, p) for l in list_linestrings] + # nest all geometries + list_linestrings = [lstring for tval in temp_list for lstring in tval.geoms] + + return list_linestrings + + +def fix_overpassing_lines(lines, buses, distance_crs, tol=1): + """ + Fix overpassing lines by splitting them at nodes within a given tolerance, + to include the buses being overpassed. + + Parameters: + - lines (GeoDataFrame): The lines to be fixed. + - buses (GeoDataFrame): The buses representing nodes. + - distance_crs (str): The coordinate reference system (CRS) for distance calculations. + - tol (float): The tolerance distance in meters for determining if a bus is within a line. + Returns: + - lines (GeoDataFrame): The fixed lines. + - buses (GeoDataFrame): The buses representing nodes. + """ + + lines_to_add = [] # list of lines to be added + lines_to_split = [] # list of lines that have been split + + lines_epsgmod = lines.to_crs(distance_crs) + buses_epsgmod = buses.to_crs(distance_crs) + + # set tqdm options for substation ids + tqdm_kwargs_substation_ids = dict( + ascii=False, + unit=" lines", + total=lines.shape[0], + desc="Verify lines overpassing nodes ", + ) + + for l in tqdm(lines.index, **tqdm_kwargs_substation_ids): + # bus indices being within tolerance from the line + bus_in_tol_epsg = buses_epsgmod[ + buses_epsgmod.geometry.distance(lines_epsgmod.geometry.loc[l]) <= tol + ] + + # exclude endings of the lines + bus_in_tol_epsg = bus_in_tol_epsg[ + ( + ( + bus_in_tol_epsg.geometry.distance( + lines_epsgmod.geometry.loc[l].boundary.geoms[0] + ) + > tol + ) + | ( + bus_in_tol_epsg.geometry.distance( + lines_epsgmod.geometry.loc[l].boundary.geoms[1] + ) + > tol + ) + ) + ] + + if not bus_in_tol_epsg.empty: + # add index of line to split + lines_to_split.append(l) + + buses_locs = buses.geometry.loc[bus_in_tol_epsg.index] + + # get new line geometries + new_geometries = _split_linestring_by_point(lines.geometry[l], buses_locs) + n_geoms = len(new_geometries) + + # create temporary copies of the line + df_append = gpd.GeoDataFrame([lines.loc[l]] * n_geoms) + # update geometries + df_append["geometry"] = new_geometries + # update name of the line if there are multiple line segments + df_append["line_id"] = [ + str(df_append["line_id"].iloc[0]) + + (f"-{letter}" if n_geoms > 1 else "") + for letter in string.ascii_lowercase[:n_geoms] + ] + + lines_to_add.append(df_append) + + if not lines_to_add: + return lines, buses + + df_to_add = gpd.GeoDataFrame(pd.concat(lines_to_add, ignore_index=True)) + df_to_add.set_crs(lines.crs, inplace=True) + df_to_add.set_index(lines.index[-1] + df_to_add.index, inplace=True) + + # update length + df_to_add["length"] = df_to_add.to_crs(distance_crs).geometry.length + + # update line endings + df_to_add = line_endings_to_bus_conversion(df_to_add) + + # remove original lines + lines.drop(lines_to_split, inplace=True) + + lines = df_to_add if lines.empty else pd.concat([lines, df_to_add]) + + lines = gpd.GeoDataFrame(lines.reset_index(drop=True), crs=lines.crs) + + return lines, buses + + def build_network( inputs, outputs, @@ -741,6 +977,11 @@ def build_network( lines = line_endings_to_bus_conversion(lines) links = line_endings_to_bus_conversion(links) + logger.info( + "Fixing lines overpassing nodes: Connecting nodes and splittling lines." + ) + lines, buses = fix_overpassing_lines(lines, buses, distance_crs, tol=1) + # METHOD to merge buses with same voltage and within tolerance tol = snakemake.config["electricity_network"]["osm_group_tolerance_buses"] logger.info(f"Aggregating close substations: Enabled with tolerance {tol} m") @@ -759,7 +1000,7 @@ def build_network( transformers = get_transformers(buses, lines) # get converters: currently modelled as links connecting buses with different polarity - converters = get_converters(buses) + converters = _get_converters(buses, links, distance_crs, tol) logger.info("Saving outputs") diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index 9992dba6d..bf9d1c4ab 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -802,7 +802,7 @@ def _filter_by_voltage(df, min_voltage=200000): # Keep numeric strings list_voltages = list_voltages[np.vectorize(str.isnumeric)(list_voltages)] list_voltages = list_voltages.astype(int) - list_voltages = list_voltages[list_voltages >= int(min_voltage_ac)] + list_voltages = list_voltages[list_voltages >= int(min_voltage)] list_voltages = list_voltages.astype(str) bool_voltages = df["voltage"].apply(_check_voltage, list_voltages=list_voltages) @@ -1670,6 +1670,35 @@ def _extend_lines_to_substations(gdf_lines, gdf_substations_polygon): return gdf_lines +# Function to bridge gaps between all lines +def _bridge_lines(lines): + bridged_lines = [] + for i in range(len(lines) - 1): + bridged_lines.append(lines[i]) + + # Get the endpoints of the current line and the startpoints of the next line + end_points = [lines[i].coords[-1], lines[i].coords[0]] + start_points = [lines[i + 1].coords[0], lines[i + 1].coords[-1]] + + # Find the closest pair of points between the two LineStrings + min_distance = float("inf") + closest_pair = None + + for end_point in end_points: + for start_point in start_points: + distance = LineString([end_point, start_point]).length + if distance < min_distance: + min_distance = distance + closest_pair = (end_point, start_point) + + # Create a bridge between the closest points + bridge = LineString(closest_pair) + bridged_lines.append(bridge) + + bridged_lines.append(lines[-1]) + return bridged_lines + + if __name__ == "__main__": if "snakemake" not in globals(): from _helpers import mock_snakemake @@ -1780,7 +1809,9 @@ def _extend_lines_to_substations(gdf_lines, gdf_substations_polygon): df_links.loc[:, "geometry"] = df_links.apply(_create_single_link, axis=1) df_links = _finalise_links(df_links) - gdf_links = gpd.GeoDataFrame(df_links, geometry="geometry", crs=crs) + gdf_links = gpd.GeoDataFrame(df_links, geometry="geometry", crs=crs).set_index( + "link_id" + ) # Add line endings to substations path_country_shapes = snakemake.input.country_shapes diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 036ca0815..651e8ea29 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -306,14 +306,24 @@ def split_links(nodes): seen = set() + # Corsica substation + node_corsica = find_closest_bus( + n, + x=9.44802, + y=42.52842, + tol=2000, # Tolerance needed to only return the bus if the region is actually modelled + ) + # Supernodes are endpoints of links, identified by having lass then two neighbours or being an AC Bus # An example for the latter is if two different links are connected to the same AC bus. + # Manually keep Corsica substation as a supernode supernodes = { m for m in nodes if ( (len(G.adj[m]) < 2 or (set(G.adj[m]) - nodes)) or (n.buses.loc[m, "carrier"] == "AC") + or (m == node_corsica) ) } @@ -530,6 +540,42 @@ def cluster( return clustering.network, clustering.busmap +def find_closest_bus(n, x, y, tol=2000): + """ + Find the index of the closest bus to the given coordinates within a specified tolerance. + Parameters: + n (pypsa.Network): The network object. + x (float): The x-coordinate (longitude) of the target location. + y (float): The y-coordinate (latitude) of the target location. + tol (float): The distance tolerance in meters. Default is 2000 meters. + + Returns: + int: The index of the closest bus to the target location within the tolerance. + Returns None if no bus is within the tolerance. + """ + # Conversion factors + meters_per_degree_lat = 111139 # Meters per degree of latitude + meters_per_degree_lon = 111139 * np.cos( + np.radians(y) + ) # Meters per degree of longitude at the given latitude + + x0 = np.array(n.buses.x) + y0 = np.array(n.buses.y) + + # Calculate distances in meters + dist = np.sqrt( + ((x - x0) * meters_per_degree_lon) ** 2 + + ((y - y0) * meters_per_degree_lat) ** 2 + ) + + # Find the closest bus within the tolerance + min_dist = dist.min() + if min_dist <= tol: + return n.buses.index[dist.argmin()] + else: + return None + + if __name__ == "__main__": if "snakemake" not in globals(): from _helpers import mock_snakemake From b143f49d651fe15e31c72872073fa96f87f84826 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Wed, 14 Aug 2024 18:17:30 +0200 Subject: [PATCH 085/100] remove config backup --- config/config_backup.yaml | 1213 ------------------------------------- 1 file changed, 1213 deletions(-) delete mode 100644 config/config_backup.yaml diff --git a/config/config_backup.yaml b/config/config_backup.yaml deleted file mode 100644 index 9ebeea351..000000000 --- a/config/config_backup.yaml +++ /dev/null @@ -1,1213 +0,0 @@ -# SPDX-FileCopyrightText: : 2017-2024 The PyPSA-Eur Authors -# -# SPDX-License-Identifier: CC0-1.0 - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#top-level-configuration -version: 0.11.0 -tutorial: false - -logging: - level: INFO - format: '%(levelname)s:%(name)s:%(message)s' - -private: - keys: - entsoe_api: - -remote: - ssh: z1 - path: ~/scratch/projects/pypsa-eur - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#run -run: - prefix: "" - name: "europe-ua-md-gridkit-custom" - scenarios: - enable: false - file: config/scenarios.yaml - disable_progressbar: false - shared_resources: - policy: false - exclude: [] - shared_cutouts: true - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#foresight -foresight: overnight - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#scenario -# Wildcard docs in https://pypsa-eur.readthedocs.io/en/latest/wildcards.html -scenario: - simpl: - - '' - ll: - - v1.0 - clusters: - - 320 - opts: - - '' - sector_opts: - - '' - planning_horizons: - # - 2020 - - 2030 - # - 2040 - # - 2050 - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#countries -countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK', 'UA', 'MD'] - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#snapshots -snapshots: - start: '2013-01-01' - end: '2014-01-01' - inclusive: 'left' - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#enable -enable: - retrieve: auto - prepare_links_p_nom: false - retrieve_databundle: true - retrieve_cost_data: true - build_cutout: false - retrieve_cutout: true - custom_busmap: true - drop_leap_day: true - - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#co2-budget -co2_budget: - 2020: 0.701 - 2025: 0.524 - 2030: 0.297 - 2035: 0.150 - 2040: 0.071 - 2045: 0.032 - 2050: 0.000 - -electricity_network: - base_network: gridkit # Options: gridkit, osm-prebuilt, osm-raw (built from scratch using OSM data, takes longer) - osm_group_tolerance_buses: 5000 # unit: meters, default 5000 - Buses within this distance are grouped together - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#electricity -electricity: - voltages: [200., 220., 300., 380., 400., 500., 750.] - gaslimit_enable: false - gaslimit: false - co2limit_enable: false - co2limit: 7.75e+7 - co2base: 1.487e+9 - - operational_reserve: - activate: false - epsilon_load: 0.02 - epsilon_vres: 0.02 - contingency: 4000 - - max_hours: - battery: 6 - H2: 168 - - extendable_carriers: - Generator: [solar, solar-hsat, onwind, offwind-ac, offwind-dc, offwind-float, OCGT, CCGT] - StorageUnit: [] # battery, H2 - Store: [battery, H2] - Link: [] # H2 pipeline - - powerplants_filter: (DateOut >= 2023 or DateOut != DateOut) and not (Country == 'Germany' and Fueltype == 'Nuclear') - custom_powerplants: false - everywhere_powerplants: [] - - conventional_carriers: [nuclear, oil, OCGT, CCGT, coal, lignite, geothermal, biomass] - renewable_carriers: [solar, onwind, offwind-ac, offwind-dc, hydro] - # renewable_carriers: [solar, solar-hsat, onwind, offwind-ac, offwind-dc, offwind-float, hydro] - - estimate_renewable_capacities: - enable: true - from_opsd: true - year: 2020 - expansion_limit: false - technology_mapping: - Offshore: [offwind-ac, offwind-dc, offwind-float] - Onshore: [onwind] - PV: [solar] - - autarky: - enable: false - by_country: false - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#atlite -atlite: - default_cutout: europe-2013-era5 - nprocesses: 4 - show_progress: false - cutouts: - # use 'base' to determine geographical bounds and time span from config - # base: - # module: era5 - europe-2013-era5: - module: era5 # in priority order - x: [-12., 42.] - y: [33., 72] - dx: 0.3 - dy: 0.3 - time: ['2013', '2013'] - europe-2013-sarah: - module: [sarah, era5] # in priority order - x: [-12., 42.] - y: [33., 65] - dx: 0.2 - dy: 0.2 - time: ['2013', '2013'] - sarah_interpolate: false - sarah_dir: - features: [influx, temperature] - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#renewable -renewable: - onwind: - cutout: europe-2013-era5 - resource: - method: wind - turbine: Vestas_V112_3MW - add_cutout_windspeed: true - capacity_per_sqkm: 3 - # correction_factor: 0.93 - corine: - grid_codes: [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32] - distance: 1000 - distance_grid_codes: [1, 2, 3, 4, 5, 6] - luisa: false - # grid_codes: [1111, 1121, 1122, 1123, 1130, 1210, 1221, 1222, 1230, 1241, 1242] - # distance: 1000 - # distance_grid_codes: [1111, 1121, 1122, 1123, 1130, 1210, 1221, 1222, 1230, 1241, 1242] - natura: true - excluder_resolution: 100 - clip_p_max_pu: 1.e-2 - offwind-ac: - cutout: europe-2013-era5 - resource: - method: wind - turbine: NREL_ReferenceTurbine_2020ATB_5.5MW - add_cutout_windspeed: true - capacity_per_sqkm: 2 - correction_factor: 0.8855 - corine: [44, 255] - luisa: false # [0, 5230] - natura: true - ship_threshold: 400 - max_depth: 60 - max_shore_distance: 30000 - excluder_resolution: 200 - clip_p_max_pu: 1.e-2 - offwind-dc: - cutout: europe-2013-era5 - resource: - method: wind - turbine: NREL_ReferenceTurbine_2020ATB_5.5MW - add_cutout_windspeed: true - capacity_per_sqkm: 2 - correction_factor: 0.8855 - corine: [44, 255] - luisa: false # [0, 5230] - natura: true - ship_threshold: 400 - max_depth: 60 - min_shore_distance: 30000 - excluder_resolution: 200 - clip_p_max_pu: 1.e-2 - offwind-float: - cutout: europe-2013-era5 - resource: - method: wind - turbine: NREL_ReferenceTurbine_5MW_offshore - # ScholzPhd Tab 4.3.1: 10MW/km^2 - capacity_per_sqkm: 2 - correction_factor: 0.8855 - # proxy for wake losses - # from 10.1016/j.energy.2018.08.153 - # until done more rigorously in #153 - corine: [44, 255] - natura: true - ship_threshold: 400 - excluder_resolution: 200 - min_depth: 60 - max_depth: 1000 - clip_p_max_pu: 1.e-2 - solar: - cutout: europe-2013-sarah - resource: - method: pv - panel: CSi - orientation: - slope: 35. - azimuth: 180. - capacity_per_sqkm: 5.1 - # correction_factor: 0.854337 - corine: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 26, 31, 32] - luisa: false # [1111, 1121, 1122, 1123, 1130, 1210, 1221, 1222, 1230, 1241, 1242, 1310, 1320, 1330, 1410, 1421, 1422, 2110, 2120, 2130, 2210, 2220, 2230, 2310, 2410, 2420, 3210, 3320, 3330] - natura: true - excluder_resolution: 100 - clip_p_max_pu: 1.e-2 - solar-hsat: - cutout: europe-2013-sarah - resource: - method: pv - panel: CSi - orientation: - slope: 35. - azimuth: 180. - tracking: horizontal - capacity_per_sqkm: 4.43 # 15% higher land usage acc. to NREL - corine: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 26, 31, 32] - luisa: false # [1111, 1121, 1122, 1123, 1130, 1210, 1221, 1222, 1230, 1241, 1242, 1310, 1320, 1330, 1410, 1421, 1422, 2110, 2120, 2130, 2210, 2220, 2230, 2310, 2410, 2420, 3210, 3320, 3330] - natura: true - excluder_resolution: 100 - clip_p_max_pu: 1.e-2 - hydro: - cutout: europe-2013-era5 - carriers: [ror, PHS, hydro] - PHS_max_hours: 6 - hydro_max_hours: "energy_capacity_totals_by_country" # one of energy_capacity_totals_by_country, estimate_by_large_installations or a float - flatten_dispatch: false - flatten_dispatch_buffer: 0.2 - clip_min_inflow: 1.0 - eia_norm_year: false - eia_correct_by_capacity: false - eia_approximate_missing: false - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#conventional -conventional: - unit_commitment: false - dynamic_fuel_price: false - nuclear: - p_max_pu: "data/nuclear_p_max_pu.csv" # float of file name - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#lines -lines: - types: - 200.0: Al/St 240/40 2-bundle 220.0 - 220.0: Al/St 240/40 2-bundle 220.0 - 300.0: Al/St 240/40 3-bundle 300.0 - 380.0: Al/St 240/40 4-bundle 380.0 - 400.0: Al/St 240/40 4-bundle 380.0 - 500.0: Al/St 240/40 4-bundle 380.0 - 750.0: Al/St 560/50 4-bundle 750.0 - s_max_pu: 0.7 - s_nom_max: .inf - max_extension: 20000 #MW - length_factor: 1.25 - reconnect_crimea: true - under_construction: 'keep' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity - dynamic_line_rating: - activate: false - cutout: europe-2013-era5 - correction_factor: 0.95 - max_voltage_difference: false - max_line_rating: false - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#links -links: - p_max_pu: 1.0 - p_nom_max: .inf - max_extension: 30000 #MW - include_tyndp: true - under_construction: 'zero' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#transformers -transformers: - x: 0.1 - s_nom: 2000. - type: '' - -# docs-load in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#load -load: - interpolate_limit: 3 - time_shift_for_large_gaps: 1w - manual_adjustments: true # false - scaling_factor: 1.0 - fixed_year: false # false or year (e.g. 2013) - supplement_synthetic: true - -# docs -# TODO: PyPSA-Eur merge issue in prepare_sector_network.py -# regulate what components with which carriers are kept from PyPSA-Eur; -# some technologies are removed because they are implemented differently -# (e.g. battery or H2 storage) or have different year-dependent costs -# in PyPSA-Eur-Sec -pypsa_eur: - Bus: - - AC - Link: - - DC - Generator: - - onwind - - offwind-ac - - offwind-dc - - offwind-float - - solar-hsat - - solar - - ror - - nuclear - StorageUnit: - - PHS - - hydro - Store: [] - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#energy -energy: - energy_totals_year: 2019 - base_emissions_year: 1990 - emissions: CO2 - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#biomass -biomass: - year: 2030 - scenario: ENS_Med - classes: - solid biomass: - - Agricultural waste - - Fuelwood residues - - Secondary Forestry residues - woodchips - - Sawdust - - Residues from landscape care - - Municipal waste - not included: - - Sugar from sugar beet - - Rape seed - - "Sunflower, soya seed " - - Bioethanol barley, wheat, grain maize, oats, other cereals and rye - - Miscanthus, switchgrass, RCG - - Willow - - Poplar - - FuelwoodRW - - C&P_RW - biogas: - - Manure solid, liquid - - Sludge - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#solar-thermal -solar_thermal: - clearsky_model: simple # should be "simple" or "enhanced"? - orientation: - slope: 45. - azimuth: 180. - cutout: default - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#existing-capacities -existing_capacities: - grouping_years_power: [1920, 1950, 1955, 1960, 1965, 1970, 1975, 1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2020, 2025] - grouping_years_heat: [1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2019] # heat grouping years >= baseyear will be ignored - threshold_capacity: 10 - default_heating_lifetime: 20 - conventional_carriers: - - lignite - - coal - - oil - - uranium - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#sector -sector: - transport: true - heating: true - biomass: true - industry: true - agriculture: true - district_heating: - potential: 0.6 - progress: - 2020: 0.0 - 2025: 0.15 - 2030: 0.3 - 2035: 0.45 - 2040: 0.6 - 2045: 0.8 - 2050: 1.0 - district_heating_loss: 0.15 - cluster_heat_buses: true - heat_demand_cutout: default - bev_dsm_restriction_value: 0.75 - bev_dsm_restriction_time: 7 - transport_heating_deadband_upper: 20. - transport_heating_deadband_lower: 15. - ICE_lower_degree_factor: 0.375 - ICE_upper_degree_factor: 1.6 - EV_lower_degree_factor: 0.98 - EV_upper_degree_factor: 0.63 - bev_dsm: true - bev_availability: 0.5 - bev_energy: 0.05 - bev_charge_efficiency: 0.9 - bev_charge_rate: 0.011 - bev_avail_max: 0.95 - bev_avail_mean: 0.8 - v2g: true - land_transport_fuel_cell_share: - 2020: 0 - 2025: 0 - 2030: 0 - 2035: 0 - 2040: 0 - 2045: 0 - 2050: 0 - land_transport_electric_share: - 2020: 0 - 2025: 0.15 - 2030: 0.3 - 2035: 0.45 - 2040: 0.7 - 2045: 0.85 - 2050: 1 - land_transport_ice_share: - 2020: 1 - 2025: 0.85 - 2030: 0.7 - 2035: 0.55 - 2040: 0.3 - 2045: 0.15 - 2050: 0 - transport_electric_efficiency: 53.19 # 1 MWh_el = 53.19*100 km - transport_fuel_cell_efficiency: 30.003 # 1 MWh_H2 = 30.003*100 km - transport_ice_efficiency: 16.0712 # 1 MWh_oil = 16.0712 * 100 km - agriculture_machinery_electric_share: 0 - agriculture_machinery_oil_share: 1 - agriculture_machinery_fuel_efficiency: 0.7 - agriculture_machinery_electric_efficiency: 0.3 - MWh_MeOH_per_MWh_H2: 0.8787 - MWh_MeOH_per_tCO2: 4.0321 - MWh_MeOH_per_MWh_e: 3.6907 - shipping_hydrogen_liquefaction: false - shipping_hydrogen_share: - 2020: 0 - 2025: 0 - 2030: 0 - 2035: 0 - 2040: 0 - 2045: 0 - 2050: 0 - shipping_methanol_share: - 2020: 0 - 2025: 0.15 - 2030: 0.3 - 2035: 0.5 - 2040: 0.7 - 2045: 0.85 - 2050: 1 - shipping_oil_share: - 2020: 1 - 2025: 0.85 - 2030: 0.7 - 2035: 0.5 - 2040: 0.3 - 2045: 0.15 - 2050: 0 - shipping_methanol_efficiency: 0.46 - shipping_oil_efficiency: 0.40 - aviation_demand_factor: 1. - HVC_demand_factor: 1. - time_dep_hp_cop: true - heat_pump_sink_T: 55. - reduce_space_heat_exogenously: true - reduce_space_heat_exogenously_factor: - 2020: 0.10 # this results in a space heat demand reduction of 10% - 2025: 0.09 # first heat demand increases compared to 2020 because of larger floor area per capita - 2030: 0.09 - 2035: 0.11 - 2040: 0.16 - 2045: 0.21 - 2050: 0.29 - retrofitting: - retro_endogen: false - cost_factor: 1.0 - interest_rate: 0.04 - annualise_cost: true - tax_weighting: false - construction_index: true - tes: true - tes_tau: - decentral: 3 - central: 180 - boilers: true - resistive_heaters: true - oil_boilers: false - biomass_boiler: true - overdimension_individual_heating: 1.1 #to cover demand peaks bigger than data - chp: true - micro_chp: false - solar_thermal: true - solar_cf_correction: 0.788457 # = >>> 1/1.2683 - marginal_cost_storage: 0. #1e-4 - methanation: true - coal_cc: false - dac: true - co2_vent: false - central_heat_vent: false - allam_cycle: false - hydrogen_fuel_cell: true - hydrogen_turbine: false - SMR: true - SMR_cc: true - regional_methanol_demand: false - regional_oil_demand: false - regional_coal_demand: false - regional_co2_sequestration_potential: - enable: false - attribute: - - conservative estimate Mt - - conservative estimate GAS Mt - - conservative estimate OIL Mt - - conservative estimate aquifer Mt - include_onshore: false - min_size: 3 - max_size: 25 - years_of_storage: 25 - co2_sequestration_potential: 200 - co2_sequestration_cost: 10 - co2_sequestration_lifetime: 50 - co2_spatial: false - co2network: false - co2_network_cost_factor: 1 - cc_fraction: 0.9 - hydrogen_underground_storage: true - hydrogen_underground_storage_locations: - # - onshore # more than 50 km from sea - - nearshore # within 50 km of sea - # - offshore - ammonia: false - min_part_load_fischer_tropsch: 0.5 - min_part_load_methanolisation: 0.3 - min_part_load_methanation: 0.3 - use_fischer_tropsch_waste_heat: 0.25 - use_haber_bosch_waste_heat: 0.25 - use_methanolisation_waste_heat: 0.25 - use_methanation_waste_heat: 0.25 - use_fuel_cell_waste_heat: 0.25 - use_electrolysis_waste_heat: 0.25 - electricity_transmission_grid: true - electricity_distribution_grid: true - electricity_distribution_grid_cost_factor: 1.0 - electricity_grid_connection: true - transmission_efficiency: - DC: - efficiency_static: 0.98 - efficiency_per_1000km: 0.977 - H2 pipeline: - efficiency_per_1000km: 1 # 0.982 - compression_per_1000km: 0.018 - gas pipeline: - efficiency_per_1000km: 1 #0.977 - compression_per_1000km: 0.01 - electricity distribution grid: - efficiency_static: 0.97 - H2_network: true - gas_network: false - H2_retrofit: false - H2_retrofit_capacity_per_CH4: 0.6 - gas_network_connectivity_upgrade: 1 - gas_distribution_grid: true - gas_distribution_grid_cost_factor: 1.0 - biomass_spatial: false - biomass_transport: false - biogas_upgrading_cc: false - conventional_generation: - OCGT: gas - biomass_to_liquid: false - biosng: false - limit_max_growth: - enable: false - # allowing 30% larger than max historic growth - factor: 1.3 - max_growth: # unit GW - onwind: 16 # onshore max grow so far 16 GW in Europe https://www.iea.org/reports/renewables-2020/wind - solar: 28 # solar max grow so far 28 GW in Europe https://www.iea.org/reports/renewables-2020/solar-pv - offwind-ac: 35 # offshore max grow so far 3.5 GW in Europe https://windeurope.org/about-wind/statistics/offshore/european-offshore-wind-industry-key-trends-statistics-2019/ - offwind-dc: 35 - max_relative_growth: - onwind: 3 - solar: 3 - offwind-ac: 3 - offwind-dc: 3 - enhanced_geothermal: - enable: false - flexible: true - max_hours: 240 - max_boost: 0.25 - var_cf: true - sustainability_factor: 0.0025 - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#industry -industry: - St_primary_fraction: - 2020: 0.6 - 2025: 0.55 - 2030: 0.5 - 2035: 0.45 - 2040: 0.4 - 2045: 0.35 - 2050: 0.3 - DRI_fraction: - 2020: 0 - 2025: 0 - 2030: 0.05 - 2035: 0.2 - 2040: 0.4 - 2045: 0.7 - 2050: 1 - H2_DRI: 1.7 - elec_DRI: 0.322 - Al_primary_fraction: - 2020: 0.4 - 2025: 0.375 - 2030: 0.35 - 2035: 0.325 - 2040: 0.3 - 2045: 0.25 - 2050: 0.2 - MWh_NH3_per_tNH3: 5.166 - MWh_CH4_per_tNH3_SMR: 10.8 - MWh_elec_per_tNH3_SMR: 0.7 - MWh_H2_per_tNH3_electrolysis: 5.93 - MWh_elec_per_tNH3_electrolysis: 0.2473 - MWh_NH3_per_MWh_H2_cracker: 1.46 # https://github.com/euronion/trace/blob/44a5ff8401762edbef80eff9cfe5a47c8d3c8be4/data/efficiencies.csv - NH3_process_emissions: 24.5 - petrochemical_process_emissions: 25.5 - #HVC primary/recycling based on values used in Neumann et al https://doi.org/10.1016/j.joule.2023.06.016, linearly interpolated between 2020 and 2050 - #2020 recycling rates based on Agora https://static.agora-energiewende.de/fileadmin/Projekte/2021/2021_02_EU_CEAP/A-EW_254_Mobilising-circular-economy_study_WEB.pdf - #fractions refer to the total primary HVC production in 2020 - #assumes 6.7 Mtplastics produced from recycling in 2020 - HVC_primary_fraction: - 2020: 1.0 - 2025: 0.9 - 2030: 0.8 - 2035: 0.7 - 2040: 0.6 - 2045: 0.5 - 2050: 0.4 - HVC_mechanical_recycling_fraction: - 2020: 0.12 - 2025: 0.15 - 2030: 0.18 - 2035: 0.21 - 2040: 0.24 - 2045: 0.27 - 2050: 0.30 - HVC_chemical_recycling_fraction: - 2020: 0.0 - 2025: 0.0 - 2030: 0.04 - 2035: 0.08 - 2040: 0.12 - 2045: 0.16 - 2050: 0.20 - HVC_environment_sequestration_fraction: 0. - waste_to_energy: false - waste_to_energy_cc: false - sector_ratios_fraction_future: - 2020: 0.0 - 2025: 0.1 - 2030: 0.3 - 2035: 0.5 - 2040: 0.7 - 2045: 0.9 - 2050: 1.0 - basic_chemicals_without_NH3_production_today: 69. #Mt/a, = 86 Mtethylene-equiv - 17 MtNH3 - HVC_production_today: 52. - MWh_elec_per_tHVC_mechanical_recycling: 0.547 - MWh_elec_per_tHVC_chemical_recycling: 6.9 - chlorine_production_today: 9.58 - MWh_elec_per_tCl: 3.6 - MWh_H2_per_tCl: -0.9372 - methanol_production_today: 1.5 - MWh_elec_per_tMeOH: 0.167 - MWh_CH4_per_tMeOH: 10.25 - MWh_MeOH_per_tMeOH: 5.528 - hotmaps_locate_missing: false - reference_year: 2015 - - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#costs -costs: - year: 2030 - version: v0.9.0 - social_discountrate: 0.02 - fill_values: - FOM: 0 - VOM: 0 - efficiency: 1 - fuel: 0 - investment: 0 - lifetime: 25 - "CO2 intensity": 0 - "discount rate": 0.07 - # Marginal and capital costs can be overwritten - # capital_cost: - # onwind: 500 - marginal_cost: - solar: 0.01 - onwind: 0.015 - offwind: 0.015 - hydro: 0. - H2: 0. - electrolysis: 0. - fuel cell: 0. - battery: 0. - battery inverter: 0. - emission_prices: - enable: true - co2: 100. - co2_monthly_prices: false - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#clustering -clustering: - focus_weights: false - simplify_network: - to_substations: false - algorithm: kmeans # choose from: [hac, kmeans] - feature: solar+onwind-time - exclude_carriers: [] - remove_stubs: true - remove_stubs_across_borders: true - cluster_network: - algorithm: kmeans - feature: solar+onwind-time - exclude_carriers: [] - consider_efficiency_classes: false - aggregation_strategies: - generators: - committable: any - ramp_limit_up: max - ramp_limit_down: max - temporal: - resolution_elec: 25H - resolution_sector: 25H - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#adjustments -adjustments: - electricity: false - sector: false - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#solving -solving: - #tmpdir: "path/to/tmp" - options: - clip_p_max_pu: 1.e-2 - load_shedding: false - noisy_costs: true - skip_iterations: true - rolling_horizon: false - seed: 123 - custom_extra_functionality: "../data/custom_extra_functionality.py" - # io_api: "direct" # Increases performance but only supported for the highs and gurobi solvers - # options that go into the optimize function - track_iterations: false - min_iterations: 2 - max_iterations: 3 - transmission_losses: 2 - linearized_unit_commitment: true - horizon: 365 - post_discretization: - enable: false - line_unit_size: 1700 - line_threshold: 0.3 - link_unit_size: - DC: 2000 - H2 pipeline: 1200 - gas pipeline: 1500 - link_threshold: - DC: 0.3 - H2 pipeline: 0.3 - gas pipeline: 0.3 - - agg_p_nom_limits: - agg_offwind: false - include_existing: false - file: data/agg_p_nom_minmax.csv - - constraints: - CCL: false - EQ: false - BAU: false - SAFE: false - - solver: - name: gurobi - options: gurobi-default - - solver_options: - highs-default: - # refer to https://ergo-code.github.io/HiGHS/dev/options/definitions/ - threads: 4 - solver: "ipm" - run_crossover: "off" - small_matrix_value: 1e-6 - large_matrix_value: 1e9 - primal_feasibility_tolerance: 1e-5 - dual_feasibility_tolerance: 1e-5 - ipm_optimality_tolerance: 1e-4 - parallel: "on" - random_seed: 123 - gurobi-default: - threads: 4 - method: 2 # barrier - crossover: 0 - BarConvTol: 1.e-6 - Seed: 123 - AggFill: 0 - PreDual: 0 - GURO_PAR_BARDENSETHRESH: 200 - gurobi-numeric-focus: - NumericFocus: 3 # Favour numeric stability over speed - method: 2 # barrier - crossover: 0 # do not use crossover - BarHomogeneous: 1 # Use homogeneous barrier if standard does not converge - BarConvTol: 1.e-5 - FeasibilityTol: 1.e-4 - OptimalityTol: 1.e-4 - ObjScale: -0.5 - threads: 8 - Seed: 123 - gurobi-fallback: # Use gurobi defaults - crossover: 0 - method: 2 # barrier - BarHomogeneous: 1 # Use homogeneous barrier if standard does not converge - BarConvTol: 1.e-5 - FeasibilityTol: 1.e-5 - OptimalityTol: 1.e-5 - Seed: 123 - threads: 8 - cplex-default: - threads: 4 - lpmethod: 4 # barrier - solutiontype: 2 # non basic solution, ie no crossover - barrier.convergetol: 1.e-5 - feasopt.tolerance: 1.e-6 - copt-default: - Threads: 8 - LpMethod: 2 - Crossover: 0 - cbc-default: {} # Used in CI - glpk-default: {} # Used in CI - - mem_mb: 30000 #memory in MB; 20 GB enough for 50+B+I+H2; 100 GB for 181+B+I+H2 - runtime: 6h #runtime in humanfriendly style https://humanfriendly.readthedocs.io/en/latest/ - - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#plotting -plotting: - map: - boundaries: [-11, 30, 34, 71] - color_geomap: - ocean: white - land: white - projection: - name: "EqualEarth" - # See https://scitools.org.uk/cartopy/docs/latest/reference/projections.html for alternatives, for example: - # name: "LambertConformal" - # central_longitude: 10. - # central_latitude: 50. - # standard_parallels: [35, 65] - eu_node_location: - x: -5.5 - y: 46. - costs_max: 1000 - costs_threshold: 1 - energy_max: 20000 - energy_min: -20000 - energy_threshold: 50. - - nice_names: - OCGT: "Open-Cycle Gas" - CCGT: "Combined-Cycle Gas" - offwind-ac: "Offshore Wind (AC)" - offwind-dc: "Offshore Wind (DC)" - offwind-float: "Offshore Wind (Floating)" - onwind: "Onshore Wind" - solar: "Solar" - PHS: "Pumped Hydro Storage" - hydro: "Reservoir & Dam" - battery: "Battery Storage" - H2: "Hydrogen Storage" - lines: "Transmission Lines" - ror: "Run of River" - load: "Load Shedding" - ac: "AC" - dc: "DC" - - tech_colors: - # wind - onwind: "#235ebc" - onshore wind: "#235ebc" - offwind: "#6895dd" - offshore wind: "#6895dd" - offwind-ac: "#6895dd" - offshore wind (AC): "#6895dd" - offshore wind ac: "#6895dd" - offwind-dc: "#74c6f2" - offshore wind (DC): "#74c6f2" - offshore wind dc: "#74c6f2" - offwind-float: "#b5e2fa" - offshore wind (Float): "#b5e2fa" - offshore wind float: "#b5e2fa" - # water - hydro: '#298c81' - hydro reservoir: '#298c81' - ror: '#3dbfb0' - run of river: '#3dbfb0' - hydroelectricity: '#298c81' - PHS: '#51dbcc' - hydro+PHS: "#08ad97" - # solar - solar: "#f9d002" - solar PV: "#f9d002" - solar-hsat: "#fdb915" - solar thermal: '#ffbf2b' - residential rural solar thermal: '#f1c069' - services rural solar thermal: '#eabf61' - residential urban decentral solar thermal: '#e5bc5a' - services urban decentral solar thermal: '#dfb953' - urban central solar thermal: '#d7b24c' - solar rooftop: '#ffea80' - # gas - OCGT: '#e0986c' - OCGT marginal: '#e0986c' - OCGT-heat: '#e0986c' - gas boiler: '#db6a25' - gas boilers: '#db6a25' - gas boiler marginal: '#db6a25' - residential rural gas boiler: '#d4722e' - residential urban decentral gas boiler: '#cb7a36' - services rural gas boiler: '#c4813f' - services urban decentral gas boiler: '#ba8947' - urban central gas boiler: '#b0904f' - gas: '#e05b09' - fossil gas: '#e05b09' - natural gas: '#e05b09' - biogas to gas: '#e36311' - biogas to gas CC: '#e51245' - CCGT: '#a85522' - CCGT marginal: '#a85522' - allam: '#B98F76' - gas for industry co2 to atmosphere: '#692e0a' - gas for industry co2 to stored: '#8a3400' - gas for industry: '#853403' - gas for industry CC: '#692e0a' - gas pipeline: '#ebbca0' - gas pipeline new: '#a87c62' - # oil - oil: '#c9c9c9' - imported oil: '#a3a3a3' - oil boiler: '#adadad' - residential rural oil boiler: '#a9a9a9' - services rural oil boiler: '#a5a5a5' - residential urban decentral oil boiler: '#a1a1a1' - urban central oil boiler: '#9d9d9d' - services urban decentral oil boiler: '#999999' - agriculture machinery oil: '#949494' - shipping oil: "#808080" - land transport oil: '#afafaf' - # nuclear - Nuclear: '#ff8c00' - Nuclear marginal: '#ff8c00' - nuclear: '#ff8c00' - uranium: '#ff8c00' - # coal - Coal: '#545454' - coal: '#545454' - Coal marginal: '#545454' - coal for industry: '#343434' - solid: '#545454' - Lignite: '#826837' - lignite: '#826837' - Lignite marginal: '#826837' - # biomass - biogas: '#e3d37d' - biomass: '#baa741' - solid biomass: '#baa741' - solid biomass transport: '#baa741' - solid biomass for industry: '#7a6d26' - solid biomass for industry CC: '#47411c' - solid biomass for industry co2 from atmosphere: '#736412' - solid biomass for industry co2 to stored: '#47411c' - urban central solid biomass CHP: '#9d9042' - urban central solid biomass CHP CC: '#6c5d28' - biomass boiler: '#8A9A5B' - residential rural biomass boiler: '#a1a066' - residential urban decentral biomass boiler: '#b0b87b' - services rural biomass boiler: '#c6cf98' - services urban decentral biomass boiler: '#dde5b5' - biomass to liquid: '#32CD32' - BioSNG: '#123456' - # power transmission - lines: '#6c9459' - transmission lines: '#6c9459' - electricity distribution grid: '#97ad8c' - low voltage: '#97ad8c' - # electricity demand - Electric load: '#110d63' - electric demand: '#110d63' - electricity: '#110d63' - industry electricity: '#2d2a66' - industry new electricity: '#2d2a66' - agriculture electricity: '#494778' - # battery + EVs - battery: '#ace37f' - battery storage: '#ace37f' - battery charger: '#88a75b' - battery discharger: '#5d4e29' - home battery: '#80c944' - home battery storage: '#80c944' - home battery charger: '#5e8032' - home battery discharger: '#3c5221' - BEV charger: '#baf238' - V2G: '#e5ffa8' - land transport EV: '#baf238' - land transport demand: '#38baf2' - Li ion: '#baf238' - # hot water storage - water tanks: '#e69487' - residential rural water tanks: '#f7b7a3' - services rural water tanks: '#f3afa3' - residential urban decentral water tanks: '#f2b2a3' - services urban decentral water tanks: '#f1b4a4' - urban central water tanks: '#e9977d' - hot water storage: '#e69487' - hot water charging: '#e8998b' - urban central water tanks charger: '#b57a67' - residential rural water tanks charger: '#b4887c' - residential urban decentral water tanks charger: '#b39995' - services rural water tanks charger: '#b3abb0' - services urban decentral water tanks charger: '#b3becc' - hot water discharging: '#e99c8e' - urban central water tanks discharger: '#b9816e' - residential rural water tanks discharger: '#ba9685' - residential urban decentral water tanks discharger: '#baac9e' - services rural water tanks discharger: '#bbc2b8' - services urban decentral water tanks discharger: '#bdd8d3' - # heat demand - Heat load: '#cc1f1f' - heat: '#cc1f1f' - heat vent: '#aa3344' - heat demand: '#cc1f1f' - rural heat: '#ff5c5c' - residential rural heat: '#ff7c7c' - services rural heat: '#ff9c9c' - central heat: '#cc1f1f' - urban central heat: '#d15959' - urban central heat vent: '#a74747' - decentral heat: '#750606' - residential urban decentral heat: '#a33c3c' - services urban decentral heat: '#cc1f1f' - low-temperature heat for industry: '#8f2727' - process heat: '#ff0000' - agriculture heat: '#d9a5a5' - # heat supply - heat pumps: '#2fb537' - heat pump: '#2fb537' - air heat pump: '#36eb41' - residential urban decentral air heat pump: '#48f74f' - services urban decentral air heat pump: '#5af95d' - services rural air heat pump: '#5af95d' - urban central air heat pump: '#6cfb6b' - ground heat pump: '#2fb537' - residential rural ground heat pump: '#48f74f' - residential rural air heat pump: '#48f74f' - services rural ground heat pump: '#5af95d' - Ambient: '#98eb9d' - CHP: '#8a5751' - urban central gas CHP: '#8d5e56' - CHP CC: '#634643' - urban central gas CHP CC: '#6e4e4c' - CHP heat: '#8a5751' - CHP electric: '#8a5751' - district heating: '#e8beac' - resistive heater: '#d8f9b8' - residential rural resistive heater: '#bef5b5' - residential urban decentral resistive heater: '#b2f1a9' - services rural resistive heater: '#a5ed9d' - services urban decentral resistive heater: '#98e991' - urban central resistive heater: '#8cdf85' - retrofitting: '#8487e8' - building retrofitting: '#8487e8' - # hydrogen - H2 for industry: "#f073da" - H2 for shipping: "#ebaee0" - H2: '#bf13a0' - hydrogen: '#bf13a0' - retrofitted H2 boiler: '#e5a0d9' - SMR: '#870c71' - SMR CC: '#4f1745' - H2 liquefaction: '#d647bd' - hydrogen storage: '#bf13a0' - H2 Store: '#bf13a0' - H2 storage: '#bf13a0' - land transport fuel cell: '#6b3161' - H2 pipeline: '#f081dc' - H2 pipeline retrofitted: '#ba99b5' - H2 Fuel Cell: '#c251ae' - H2 fuel cell: '#c251ae' - H2 turbine: '#991f83' - H2 Electrolysis: '#ff29d9' - H2 electrolysis: '#ff29d9' - # ammonia - NH3: '#46caf0' - ammonia: '#46caf0' - ammonia store: '#00ace0' - ammonia cracker: '#87d0e6' - Haber-Bosch: '#076987' - # syngas - Sabatier: '#9850ad' - methanation: '#c44ce6' - methane: '#c44ce6' - # synfuels - Fischer-Tropsch: '#25c49a' - liquid: '#25c49a' - kerosene for aviation: '#a1ffe6' - naphtha for industry: '#57ebc4' - methanolisation: '#83d6d5' - methanol: '#468c8b' - shipping methanol: '#468c8b' - industry methanol: '#468c8b' - # co2 - CC: '#f29dae' - CCS: '#f29dae' - CO2 sequestration: '#f29dae' - DAC: '#ff5270' - co2 stored: '#f2385a' - co2 sequestered: '#f2682f' - co2: '#f29dae' - co2 vent: '#ffd4dc' - CO2 pipeline: '#f5627f' - # emissions - process emissions CC: '#000000' - process emissions: '#222222' - process emissions to stored: '#444444' - process emissions to atmosphere: '#888888' - oil emissions: '#aaaaaa' - shipping oil emissions: "#555555" - shipping methanol emissions: '#666666' - land transport oil emissions: '#777777' - agriculture machinery oil emissions: '#333333' - # other - shipping: '#03a2ff' - power-to-heat: '#2fb537' - power-to-gas: '#c44ce6' - power-to-H2: '#ff29d9' - power-to-liquid: '#25c49a' - gas-to-power/heat: '#ee8340' - waste: '#e3d37d' - other: '#000000' - geothermal: '#ba91b1' - geothermal heat: '#ba91b1' - geothermal district heat: '#d19D00' - geothermal organic rankine cycle: '#ffbf00' - AC: "#70af1d" - AC-AC: "#70af1d" - AC line: "#70af1d" - links: "#8a1caf" - HVDC links: "#8a1caf" - DC: "#8a1caf" - DC-DC: "#8a1caf" - DC link: "#8a1caf" - load: "#dd2e23" - waste CHP: '#e3d37d' - waste CHP CC: '#e3d3ff' - HVC to air: 'k' From f87eec5dece86146f5837a8cc66f00d81a828a8e Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Thu, 15 Aug 2024 17:52:55 +0200 Subject: [PATCH 086/100] Bug fix: Carrier type of all supernodes corrected to 'AC' --- .sync-send | 3 + Snakefile | 2 +- config/config_backuo.yaml | 1259 +++++++++++++++++++++++++++++++++++ scripts/simplify_network.py | 30 +- 4 files changed, 1281 insertions(+), 13 deletions(-) create mode 100644 config/config_backuo.yaml diff --git a/.sync-send b/.sync-send index 483c7a999..6fc8cb4c0 100644 --- a/.sync-send +++ b/.sync-send @@ -9,3 +9,6 @@ config/test envs matplotlibrc Snakefile +data/eez/ +data/naturalearth/ +resources/europe-nuts2-gridkit/ diff --git a/Snakefile b/Snakefile index 56a704dec..c45c7e58d 100644 --- a/Snakefile +++ b/Snakefile @@ -135,6 +135,6 @@ rule sync: shell: """ rsync -uvarh --ignore-missing-args --files-from=.sync-send . {params.cluster} - rsync -uvarh --no-g {params.cluster}/resources . || echo "No resources directory, skipping rsync" + # rsync -uvarh --no-g {params.cluster}/resources . || echo "No resources directory, skipping rsync" rsync -uvarh --no-g {params.cluster}/results . || echo "No results directory, skipping rsync" """ diff --git a/config/config_backuo.yaml b/config/config_backuo.yaml new file mode 100644 index 000000000..f45ea5be0 --- /dev/null +++ b/config/config_backuo.yaml @@ -0,0 +1,1259 @@ +# SPDX-FileCopyrightText: : 2017-2024 The PyPSA-Eur Authors +# +# SPDX-License-Identifier: CC0-1.0 + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#top-level-configuration +version: 0.11.0 +tutorial: false + +logging: + level: INFO + format: '%(levelname)s:%(name)s:%(message)s' + +private: + keys: + entsoe_api: + +remote: + ssh: zecm + path: ~/scratch/projects/pypsa-eur + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#run +run: + prefix: "" + name: "europe-nuts2-gridkit" + scenarios: + enable: false + file: config/scenarios.yaml + disable_progressbar: false + shared_resources: + policy: false + exclude: [] + shared_cutouts: true + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#foresight +foresight: overnight + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#scenario +# Wildcard docs in https://pypsa-eur.readthedocs.io/en/latest/wildcards.html +scenario: + simpl: + - '' + ll: + - v1.0 + clusters: + - 318 + opts: + - '' + sector_opts: + - '' + planning_horizons: + # - 2020 + - 2030 + # - 2040 + # - 2050 + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#countries +countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK', 'UA', 'MD'] + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#snapshots +snapshots: + start: "2013-01-01" + end: "2014-01-01" + inclusive: 'left' + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#enable +enable: + retrieve: auto + prepare_links_p_nom: false + retrieve_databundle: true + retrieve_cost_data: true + build_cutout: false + retrieve_cutout: true + custom_busmap: true + drop_leap_day: true + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#co2-budget +co2_budget: + 2020: 0.701 + 2025: 0.524 + 2030: 0.297 + 2035: 0.150 + 2040: 0.071 + 2045: 0.032 + 2050: 0.000 + +electricity_network: + base_network: gridkit # Options: gridkit, osm-prebuilt, osm-raw (built from scratch using OSM data, takes longer) + osm_group_tolerance_buses: 5000 # unit: meters, default 5000 - Buses within this distance are grouped together + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#lines +lines: + types: # Specify voltages (keys) and line types (values) for lines + 200.: "Al/St 240/40 2-bundle 200.0" + 220.: "Al/St 240/40 2-bundle 220.0" + 300.: "Al/St 240/40 3-bundle 300.0" + 380.: "Al/St 240/40 4-bundle 380.0" + 500.: "Al/St 240/40 4-bundle 380.0" + 750.: "Al/St 560/50 4-bundle 750.0" + s_max_pu: 0.7 + s_nom_max: .inf + max_extension: 20000 #MW + length_factor: 1.25 + reconnect_crimea: true # Only needed for 'gridkit' base_network, in OSM, the lines are already connected + under_construction: 'keep' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity + dynamic_line_rating: + activate: false + cutout: europe-2013-sarah3-era5 + correction_factor: 0.95 + max_voltage_difference: false + max_line_rating: false + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#links +links: + p_max_pu: 1.0 + p_nom_max: .inf + max_extension: 30000 #MW + include_tyndp: false + under_construction: 'zero' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#transformers +transformers: + x: 0.1 + s_nom: 2000. + type: '' + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#electricity +electricity: + gaslimit_enable: false + gaslimit: false + co2limit_enable: false + co2limit: 7.75e+7 + co2base: 1.487e+9 + + operational_reserve: + activate: false + epsilon_load: 0.02 + epsilon_vres: 0.02 + contingency: 4000 + + max_hours: + battery: 6 + H2: 168 + + extendable_carriers: + Generator: [solar, solar-hsat, onwind, offwind-ac, offwind-dc, offwind-float, OCGT, CCGT] + StorageUnit: [] # battery, H2 + Store: [battery, H2] + Link: [] # H2 pipeline + + powerplants_filter: (DateOut >= 2023 or DateOut != DateOut) and not (Country == 'Germany' and Fueltype == 'Nuclear') + custom_powerplants: false + everywhere_powerplants: [] + + conventional_carriers: [nuclear, oil, OCGT, CCGT, coal, lignite, geothermal, biomass] + renewable_carriers: [solar, onwind, offwind-ac, offwind-dc, hydro] + + estimate_renewable_capacities: + enable: true + from_opsd: true + year: 2020 + expansion_limit: false + technology_mapping: + Offshore: [offwind-ac, offwind-dc, offwind-float] + Onshore: [onwind] + PV: [solar] + + autarky: + enable: false + by_country: false + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#atlite +atlite: + default_cutout: europe-2013-sarah3-era5 + nprocesses: 4 + show_progress: false + cutouts: + # use 'base' to determine geographical bounds and time span from config + # base: + # module: era5 + europe-2013-sarah3-era5: + module: [sarah, era5] # in priority order + x: [-12., 42.] + y: [33., 72.] + dx: 0.3 + dy: 0.3 + time: ['2013', '2013'] + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#renewable +renewable: + onwind: + cutout: europe-2013-sarah3-era5 + resource: + method: wind + turbine: Vestas_V112_3MW + smooth: true + add_cutout_windspeed: true + capacity_per_sqkm: 3 + # correction_factor: 0.93 + corine: + grid_codes: [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32] + distance: 1000 + distance_grid_codes: [1, 2, 3, 4, 5, 6] + luisa: false + # grid_codes: [1111, 1121, 1122, 1123, 1130, 1210, 1221, 1222, 1230, 1241, 1242] + # distance: 1000 + # distance_grid_codes: [1111, 1121, 1122, 1123, 1130, 1210, 1221, 1222, 1230, 1241, 1242] + natura: true + excluder_resolution: 100 + clip_p_max_pu: 1.e-2 + offwind-ac: + cutout: europe-2013-sarah3-era5 + resource: + method: wind + turbine: NREL_ReferenceTurbine_2020ATB_5.5MW + smooth: true + add_cutout_windspeed: true + capacity_per_sqkm: 2 + correction_factor: 0.8855 + corine: [44, 255] + luisa: false # [0, 5230] + natura: true + ship_threshold: 400 + max_depth: 60 + max_shore_distance: 30000 + excluder_resolution: 200 + clip_p_max_pu: 1.e-2 + offwind-dc: + cutout: europe-2013-sarah3-era5 + resource: + method: wind + turbine: NREL_ReferenceTurbine_2020ATB_5.5MW + smooth: true + add_cutout_windspeed: true + capacity_per_sqkm: 2 + correction_factor: 0.8855 + corine: [44, 255] + luisa: false # [0, 5230] + natura: true + ship_threshold: 400 + max_depth: 60 + min_shore_distance: 30000 + excluder_resolution: 200 + clip_p_max_pu: 1.e-2 + offwind-float: + cutout: europe-2013-sarah3-era5 + resource: + method: wind + turbine: NREL_ReferenceTurbine_5MW_offshore + smooth: true + add_cutout_windspeed: true + # ScholzPhd Tab 4.3.1: 10MW/km^2 + capacity_per_sqkm: 2 + correction_factor: 0.8855 + # proxy for wake losses + # from 10.1016/j.energy.2018.08.153 + # until done more rigorously in #153 + corine: [44, 255] + natura: true + ship_threshold: 400 + excluder_resolution: 200 + min_depth: 60 + max_depth: 1000 + clip_p_max_pu: 1.e-2 + solar: + cutout: europe-2013-sarah3-era5 + resource: + method: pv + panel: CSi + orientation: + slope: 35. + azimuth: 180. + capacity_per_sqkm: 5.1 + # correction_factor: 0.854337 + corine: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 26, 31, 32] + luisa: false # [1111, 1121, 1122, 1123, 1130, 1210, 1221, 1222, 1230, 1241, 1242, 1310, 1320, 1330, 1410, 1421, 1422, 2110, 2120, 2130, 2210, 2220, 2230, 2310, 2410, 2420, 3210, 3320, 3330] + natura: true + excluder_resolution: 100 + clip_p_max_pu: 1.e-2 + solar-hsat: + cutout: europe-2013-sarah3-era5 + resource: + method: pv + panel: CSi + orientation: + slope: 35. + azimuth: 180. + tracking: horizontal + capacity_per_sqkm: 4.43 # 15% higher land usage acc. to NREL + corine: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 26, 31, 32] + luisa: false # [1111, 1121, 1122, 1123, 1130, 1210, 1221, 1222, 1230, 1241, 1242, 1310, 1320, 1330, 1410, 1421, 1422, 2110, 2120, 2130, 2210, 2220, 2230, 2310, 2410, 2420, 3210, 3320, 3330] + natura: true + excluder_resolution: 100 + clip_p_max_pu: 1.e-2 + hydro: + cutout: europe-2013-sarah3-era5 + carriers: [ror, PHS, hydro] + PHS_max_hours: 6 + hydro_max_hours: "energy_capacity_totals_by_country" # one of energy_capacity_totals_by_country, estimate_by_large_installations or a float + flatten_dispatch: false + flatten_dispatch_buffer: 0.2 + clip_min_inflow: 1.0 + eia_norm_year: false + eia_correct_by_capacity: false + eia_approximate_missing: false + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#conventional +conventional: + unit_commitment: false + dynamic_fuel_price: false + nuclear: + p_max_pu: "data/nuclear_p_max_pu.csv" # float of file name + +# docs-load in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#load +load: + interpolate_limit: 3 + time_shift_for_large_gaps: 1w + manual_adjustments: true # false + scaling_factor: 1.0 + fixed_year: false # false or year (e.g. 2013) + supplement_synthetic: true + +# docs +# TODO: PyPSA-Eur merge issue in prepare_sector_network.py +# regulate what components with which carriers are kept from PyPSA-Eur; +# some technologies are removed because they are implemented differently +# (e.g. battery or H2 storage) or have different year-dependent costs +# in PyPSA-Eur-Sec +pypsa_eur: + Bus: + - AC + Link: + - DC + Generator: + - onwind + - offwind-ac + - offwind-dc + - offwind-float + - solar-hsat + - solar + - ror + - nuclear + StorageUnit: + - PHS + - hydro + Store: [] + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#energy +energy: + energy_totals_year: 2019 + base_emissions_year: 1990 + emissions: CO2 + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#biomass +biomass: + year: 2030 + scenario: ENS_Med + classes: + solid biomass: + - Agricultural waste + - Fuelwood residues + - Secondary Forestry residues - woodchips + - Sawdust + - Residues from landscape care + not included: + - Sugar from sugar beet + - Rape seed + - "Sunflower, soya seed " + - Bioethanol barley, wheat, grain maize, oats, other cereals and rye + - Miscanthus, switchgrass, RCG + - Willow + - Poplar + - FuelwoodRW + - C&P_RW + biogas: + - Manure solid, liquid + - Sludge + municipal solid waste: + - Municipal waste + share_unsustainable_use_retained: + 2020: 1 + 2025: 0.66 + 2030: 0.33 + 2035: 0 + 2040: 0 + 2045: 0 + 2050: 0 + share_sustainable_potential_available: + 2020: 0 + 2025: 0.33 + 2030: 0.66 + 2035: 1 + 2040: 1 + 2045: 1 + 2050: 1 + + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#solar-thermal +solar_thermal: + clearsky_model: simple # should be "simple" or "enhanced"? + orientation: + slope: 45. + azimuth: 180. + cutout: default + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#existing-capacities +existing_capacities: + grouping_years_power: [1920, 1950, 1955, 1960, 1965, 1970, 1975, 1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2020, 2025] + grouping_years_heat: [1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2019] # heat grouping years >= baseyear will be ignored + threshold_capacity: 10 + default_heating_lifetime: 20 + conventional_carriers: + - lignite + - coal + - oil + - uranium + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#sector +sector: + transport: true + heating: true + biomass: true + industry: true + agriculture: true + fossil_fuels: true + district_heating: + potential: 0.6 + progress: + 2020: 0.0 + 2025: 0.15 + 2030: 0.3 + 2035: 0.45 + 2040: 0.6 + 2045: 0.8 + 2050: 1.0 + district_heating_loss: 0.15 + forward_temperature: 90 #C + return_temperature: 50 #C + heat_source_cooling: 6 #K + heat_pump_cop_approximation: + refrigerant: ammonia + heat_exchanger_pinch_point_temperature_difference: 5 #K + isentropic_compressor_efficiency: 0.8 + heat_loss: 0.0 + heat_pump_sources: + urban central: + - air + urban decentral: + - air + rural: + - air + - ground + cluster_heat_buses: true + heat_demand_cutout: default + bev_dsm_restriction_value: 0.75 + bev_dsm_restriction_time: 7 + transport_heating_deadband_upper: 20. + transport_heating_deadband_lower: 15. + ICE_lower_degree_factor: 0.375 + ICE_upper_degree_factor: 1.6 + EV_lower_degree_factor: 0.98 + EV_upper_degree_factor: 0.63 + bev_dsm: true + bev_availability: 0.5 + bev_energy: 0.05 + bev_charge_efficiency: 0.9 + bev_charge_rate: 0.011 + bev_avail_max: 0.95 + bev_avail_mean: 0.8 + v2g: true + land_transport_fuel_cell_share: + 2020: 0 + 2025: 0 + 2030: 0 + 2035: 0 + 2040: 0 + 2045: 0 + 2050: 0 + land_transport_electric_share: + 2020: 0 + 2025: 0.15 + 2030: 0.3 + 2035: 0.45 + 2040: 0.7 + 2045: 0.85 + 2050: 1 + land_transport_ice_share: + 2020: 1 + 2025: 0.85 + 2030: 0.7 + 2035: 0.55 + 2040: 0.3 + 2045: 0.15 + 2050: 0 + transport_electric_efficiency: 53.19 # 1 MWh_el = 53.19*100 km + transport_fuel_cell_efficiency: 30.003 # 1 MWh_H2 = 30.003*100 km + transport_ice_efficiency: 16.0712 # 1 MWh_oil = 16.0712 * 100 km + agriculture_machinery_electric_share: 0 + agriculture_machinery_oil_share: 1 + agriculture_machinery_fuel_efficiency: 0.7 + agriculture_machinery_electric_efficiency: 0.3 + MWh_MeOH_per_MWh_H2: 0.8787 + MWh_MeOH_per_tCO2: 4.0321 + MWh_MeOH_per_MWh_e: 3.6907 + shipping_hydrogen_liquefaction: false + shipping_hydrogen_share: + 2020: 0 + 2025: 0 + 2030: 0 + 2035: 0 + 2040: 0 + 2045: 0 + 2050: 0 + shipping_methanol_share: + 2020: 0 + 2025: 0.15 + 2030: 0.3 + 2035: 0.5 + 2040: 0.7 + 2045: 0.85 + 2050: 1 + shipping_oil_share: + 2020: 1 + 2025: 0.85 + 2030: 0.7 + 2035: 0.5 + 2040: 0.3 + 2045: 0.15 + 2050: 0 + shipping_methanol_efficiency: 0.46 + shipping_oil_efficiency: 0.40 + aviation_demand_factor: 1. + HVC_demand_factor: 1. + time_dep_hp_cop: true + heat_pump_sink_T_individual_heating: 55. + reduce_space_heat_exogenously: true + reduce_space_heat_exogenously_factor: + 2020: 0.10 # this results in a space heat demand reduction of 10% + 2025: 0.09 # first heat demand increases compared to 2020 because of larger floor area per capita + 2030: 0.09 + 2035: 0.11 + 2040: 0.16 + 2045: 0.21 + 2050: 0.29 + retrofitting: + retro_endogen: false + cost_factor: 1.0 + interest_rate: 0.04 + annualise_cost: true + tax_weighting: false + construction_index: true + tes: true + tes_tau: + decentral: 3 + central: 180 + boilers: true + resistive_heaters: true + oil_boilers: false + biomass_boiler: true + overdimension_individual_heating: 1.1 #to cover demand peaks bigger than data + chp: true + micro_chp: false + solar_thermal: true + solar_cf_correction: 0.788457 # = >>> 1/1.2683 + marginal_cost_storage: 0. #1e-4 + methanation: true + coal_cc: false + dac: true + co2_vent: false + central_heat_vent: false + allam_cycle: false + hydrogen_fuel_cell: true + hydrogen_turbine: false + SMR: true + SMR_cc: true + regional_methanol_demand: false + regional_oil_demand: false + regional_coal_demand: false + regional_co2_sequestration_potential: + enable: false + attribute: + - conservative estimate Mt + - conservative estimate GAS Mt + - conservative estimate OIL Mt + - conservative estimate aquifer Mt + include_onshore: false + min_size: 3 + max_size: 25 + years_of_storage: 25 + co2_sequestration_potential: 200 + co2_sequestration_cost: 10 + co2_sequestration_lifetime: 50 + co2_spatial: false + co2network: false + co2_network_cost_factor: 1 + cc_fraction: 0.9 + hydrogen_underground_storage: true + hydrogen_underground_storage_locations: + # - onshore # more than 50 km from sea + - nearshore # within 50 km of sea + # - offshore + ammonia: false + min_part_load_fischer_tropsch: 0.5 + min_part_load_methanolisation: 0.3 + min_part_load_methanation: 0.3 + use_fischer_tropsch_waste_heat: 0.25 + use_haber_bosch_waste_heat: 0.25 + use_methanolisation_waste_heat: 0.25 + use_methanation_waste_heat: 0.25 + use_fuel_cell_waste_heat: 0.25 + use_electrolysis_waste_heat: 0.25 + electricity_transmission_grid: true + electricity_distribution_grid: true + electricity_distribution_grid_cost_factor: 1.0 + electricity_grid_connection: true + transmission_efficiency: + DC: + efficiency_static: 0.98 + efficiency_per_1000km: 0.977 + H2 pipeline: + efficiency_per_1000km: 1 # 0.982 + compression_per_1000km: 0.018 + gas pipeline: + efficiency_per_1000km: 1 #0.977 + compression_per_1000km: 0.01 + electricity distribution grid: + efficiency_static: 0.97 + H2_network: true + gas_network: false + H2_retrofit: false + H2_retrofit_capacity_per_CH4: 0.6 + gas_network_connectivity_upgrade: 1 + gas_distribution_grid: true + gas_distribution_grid_cost_factor: 1.0 + biomass_spatial: false + biomass_transport: false + biogas_upgrading_cc: false + conventional_generation: + OCGT: gas + biomass_to_liquid: false + electrobiofuels: false + biosng: false + municipal_solid_waste: false + limit_max_growth: + enable: false + # allowing 30% larger than max historic growth + factor: 1.3 + max_growth: # unit GW + onwind: 16 # onshore max grow so far 16 GW in Europe https://www.iea.org/reports/renewables-2020/wind + solar: 28 # solar max grow so far 28 GW in Europe https://www.iea.org/reports/renewables-2020/solar-pv + offwind-ac: 35 # offshore max grow so far 3.5 GW in Europe https://windeurope.org/about-wind/statistics/offshore/european-offshore-wind-industry-key-trends-statistics-2019/ + offwind-dc: 35 + max_relative_growth: + onwind: 3 + solar: 3 + offwind-ac: 3 + offwind-dc: 3 + enhanced_geothermal: + enable: false + flexible: true + max_hours: 240 + max_boost: 0.25 + var_cf: true + sustainability_factor: 0.0025 + solid_biomass_import: + enable: false + price: 54 #EUR/MWh + max_amount: 1390 # TWh + upstream_emissions_factor: .1 #share of solid biomass CO2 emissions at full combustion + + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#industry +industry: + St_primary_fraction: + 2020: 0.6 + 2025: 0.55 + 2030: 0.5 + 2035: 0.45 + 2040: 0.4 + 2045: 0.35 + 2050: 0.3 + DRI_fraction: + 2020: 0 + 2025: 0 + 2030: 0.05 + 2035: 0.2 + 2040: 0.4 + 2045: 0.7 + 2050: 1 + H2_DRI: 1.7 + elec_DRI: 0.322 + Al_primary_fraction: + 2020: 0.4 + 2025: 0.375 + 2030: 0.35 + 2035: 0.325 + 2040: 0.3 + 2045: 0.25 + 2050: 0.2 + MWh_NH3_per_tNH3: 5.166 + MWh_CH4_per_tNH3_SMR: 10.8 + MWh_elec_per_tNH3_SMR: 0.7 + MWh_H2_per_tNH3_electrolysis: 5.93 + MWh_elec_per_tNH3_electrolysis: 0.2473 + MWh_NH3_per_MWh_H2_cracker: 1.46 # https://github.com/euronion/trace/blob/44a5ff8401762edbef80eff9cfe5a47c8d3c8be4/data/efficiencies.csv + NH3_process_emissions: 24.5 + petrochemical_process_emissions: 25.5 + #HVC primary/recycling based on values used in Neumann et al https://doi.org/10.1016/j.joule.2023.06.016, linearly interpolated between 2020 and 2050 + #2020 recycling rates based on Agora https://static.agora-energiewende.de/fileadmin/Projekte/2021/2021_02_EU_CEAP/A-EW_254_Mobilising-circular-economy_study_WEB.pdf + #fractions refer to the total primary HVC production in 2020 + #assumes 6.7 Mtplastics produced from recycling in 2020 + HVC_primary_fraction: + 2020: 1.0 + 2025: 0.9 + 2030: 0.8 + 2035: 0.7 + 2040: 0.6 + 2045: 0.5 + 2050: 0.4 + HVC_mechanical_recycling_fraction: + 2020: 0.12 + 2025: 0.15 + 2030: 0.18 + 2035: 0.21 + 2040: 0.24 + 2045: 0.27 + 2050: 0.30 + HVC_chemical_recycling_fraction: + 2020: 0.0 + 2025: 0.0 + 2030: 0.04 + 2035: 0.08 + 2040: 0.12 + 2045: 0.16 + 2050: 0.20 + HVC_environment_sequestration_fraction: 0. + waste_to_energy: false + waste_to_energy_cc: false + sector_ratios_fraction_future: + 2020: 0.0 + 2025: 0.1 + 2030: 0.3 + 2035: 0.5 + 2040: 0.7 + 2045: 0.9 + 2050: 1.0 + basic_chemicals_without_NH3_production_today: 69. #Mt/a, = 86 Mtethylene-equiv - 17 MtNH3 + HVC_production_today: 52. + MWh_elec_per_tHVC_mechanical_recycling: 0.547 + MWh_elec_per_tHVC_chemical_recycling: 6.9 + chlorine_production_today: 9.58 + MWh_elec_per_tCl: 3.6 + MWh_H2_per_tCl: -0.9372 + methanol_production_today: 1.5 + MWh_elec_per_tMeOH: 0.167 + MWh_CH4_per_tMeOH: 10.25 + MWh_MeOH_per_tMeOH: 5.528 + hotmaps_locate_missing: false + reference_year: 2019 + + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#costs +costs: + year: 2030 + version: v0.9.1 + social_discountrate: 0.02 + fill_values: + FOM: 0 + VOM: 0 + efficiency: 1 + fuel: 0 + investment: 0 + lifetime: 25 + "CO2 intensity": 0 + "discount rate": 0.07 + # Marginal and capital costs can be overwritten + # capital_cost: + # onwind: 500 + marginal_cost: + solar: 0.01 + onwind: 0.015 + offwind: 0.015 + hydro: 0. + H2: 0. + electrolysis: 0. + fuel cell: 0. + battery: 0. + battery inverter: 0. + emission_prices: + enable: true + co2: 100. + co2_monthly_prices: false + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#clustering +clustering: + focus_weights: false + simplify_network: + to_substations: false + algorithm: kmeans # choose from: [hac, kmeans] + feature: solar+onwind-time + exclude_carriers: [] + remove_stubs: true + remove_stubs_across_borders: true + cluster_network: + algorithm: kmeans + feature: solar+onwind-time + exclude_carriers: [] + consider_efficiency_classes: false + aggregation_strategies: + generators: + committable: any + ramp_limit_up: max + ramp_limit_down: max + temporal: + resolution_elec: 1H + resolution_sector: 1H + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#adjustments +adjustments: + electricity: false + sector: false + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#solving +solving: + #tmpdir: "path/to/tmp" + options: + clip_p_max_pu: 1.e-2 + load_shedding: false + curtailment_mode: false + noisy_costs: true + skip_iterations: true + rolling_horizon: false + seed: 123 + custom_extra_functionality: "../data/custom_extra_functionality.py" + # io_api: "direct" # Increases performance but only supported for the highs and gurobi solvers + # options that go into the optimize function + track_iterations: false + min_iterations: 2 + max_iterations: 3 + transmission_losses: 2 + linearized_unit_commitment: true + horizon: 365 + post_discretization: + enable: false + line_unit_size: 1700 + line_threshold: 0.3 + link_unit_size: + DC: 2000 + H2 pipeline: 1200 + gas pipeline: 1500 + link_threshold: + DC: 0.3 + H2 pipeline: 0.3 + gas pipeline: 0.3 + + agg_p_nom_limits: + agg_offwind: false + include_existing: false + file: data/agg_p_nom_minmax.csv + + constraints: + CCL: false + EQ: false + BAU: false + SAFE: false + + solver: + name: gurobi + options: gurobi-default + + solver_options: + highs-default: + # refer to https://ergo-code.github.io/HiGHS/dev/options/definitions/ + threads: 1 + solver: "ipm" + run_crossover: "off" + small_matrix_value: 1e-6 + large_matrix_value: 1e9 + primal_feasibility_tolerance: 1e-5 + dual_feasibility_tolerance: 1e-5 + ipm_optimality_tolerance: 1e-4 + parallel: "on" + random_seed: 123 + gurobi-default: + threads: 8 + method: 2 # barrier + crossover: 0 + BarConvTol: 1.e-6 + Seed: 123 + AggFill: 0 + PreDual: 0 + GURO_PAR_BARDENSETHRESH: 200 + gurobi-numeric-focus: + NumericFocus: 3 # Favour numeric stability over speed + method: 2 # barrier + crossover: 0 # do not use crossover + BarHomogeneous: 1 # Use homogeneous barrier if standard does not converge + BarConvTol: 1.e-5 + FeasibilityTol: 1.e-4 + OptimalityTol: 1.e-4 + ObjScale: -0.5 + threads: 8 + Seed: 123 + gurobi-fallback: # Use gurobi defaults + crossover: 0 + method: 2 # barrier + BarHomogeneous: 1 # Use homogeneous barrier if standard does not converge + BarConvTol: 1.e-5 + FeasibilityTol: 1.e-5 + OptimalityTol: 1.e-5 + Seed: 123 + threads: 8 + cplex-default: + threads: 4 + lpmethod: 4 # barrier + solutiontype: 2 # non basic solution, ie no crossover + barrier.convergetol: 1.e-5 + feasopt.tolerance: 1.e-6 + copt-default: + Threads: 8 + LpMethod: 2 + Crossover: 0 + RelGap: 1.e-6 + Dualize: 0 + copt-gpu: + LpMethod: 6 + GPUMode: 1 + PDLPTol: 1.e-5 + Crossover: 0 + cbc-default: {} # Used in CI + glpk-default: {} # Used in CI + + mem_mb: 140000 #memory in MB; 20 GB enough for 50+B+I+H2; 100 GB for 181+B+I+H2 + runtime: 60h #runtime in humanfriendly style https://humanfriendly.readthedocs.io/en/latest/ + + +# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#plotting +plotting: + map: + boundaries: [-11, 30, 34, 71] + color_geomap: + ocean: white + land: white + projection: + name: "EqualEarth" + # See https://scitools.org.uk/cartopy/docs/latest/reference/projections.html for alternatives, for example: + # name: "LambertConformal" + # central_longitude: 10. + # central_latitude: 50. + # standard_parallels: [35, 65] + eu_node_location: + x: -5.5 + y: 46. + costs_max: 1000 + costs_threshold: 1 + energy_max: 20000 + energy_min: -20000 + energy_threshold: 50. + + nice_names: + OCGT: "Open-Cycle Gas" + CCGT: "Combined-Cycle Gas" + offwind-ac: "Offshore Wind (AC)" + offwind-dc: "Offshore Wind (DC)" + offwind-float: "Offshore Wind (Floating)" + onwind: "Onshore Wind" + solar: "Solar" + PHS: "Pumped Hydro Storage" + hydro: "Reservoir & Dam" + battery: "Battery Storage" + H2: "Hydrogen Storage" + lines: "Transmission Lines" + ror: "Run of River" + load: "Load Shedding" + ac: "AC" + dc: "DC" + + tech_colors: + # wind + onwind: "#235ebc" + onshore wind: "#235ebc" + offwind: "#6895dd" + offshore wind: "#6895dd" + offwind-ac: "#6895dd" + offshore wind (AC): "#6895dd" + offshore wind ac: "#6895dd" + offwind-dc: "#74c6f2" + offshore wind (DC): "#74c6f2" + offshore wind dc: "#74c6f2" + offwind-float: "#b5e2fa" + offshore wind (Float): "#b5e2fa" + offshore wind float: "#b5e2fa" + # water + hydro: '#298c81' + hydro reservoir: '#298c81' + ror: '#3dbfb0' + run of river: '#3dbfb0' + hydroelectricity: '#298c81' + PHS: '#51dbcc' + hydro+PHS: "#08ad97" + # solar + solar: "#f9d002" + solar PV: "#f9d002" + solar-hsat: "#fdb915" + solar thermal: '#ffbf2b' + residential rural solar thermal: '#f1c069' + services rural solar thermal: '#eabf61' + residential urban decentral solar thermal: '#e5bc5a' + services urban decentral solar thermal: '#dfb953' + urban central solar thermal: '#d7b24c' + solar rooftop: '#ffea80' + # gas + OCGT: '#e0986c' + OCGT marginal: '#e0986c' + OCGT-heat: '#e0986c' + gas boiler: '#db6a25' + gas boilers: '#db6a25' + gas boiler marginal: '#db6a25' + residential rural gas boiler: '#d4722e' + residential urban decentral gas boiler: '#cb7a36' + services rural gas boiler: '#c4813f' + services urban decentral gas boiler: '#ba8947' + urban central gas boiler: '#b0904f' + gas: '#e05b09' + fossil gas: '#e05b09' + natural gas: '#e05b09' + biogas to gas: '#e36311' + biogas to gas CC: '#e51245' + CCGT: '#a85522' + CCGT marginal: '#a85522' + allam: '#B98F76' + gas for industry co2 to atmosphere: '#692e0a' + gas for industry co2 to stored: '#8a3400' + gas for industry: '#853403' + gas for industry CC: '#692e0a' + gas pipeline: '#ebbca0' + gas pipeline new: '#a87c62' + # oil + oil: '#c9c9c9' + imported oil: '#a3a3a3' + oil boiler: '#adadad' + residential rural oil boiler: '#a9a9a9' + services rural oil boiler: '#a5a5a5' + residential urban decentral oil boiler: '#a1a1a1' + urban central oil boiler: '#9d9d9d' + services urban decentral oil boiler: '#999999' + agriculture machinery oil: '#949494' + shipping oil: "#808080" + land transport oil: '#afafaf' + # nuclear + Nuclear: '#ff8c00' + Nuclear marginal: '#ff8c00' + nuclear: '#ff8c00' + uranium: '#ff8c00' + # coal + Coal: '#545454' + coal: '#545454' + Coal marginal: '#545454' + coal for industry: '#343434' + solid: '#545454' + Lignite: '#826837' + lignite: '#826837' + Lignite marginal: '#826837' + # biomass + biogas: '#e3d37d' + biomass: '#baa741' + solid biomass: '#baa741' + municipal solid waste: '#91ba41' + solid biomass import: '#d5ca8d' + solid biomass transport: '#baa741' + solid biomass for industry: '#7a6d26' + solid biomass for industry CC: '#47411c' + solid biomass for industry co2 from atmosphere: '#736412' + solid biomass for industry co2 to stored: '#47411c' + urban central solid biomass CHP: '#9d9042' + urban central solid biomass CHP CC: '#6c5d28' + biomass boiler: '#8A9A5B' + residential rural biomass boiler: '#a1a066' + residential urban decentral biomass boiler: '#b0b87b' + services rural biomass boiler: '#c6cf98' + services urban decentral biomass boiler: '#dde5b5' + biomass to liquid: '#32CD32' + unsustainable bioliquids: '#32CD32' + electrobiofuels: 'red' + BioSNG: '#123456' + # power transmission + lines: '#6c9459' + transmission lines: '#6c9459' + electricity distribution grid: '#97ad8c' + low voltage: '#97ad8c' + # electricity demand + Electric load: '#110d63' + electric demand: '#110d63' + electricity: '#110d63' + industry electricity: '#2d2a66' + industry new electricity: '#2d2a66' + agriculture electricity: '#494778' + # battery + EVs + battery: '#ace37f' + battery storage: '#ace37f' + battery charger: '#88a75b' + battery discharger: '#5d4e29' + home battery: '#80c944' + home battery storage: '#80c944' + home battery charger: '#5e8032' + home battery discharger: '#3c5221' + BEV charger: '#baf238' + V2G: '#e5ffa8' + land transport EV: '#baf238' + land transport demand: '#38baf2' + EV battery: '#baf238' + # hot water storage + water tanks: '#e69487' + residential rural water tanks: '#f7b7a3' + services rural water tanks: '#f3afa3' + residential urban decentral water tanks: '#f2b2a3' + services urban decentral water tanks: '#f1b4a4' + urban central water tanks: '#e9977d' + hot water storage: '#e69487' + hot water charging: '#e8998b' + urban central water tanks charger: '#b57a67' + residential rural water tanks charger: '#b4887c' + residential urban decentral water tanks charger: '#b39995' + services rural water tanks charger: '#b3abb0' + services urban decentral water tanks charger: '#b3becc' + hot water discharging: '#e99c8e' + urban central water tanks discharger: '#b9816e' + residential rural water tanks discharger: '#ba9685' + residential urban decentral water tanks discharger: '#baac9e' + services rural water tanks discharger: '#bbc2b8' + services urban decentral water tanks discharger: '#bdd8d3' + # heat demand + Heat load: '#cc1f1f' + heat: '#cc1f1f' + heat vent: '#aa3344' + heat demand: '#cc1f1f' + rural heat: '#ff5c5c' + residential rural heat: '#ff7c7c' + services rural heat: '#ff9c9c' + central heat: '#cc1f1f' + urban central heat: '#d15959' + urban central heat vent: '#a74747' + decentral heat: '#750606' + residential urban decentral heat: '#a33c3c' + services urban decentral heat: '#cc1f1f' + low-temperature heat for industry: '#8f2727' + process heat: '#ff0000' + agriculture heat: '#d9a5a5' + # heat supply + heat pumps: '#2fb537' + heat pump: '#2fb537' + air heat pump: '#36eb41' + residential urban decentral air heat pump: '#48f74f' + services urban decentral air heat pump: '#5af95d' + services rural air heat pump: '#5af95d' + urban central air heat pump: '#6cfb6b' + ground heat pump: '#2fb537' + residential rural ground heat pump: '#48f74f' + residential rural air heat pump: '#48f74f' + services rural ground heat pump: '#5af95d' + Ambient: '#98eb9d' + CHP: '#8a5751' + urban central gas CHP: '#8d5e56' + CHP CC: '#634643' + urban central gas CHP CC: '#6e4e4c' + CHP heat: '#8a5751' + CHP electric: '#8a5751' + district heating: '#e8beac' + resistive heater: '#d8f9b8' + residential rural resistive heater: '#bef5b5' + residential urban decentral resistive heater: '#b2f1a9' + services rural resistive heater: '#a5ed9d' + services urban decentral resistive heater: '#98e991' + urban central resistive heater: '#8cdf85' + retrofitting: '#8487e8' + building retrofitting: '#8487e8' + # hydrogen + H2 for industry: "#f073da" + H2 for shipping: "#ebaee0" + H2: '#bf13a0' + hydrogen: '#bf13a0' + retrofitted H2 boiler: '#e5a0d9' + SMR: '#870c71' + SMR CC: '#4f1745' + H2 liquefaction: '#d647bd' + hydrogen storage: '#bf13a0' + H2 Store: '#bf13a0' + H2 storage: '#bf13a0' + land transport fuel cell: '#6b3161' + H2 pipeline: '#f081dc' + H2 pipeline retrofitted: '#ba99b5' + H2 Fuel Cell: '#c251ae' + H2 fuel cell: '#c251ae' + H2 turbine: '#991f83' + H2 Electrolysis: '#ff29d9' + H2 electrolysis: '#ff29d9' + # ammonia + NH3: '#46caf0' + ammonia: '#46caf0' + ammonia store: '#00ace0' + ammonia cracker: '#87d0e6' + Haber-Bosch: '#076987' + # syngas + Sabatier: '#9850ad' + methanation: '#c44ce6' + methane: '#c44ce6' + # synfuels + Fischer-Tropsch: '#25c49a' + liquid: '#25c49a' + kerosene for aviation: '#a1ffe6' + naphtha for industry: '#57ebc4' + methanolisation: '#83d6d5' + methanol: '#468c8b' + shipping methanol: '#468c8b' + industry methanol: '#468c8b' + # co2 + CC: '#f29dae' + CCS: '#f29dae' + CO2 sequestration: '#f29dae' + DAC: '#ff5270' + co2 stored: '#f2385a' + co2 sequestered: '#f2682f' + co2: '#f29dae' + co2 vent: '#ffd4dc' + CO2 pipeline: '#f5627f' + # emissions + process emissions CC: '#000000' + process emissions: '#222222' + process emissions to stored: '#444444' + process emissions to atmosphere: '#888888' + oil emissions: '#aaaaaa' + shipping oil emissions: "#555555" + shipping methanol emissions: '#666666' + land transport oil emissions: '#777777' + agriculture machinery oil emissions: '#333333' + # other + shipping: '#03a2ff' + power-to-heat: '#2fb537' + power-to-gas: '#c44ce6' + power-to-H2: '#ff29d9' + power-to-liquid: '#25c49a' + gas-to-power/heat: '#ee8340' + waste: '#e3d37d' + other: '#000000' + geothermal: '#ba91b1' + geothermal heat: '#ba91b1' + geothermal district heat: '#d19D00' + geothermal organic rankine cycle: '#ffbf00' + AC: "#70af1d" + AC-AC: "#70af1d" + AC line: "#70af1d" + links: "#8a1caf" + HVDC links: "#8a1caf" + DC: "#8a1caf" + DC-DC: "#8a1caf" + DC link: "#8a1caf" + load: "#dd2e23" + waste CHP: '#e3d37d' + waste CHP CC: '#e3d3ff' + HVC to air: 'k' diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 651e8ea29..a2c32b61d 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -301,29 +301,20 @@ def simplify_links( # Only span graph over the DC link components G = n.graph(branch_components=["Link"]) - def split_links(nodes): + def split_links(nodes, added_supernodes=None): nodes = frozenset(nodes) seen = set() - # Corsica substation - node_corsica = find_closest_bus( - n, - x=9.44802, - y=42.52842, - tol=2000, # Tolerance needed to only return the bus if the region is actually modelled - ) - # Supernodes are endpoints of links, identified by having lass then two neighbours or being an AC Bus # An example for the latter is if two different links are connected to the same AC bus. - # Manually keep Corsica substation as a supernode supernodes = { m for m in nodes if ( (len(G.adj[m]) < 2 or (set(G.adj[m]) - nodes)) or (n.buses.loc[m, "carrier"] == "AC") - or (m == node_corsica) + or (m in added_supernodes) ) } @@ -360,8 +351,20 @@ def split_links(nodes): 0.0, index=n.buses.index, columns=list(connection_costs_per_link) ) + node_corsica = find_closest_bus( + n, + x=9.44802, + y=42.52842, + tol=2000, # Tolerance needed to only return the bus if the region is actually modelled + ) + + added_supernodes = [] + added_supernodes.append(node_corsica) + for lbl in labels.value_counts().loc[lambda s: s > 2].index: - for b, buses, links in split_links(labels.index[labels == lbl]): + for b, buses, links in split_links( + labels.index[labels == lbl], added_supernodes + ): if len(buses) <= 2: continue @@ -422,6 +425,9 @@ def split_links(nodes): logger.debug("Collecting all components using the busmap") + # Change carrier type of all added super_nodes to "AC" + n.buses.loc[added_supernodes, "carrier"] = "AC" + _aggregate_and_move_components( n, busmap, From e097fc4ce12efb39bc35644776e99148415c7b10 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Thu, 15 Aug 2024 18:10:51 +0200 Subject: [PATCH 087/100] Bug fix: Carrier type of all supernodes corrected to 'AC' --- scripts/simplify_network.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index a2c32b61d..119445c42 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -359,7 +359,8 @@ def split_links(nodes, added_supernodes=None): ) added_supernodes = [] - added_supernodes.append(node_corsica) + if node_corsica is not None: + added_supernodes.append(node_corsica) for lbl in labels.value_counts().loc[lambda s: s > 2].index: for b, buses, links in split_links( From 864321e3c88bb7e12f4a9915a0cf4d95a32681e1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 08:50:40 +0000 Subject: [PATCH 088/100] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/build_industry_sector_ratios.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/build_industry_sector_ratios.py b/scripts/build_industry_sector_ratios.py index 530ac910f..c2438f915 100644 --- a/scripts/build_industry_sector_ratios.py +++ b/scripts/build_industry_sector_ratios.py @@ -445,7 +445,9 @@ def chemicals_industry(): # subtract ammonia energy demand (in ktNH3/a) ammonia = pd.read_csv(snakemake.input.ammonia_production, index_col=0) - ammonia_total = ammonia.loc[ammonia.index.intersection(eu27), str(max(2018, year))].sum() + ammonia_total = ammonia.loc[ + ammonia.index.intersection(eu27), str(max(2018, year)) + ].sum() df.loc["methane", sector] -= ammonia_total * params["MWh_CH4_per_tNH3_SMR"] df.loc["elec", sector] -= ammonia_total * params["MWh_elec_per_tNH3_SMR"] From 9b663452914e888f10db29af6e5dd21543092514 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Mon, 19 Aug 2024 12:18:25 +0200 Subject: [PATCH 089/100] Updated rules and base_network for compatibility with TYNDP projects. --- rules/build_electricity.smk | 5 ----- scripts/base_network.py | 5 ----- 2 files changed, 10 deletions(-) diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 184523013..b0de316eb 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -118,11 +118,6 @@ rule base_network: if config_provider("electricity_network", "base_network")(w) == "gridkit" else [] ), - links_tyndp=lambda w: ( - "data/links_tyndp.csv" - if config_provider("electricity_network", "base_network")(w) == "gridkit" - else [] - ), country_shapes=resources("country_shapes.geojson"), offshore_shapes=resources("offshore_shapes.geojson"), europe_shape=resources("europe_shape.geojson"), diff --git a/scripts/base_network.py b/scripts/base_network.py index 49ef72812..38c949e6e 100644 --- a/scripts/base_network.py +++ b/scripts/base_network.py @@ -829,11 +829,6 @@ def base_network( else: raise ValueError("base_network must be either 'gridkit' or 'osm'") - if config["links"].get("include_tyndp") & ( - config["electricity_network"].get("base_network") == "gridkit" - ): - buses, links = _add_links_from_tyndp(buses, links, links_tyndp, europe_shape) - if config["electricity_network"].get("base_network") == "gridkit": converters = _load_converters_from_eg(buses, eg_converters) elif "osm" in config["electricity_network"].get("base_network"): From 412acd88f2b9cc4144d9e6c56790b8d4254369b2 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Mon, 19 Aug 2024 15:54:48 +0200 Subject: [PATCH 090/100] Updated Zenodo repository and prebuilt network to include 150 kV HVDC connections. --- rules/retrieve.smk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rules/retrieve.smk b/rules/retrieve.smk index 1cf53f785..371100d6e 100644 --- a/rules/retrieve.smk +++ b/rules/retrieve.smk @@ -415,14 +415,14 @@ if config["enable"]["retrieve"] and ( rule retrieve_osm_prebuilt: input: - buses=storage("https://zenodo.org/records/12799202/files/buses.csv"), + buses=storage("https://zenodo.org/records/13342577/files/buses.csv"), converters=storage( - "https://zenodo.org/records/12799202/files/converters.csv" + "https://zenodo.org/records/13342577/files/converters.csv" ), - lines=storage("https://zenodo.org/records/12799202/files/lines.csv"), - links=storage("https://zenodo.org/records/12799202/files/links.csv"), + lines=storage("https://zenodo.org/records/13342577/files/lines.csv"), + links=storage("https://zenodo.org/records/13342577/files/links.csv"), transformers=storage( - "https://zenodo.org/records/12799202/files/transformers.csv" + "https://zenodo.org/records/13342577/files/transformers.csv" ), output: buses="data/osm/prebuilt/buses.csv", From f1526fb00ab4e7fbf54f09a8d0f224e4ea515cc0 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Tue, 20 Aug 2024 11:20:43 +0200 Subject: [PATCH 091/100] Removed outdated config backup. --- config/config_backuo.yaml | 1259 ------------------------------------- 1 file changed, 1259 deletions(-) delete mode 100644 config/config_backuo.yaml diff --git a/config/config_backuo.yaml b/config/config_backuo.yaml deleted file mode 100644 index f45ea5be0..000000000 --- a/config/config_backuo.yaml +++ /dev/null @@ -1,1259 +0,0 @@ -# SPDX-FileCopyrightText: : 2017-2024 The PyPSA-Eur Authors -# -# SPDX-License-Identifier: CC0-1.0 - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#top-level-configuration -version: 0.11.0 -tutorial: false - -logging: - level: INFO - format: '%(levelname)s:%(name)s:%(message)s' - -private: - keys: - entsoe_api: - -remote: - ssh: zecm - path: ~/scratch/projects/pypsa-eur - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#run -run: - prefix: "" - name: "europe-nuts2-gridkit" - scenarios: - enable: false - file: config/scenarios.yaml - disable_progressbar: false - shared_resources: - policy: false - exclude: [] - shared_cutouts: true - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#foresight -foresight: overnight - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#scenario -# Wildcard docs in https://pypsa-eur.readthedocs.io/en/latest/wildcards.html -scenario: - simpl: - - '' - ll: - - v1.0 - clusters: - - 318 - opts: - - '' - sector_opts: - - '' - planning_horizons: - # - 2020 - - 2030 - # - 2040 - # - 2050 - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#countries -countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK', 'UA', 'MD'] - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#snapshots -snapshots: - start: "2013-01-01" - end: "2014-01-01" - inclusive: 'left' - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#enable -enable: - retrieve: auto - prepare_links_p_nom: false - retrieve_databundle: true - retrieve_cost_data: true - build_cutout: false - retrieve_cutout: true - custom_busmap: true - drop_leap_day: true - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#co2-budget -co2_budget: - 2020: 0.701 - 2025: 0.524 - 2030: 0.297 - 2035: 0.150 - 2040: 0.071 - 2045: 0.032 - 2050: 0.000 - -electricity_network: - base_network: gridkit # Options: gridkit, osm-prebuilt, osm-raw (built from scratch using OSM data, takes longer) - osm_group_tolerance_buses: 5000 # unit: meters, default 5000 - Buses within this distance are grouped together - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#lines -lines: - types: # Specify voltages (keys) and line types (values) for lines - 200.: "Al/St 240/40 2-bundle 200.0" - 220.: "Al/St 240/40 2-bundle 220.0" - 300.: "Al/St 240/40 3-bundle 300.0" - 380.: "Al/St 240/40 4-bundle 380.0" - 500.: "Al/St 240/40 4-bundle 380.0" - 750.: "Al/St 560/50 4-bundle 750.0" - s_max_pu: 0.7 - s_nom_max: .inf - max_extension: 20000 #MW - length_factor: 1.25 - reconnect_crimea: true # Only needed for 'gridkit' base_network, in OSM, the lines are already connected - under_construction: 'keep' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity - dynamic_line_rating: - activate: false - cutout: europe-2013-sarah3-era5 - correction_factor: 0.95 - max_voltage_difference: false - max_line_rating: false - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#links -links: - p_max_pu: 1.0 - p_nom_max: .inf - max_extension: 30000 #MW - include_tyndp: false - under_construction: 'zero' # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#transformers -transformers: - x: 0.1 - s_nom: 2000. - type: '' - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#electricity -electricity: - gaslimit_enable: false - gaslimit: false - co2limit_enable: false - co2limit: 7.75e+7 - co2base: 1.487e+9 - - operational_reserve: - activate: false - epsilon_load: 0.02 - epsilon_vres: 0.02 - contingency: 4000 - - max_hours: - battery: 6 - H2: 168 - - extendable_carriers: - Generator: [solar, solar-hsat, onwind, offwind-ac, offwind-dc, offwind-float, OCGT, CCGT] - StorageUnit: [] # battery, H2 - Store: [battery, H2] - Link: [] # H2 pipeline - - powerplants_filter: (DateOut >= 2023 or DateOut != DateOut) and not (Country == 'Germany' and Fueltype == 'Nuclear') - custom_powerplants: false - everywhere_powerplants: [] - - conventional_carriers: [nuclear, oil, OCGT, CCGT, coal, lignite, geothermal, biomass] - renewable_carriers: [solar, onwind, offwind-ac, offwind-dc, hydro] - - estimate_renewable_capacities: - enable: true - from_opsd: true - year: 2020 - expansion_limit: false - technology_mapping: - Offshore: [offwind-ac, offwind-dc, offwind-float] - Onshore: [onwind] - PV: [solar] - - autarky: - enable: false - by_country: false - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#atlite -atlite: - default_cutout: europe-2013-sarah3-era5 - nprocesses: 4 - show_progress: false - cutouts: - # use 'base' to determine geographical bounds and time span from config - # base: - # module: era5 - europe-2013-sarah3-era5: - module: [sarah, era5] # in priority order - x: [-12., 42.] - y: [33., 72.] - dx: 0.3 - dy: 0.3 - time: ['2013', '2013'] - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#renewable -renewable: - onwind: - cutout: europe-2013-sarah3-era5 - resource: - method: wind - turbine: Vestas_V112_3MW - smooth: true - add_cutout_windspeed: true - capacity_per_sqkm: 3 - # correction_factor: 0.93 - corine: - grid_codes: [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32] - distance: 1000 - distance_grid_codes: [1, 2, 3, 4, 5, 6] - luisa: false - # grid_codes: [1111, 1121, 1122, 1123, 1130, 1210, 1221, 1222, 1230, 1241, 1242] - # distance: 1000 - # distance_grid_codes: [1111, 1121, 1122, 1123, 1130, 1210, 1221, 1222, 1230, 1241, 1242] - natura: true - excluder_resolution: 100 - clip_p_max_pu: 1.e-2 - offwind-ac: - cutout: europe-2013-sarah3-era5 - resource: - method: wind - turbine: NREL_ReferenceTurbine_2020ATB_5.5MW - smooth: true - add_cutout_windspeed: true - capacity_per_sqkm: 2 - correction_factor: 0.8855 - corine: [44, 255] - luisa: false # [0, 5230] - natura: true - ship_threshold: 400 - max_depth: 60 - max_shore_distance: 30000 - excluder_resolution: 200 - clip_p_max_pu: 1.e-2 - offwind-dc: - cutout: europe-2013-sarah3-era5 - resource: - method: wind - turbine: NREL_ReferenceTurbine_2020ATB_5.5MW - smooth: true - add_cutout_windspeed: true - capacity_per_sqkm: 2 - correction_factor: 0.8855 - corine: [44, 255] - luisa: false # [0, 5230] - natura: true - ship_threshold: 400 - max_depth: 60 - min_shore_distance: 30000 - excluder_resolution: 200 - clip_p_max_pu: 1.e-2 - offwind-float: - cutout: europe-2013-sarah3-era5 - resource: - method: wind - turbine: NREL_ReferenceTurbine_5MW_offshore - smooth: true - add_cutout_windspeed: true - # ScholzPhd Tab 4.3.1: 10MW/km^2 - capacity_per_sqkm: 2 - correction_factor: 0.8855 - # proxy for wake losses - # from 10.1016/j.energy.2018.08.153 - # until done more rigorously in #153 - corine: [44, 255] - natura: true - ship_threshold: 400 - excluder_resolution: 200 - min_depth: 60 - max_depth: 1000 - clip_p_max_pu: 1.e-2 - solar: - cutout: europe-2013-sarah3-era5 - resource: - method: pv - panel: CSi - orientation: - slope: 35. - azimuth: 180. - capacity_per_sqkm: 5.1 - # correction_factor: 0.854337 - corine: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 26, 31, 32] - luisa: false # [1111, 1121, 1122, 1123, 1130, 1210, 1221, 1222, 1230, 1241, 1242, 1310, 1320, 1330, 1410, 1421, 1422, 2110, 2120, 2130, 2210, 2220, 2230, 2310, 2410, 2420, 3210, 3320, 3330] - natura: true - excluder_resolution: 100 - clip_p_max_pu: 1.e-2 - solar-hsat: - cutout: europe-2013-sarah3-era5 - resource: - method: pv - panel: CSi - orientation: - slope: 35. - azimuth: 180. - tracking: horizontal - capacity_per_sqkm: 4.43 # 15% higher land usage acc. to NREL - corine: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 26, 31, 32] - luisa: false # [1111, 1121, 1122, 1123, 1130, 1210, 1221, 1222, 1230, 1241, 1242, 1310, 1320, 1330, 1410, 1421, 1422, 2110, 2120, 2130, 2210, 2220, 2230, 2310, 2410, 2420, 3210, 3320, 3330] - natura: true - excluder_resolution: 100 - clip_p_max_pu: 1.e-2 - hydro: - cutout: europe-2013-sarah3-era5 - carriers: [ror, PHS, hydro] - PHS_max_hours: 6 - hydro_max_hours: "energy_capacity_totals_by_country" # one of energy_capacity_totals_by_country, estimate_by_large_installations or a float - flatten_dispatch: false - flatten_dispatch_buffer: 0.2 - clip_min_inflow: 1.0 - eia_norm_year: false - eia_correct_by_capacity: false - eia_approximate_missing: false - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#conventional -conventional: - unit_commitment: false - dynamic_fuel_price: false - nuclear: - p_max_pu: "data/nuclear_p_max_pu.csv" # float of file name - -# docs-load in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#load -load: - interpolate_limit: 3 - time_shift_for_large_gaps: 1w - manual_adjustments: true # false - scaling_factor: 1.0 - fixed_year: false # false or year (e.g. 2013) - supplement_synthetic: true - -# docs -# TODO: PyPSA-Eur merge issue in prepare_sector_network.py -# regulate what components with which carriers are kept from PyPSA-Eur; -# some technologies are removed because they are implemented differently -# (e.g. battery or H2 storage) or have different year-dependent costs -# in PyPSA-Eur-Sec -pypsa_eur: - Bus: - - AC - Link: - - DC - Generator: - - onwind - - offwind-ac - - offwind-dc - - offwind-float - - solar-hsat - - solar - - ror - - nuclear - StorageUnit: - - PHS - - hydro - Store: [] - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#energy -energy: - energy_totals_year: 2019 - base_emissions_year: 1990 - emissions: CO2 - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#biomass -biomass: - year: 2030 - scenario: ENS_Med - classes: - solid biomass: - - Agricultural waste - - Fuelwood residues - - Secondary Forestry residues - woodchips - - Sawdust - - Residues from landscape care - not included: - - Sugar from sugar beet - - Rape seed - - "Sunflower, soya seed " - - Bioethanol barley, wheat, grain maize, oats, other cereals and rye - - Miscanthus, switchgrass, RCG - - Willow - - Poplar - - FuelwoodRW - - C&P_RW - biogas: - - Manure solid, liquid - - Sludge - municipal solid waste: - - Municipal waste - share_unsustainable_use_retained: - 2020: 1 - 2025: 0.66 - 2030: 0.33 - 2035: 0 - 2040: 0 - 2045: 0 - 2050: 0 - share_sustainable_potential_available: - 2020: 0 - 2025: 0.33 - 2030: 0.66 - 2035: 1 - 2040: 1 - 2045: 1 - 2050: 1 - - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#solar-thermal -solar_thermal: - clearsky_model: simple # should be "simple" or "enhanced"? - orientation: - slope: 45. - azimuth: 180. - cutout: default - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#existing-capacities -existing_capacities: - grouping_years_power: [1920, 1950, 1955, 1960, 1965, 1970, 1975, 1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2020, 2025] - grouping_years_heat: [1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2019] # heat grouping years >= baseyear will be ignored - threshold_capacity: 10 - default_heating_lifetime: 20 - conventional_carriers: - - lignite - - coal - - oil - - uranium - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#sector -sector: - transport: true - heating: true - biomass: true - industry: true - agriculture: true - fossil_fuels: true - district_heating: - potential: 0.6 - progress: - 2020: 0.0 - 2025: 0.15 - 2030: 0.3 - 2035: 0.45 - 2040: 0.6 - 2045: 0.8 - 2050: 1.0 - district_heating_loss: 0.15 - forward_temperature: 90 #C - return_temperature: 50 #C - heat_source_cooling: 6 #K - heat_pump_cop_approximation: - refrigerant: ammonia - heat_exchanger_pinch_point_temperature_difference: 5 #K - isentropic_compressor_efficiency: 0.8 - heat_loss: 0.0 - heat_pump_sources: - urban central: - - air - urban decentral: - - air - rural: - - air - - ground - cluster_heat_buses: true - heat_demand_cutout: default - bev_dsm_restriction_value: 0.75 - bev_dsm_restriction_time: 7 - transport_heating_deadband_upper: 20. - transport_heating_deadband_lower: 15. - ICE_lower_degree_factor: 0.375 - ICE_upper_degree_factor: 1.6 - EV_lower_degree_factor: 0.98 - EV_upper_degree_factor: 0.63 - bev_dsm: true - bev_availability: 0.5 - bev_energy: 0.05 - bev_charge_efficiency: 0.9 - bev_charge_rate: 0.011 - bev_avail_max: 0.95 - bev_avail_mean: 0.8 - v2g: true - land_transport_fuel_cell_share: - 2020: 0 - 2025: 0 - 2030: 0 - 2035: 0 - 2040: 0 - 2045: 0 - 2050: 0 - land_transport_electric_share: - 2020: 0 - 2025: 0.15 - 2030: 0.3 - 2035: 0.45 - 2040: 0.7 - 2045: 0.85 - 2050: 1 - land_transport_ice_share: - 2020: 1 - 2025: 0.85 - 2030: 0.7 - 2035: 0.55 - 2040: 0.3 - 2045: 0.15 - 2050: 0 - transport_electric_efficiency: 53.19 # 1 MWh_el = 53.19*100 km - transport_fuel_cell_efficiency: 30.003 # 1 MWh_H2 = 30.003*100 km - transport_ice_efficiency: 16.0712 # 1 MWh_oil = 16.0712 * 100 km - agriculture_machinery_electric_share: 0 - agriculture_machinery_oil_share: 1 - agriculture_machinery_fuel_efficiency: 0.7 - agriculture_machinery_electric_efficiency: 0.3 - MWh_MeOH_per_MWh_H2: 0.8787 - MWh_MeOH_per_tCO2: 4.0321 - MWh_MeOH_per_MWh_e: 3.6907 - shipping_hydrogen_liquefaction: false - shipping_hydrogen_share: - 2020: 0 - 2025: 0 - 2030: 0 - 2035: 0 - 2040: 0 - 2045: 0 - 2050: 0 - shipping_methanol_share: - 2020: 0 - 2025: 0.15 - 2030: 0.3 - 2035: 0.5 - 2040: 0.7 - 2045: 0.85 - 2050: 1 - shipping_oil_share: - 2020: 1 - 2025: 0.85 - 2030: 0.7 - 2035: 0.5 - 2040: 0.3 - 2045: 0.15 - 2050: 0 - shipping_methanol_efficiency: 0.46 - shipping_oil_efficiency: 0.40 - aviation_demand_factor: 1. - HVC_demand_factor: 1. - time_dep_hp_cop: true - heat_pump_sink_T_individual_heating: 55. - reduce_space_heat_exogenously: true - reduce_space_heat_exogenously_factor: - 2020: 0.10 # this results in a space heat demand reduction of 10% - 2025: 0.09 # first heat demand increases compared to 2020 because of larger floor area per capita - 2030: 0.09 - 2035: 0.11 - 2040: 0.16 - 2045: 0.21 - 2050: 0.29 - retrofitting: - retro_endogen: false - cost_factor: 1.0 - interest_rate: 0.04 - annualise_cost: true - tax_weighting: false - construction_index: true - tes: true - tes_tau: - decentral: 3 - central: 180 - boilers: true - resistive_heaters: true - oil_boilers: false - biomass_boiler: true - overdimension_individual_heating: 1.1 #to cover demand peaks bigger than data - chp: true - micro_chp: false - solar_thermal: true - solar_cf_correction: 0.788457 # = >>> 1/1.2683 - marginal_cost_storage: 0. #1e-4 - methanation: true - coal_cc: false - dac: true - co2_vent: false - central_heat_vent: false - allam_cycle: false - hydrogen_fuel_cell: true - hydrogen_turbine: false - SMR: true - SMR_cc: true - regional_methanol_demand: false - regional_oil_demand: false - regional_coal_demand: false - regional_co2_sequestration_potential: - enable: false - attribute: - - conservative estimate Mt - - conservative estimate GAS Mt - - conservative estimate OIL Mt - - conservative estimate aquifer Mt - include_onshore: false - min_size: 3 - max_size: 25 - years_of_storage: 25 - co2_sequestration_potential: 200 - co2_sequestration_cost: 10 - co2_sequestration_lifetime: 50 - co2_spatial: false - co2network: false - co2_network_cost_factor: 1 - cc_fraction: 0.9 - hydrogen_underground_storage: true - hydrogen_underground_storage_locations: - # - onshore # more than 50 km from sea - - nearshore # within 50 km of sea - # - offshore - ammonia: false - min_part_load_fischer_tropsch: 0.5 - min_part_load_methanolisation: 0.3 - min_part_load_methanation: 0.3 - use_fischer_tropsch_waste_heat: 0.25 - use_haber_bosch_waste_heat: 0.25 - use_methanolisation_waste_heat: 0.25 - use_methanation_waste_heat: 0.25 - use_fuel_cell_waste_heat: 0.25 - use_electrolysis_waste_heat: 0.25 - electricity_transmission_grid: true - electricity_distribution_grid: true - electricity_distribution_grid_cost_factor: 1.0 - electricity_grid_connection: true - transmission_efficiency: - DC: - efficiency_static: 0.98 - efficiency_per_1000km: 0.977 - H2 pipeline: - efficiency_per_1000km: 1 # 0.982 - compression_per_1000km: 0.018 - gas pipeline: - efficiency_per_1000km: 1 #0.977 - compression_per_1000km: 0.01 - electricity distribution grid: - efficiency_static: 0.97 - H2_network: true - gas_network: false - H2_retrofit: false - H2_retrofit_capacity_per_CH4: 0.6 - gas_network_connectivity_upgrade: 1 - gas_distribution_grid: true - gas_distribution_grid_cost_factor: 1.0 - biomass_spatial: false - biomass_transport: false - biogas_upgrading_cc: false - conventional_generation: - OCGT: gas - biomass_to_liquid: false - electrobiofuels: false - biosng: false - municipal_solid_waste: false - limit_max_growth: - enable: false - # allowing 30% larger than max historic growth - factor: 1.3 - max_growth: # unit GW - onwind: 16 # onshore max grow so far 16 GW in Europe https://www.iea.org/reports/renewables-2020/wind - solar: 28 # solar max grow so far 28 GW in Europe https://www.iea.org/reports/renewables-2020/solar-pv - offwind-ac: 35 # offshore max grow so far 3.5 GW in Europe https://windeurope.org/about-wind/statistics/offshore/european-offshore-wind-industry-key-trends-statistics-2019/ - offwind-dc: 35 - max_relative_growth: - onwind: 3 - solar: 3 - offwind-ac: 3 - offwind-dc: 3 - enhanced_geothermal: - enable: false - flexible: true - max_hours: 240 - max_boost: 0.25 - var_cf: true - sustainability_factor: 0.0025 - solid_biomass_import: - enable: false - price: 54 #EUR/MWh - max_amount: 1390 # TWh - upstream_emissions_factor: .1 #share of solid biomass CO2 emissions at full combustion - - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#industry -industry: - St_primary_fraction: - 2020: 0.6 - 2025: 0.55 - 2030: 0.5 - 2035: 0.45 - 2040: 0.4 - 2045: 0.35 - 2050: 0.3 - DRI_fraction: - 2020: 0 - 2025: 0 - 2030: 0.05 - 2035: 0.2 - 2040: 0.4 - 2045: 0.7 - 2050: 1 - H2_DRI: 1.7 - elec_DRI: 0.322 - Al_primary_fraction: - 2020: 0.4 - 2025: 0.375 - 2030: 0.35 - 2035: 0.325 - 2040: 0.3 - 2045: 0.25 - 2050: 0.2 - MWh_NH3_per_tNH3: 5.166 - MWh_CH4_per_tNH3_SMR: 10.8 - MWh_elec_per_tNH3_SMR: 0.7 - MWh_H2_per_tNH3_electrolysis: 5.93 - MWh_elec_per_tNH3_electrolysis: 0.2473 - MWh_NH3_per_MWh_H2_cracker: 1.46 # https://github.com/euronion/trace/blob/44a5ff8401762edbef80eff9cfe5a47c8d3c8be4/data/efficiencies.csv - NH3_process_emissions: 24.5 - petrochemical_process_emissions: 25.5 - #HVC primary/recycling based on values used in Neumann et al https://doi.org/10.1016/j.joule.2023.06.016, linearly interpolated between 2020 and 2050 - #2020 recycling rates based on Agora https://static.agora-energiewende.de/fileadmin/Projekte/2021/2021_02_EU_CEAP/A-EW_254_Mobilising-circular-economy_study_WEB.pdf - #fractions refer to the total primary HVC production in 2020 - #assumes 6.7 Mtplastics produced from recycling in 2020 - HVC_primary_fraction: - 2020: 1.0 - 2025: 0.9 - 2030: 0.8 - 2035: 0.7 - 2040: 0.6 - 2045: 0.5 - 2050: 0.4 - HVC_mechanical_recycling_fraction: - 2020: 0.12 - 2025: 0.15 - 2030: 0.18 - 2035: 0.21 - 2040: 0.24 - 2045: 0.27 - 2050: 0.30 - HVC_chemical_recycling_fraction: - 2020: 0.0 - 2025: 0.0 - 2030: 0.04 - 2035: 0.08 - 2040: 0.12 - 2045: 0.16 - 2050: 0.20 - HVC_environment_sequestration_fraction: 0. - waste_to_energy: false - waste_to_energy_cc: false - sector_ratios_fraction_future: - 2020: 0.0 - 2025: 0.1 - 2030: 0.3 - 2035: 0.5 - 2040: 0.7 - 2045: 0.9 - 2050: 1.0 - basic_chemicals_without_NH3_production_today: 69. #Mt/a, = 86 Mtethylene-equiv - 17 MtNH3 - HVC_production_today: 52. - MWh_elec_per_tHVC_mechanical_recycling: 0.547 - MWh_elec_per_tHVC_chemical_recycling: 6.9 - chlorine_production_today: 9.58 - MWh_elec_per_tCl: 3.6 - MWh_H2_per_tCl: -0.9372 - methanol_production_today: 1.5 - MWh_elec_per_tMeOH: 0.167 - MWh_CH4_per_tMeOH: 10.25 - MWh_MeOH_per_tMeOH: 5.528 - hotmaps_locate_missing: false - reference_year: 2019 - - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#costs -costs: - year: 2030 - version: v0.9.1 - social_discountrate: 0.02 - fill_values: - FOM: 0 - VOM: 0 - efficiency: 1 - fuel: 0 - investment: 0 - lifetime: 25 - "CO2 intensity": 0 - "discount rate": 0.07 - # Marginal and capital costs can be overwritten - # capital_cost: - # onwind: 500 - marginal_cost: - solar: 0.01 - onwind: 0.015 - offwind: 0.015 - hydro: 0. - H2: 0. - electrolysis: 0. - fuel cell: 0. - battery: 0. - battery inverter: 0. - emission_prices: - enable: true - co2: 100. - co2_monthly_prices: false - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#clustering -clustering: - focus_weights: false - simplify_network: - to_substations: false - algorithm: kmeans # choose from: [hac, kmeans] - feature: solar+onwind-time - exclude_carriers: [] - remove_stubs: true - remove_stubs_across_borders: true - cluster_network: - algorithm: kmeans - feature: solar+onwind-time - exclude_carriers: [] - consider_efficiency_classes: false - aggregation_strategies: - generators: - committable: any - ramp_limit_up: max - ramp_limit_down: max - temporal: - resolution_elec: 1H - resolution_sector: 1H - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#adjustments -adjustments: - electricity: false - sector: false - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#solving -solving: - #tmpdir: "path/to/tmp" - options: - clip_p_max_pu: 1.e-2 - load_shedding: false - curtailment_mode: false - noisy_costs: true - skip_iterations: true - rolling_horizon: false - seed: 123 - custom_extra_functionality: "../data/custom_extra_functionality.py" - # io_api: "direct" # Increases performance but only supported for the highs and gurobi solvers - # options that go into the optimize function - track_iterations: false - min_iterations: 2 - max_iterations: 3 - transmission_losses: 2 - linearized_unit_commitment: true - horizon: 365 - post_discretization: - enable: false - line_unit_size: 1700 - line_threshold: 0.3 - link_unit_size: - DC: 2000 - H2 pipeline: 1200 - gas pipeline: 1500 - link_threshold: - DC: 0.3 - H2 pipeline: 0.3 - gas pipeline: 0.3 - - agg_p_nom_limits: - agg_offwind: false - include_existing: false - file: data/agg_p_nom_minmax.csv - - constraints: - CCL: false - EQ: false - BAU: false - SAFE: false - - solver: - name: gurobi - options: gurobi-default - - solver_options: - highs-default: - # refer to https://ergo-code.github.io/HiGHS/dev/options/definitions/ - threads: 1 - solver: "ipm" - run_crossover: "off" - small_matrix_value: 1e-6 - large_matrix_value: 1e9 - primal_feasibility_tolerance: 1e-5 - dual_feasibility_tolerance: 1e-5 - ipm_optimality_tolerance: 1e-4 - parallel: "on" - random_seed: 123 - gurobi-default: - threads: 8 - method: 2 # barrier - crossover: 0 - BarConvTol: 1.e-6 - Seed: 123 - AggFill: 0 - PreDual: 0 - GURO_PAR_BARDENSETHRESH: 200 - gurobi-numeric-focus: - NumericFocus: 3 # Favour numeric stability over speed - method: 2 # barrier - crossover: 0 # do not use crossover - BarHomogeneous: 1 # Use homogeneous barrier if standard does not converge - BarConvTol: 1.e-5 - FeasibilityTol: 1.e-4 - OptimalityTol: 1.e-4 - ObjScale: -0.5 - threads: 8 - Seed: 123 - gurobi-fallback: # Use gurobi defaults - crossover: 0 - method: 2 # barrier - BarHomogeneous: 1 # Use homogeneous barrier if standard does not converge - BarConvTol: 1.e-5 - FeasibilityTol: 1.e-5 - OptimalityTol: 1.e-5 - Seed: 123 - threads: 8 - cplex-default: - threads: 4 - lpmethod: 4 # barrier - solutiontype: 2 # non basic solution, ie no crossover - barrier.convergetol: 1.e-5 - feasopt.tolerance: 1.e-6 - copt-default: - Threads: 8 - LpMethod: 2 - Crossover: 0 - RelGap: 1.e-6 - Dualize: 0 - copt-gpu: - LpMethod: 6 - GPUMode: 1 - PDLPTol: 1.e-5 - Crossover: 0 - cbc-default: {} # Used in CI - glpk-default: {} # Used in CI - - mem_mb: 140000 #memory in MB; 20 GB enough for 50+B+I+H2; 100 GB for 181+B+I+H2 - runtime: 60h #runtime in humanfriendly style https://humanfriendly.readthedocs.io/en/latest/ - - -# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#plotting -plotting: - map: - boundaries: [-11, 30, 34, 71] - color_geomap: - ocean: white - land: white - projection: - name: "EqualEarth" - # See https://scitools.org.uk/cartopy/docs/latest/reference/projections.html for alternatives, for example: - # name: "LambertConformal" - # central_longitude: 10. - # central_latitude: 50. - # standard_parallels: [35, 65] - eu_node_location: - x: -5.5 - y: 46. - costs_max: 1000 - costs_threshold: 1 - energy_max: 20000 - energy_min: -20000 - energy_threshold: 50. - - nice_names: - OCGT: "Open-Cycle Gas" - CCGT: "Combined-Cycle Gas" - offwind-ac: "Offshore Wind (AC)" - offwind-dc: "Offshore Wind (DC)" - offwind-float: "Offshore Wind (Floating)" - onwind: "Onshore Wind" - solar: "Solar" - PHS: "Pumped Hydro Storage" - hydro: "Reservoir & Dam" - battery: "Battery Storage" - H2: "Hydrogen Storage" - lines: "Transmission Lines" - ror: "Run of River" - load: "Load Shedding" - ac: "AC" - dc: "DC" - - tech_colors: - # wind - onwind: "#235ebc" - onshore wind: "#235ebc" - offwind: "#6895dd" - offshore wind: "#6895dd" - offwind-ac: "#6895dd" - offshore wind (AC): "#6895dd" - offshore wind ac: "#6895dd" - offwind-dc: "#74c6f2" - offshore wind (DC): "#74c6f2" - offshore wind dc: "#74c6f2" - offwind-float: "#b5e2fa" - offshore wind (Float): "#b5e2fa" - offshore wind float: "#b5e2fa" - # water - hydro: '#298c81' - hydro reservoir: '#298c81' - ror: '#3dbfb0' - run of river: '#3dbfb0' - hydroelectricity: '#298c81' - PHS: '#51dbcc' - hydro+PHS: "#08ad97" - # solar - solar: "#f9d002" - solar PV: "#f9d002" - solar-hsat: "#fdb915" - solar thermal: '#ffbf2b' - residential rural solar thermal: '#f1c069' - services rural solar thermal: '#eabf61' - residential urban decentral solar thermal: '#e5bc5a' - services urban decentral solar thermal: '#dfb953' - urban central solar thermal: '#d7b24c' - solar rooftop: '#ffea80' - # gas - OCGT: '#e0986c' - OCGT marginal: '#e0986c' - OCGT-heat: '#e0986c' - gas boiler: '#db6a25' - gas boilers: '#db6a25' - gas boiler marginal: '#db6a25' - residential rural gas boiler: '#d4722e' - residential urban decentral gas boiler: '#cb7a36' - services rural gas boiler: '#c4813f' - services urban decentral gas boiler: '#ba8947' - urban central gas boiler: '#b0904f' - gas: '#e05b09' - fossil gas: '#e05b09' - natural gas: '#e05b09' - biogas to gas: '#e36311' - biogas to gas CC: '#e51245' - CCGT: '#a85522' - CCGT marginal: '#a85522' - allam: '#B98F76' - gas for industry co2 to atmosphere: '#692e0a' - gas for industry co2 to stored: '#8a3400' - gas for industry: '#853403' - gas for industry CC: '#692e0a' - gas pipeline: '#ebbca0' - gas pipeline new: '#a87c62' - # oil - oil: '#c9c9c9' - imported oil: '#a3a3a3' - oil boiler: '#adadad' - residential rural oil boiler: '#a9a9a9' - services rural oil boiler: '#a5a5a5' - residential urban decentral oil boiler: '#a1a1a1' - urban central oil boiler: '#9d9d9d' - services urban decentral oil boiler: '#999999' - agriculture machinery oil: '#949494' - shipping oil: "#808080" - land transport oil: '#afafaf' - # nuclear - Nuclear: '#ff8c00' - Nuclear marginal: '#ff8c00' - nuclear: '#ff8c00' - uranium: '#ff8c00' - # coal - Coal: '#545454' - coal: '#545454' - Coal marginal: '#545454' - coal for industry: '#343434' - solid: '#545454' - Lignite: '#826837' - lignite: '#826837' - Lignite marginal: '#826837' - # biomass - biogas: '#e3d37d' - biomass: '#baa741' - solid biomass: '#baa741' - municipal solid waste: '#91ba41' - solid biomass import: '#d5ca8d' - solid biomass transport: '#baa741' - solid biomass for industry: '#7a6d26' - solid biomass for industry CC: '#47411c' - solid biomass for industry co2 from atmosphere: '#736412' - solid biomass for industry co2 to stored: '#47411c' - urban central solid biomass CHP: '#9d9042' - urban central solid biomass CHP CC: '#6c5d28' - biomass boiler: '#8A9A5B' - residential rural biomass boiler: '#a1a066' - residential urban decentral biomass boiler: '#b0b87b' - services rural biomass boiler: '#c6cf98' - services urban decentral biomass boiler: '#dde5b5' - biomass to liquid: '#32CD32' - unsustainable bioliquids: '#32CD32' - electrobiofuels: 'red' - BioSNG: '#123456' - # power transmission - lines: '#6c9459' - transmission lines: '#6c9459' - electricity distribution grid: '#97ad8c' - low voltage: '#97ad8c' - # electricity demand - Electric load: '#110d63' - electric demand: '#110d63' - electricity: '#110d63' - industry electricity: '#2d2a66' - industry new electricity: '#2d2a66' - agriculture electricity: '#494778' - # battery + EVs - battery: '#ace37f' - battery storage: '#ace37f' - battery charger: '#88a75b' - battery discharger: '#5d4e29' - home battery: '#80c944' - home battery storage: '#80c944' - home battery charger: '#5e8032' - home battery discharger: '#3c5221' - BEV charger: '#baf238' - V2G: '#e5ffa8' - land transport EV: '#baf238' - land transport demand: '#38baf2' - EV battery: '#baf238' - # hot water storage - water tanks: '#e69487' - residential rural water tanks: '#f7b7a3' - services rural water tanks: '#f3afa3' - residential urban decentral water tanks: '#f2b2a3' - services urban decentral water tanks: '#f1b4a4' - urban central water tanks: '#e9977d' - hot water storage: '#e69487' - hot water charging: '#e8998b' - urban central water tanks charger: '#b57a67' - residential rural water tanks charger: '#b4887c' - residential urban decentral water tanks charger: '#b39995' - services rural water tanks charger: '#b3abb0' - services urban decentral water tanks charger: '#b3becc' - hot water discharging: '#e99c8e' - urban central water tanks discharger: '#b9816e' - residential rural water tanks discharger: '#ba9685' - residential urban decentral water tanks discharger: '#baac9e' - services rural water tanks discharger: '#bbc2b8' - services urban decentral water tanks discharger: '#bdd8d3' - # heat demand - Heat load: '#cc1f1f' - heat: '#cc1f1f' - heat vent: '#aa3344' - heat demand: '#cc1f1f' - rural heat: '#ff5c5c' - residential rural heat: '#ff7c7c' - services rural heat: '#ff9c9c' - central heat: '#cc1f1f' - urban central heat: '#d15959' - urban central heat vent: '#a74747' - decentral heat: '#750606' - residential urban decentral heat: '#a33c3c' - services urban decentral heat: '#cc1f1f' - low-temperature heat for industry: '#8f2727' - process heat: '#ff0000' - agriculture heat: '#d9a5a5' - # heat supply - heat pumps: '#2fb537' - heat pump: '#2fb537' - air heat pump: '#36eb41' - residential urban decentral air heat pump: '#48f74f' - services urban decentral air heat pump: '#5af95d' - services rural air heat pump: '#5af95d' - urban central air heat pump: '#6cfb6b' - ground heat pump: '#2fb537' - residential rural ground heat pump: '#48f74f' - residential rural air heat pump: '#48f74f' - services rural ground heat pump: '#5af95d' - Ambient: '#98eb9d' - CHP: '#8a5751' - urban central gas CHP: '#8d5e56' - CHP CC: '#634643' - urban central gas CHP CC: '#6e4e4c' - CHP heat: '#8a5751' - CHP electric: '#8a5751' - district heating: '#e8beac' - resistive heater: '#d8f9b8' - residential rural resistive heater: '#bef5b5' - residential urban decentral resistive heater: '#b2f1a9' - services rural resistive heater: '#a5ed9d' - services urban decentral resistive heater: '#98e991' - urban central resistive heater: '#8cdf85' - retrofitting: '#8487e8' - building retrofitting: '#8487e8' - # hydrogen - H2 for industry: "#f073da" - H2 for shipping: "#ebaee0" - H2: '#bf13a0' - hydrogen: '#bf13a0' - retrofitted H2 boiler: '#e5a0d9' - SMR: '#870c71' - SMR CC: '#4f1745' - H2 liquefaction: '#d647bd' - hydrogen storage: '#bf13a0' - H2 Store: '#bf13a0' - H2 storage: '#bf13a0' - land transport fuel cell: '#6b3161' - H2 pipeline: '#f081dc' - H2 pipeline retrofitted: '#ba99b5' - H2 Fuel Cell: '#c251ae' - H2 fuel cell: '#c251ae' - H2 turbine: '#991f83' - H2 Electrolysis: '#ff29d9' - H2 electrolysis: '#ff29d9' - # ammonia - NH3: '#46caf0' - ammonia: '#46caf0' - ammonia store: '#00ace0' - ammonia cracker: '#87d0e6' - Haber-Bosch: '#076987' - # syngas - Sabatier: '#9850ad' - methanation: '#c44ce6' - methane: '#c44ce6' - # synfuels - Fischer-Tropsch: '#25c49a' - liquid: '#25c49a' - kerosene for aviation: '#a1ffe6' - naphtha for industry: '#57ebc4' - methanolisation: '#83d6d5' - methanol: '#468c8b' - shipping methanol: '#468c8b' - industry methanol: '#468c8b' - # co2 - CC: '#f29dae' - CCS: '#f29dae' - CO2 sequestration: '#f29dae' - DAC: '#ff5270' - co2 stored: '#f2385a' - co2 sequestered: '#f2682f' - co2: '#f29dae' - co2 vent: '#ffd4dc' - CO2 pipeline: '#f5627f' - # emissions - process emissions CC: '#000000' - process emissions: '#222222' - process emissions to stored: '#444444' - process emissions to atmosphere: '#888888' - oil emissions: '#aaaaaa' - shipping oil emissions: "#555555" - shipping methanol emissions: '#666666' - land transport oil emissions: '#777777' - agriculture machinery oil emissions: '#333333' - # other - shipping: '#03a2ff' - power-to-heat: '#2fb537' - power-to-gas: '#c44ce6' - power-to-H2: '#ff29d9' - power-to-liquid: '#25c49a' - gas-to-power/heat: '#ee8340' - waste: '#e3d37d' - other: '#000000' - geothermal: '#ba91b1' - geothermal heat: '#ba91b1' - geothermal district heat: '#d19D00' - geothermal organic rankine cycle: '#ffbf00' - AC: "#70af1d" - AC-AC: "#70af1d" - AC line: "#70af1d" - links: "#8a1caf" - HVDC links: "#8a1caf" - DC: "#8a1caf" - DC-DC: "#8a1caf" - DC link: "#8a1caf" - load: "#dd2e23" - waste CHP: '#e3d37d' - waste CHP CC: '#e3d3ff' - HVC to air: 'k' From 7be521704e094ddc061513136cd7c2ed87c3c379 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Wed, 21 Aug 2024 16:35:07 +0200 Subject: [PATCH 092/100] Implemented all comments from PR #1079. Cleaned up OSM implementation. --- .sync-send | 3 - Snakefile | 3 +- config/config.default.yaml | 7 +- doc/configtables/electricity.csv | 3 +- doc/configtables/electricity_network.csv | 3 - doc/release_notes.rst | 4 +- rules/build_electricity.smk | 100 +-- rules/development.smk | 36 +- rules/retrieve.smk | 61 +- scripts/_helpers.py | 21 - scripts/add_electricity.py | 7 +- scripts/base_network.py | 296 ++++----- scripts/build_osm_network.py | 737 +++++------------------ scripts/clean_osm_data.py | 62 +- scripts/prepare_osm_network_release.py | 114 ++-- scripts/retrieve_gdp_uamd.py | 34 -- 16 files changed, 429 insertions(+), 1062 deletions(-) delete mode 100644 doc/configtables/electricity_network.csv delete mode 100644 scripts/retrieve_gdp_uamd.py diff --git a/.sync-send b/.sync-send index 6fc8cb4c0..483c7a999 100644 --- a/.sync-send +++ b/.sync-send @@ -9,6 +9,3 @@ config/test envs matplotlibrc Snakefile -data/eez/ -data/naturalearth/ -resources/europe-nuts2-gridkit/ diff --git a/Snakefile b/Snakefile index c45c7e58d..eb99437bf 100644 --- a/Snakefile +++ b/Snakefile @@ -135,6 +135,7 @@ rule sync: shell: """ rsync -uvarh --ignore-missing-args --files-from=.sync-send . {params.cluster} - # rsync -uvarh --no-g {params.cluster}/resources . || echo "No resources directory, skipping rsync" + rsync -uvarh --no-g {params.cluster}/resources . || echo "No resources directory, skipping rsync" rsync -uvarh --no-g {params.cluster}/results . || echo "No results directory, skipping rsync" + rsync -uvarh --no-g {params.cluster}/logs . || echo "No logs directory, skipping rsync" """ diff --git a/config/config.default.yaml b/config/config.default.yaml index e229e1969..4067246ee 100644 --- a/config/config.default.yaml +++ b/config/config.default.yaml @@ -84,12 +84,10 @@ co2_budget: 2045: 0.032 2050: 0.000 -electricity_network: - base_network: gridkit # Options: gridkit, osm-prebuilt, osm-raw (built from scratch using OSM data, takes longer) - osm_group_tolerance_buses: 5000 # unit: meters, default 5000 - Buses within this distance are grouped together - # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#electricity electricity: + voltages: [200., 300., 380., 500., 750.] + base_network: entsoegridkit gaslimit_enable: false gaslimit: false co2limit_enable: false @@ -278,6 +276,7 @@ conventional: # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#lines lines: types: + 200.: "Al/St 240/40 2-bundle 200.0" 220.: "Al/St 240/40 2-bundle 220.0" 300.: "Al/St 240/40 3-bundle 300.0" 380.: "Al/St 240/40 4-bundle 380.0" diff --git a/doc/configtables/electricity.csv b/doc/configtables/electricity.csv index ee733660c..9bad7bfc6 100644 --- a/doc/configtables/electricity.csv +++ b/doc/configtables/electricity.csv @@ -1,5 +1,6 @@ ,Unit,Values,Description -voltages,kV,"Any subset of {220., 300., 380.}",Voltage levels to consider +voltages,kV,"Any subset of {200., 220., 300., 380., 500., 750.}",Voltage levels to consider +base_network, --, "Any value in {'entsoegridkit', 'osm-prebuilt', 'osm-raw}", "Specify the underlying base network, i.e. GridKit (based on ENTSO-E web map extract, OpenStreetMap (OSM) prebuilt or raw (built from raw OSM data), takes longer." gaslimit_enable,bool,true or false,Add an overall absolute gas limit configured in ``electricity: gaslimit``. gaslimit,MWhth,float or false,Global gas usage limit co2limit_enable,bool,true or false,Add an overall absolute carbon-dioxide emissions limit configured in ``electricity: co2limit`` in :mod:`prepare_network`. **Warning:** This option should currently only be used with electricity-only networks, not for sector-coupled networks.. diff --git a/doc/configtables/electricity_network.csv b/doc/configtables/electricity_network.csv deleted file mode 100644 index f7a51ef1f..000000000 --- a/doc/configtables/electricity_network.csv +++ /dev/null @@ -1,3 +0,0 @@ -,Unit,Values,Description -base_network, --, "Any value in {'gridkit', 'osm-prebuilt', 'osm-raw}", "Specify the underlying base network, i.e. GridKit (based on ENTSO-E web map extract, OpenStreetMap (OSM) prebuilt or raw (built from raw OSM data), takes longer." -osm_group_tolerance_buses, meters, float, "Specifies the radius in which substations shall be clustered to a single bus. Default recommendation: 5000 (meters)" diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 759af7765..4add46f82 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -73,9 +73,7 @@ Upcoming Release * Enable parallelism in :mod:`determine_availability_matrix_MD_UA.py` and remove plots. This requires the use of temporary files. -* Added new feature that to base the electricity network on OpenStreetMap (OSM data) (PR https://github.com/PyPSA/pypsa-eur/pull/1079). Note that a heuristics based cleaning process is used for lines and links where electrical parameters are incomplete, missing, or ambiguous. Through ``electricity_network["base_network"]``, the base network can be set to "gridkit" (original default setting), "osm-prebuilt" (which downloads the latest prebuilt snapshot based on OSM data from Zenodo), or "osm-raw" which retrieves (once) and cleans the raw OSM data and subsequently builds the network. Note that this process may take a few minutes. - -* Voltage settings have been aggregated and are now directly read from the line type dictionary. Instead of ``electricity["voltages"]``, scripts have been updated to refer to ``lines["types"].keys()``. +* Added new major feature to create the base_network from OpenStreetMap (OSM) data (PR https://github.com/PyPSA/pypsa-eur/pull/1079). Note that a heuristics based cleaning process is used for lines and links where electrical parameters are incomplete, missing, or ambiguous. Through ``electricity["base_network"]``, the base network can be set to "entsoegridkit" (original default setting, deprecated soon), "osm-prebuilt" (which downloads the latest prebuilt snapshot based on OSM data from Zenodo), or "osm-raw" which retrieves (once) and cleans the raw OSM data and subsequently builds the network. Note that this process may take a few minutes. * Updated pre-built `weather data cutouts `__. These are now merged cutouts with diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index b0de316eb..06730bcf6 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -50,6 +50,19 @@ rule build_powerplants: "../scripts/build_powerplants.py" +def input_base_network(w): + base_network = config_provider("electricity", "base_network")(w) + components = {"buses", "lines", "links", "converters", "transformers"} + if base_network == "osm-raw": + inputs = {c: resources(f"osm/pre-base/{c}.csv") for c in components} + else: + inputs = {c: f"data/{base_network}/{c}.csv" for c in components} + if base_network == "entsoegridkit": + inputs["parameter_corrections"] = "data/parameter_corrections.yaml" + inputs["links_p_nom"] = "data/links_p_nom.csv" + return inputs + + rule base_network: params: countries=config_provider("countries"), @@ -58,66 +71,7 @@ rule base_network: lines=config_provider("lines"), transformers=config_provider("transformers"), input: - eg_buses=lambda w: ( - "data/entsoegridkit/buses.csv" - if config_provider("electricity_network", "base_network")(w) == "gridkit" - else ( - "data/osm/prebuilt/buses.csv" - if config_provider("electricity_network", "base_network")(w) - == "osm-prebuilt" - else resources("osm/pre-base/buses.csv") - ) - ), - eg_lines=lambda w: ( - "data/entsoegridkit/lines.csv" - if config_provider("electricity_network", "base_network")(w) == "gridkit" - else ( - "data/osm/prebuilt/lines.csv" - if config_provider("electricity_network", "base_network")(w) - == "osm-prebuilt" - else resources("osm/pre-base/lines.csv") - ) - ), - eg_links=lambda w: ( - "data/entsoegridkit/links.csv" - if config_provider("electricity_network", "base_network")(w) == "gridkit" - else ( - "data/osm/prebuilt/links.csv" - if config_provider("electricity_network", "base_network")(w) - == "osm-prebuilt" - else resources("osm/pre-base/links.csv") - ) - ), - eg_converters=lambda w: ( - "data/entsoegridkit/converters.csv" - if config_provider("electricity_network", "base_network")(w) == "gridkit" - else ( - "data/osm/prebuilt/converters.csv" - if config_provider("electricity_network", "base_network")(w) - == "osm-prebuilt" - else resources("osm/pre-base/converters.csv") - ) - ), - eg_transformers=lambda w: ( - "data/entsoegridkit/transformers.csv" - if config_provider("electricity_network", "base_network")(w) == "gridkit" - else ( - "data/osm/prebuilt/transformers.csv" - if config_provider("electricity_network", "base_network")(w) - == "osm-prebuilt" - else resources("osm/pre-base/transformers.csv") - ) - ), - parameter_corrections=lambda w: ( - "data/parameter_corrections.yaml" - if config_provider("electricity_network", "base_network")(w) == "gridkit" - else [] - ), - links_p_nom=lambda w: ( - "data/links_p_nom.csv" - if config_provider("electricity_network", "base_network")(w) == "gridkit" - else [] - ), + unpack(input_base_network), country_shapes=resources("country_shapes.geojson"), offshore_shapes=resources("offshore_shapes.geojson"), europe_shape=resources("europe_shape.geojson"), @@ -684,28 +638,28 @@ rule prepare_network: "../scripts/prepare_network.py" -if config["electricity_network"]["base_network"] == "osm-raw": +if config["electricity"]["base_network"] == "osm-raw": rule clean_osm_data: input: cables_way=expand( - "data/osm/raw/{country}/cables_way.json", + "data/osm-raw/{country}/cables_way.json", country=config_provider("countries"), ), lines_way=expand( - "data/osm/raw/{country}/lines_way.json", + "data/osm-raw/{country}/lines_way.json", country=config_provider("countries"), ), links_relation=expand( - "data/osm/raw/{country}/links_relation.json", + "data/osm-raw/{country}/links_relation.json", country=config_provider("countries"), ), substations_way=expand( - "data/osm/raw/{country}/substations_way.json", + "data/osm-raw/{country}/substations_way.json", country=config_provider("countries"), ), substations_relation=expand( - "data/osm/raw/{country}/substations_relation.json", + "data/osm-raw/{country}/substations_relation.json", country=config_provider("countries"), ), offshore_shapes=resources("offshore_shapes.geojson"), @@ -717,11 +671,18 @@ if config["electricity_network"]["base_network"] == "osm-raw": links=resources("osm/clean/links.geojson"), log: logs("clean_osm_data.log"), + benchmark: + benchmarks("clean_osm_data") + threads: 1 + resources: + mem_mb=4000, + conda: + "../envs/environment.yaml" script: "../scripts/clean_osm_data.py" -if config["electricity_network"]["base_network"] == "osm-raw": +if config["electricity"]["base_network"] == "osm-raw": rule build_osm_network: input: @@ -744,5 +705,10 @@ if config["electricity_network"]["base_network"] == "osm-raw": logs("build_osm_network.log"), benchmark: benchmarks("build_osm_network") + threads: 1 + resources: + mem_mb=4000, + conda: + "../envs/environment.yaml" script: "../scripts/build_osm_network.py" diff --git a/rules/development.smk b/rules/development.smk index 24c46a159..0386e38e8 100644 --- a/rules/development.smk +++ b/rules/development.smk @@ -2,19 +2,25 @@ # # SPDX-License-Identifier: MIT +if config["electricity"]["base_network"] == "osm-raw": -rule prepare_osm_network_release: - input: - base_network=resources("networks/base.nc"), - output: - buses=resources("osm/release/buses.csv"), - converters=resources("osm/release/converters.csv"), - lines=resources("osm/release/lines.csv"), - links=resources("osm/release/links.csv"), - transformers=resources("osm/release/transformers.csv"), - log: - logs("prepare_osm_network_release.log"), - benchmark: - benchmarks("prepare_osm_network_release") - script: - "../scripts/prepare_osm_network_release.py" + rule prepare_osm_network_release: + input: + base_network=resources("networks/base.nc"), + output: + buses=resources("osm/release/buses.csv"), + converters=resources("osm/release/converters.csv"), + lines=resources("osm/release/lines.csv"), + links=resources("osm/release/links.csv"), + transformers=resources("osm/release/transformers.csv"), + log: + logs("prepare_osm_network_release.log"), + benchmark: + benchmarks("prepare_osm_network_release") + threads: 1 + resources: + mem_mb=1000, + conda: + "../envs/environment.yaml" + script: + "../scripts/prepare_osm_network_release.py" diff --git a/rules/retrieve.smk b/rules/retrieve.smk index 371100d6e..75ab5a375 100644 --- a/rules/retrieve.smk +++ b/rules/retrieve.smk @@ -392,25 +392,8 @@ if config["enable"]["retrieve"]: "../scripts/retrieve_monthly_fuel_prices.py" -if config["enable"]["retrieve"] and {"UA", "MD"}.intersection(config["countries"]): - - rule retrieve_gdp_uamd: - output: - gdp_non_nuts3="data/GDP_per_capita_PPP_1990_2015_v2.nc", - pop_non_nuts3="data/ppp_2013_1km_Aggregated.tif", - log: - "logs/retrieve_gdp_uamd.log", - resources: - mem_mb=5000, - retries: 2 - conda: - "../envs/retrieve.yaml" - script: - "../scripts/retrieve_gdp_uamd.py" - - if config["enable"]["retrieve"] and ( - config["electricity_network"]["base_network"] == "osm-prebuilt" + config["electricity"]["base_network"] == "osm-prebuilt" ): rule retrieve_osm_prebuilt: @@ -425,65 +408,67 @@ if config["enable"]["retrieve"] and ( "https://zenodo.org/records/13342577/files/transformers.csv" ), output: - buses="data/osm/prebuilt/buses.csv", - converters="data/osm/prebuilt/converters.csv", - lines="data/osm/prebuilt/lines.csv", - links="data/osm/prebuilt/links.csv", - transformers="data/osm/prebuilt/transformers.csv", + buses="data/osm-prebuilt/buses.csv", + converters="data/osm-prebuilt/converters.csv", + lines="data/osm-prebuilt/lines.csv", + links="data/osm-prebuilt/links.csv", + transformers="data/osm-prebuilt/transformers.csv", log: "logs/retrieve_osm_prebuilt.log", + threads: 1 resources: mem_mb=500, retries: 2 run: for key in input.keys(): move(input[key], output[key]) + validate_checksum(output[key], input[key]) if config["enable"]["retrieve"] and ( - config["electricity_network"]["base_network"] == "osm-raw" + config["electricity"]["base_network"] == "osm-raw" ): rule retrieve_osm_data: output: - cables_way="data/osm/raw/{country}/cables_way.json", - lines_way="data/osm/raw/{country}/lines_way.json", - links_relation="data/osm/raw/{country}/links_relation.json", - substations_way="data/osm/raw/{country}/substations_way.json", - substations_relation="data/osm/raw/{country}/substations_relation.json", + cables_way="data/osm-raw/{country}/cables_way.json", + lines_way="data/osm-raw/{country}/lines_way.json", + links_relation="data/osm-raw/{country}/links_relation.json", + substations_way="data/osm-raw/{country}/substations_way.json", + substations_relation="data/osm-raw/{country}/substations_relation.json", log: "logs/retrieve_osm_data_{country}.log", - resources: - cores=2, - threads=1, + threads: 1 + conda: + "../envs/retrieve.yaml" script: "../scripts/retrieve_osm_data.py" if config["enable"]["retrieve"] and ( - config["electricity_network"]["base_network"] == "osm-raw" + config["electricity"]["base_network"] == "osm-raw" ): rule retrieve_osm_data_all: input: expand( - "data/osm/raw/{country}/cables_way.json", + "data/osm-raw/{country}/cables_way.json", country=config_provider("countries"), ), expand( - "data/osm/raw/{country}/lines_way.json", + "data/osm-raw/{country}/lines_way.json", country=config_provider("countries"), ), expand( - "data/osm/raw/{country}/links_relation.json", + "data/osm-raw/{country}/links_relation.json", country=config_provider("countries"), ), expand( - "data/osm/raw/{country}/substations_way.json", + "data/osm-raw/{country}/substations_way.json", country=config_provider("countries"), ), expand( - "data/osm/raw/{country}/substations_relation.json", + "data/osm-raw/{country}/substations_relation.json", country=config_provider("countries"), ), diff --git a/scripts/_helpers.py b/scripts/_helpers.py index 537b8c4f7..a3b77c1c0 100644 --- a/scripts/_helpers.py +++ b/scripts/_helpers.py @@ -370,27 +370,6 @@ def update_to(b=1, bsize=1, tsize=None): urllib.request.urlretrieve(url, file, reporthook=update_to) -def retrieve_file(url, destination): - """ - Downloads a file from a specified URL to a local destination using custom - headers that mimic a Firefox browser request. - - This function is useful for overcoming 'HTTP Error 403: Forbidden' - issues, which often occur when the server requires more typical - browser-like headers for access. - """ - - headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" - } - response = requests.get(url, headers=headers) - response.raise_for_status() - - with open(destination, "wb") as f: - f.write(response.content) - logger.info(f"File downloaded and saved as {destination}") - - def mock_snakemake( rulename, root_dir=None, diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py index 2aef27a99..076eb84ed 100755 --- a/scripts/add_electricity.py +++ b/scripts/add_electricity.py @@ -324,7 +324,7 @@ def upsample(cntry, group, gdp_pop_non_nuts3): # relative factors 0.6 and 0.4 have been determined from a linear # regression on the country to continent load data factors = normed(0.6 * normed(gdp_n) + 0.4 * normed(pop_n)) - if cntry in ["UA", "MD"] and gdp_pop_non_nuts3 is not None: + if cntry in ["UA", "MD"]: # overwrite factor because nuts3 provides no data for UA+MD gdp_pop_non_nuts3 = gpd.read_file(gdp_pop_non_nuts3).set_index("Bus") gdp_pop_non_nuts3 = gdp_pop_non_nuts3.loc[ @@ -847,11 +847,6 @@ def add_transmission_projects(n, transmission_projects): ) ppl = load_powerplants(snakemake.input.powerplants) - if "gdp_pop_non_nuts3" in snakemake.input.keys(): - gdp_pop_non_nuts3 = snakemake.input.gdp_pop_non_nuts3 - else: - gdp_pop_non_nuts3 = None - attach_load( n, snakemake.input.regions, diff --git a/scripts/base_network.py b/scripts/base_network.py index 2c45f1f0d..5c98129b1 100644 --- a/scripts/base_network.py +++ b/scripts/base_network.py @@ -6,8 +6,9 @@ # coding: utf-8 """ Creates the network topology from a `ENTSO-E map extract. - -`_ (March 2022) as a PyPSA +`_ (March 2022) +or `OpenStreetMap data `_ (Aug 2024) +as a PyPSA network. Relevant Settings @@ -134,10 +135,10 @@ def _find_closest_links(links, new_links, distance_upper_bound=1.5): ) -def _load_buses_from_eg(eg_buses, europe_shape, config): +def _load_buses(buses, europe_shape, config): buses = ( pd.read_csv( - eg_buses, + buses, quotechar="'", true_values=["t"], false_values=["f"], @@ -160,23 +161,24 @@ def _load_buses_from_eg(eg_buses, europe_shape, config): lambda p: europe_shape_prepped.contains(Point(p)), axis=1 ) - v_nom_min = min(config["lines"]["types"].keys()) - v_nom_max = max(config["lines"]["types"].keys()) + v_nom_min = min(config["electricity"]["voltages"]) + v_nom_max = max(config["electricity"]["voltages"]) - # Quick fix: buses_with_v_nom_to_keep_b = ( (v_nom_min <= buses.v_nom) & (buses.v_nom <= v_nom_max) | (buses.v_nom.isnull()) - | (buses.carrier == "DC") + | ( + buses.carrier == "DC" + ) # Keeping all DC buses from the input dataset independent of voltage (e.g. 150 kV connections) ) logger.info(f"Removing buses outside of range AC {v_nom_min} - {v_nom_max} V") return pd.DataFrame(buses.loc[buses_in_europe_b & buses_with_v_nom_to_keep_b]) -def _load_transformers_from_eg(buses, eg_transformers): +def _load_transformers(buses, transformers): transformers = pd.read_csv( - eg_transformers, + transformers, quotechar="'", true_values=["t"], false_values=["f"], @@ -188,9 +190,9 @@ def _load_transformers_from_eg(buses, eg_transformers): return transformers -def _load_converters_from_eg(buses, eg_converters): +def _load_converters_from_eg(buses, converters): converters = pd.read_csv( - eg_converters, + converters, quotechar="'", true_values=["t"], false_values=["f"], @@ -204,9 +206,9 @@ def _load_converters_from_eg(buses, eg_converters): return converters -def _load_converters_from_osm(buses, eg_converters): +def _load_converters_from_osm(buses, converters): converters = pd.read_csv( - eg_converters, + converters, quotechar="'", true_values=["t"], false_values=["f"], @@ -220,9 +222,9 @@ def _load_converters_from_osm(buses, eg_converters): return converters -def _load_links_from_eg(buses, eg_links): +def _load_links_from_eg(buses, links): links = pd.read_csv( - eg_links, + links, quotechar="'", true_values=["t"], false_values=["f"], @@ -231,7 +233,7 @@ def _load_links_from_eg(buses, eg_links): links["length"] /= 1e3 - # Skagerrak Link is connected to 132kV bus which is removed in _load_buses_from_eg. + # Skagerrak Link is connected to 132kV bus which is removed in _load_buses. # Connect to neighboring 380kV bus links.loc[links.bus1 == "6396", "bus1"] = "6398" @@ -243,9 +245,9 @@ def _load_links_from_eg(buses, eg_links): return links -def _load_links_from_osm(buses, eg_links): +def _load_links_from_osm(buses, links): links = pd.read_csv( - eg_links, + links, quotechar="'", true_values=["t"], false_values=["f"], @@ -268,116 +270,10 @@ def _load_links_from_osm(buses, eg_links): return links -def _add_links_from_tyndp(buses, links, links_tyndp, europe_shape): - links_tyndp = pd.read_csv(links_tyndp) - - # remove all links from list which lie outside all of the desired countries - europe_shape = gpd.read_file(europe_shape).loc[0, "geometry"] - europe_shape_prepped = shapely.prepared.prep(europe_shape) - x1y1_in_europe_b = links_tyndp[["x1", "y1"]].apply( - lambda p: europe_shape_prepped.contains(Point(p)), axis=1 - ) - x2y2_in_europe_b = links_tyndp[["x2", "y2"]].apply( - lambda p: europe_shape_prepped.contains(Point(p)), axis=1 - ) - is_within_covered_countries_b = x1y1_in_europe_b & x2y2_in_europe_b - - if not is_within_covered_countries_b.all(): - logger.info( - "TYNDP links outside of the covered area (skipping): " - + ", ".join(links_tyndp.loc[~is_within_covered_countries_b, "Name"]) - ) - - links_tyndp = links_tyndp.loc[is_within_covered_countries_b] - if links_tyndp.empty: - return buses, links - - has_replaces_b = links_tyndp.replaces.notnull() - oids = dict(Bus=_get_oid(buses), Link=_get_oid(links)) - keep_b = dict( - Bus=pd.Series(True, index=buses.index), Link=pd.Series(True, index=links.index) - ) - for reps in links_tyndp.loc[has_replaces_b, "replaces"]: - for comps in reps.split(":"): - oids_to_remove = comps.split(".") - c = oids_to_remove.pop(0) - keep_b[c] &= ~oids[c].isin(oids_to_remove) - buses = buses.loc[keep_b["Bus"]] - links = links.loc[keep_b["Link"]] - - links_tyndp["j"] = _find_closest_links( - links, links_tyndp, distance_upper_bound=0.20 - ) - # Corresponds approximately to 20km tolerances - - if links_tyndp["j"].notnull().any(): - logger.info( - "TYNDP links already in the dataset (skipping): " - + ", ".join(links_tyndp.loc[links_tyndp["j"].notnull(), "Name"]) - ) - links_tyndp = links_tyndp.loc[links_tyndp["j"].isnull()] - if links_tyndp.empty: - return buses, links - - tree_buses = buses.query("carrier=='AC'") - tree = KDTree(tree_buses[["x", "y"]]) - _, ind0 = tree.query(links_tyndp[["x1", "y1"]]) - ind0_b = ind0 < len(tree_buses) - links_tyndp.loc[ind0_b, "bus0"] = tree_buses.index[ind0[ind0_b]] - - _, ind1 = tree.query(links_tyndp[["x2", "y2"]]) - ind1_b = ind1 < len(tree_buses) - links_tyndp.loc[ind1_b, "bus1"] = tree_buses.index[ind1[ind1_b]] - - links_tyndp_located_b = ( - links_tyndp["bus0"].notnull() & links_tyndp["bus1"].notnull() - ) - if not links_tyndp_located_b.all(): - logger.warning( - "Did not find connected buses for TYNDP links (skipping): " - + ", ".join(links_tyndp.loc[~links_tyndp_located_b, "Name"]) - ) - links_tyndp = links_tyndp.loc[links_tyndp_located_b] - - logger.info("Adding the following TYNDP links: " + ", ".join(links_tyndp["Name"])) - - links_tyndp = links_tyndp[["bus0", "bus1"]].assign( - carrier="DC", - p_nom=links_tyndp["Power (MW)"], - length=links_tyndp["Length (given) (km)"].fillna( - links_tyndp["Length (distance*1.2) (km)"] - ), - under_construction=True, - underground=False, - geometry=( - links_tyndp[["x1", "y1", "x2", "y2"]].apply( - lambda s: str(LineString([[s.x1, s.y1], [s.x2, s.y2]])), axis=1 - ) - ), - tags=( - '"name"=>"' - + links_tyndp["Name"] - + '", ' - + '"ref"=>"' - + links_tyndp["Ref"] - + '", ' - + '"status"=>"' - + links_tyndp["status"] - + '"' - ), - ) - - links_tyndp.index = "T" + links_tyndp.index.astype(str) - - links = pd.concat([links, links_tyndp], sort=True) - - return buses, links - - -def _load_lines_from_eg(buses, eg_lines): +def _load_lines(buses, lines): lines = ( pd.read_csv( - eg_lines, + lines, quotechar="'", true_values=["t"], false_values=["f"], @@ -395,7 +291,7 @@ def _load_lines_from_eg(buses, eg_lines): lines["length"] /= 1e3 - lines["carrier"] = "AC" # TODO pypsa-eur check + lines["carrier"] = "AC" lines = _remove_dangling_branches(lines, buses) return lines @@ -446,7 +342,7 @@ def _reconnect_crimea(lines): def _set_electrical_parameters_lines_eg(lines, config): - v_noms = list(config["lines"]["types"].keys()) + v_noms = config["electricity"]["voltages"] linetypes = config["lines"]["types"] for v_nom in v_noms: @@ -462,7 +358,7 @@ def _set_electrical_parameters_lines_osm(lines, config): lines["type"] = [] return lines - v_noms = list(config["lines"]["types"].keys()) + v_noms = config["electricity"]["voltages"] linetypes = _get_linetypes_config(config["lines"]["types"], v_noms) lines["carrier"] = "AC" @@ -807,11 +703,11 @@ def _set_shapes(n, country_shapes, offshore_shapes): def base_network( - eg_buses, - eg_converters, - eg_transformers, - eg_lines, - eg_links, + buses, + converters, + transformers, + lines, + links, links_p_nom, europe_shape, country_shapes, @@ -820,57 +716,58 @@ def base_network( config, ): - buses = _load_buses_from_eg(eg_buses, europe_shape, config) - - if config["electricity_network"].get("base_network") == "gridkit": - links = _load_links_from_eg(buses, eg_links) - elif "osm" in config["electricity_network"].get("base_network"): - links = _load_links_from_osm(buses, eg_links) - else: - raise ValueError("base_network must be either 'gridkit' or 'osm'") + base_network = config["electricity"].get("base_network") + assert base_network in { + "entsoegridkit", + "osm-raw", + "osm-prebuilt", + }, f"base_network must be either 'entsoegridkit', 'osm-raw' or 'osm-prebuilt', but got '{base_network}'" + if base_network == "entsoegridkit": + warnings.warn( + "The 'entsoegridkit' base network is deprecated and will be removed in future versions. Please use 'osm-raw' or 'osm-prebuilt' instead.", + DeprecationWarning, + ) - if config["electricity_network"].get("base_network") == "gridkit": - converters = _load_converters_from_eg(buses, eg_converters) - elif "osm" in config["electricity_network"].get("base_network"): - converters = _load_converters_from_osm(buses, eg_converters) + logger.info(f"Creating base network using {base_network}.") - transformers = _load_transformers_from_eg(buses, eg_transformers) + buses = _load_buses(buses, europe_shape, config) + transformers = _load_transformers(buses, transformers) + lines = _load_lines(buses, lines) - lines = _load_lines_from_eg(buses, eg_lines) + if base_network == "entsoegridkit": + links = _load_links_from_eg(buses, links) + converters = _load_converters_from_eg(buses, converters) - if ( - (config["electricity_network"].get("base_network") == "gridkit") - & (config["lines"].get("reconnect_crimea", True)) - & ("UA" in config["countries"]) - ): - lines = _reconnect_crimea(lines) + # Optionally reconnect Crimea + if (config["lines"].get("reconnect_crimea", True)) & ( + "UA" in config["countries"] + ): + lines = _reconnect_crimea(lines) - if config["electricity_network"].get("base_network") == "gridkit": + # Set electrical parameters of lines and links lines = _set_electrical_parameters_lines_eg(lines, config) links = _set_electrical_parameters_links_eg(links, config, links_p_nom) - elif "osm" in config["electricity_network"].get("base_network"): + elif base_network in {"osm-prebuilt", "osm-raw"}: + links = _load_links_from_osm(buses, links) + converters = _load_converters_from_osm(buses, converters) + + # Set electrical parameters of lines and links lines = _set_electrical_parameters_lines_osm(lines, config) links = _set_electrical_parameters_links_osm(links, config) else: - raise ValueError("base_network must be either 'gridkit' or 'osm'") + raise ValueError( + "base_network must be either 'entsoegridkit', 'osm-raw', or 'osm-prebuilt'" + ) + # Set electrical parameters of transformers and converters transformers = _set_electrical_parameters_transformers(transformers, config) converters = _set_electrical_parameters_converters(converters, config) n = pypsa.Network() - - if config["electricity_network"].get("base_network") == "gridkit": - n.name = "PyPSA-Eur (GridKit)" - elif "osm" in config["electricity_network"].get("base_network"): - n.name = "PyPSA-Eur (OSM)" - else: - raise ValueError("base_network must be either 'gridkit' or 'osm'") + n.name = f"PyPSA-Eur ({base_network})" time = get_snapshots(snakemake.params.snapshots, snakemake.params.drop_leap_day) n.set_snapshots(time) - n.madd( - "Carrier", ["AC", "DC"] - ) # TODO: fix hard code and check if AC/DC truly exist n.import_components_from_dataframe(buses, "Bus") n.import_components_from_dataframe(lines, "Line") @@ -879,15 +776,13 @@ def base_network( n.import_components_from_dataframe(converters, "Link") _set_lines_s_nom_from_linetypes(n) - if config["electricity_network"].get("base_network") == "gridkit": + if config["electricity"].get("base_network") == "gridkit": _apply_parameter_corrections(n, parameter_corrections) - # TODO: what about this? n = _remove_unconnected_components(n) _set_countries_and_substations(n, config, country_shapes, offshore_shapes) - # TODO pypsa-eur add this _set_links_underwater_fraction(n, offshore_shapes) _replace_b2b_converter_at_country_border_by_link(n) @@ -896,9 +791,12 @@ def base_network( _set_shapes(n, country_shapes, offshore_shapes) - logger.info( - f"Base network created using {config['electricity_network'].get('base_network')}." - ) + # Add carriers if they are present in buses.carriers + carriers_in_buses = set(n.buses.carrier.dropna().unique()) + carriers = carriers_in_buses.intersection({"AC", "DC"}) + + if carriers: + n.madd("Carrier", carriers) return n @@ -1066,25 +964,47 @@ def append_bus_shapes(n, shapes, type): configure_logging(snakemake) set_scenario_config(snakemake) + countries = snakemake.params.countries + + buses = snakemake.input.buses + converters = snakemake.input.converters + transformers = snakemake.input.transformers + lines = snakemake.input.lines + links = snakemake.input.links + europe_shape = snakemake.input.europe_shape + country_shapes = snakemake.input.country_shapes + offshore_shapes = snakemake.input.offshore_shapes + config = snakemake.config + + if "links_p_nom" in snakemake.input.keys(): + links_p_nom = snakemake.input.links_p_nom + else: + links_p_nom = None + + if "parameter_corrections" in snakemake.input.keys(): + parameter_corrections = snakemake.input.parameter_corrections + else: + parameter_corrections = None + n = base_network( - snakemake.input.eg_buses, - snakemake.input.eg_converters, - snakemake.input.eg_transformers, - snakemake.input.eg_lines, - snakemake.input.eg_links, - snakemake.input.links_p_nom, - snakemake.input.europe_shape, - snakemake.input.country_shapes, - snakemake.input.offshore_shapes, - snakemake.input.parameter_corrections, - snakemake.config, + buses, + converters, + transformers, + lines, + links, + links_p_nom, + europe_shape, + country_shapes, + offshore_shapes, + parameter_corrections, + config, ) onshore_regions, offshore_regions, shapes, offshore_shapes = build_bus_shapes( n, - snakemake.input.country_shapes, - snakemake.input.offshore_shapes, - snakemake.params.countries, + country_shapes, + offshore_shapes, + countries, ) shapes.to_file(snakemake.output.regions_onshore) diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 795712067..889105754 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -10,7 +10,6 @@ import geopandas as gpd import numpy as np import pandas as pd -from _benchmark import memory_logger from _helpers import configure_logging, set_scenario_config from shapely.geometry import LineString, Point from shapely.ops import linemerge, nearest_points, split @@ -18,45 +17,39 @@ logger = logging.getLogger(__name__) -# list of recognised nan values (NA and na excluded as may be confused with Namibia 2-letter country code) -NA_VALUES = ["NULL", "", "N/A", "NAN", "NaN", "nan", "Nan", "n/a", "null"] - -def read_csv_nafix(file, **kwargs): - "Function to open a csv as pandas file and standardize the na value" - if "keep_default_na" not in kwargs: - kwargs["keep_default_na"] = False - if "na_values" not in kwargs: - kwargs["na_values"] = NA_VALUES - - if os.stat(file).st_size > 0: - return pd.read_csv(file, **kwargs) - else: - return pd.DataFrame() - - -def save_to_geojson(df, fn): - """ - Save a (Geo)DataFrame to a GeoJSON file. - - Parameters: - - df: The (Geo)DataFrame to be saved. - - fn: The filename (including the path) of the output GeoJSON file. - - Returns: - None - """ - if os.path.exists(fn): - os.unlink(fn) # remove file if it exists - - # save file if the (Geo)DataFrame is non-empty - if df.empty: - # create empty file to avoid issues with snakemake - with open(fn, "w") as fp: - pass - else: - # save file - df.to_file(fn, driver="GeoJSON") +GEO_CRS = "EPSG:4326" +DISTANCE_CRS = "EPSG:3035" +BUS_TOL = ( + 5000 # unit: meters, default 5000 - Buses within this distance are grouped together +) +LINES_COLUMNS = [ + "bus0", + "bus1", + "voltage", + "circuits", + "length", + "underground", + "under_construction", + "geometry", +] +LINKS_COLUMNS = [ + "bus0", + "bus1", + "voltage", + "p_nom", + "length", + "under_construction", + "geometry", +] +TRANSFORMERS_COLUMNS = [ + "bus0", + "bus1", + "voltage_bus0", + "voltage_bus1", + "country", + "geometry", +] def read_geojson(fn, cols=[], dtype=None, crs="EPSG:4326"): @@ -88,35 +81,6 @@ def read_geojson(fn, cols=[], dtype=None, crs="EPSG:4326"): return df -def to_csv_nafix(df, path, **kwargs): - """ - Write a pandas DataFrame to a CSV file with NA values replaced. - - Parameters: - - df: pandas DataFrame - The DataFrame to be written to the CSV file. - - path: str - The file path where the CSV file will be saved. - - **kwargs: keyword arguments - Additional arguments to be passed to the `to_csv` function of pandas. - - Returns: - - None - - If the DataFrame is not empty or does not have empty columns, it will be - written to the CSV file with NA values replaced by the first value in the - `NA_VALUES` list. If the DataFrame is empty or has empty columns, an empty - file will be created at the specified path. - """ - if "na_rep" in kwargs: - del kwargs["na_rep"] - if not df.empty or not df.columns.empty: - return df.to_csv(path, **kwargs, na_rep=NA_VALUES[0]) - else: - with open(path, "w") as fp: - pass - - def line_endings_to_bus_conversion(lines): """ Converts line endings to bus connections. @@ -252,19 +216,13 @@ def set_lines_ids(lines, buses, distance_crs): distance_bus0 = busesepsg.geometry.loc[bus0_id].distance( row.geometry.boundary.geoms[0] ) - if distance_bus0 > 0.0: + + if distance_bus0 > 0: # the line does not start in the node, thus modify the linestring - lines.loc[i, "geometry"] = linemerge( - [ - LineString( - [ - buses.geometry.loc[bus0_id], - lines.geometry.loc[i].boundary.geoms[0], - ] - ), - lines.geometry.loc[i], - ] - ) + line_start_point = lines.geometry.loc[i].boundary.geoms[0] + new_segment = LineString([buses.geometry.loc[bus0_id], line_start_point]) + modified_line = linemerge([new_segment, lines.geometry.loc[i]]) + lines.loc[i, "geometry"] = modified_line # find the closest node of the bus1 of the line bus1_id = buses_sel.geometry.distance(row.geometry.boundary.geoms[1]).idxmin() @@ -274,19 +232,13 @@ def set_lines_ids(lines, buses, distance_crs): distance_bus1 = busesepsg.geometry.loc[bus1_id].distance( row.geometry.boundary.geoms[1] ) - if distance_bus1 > 0.0: - # the line does not end in the node, thus modify the linestring - lines.loc[i, "geometry"] = linemerge( - [ - lines.geometry.loc[i], - LineString( - [ - lines.geometry.loc[i].boundary.geoms[1], - buses.geometry.loc[bus1_id], - ] - ), - ] - ) + + if distance_bus1 > 0: + # the line does not start in the node, thus modify the linestring + line_end_point = lines.geometry.loc[i].boundary.geoms[1] + new_segment = LineString([line_end_point, buses.geometry.loc[bus1_id]]) + modified_line = linemerge([lines.geometry.loc[i], new_segment]) + lines.loc[i, "geometry"] = modified_line return lines, buses @@ -322,27 +274,23 @@ def merge_stations_same_station_id( lon_bus = np.round(station_point_x + v_it * delta_lon, precision) lat_bus = np.round(station_point_y + v_it * delta_lat, precision) + bus_data = [ + n_buses, # "bus_id" + g_name, # "station_id" + v_name[0], # "voltage" + bus_row["dc"].all(), # "dc" + "|".join(bus_row["symbol"].unique()), # "symbol" + bus_row["under_construction"].any(), # "under_construction" + "|".join(bus_row["tag_substation"].unique()), # "tag_substation" + bus_row["tag_area"].sum(), # "tag_area" + lon_bus, # "lon" + lat_bus, # "lat" + bus_row["country"].iloc[0], # "country" + Point(lon_bus, lat_bus), # "geometry" + ] + # add the bus - buses_clean.append( - [ - n_buses, # "bus_id" - g_name, # "station_id" - v_name[0], # "voltage" - bus_row["dc"].all(), # "dc" - "|".join(bus_row["symbol"].unique()), # "symbol" - bus_row["under_construction"].any(), # "under_construction" - "|".join(bus_row["tag_substation"].unique()), # "tag_substation" - bus_row["tag_area"].sum(), # "tag_area" - lon_bus, # "lon" - lat_bus, # "lat" - bus_row["country"].iloc[0], # "country", - # is_dclink_boundary_point, # check if new bus was formed of at least one DC link boundary point - Point( - lon_bus, - lat_bus, - ), # "geometry" - ] - ) + buses_clean.append(bus_data) # increase counters v_it += 1 @@ -404,8 +352,8 @@ def get_transformers(buses, lines): df_transformers = [] # Transformers should be added between AC buses only - # TODO pypsa-eur: Fix this! instead of tilde use != buses_ac = buses[buses["dc"] != True] + for g_name, g_value in buses_ac.sort_values("voltage", ascending=True).groupby( by="station_id" ): @@ -413,26 +361,26 @@ def get_transformers(buses, lines): n_voltages = len(g_value) if n_voltages > 1: - for id in range(0, n_voltages - 1): + for id in range(n_voltages - 1): # when g_value has more than one node, it means that there are multiple voltages for the same bus - geom_trans = LineString( + transformer_geometry = LineString( [g_value.geometry.iloc[id], g_value.geometry.iloc[id + 1]] ) - df_transformers.append( - [ - f"transf_{g_name}_{id}", # "line_id" - g_value["bus_id"].iloc[id], # "bus0" - g_value["bus_id"].iloc[id + 1], # "bus1" - g_value.voltage.iloc[id], # "voltage_bus0" - g_value.voltage.iloc[id + 1], # "voltage_bus0" - g_value.country.iloc[id], # "country" - geom_trans, # "geometry" - ] - ) - # TODO pypsa-eur: fix bug in pypsa-earth, where the id column is wrongly named "line_id" instead of "transformer_id + transformer_data = [ + f"transf_{g_name}_{id}", # "line_id" + g_value["bus_id"].iloc[id], # "bus0" + g_value["bus_id"].iloc[id + 1], # "bus1" + g_value.voltage.iloc[id], # "voltage_bus0" + g_value.voltage.iloc[id + 1], # "voltage_bus0" + g_value.country.iloc[id], # "country" + transformer_geometry, # "geometry" + ] + + df_transformers.append(transformer_data) + # name of the columns - trasf_columns = [ + transformers_columns = [ "transformer_id", "bus0", "bus1", @@ -442,72 +390,18 @@ def get_transformers(buses, lines): "geometry", ] - df_transformers = gpd.GeoDataFrame(df_transformers, columns=trasf_columns) + df_transformers = gpd.GeoDataFrame(df_transformers, columns=transformers_columns) if not df_transformers.empty: init_index = 0 if lines.empty else lines.index[-1] + 1 df_transformers.set_index(init_index + df_transformers.index, inplace=True) # update line endings df_transformers = line_endings_to_bus_conversion(df_transformers) + df_transformers.drop(columns=["bounds", "bus_0_coors", "bus_1_coors"], inplace=True) - return df_transformers - - -# def get_converters(buses): -# """ -# Function to create fake converter lines that connect buses of the same -# station_id of different polarities. -# """ - -# df_converters = [] - -# for g_name, g_value in buses.sort_values("voltage", ascending=True).groupby( -# by="station_id" -# ): -# # note: by construction there cannot be more that two buses with the same station_id and same voltage -# n_voltages = len(g_value) - -# # A converter stations should have both AC and DC parts -# if g_value["dc"].any() & ~g_value["dc"].all(): -# dc_voltage = g_value[g_value.dc]["voltage"].values - -# for u in dc_voltage: -# id_0 = g_value[g_value["dc"] & g_value["voltage"].isin([u])].index[0] - -# ac_voltages = g_value[~g_value.dc]["voltage"] -# # A converter is added between a DC nodes and AC one with the closest voltage -# id_1 = ac_voltages.sub(u).abs().idxmin() - -# geom_conv = LineString( -# [g_value.geometry.loc[id_0], g_value.geometry.loc[id_1]] -# ) - -# # check if bus is a dclink boundary point, only then add converter -# df_converters.append( -# [ -# f"convert_{g_name}_{id_0}", # "line_id" -# g_value["bus_id"].loc[id_0], # "bus0" -# g_value["bus_id"].loc[id_1], # "bus1" -# False, # "underground" -# False, # "under_construction" -# g_value.country.loc[id_0], # "country" -# geom_conv, # "geometry" -# ] -# ) - -# # name of the columns -# conv_columns = [ -# "converter_id", -# "bus0", -# "bus1", -# "underground", -# "under_construction", -# "country", -# "geometry", -# ] + gdf_transformers = gpd.GeoDataFrame(df_transformers) + gdf_transformers.crs = GEO_CRS -# df_converters = gpd.GeoDataFrame(df_converters, columns=conv_columns).reset_index() - -# return df_converters + return gdf_transformers def _find_closest_bus(row, buses, distance_crs, tol=5000): @@ -552,7 +446,7 @@ def _find_closest_bus(row, buses, distance_crs, tol=5000): return closest_bus_id -def _get_converters(buses, links, distance_crs, tol): +def _get_converters(buses, links, distance_crs): """ Get the converters for the given buses and links. Connecting link endings to closest AC bus. @@ -575,30 +469,30 @@ def _get_converters(buses, links, distance_crs, tol): continue converter_id = f"converter/{row['link_id']}_{conv}" + converter_geometry = LineString( + [ + buses[buses["bus_id"] == link_end].geometry.values[0], + buses[buses["bus_id"] == closest_bus].geometry.values[0], + ] + ) + logger.info( f"Added converter #{conv+1}/2 for link {row['link_id']}:{converter_id}." ) + converter_data = [ + converter_id, # "line_id" + link_end, # "bus0" + closest_bus, # "bus1" + row["p_nom"], # "p_nom" + False, # "underground" + False, # "under_construction" + buses[buses["bus_id"] == closest_bus].country.values[0], # "country" + converter_geometry, # "geometry" + ] + # Create the converter - converters.append( - [ - converter_id, # "line_id" - link_end, # "bus0" - closest_bus, # "bus1" - row["p_nom"], # "p_nom" - False, # "underground" - False, # "under_construction" - buses[buses["bus_id"] == closest_bus].country.values[ - 0 - ], # "country" - LineString( - [ - buses[buses["bus_id"] == link_end].geometry.values[0], - buses[buses["bus_id"] == closest_bus].geometry.values[0], - ] - ), # "geometry" - ] - ) + converters.append(converter_data) conv_columns = [ "converter_id", @@ -612,7 +506,7 @@ def _get_converters(buses, links, distance_crs, tol): ] gdf_converters = gpd.GeoDataFrame( - converters, columns=conv_columns, crs=geo_crs + converters, columns=conv_columns, crs=GEO_CRS ).reset_index() return gdf_converters @@ -634,39 +528,37 @@ def connect_stations_same_station_id(lines, buses): if len(buses_station_id) > 1: for b_it in range(1, len(buses_station_id)): - add_lines.append( + line_geometry = LineString( [ - f"link{buses_station_id}_{b_it}", # "line_id" - buses_station_id.index[0], # "bus0" - buses_station_id.index[b_it], # "bus1" - 400000, # "voltage" - 1, # "circuits" - 0.0, # "length" - False, # "underground" - False, # "under_construction" - "transmission", # "tag_type" - ac_freq, # "tag_frequency" - buses_station_id.country.iloc[0], # "country" - LineString( - [ - buses_station_id.geometry.iloc[0], - buses_station_id.geometry.iloc[b_it], - ] - ), # "geometry" - LineString( - [ - buses_station_id.geometry.iloc[0], - buses_station_id.geometry.iloc[b_it], - ] - ).bounds, # "bounds" - buses_station_id.geometry.iloc[0], # "bus_0_coors" - buses_station_id.geometry.iloc[b_it], # "bus_1_coors" - buses_station_id.lon.iloc[0], # "bus0_lon" - buses_station_id.lat.iloc[0], # "bus0_lat" - buses_station_id.lon.iloc[b_it], # "bus1_lon" - buses_station_id.lat.iloc[b_it], # "bus1_lat" + buses_station_id.geometry.iloc[0], + buses_station_id.geometry.iloc[b_it], ] ) + line_bounds = line_geometry.bounds + + line_data = [ + f"link{buses_station_id}_{b_it}", # "line_id" + buses_station_id.index[0], # "bus0" + buses_station_id.index[b_it], # "bus1" + 400000, # "voltage" + 1, # "circuits" + 0.0, # "length" + False, # "underground" + False, # "under_construction" + "transmission", # "tag_type" + ac_freq, # "tag_frequency" + buses_station_id.country.iloc[0], # "country" + line_geometry, # "geometry" + line_bounds, # "bounds" + buses_station_id.geometry.iloc[0], # "bus_0_coors" + buses_station_id.geometry.iloc[b_it], # "bus_1_coors" + buses_station_id.lon.iloc[0], # "bus0_lon" + buses_station_id.lat.iloc[0], # "bus0_lat" + buses_station_id.lon.iloc[b_it], # "bus1_lon" + buses_station_id.lat.iloc[b_it], # "bus1_lat" + ] + + add_lines.append(line_data) # name of the columns add_lines_columns = [ @@ -733,12 +625,10 @@ def merge_stations_lines_by_station_id_and_voltage( logger.info(" - Setting substation ids with tolerance of %.2f m" % (tol)) - # TODO pypsa-eur: Add this fix to pypsa-earth: Buses should not be clustered geographically if they are different # bus types (AC != DC) buses_ac = buses[buses["dc"] == False].reset_index() buses_dc = buses[buses["dc"] == True].reset_index() - # set substation ids # set_substations_ids(buses, distance_crs, tol=tol) set_substations_ids(buses_ac, distance_crs, tol=tol) set_substations_ids(buses_dc, distance_crs, tol=tol) @@ -772,8 +662,6 @@ def merge_stations_lines_by_station_id_and_voltage( # reset index lines.reset_index(drop=True, inplace=True) links.reset_index(drop=True, inplace=True) - # if len(links) > 0: - # links.reset_index(drop=True, inplace=True) return lines, links, buses @@ -841,23 +729,16 @@ def fix_overpassing_lines(lines, buses, distance_crs, tol=1): buses_epsgmod.geometry.distance(lines_epsgmod.geometry.loc[l]) <= tol ] + # Get boundary points + endpoint0 = lines_epsgmod.geometry.loc[l].boundary.geoms[0] + endpoint1 = lines_epsgmod.geometry.loc[l].boundary.geoms[1] + + # Calculate distances + dist_to_ep0 = bus_in_tol_epsg.geometry.distance(endpoint0) + dist_to_ep1 = bus_in_tol_epsg.geometry.distance(endpoint1) + # exclude endings of the lines - bus_in_tol_epsg = bus_in_tol_epsg[ - ( - ( - bus_in_tol_epsg.geometry.distance( - lines_epsgmod.geometry.loc[l].boundary.geoms[0] - ) - > tol - ) - | ( - bus_in_tol_epsg.geometry.distance( - lines_epsgmod.geometry.loc[l].boundary.geoms[1] - ) - > tol - ) - ) - ] + bus_in_tol_epsg = bus_in_tol_epsg[(dist_to_ep0 > tol) | (dist_to_ep1 > tol)] if not bus_in_tol_epsg.empty: # add index of line to split @@ -905,74 +786,12 @@ def fix_overpassing_lines(lines, buses, distance_crs, tol=1): return lines, buses -def build_network( - inputs, - outputs, - geo_crs, - distance_crs, -): - osm_clean_columns = { - "substation": { - "bus_id": "object", - "station_id": "float", - "voltage": "float", - "dc": "bool", - "symbol": "object", - "under_construction": "bool", - "tag_substation": "str", - "tag_area": "str", - "lon": "float", - "lat": "float", - "country": "str", - "geometry": "object", - "tag_source": "str", - }, - "line": { - "line_id": "object", - "bus0": "object", - "bus1": "object", - "voltage": "float", - "circuits": "float", - "length": "float", - "underground": "bool", - "under_construction": "bool", - "tag_type": "str", - "tag_frequency": "float", - "dc": "bool", - "country": "object", - "geometry": "object", - }, - "link": { - "link_id": "object", - "bus0": "object", - "bus1": "object", - "voltage": "float", - "length": "float", - "under_construction": "bool", - "dc": "bool", - "country": "object", - "geometry": "object", - }, - } +def build_network(inputs, outputs): logger.info("Reading input data.") - buses = read_geojson( - inputs["substations"], - osm_clean_columns["substation"].keys(), - dtype=osm_clean_columns["substation"], - ) - - lines = read_geojson( - inputs["lines"], - osm_clean_columns["line"].keys(), - dtype=osm_clean_columns["line"], - ) - - links = read_geojson( - inputs["links"], - osm_clean_columns["link"].keys(), - dtype=osm_clean_columns["link"], - ) + buses = gpd.read_file(inputs["substations"]) + lines = gpd.read_file(inputs["lines"]) + links = gpd.read_file(inputs["links"]) lines = line_endings_to_bus_conversion(lines) links = line_endings_to_bus_conversion(links) @@ -980,14 +799,13 @@ def build_network( logger.info( "Fixing lines overpassing nodes: Connecting nodes and splittling lines." ) - lines, buses = fix_overpassing_lines(lines, buses, distance_crs, tol=1) + lines, buses = fix_overpassing_lines(lines, buses, DISTANCE_CRS, tol=1) - # METHOD to merge buses with same voltage and within tolerance - tol = snakemake.config["electricity_network"]["osm_group_tolerance_buses"] - logger.info(f"Aggregating close substations: Enabled with tolerance {tol} m") + # Merge buses with same voltage and within tolerance + logger.info(f"Aggregating close substations: Enabled with tolerance {BUS_TOL} m") lines, links, buses = merge_stations_lines_by_station_id_and_voltage( - lines, links, buses, distance_crs, tol=tol + lines, links, buses, DISTANCE_CRS, BUS_TOL ) # Recalculate lengths of lines @@ -995,19 +813,11 @@ def build_network( lines["length"] = lines.to_crs(utm).length links["length"] = links.to_crs(utm).length - # TODO pypsa-eur: check if needed for updated links scripts - # get transformers: modelled as lines connecting buses with different voltage transformers = get_transformers(buses, lines) - - # get converters: currently modelled as links connecting buses with different polarity - converters = _get_converters(buses, links, distance_crs, tol) + converters = _get_converters(buses, links, DISTANCE_CRS) logger.info("Saving outputs") - # create clean directory if not already exist - if not os.path.exists(outputs["lines"]): - os.makedirs(os.path.dirname(outputs["lines"]), exist_ok=True) - ### Convert output to pypsa-eur friendly format # Rename "substation" in buses["symbol"] to "Substation" buses["symbol"] = buses["symbol"].replace({"substation": "Substation"}) @@ -1037,232 +847,31 @@ def build_network( buses = buses.replace({True: "t", False: "f"}) # Change column orders - cols_lines = [ - "bus0", - "bus1", - "voltage", - "circuits", - "length", - "underground", - "under_construction", - "geometry", - ] - - lines = lines[cols_lines] - - cols_links = [ - "bus0", - "bus1", - "voltage", - "p_nom", - "length", - "under_construction", - "geometry", - ] - + lines = lines[LINES_COLUMNS] if not links.empty: - links = links[cols_links] - - cols_transformers = [ - "bus0", - "bus1", - "voltage_bus0", - "voltage_bus1", - "country", - "geometry", - ] - - transformers = transformers[cols_transformers] - - if links.empty: # create empty dataframe with cols_links as columns - links = pd.DataFrame(columns=["link_id"] + cols_links) + links = links[LINKS_COLUMNS] + else: + links = pd.DataFrame(columns=["link_id"] + LINKS_COLUMNS) links.set_index("link_id", inplace=True) + transformers = transformers[TRANSFORMERS_COLUMNS] - to_csv_nafix(lines, outputs["lines"], quotechar="'") # Generate CSV - to_csv_nafix(links, outputs["links"], quotechar="'") # Generate CSV - to_csv_nafix(converters, outputs["converters"], quotechar="'") # Generate CSV - to_csv_nafix(transformers, outputs["transformers"], quotechar="'") # Generate CSV + # Export to csv for base_network + buses.to_csv(outputs["substations"], quotechar="'") + lines.to_csv(outputs["lines"], quotechar="'") + links.to_csv(outputs["links"], quotechar="'") + converters.to_csv(outputs["converters"], quotechar="'") + transformers.to_csv(outputs["transformers"], quotechar="'") # Export to GeoJSON for quick validations - save_to_geojson( - gpd.GeoDataFrame(lines), - outputs["lines_geojson"], - ) - save_to_geojson( - gpd.GeoDataFrame(links), - outputs["links_geojson"], - ) - save_to_geojson( - gpd.GeoDataFrame(converters, geometry="geometry", crs=geo_crs), - outputs["converters_geojson"], - ) - save_to_geojson( - gpd.GeoDataFrame(transformers, geometry="geometry", crs=geo_crs), - outputs["transformers_geojson"], - ) - - # create clean directory if not already exist - if not os.path.exists(outputs["substations"]): - os.makedirs(os.path.dirname(outputs["substations"]), exist_ok=True) - # Generate CSV - to_csv_nafix(buses, outputs["substations"], quotechar="'") - save_to_geojson( - gpd.GeoDataFrame(buses, geometry="geometry", crs=geo_crs), - outputs["substations_geojson"], - ) + buses.to_file(outputs["substations_geojson"]) + lines.to_file(outputs["lines_geojson"]) + links.to_file(outputs["links_geojson"]) + converters.to_file(outputs["converters_geojson"]) + transformers.to_file(outputs["transformers_geojson"]) return None -# Function to check if two lines are connected -def are_lines_connected(line1, line2): - """ - Check if two lines are connected. - - Parameters: - line1 (dict): A dictionary representing the first line. - line2 (dict): A dictionary representing the second line. - - Returns: - tuple: A tuple of boolean values indicating the connection status between - the lines. - - The tuple contains four elements: - - True if the first line's bus_0_coors is almost equal to the second line's - bus_0_coors, False otherwise. - - True if the first line's bus_0_coors is almost equal to the second line's - bus_1_coors, False otherwise. - - True if the first line's bus_1_coors is almost equal to the second line's - bus_0_coors, False otherwise. - - True if the first line's bus_1_coors is almost equal to the second line's - bus_1_coors, False otherwise. - """ - return ( - are_almost_equal(line1["bus_0_coors"], line2["bus_0_coors"]), - are_almost_equal(line1["bus_0_coors"], line2["bus_1_coors"]), - are_almost_equal(line1["bus_1_coors"], line2["bus_0_coors"]), - are_almost_equal(line1["bus_1_coors"], line2["bus_1_coors"]), - ) - - -def _dfs(adj_matrix, visited, current_vertex, path): - """ - Perform a depth-first search (DFS) on a graph represented by an adjacency - matrix. - - Parameters: - - adj_matrix (list of lists): The adjacency matrix representing the graph. - - visited (list of bool): A list to keep track of visited vertices. - - current_vertex (int): The current vertex being visited. - - path (list): The path of vertices visited so far. - - Returns: - - path (list): The path of vertices visited during the DFS. - """ - visited[current_vertex] = True - path.append(current_vertex) - for neighbor in range(len(adj_matrix)): - if adj_matrix[current_vertex][neighbor] == 1 and not visited[neighbor]: - _dfs(adj_matrix, visited, neighbor, path) - return path - - -# Returns all connected paths as a vector -def find_paths(adj_matrix): - """ - Find all paths in a graph represented by an adjacency matrix. - - Parameters: - - adj_matrix (list of lists): The adjacency matrix representing the graph. - - Returns: - - paths (list of lists): A list of lists, where each inner list represents - a path in the graph. - """ - visited = [False] * len(adj_matrix) - paths = [] - for vertex in range(len(adj_matrix)): - if not visited[vertex]: - path = _dfs(adj_matrix, visited, vertex, []) - if path: - paths.append(path) - return paths - - -def are_almost_equal(point1, point2, tolerance=1e-6): - """ - Check if two Shapely points are almost equal with a given tolerance. - - Args: - point1 (Point): First Shapely point. - point2 (Point): Second Shapely point. - tolerance (float): Tolerance for coordinate deviation. - - Returns: - bool: True if the points are almost equal, False otherwise. - """ - return abs(point1.x - point2.x) < tolerance and abs(point1.y - point2.y) < tolerance - - -def merge_linestrings(gdf): - """ - Merge LineStrings in a GeoDataFrame wherever the endpoints match. - - Parameters: - gdf (GeoDataFrame): A GeoDataFrame containing LineString geometries. - - Returns: - GeoDataFrame: A GeoDataFrame with merged LineString geometries. - """ - gdf = gdf.copy() - if len(gdf) == 1: - return gdf - - lines = list(gdf.geometry) - merged_lines = [] - while lines: - line = lines.pop(0) - merged_line = line - i = 0 - while i < len(lines): - if are_almost_equal( - Point(merged_line.coords[-1]), Point(lines[i].coords[0]) - ): - merged_line = LineString( - list(merged_line.coords) + list(lines.pop(i).coords[1:]) - ) - i = 0 # Restart the scan after merging - elif are_almost_equal( - Point(merged_line.coords[0]), Point(lines[i].coords[-1]) - ): - merged_line = LineString( - list(lines.pop(i).coords)[:-1] + list(merged_line.coords) - ) - i = 0 # Restart the scan after merging - elif are_almost_equal( - Point(merged_line.coords[-1]), Point(lines[i].coords[-1]) - ): - merged_line = LineString( - list(merged_line.coords) + list(lines.pop(i).coords[::-1])[1:] - ) - i = 0 # Restart the scan after merging - elif are_almost_equal( - Point(merged_line.coords[0]), Point(lines[i].coords[0]) - ): - merged_line = LineString( - list(lines.pop(i).coords[::-1])[:-1] + list(merged_line.coords) - ) - i = 0 # Restart the scan after merging - else: - i += 1 - merged_lines.append(merged_line) - no_coordinates = [len(merged_lines[i].coords) for i in range(len(merged_lines))] - max_index = np.argmax(no_coordinates) - merged_lines = [merged_lines[max_index]] - - return gpd.GeoDataFrame(geometry=merged_lines, crs=gdf.crs) - - if __name__ == "__main__": # Detect running outside of snakemake and mock snakemake for testing if "snakemake" not in globals(): @@ -1273,20 +882,6 @@ def merge_linestrings(gdf): configure_logging(snakemake) set_scenario_config(snakemake) - # load default crs - geo_crs = "EPSG:4326" - distance_crs = "EPSG:3035" - countries = snakemake.config["countries"] - with memory_logger( - filename=getattr(snakemake.log, "memory", None), interval=30.0 - ) as mem: - build_network( - snakemake.input, - snakemake.output, - geo_crs, - distance_crs, - ) - - logger.info(f"Maximum memory usage: {mem.mem_usage}") + build_network(snakemake.input, snakemake.output) diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index bf9d1c4ab..7f42ee2c3 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -3,26 +3,14 @@ # # SPDX-License-Identifier: MIT """ -This script is used to clean OpenStreetMap (OSM) data for the PyPSA-Eur -project. +This script is used to clean OpenStreetMap (OSM) data for creating a PyPSA-Eur +ready network. The script performs various cleaning operations on the OSM data, including: - Cleaning voltage, circuits, cables, wires, and frequency columns - Splitting semicolon-separated cells into new rows - Distributing values to circuits based on the number of splits - Adding line endings to substations based on line data - -The cleaned data is then written to an output file. - -Usage: - python clean_osm_data.py - -Arguments: - output_file (str): The path to the output file where the cleaned data will - be written. - -Example: - python clean_osm_data.py cleaned_data.csv """ import json @@ -1001,16 +989,13 @@ def _clean_lines(df_lines, list_voltages): # Clean those values where multiple circuit values are present, divided by # semicolon - bool_cables = ( - (df_lines["circuits"].apply(lambda x: len(x.split(";")) > 1)) - & ( - df_lines.apply( - lambda row: len(row["circuits"].split(";")) == row["split_elements"], - axis=1, - ) - ) - & (df_lines["cleaned"] == False) + has_multiple_circuits = df_lines["circuits"].apply(lambda x: len(x.split(";")) > 1) + circuits_match_split_elements = df_lines.apply( + lambda row: len(row["circuits"].split(";")) == row["split_elements"], + axis=1, ) + is_not_cleaned = df_lines["cleaned"] == False + bool_cables = has_multiple_circuits & circuits_match_split_elements & is_not_cleaned df_lines.loc[bool_cables, "circuits"] = df_lines.loc[bool_cables].apply( lambda row: str(row["circuits"].split(";")[int(row["id"].split("-")[-1]) - 1]), @@ -1023,16 +1008,13 @@ def _clean_lines(df_lines, list_voltages): # Clean those values where multiple cables values are present, divided by # semicolon - bool_cables = ( - (df_lines["cables"].apply(lambda x: len(x.split(";")) > 1)) - & ( - df_lines.apply( - lambda row: len(row["cables"].split(";")) == row["split_elements"], - axis=1, - ) - ) - & (df_lines["cleaned"] == False) + has_multiple_cables = df_lines["cables"].apply(lambda x: len(x.split(";")) > 1) + cables_match_split_elements = df_lines.apply( + lambda row: len(row["cables"].split(";")) == row["split_elements"], + axis=1, ) + is_not_cleaned = df_lines["cleaned"] == False + bool_cables = has_multiple_cables & cables_match_split_elements & is_not_cleaned df_lines.loc[bool_cables, "circuits"] = df_lines.loc[bool_cables].apply( lambda row: str( @@ -1713,8 +1695,6 @@ def _bridge_lines(lines): min_voltage_ac = 200000 # [unit: V] Minimum voltage value to filter AC lines. min_voltage_dc = 150000 # [unit: V] Minimum voltage value to filter DC links. - lines_to_drop = [""] - logger.info("---") logger.info("SUBSTATIONS") # Input @@ -1732,11 +1712,9 @@ def _bridge_lines(lines): df_substations["frequency"] = _clean_frequency(df_substations["frequency"]) df_substations = _clean_substations(df_substations, list_voltages) df_substations = _create_substations_geometry(df_substations) + # Merge touching polygons df_substations = _merge_touching_polygons(df_substations) - # df_substations["polygon"] = df_substations["polygon"].apply( - # lambda x: x.convex_hull - # ) df_substations = _create_substations_centroid(df_substations) df_substations = _finalise_substations(df_substations) @@ -1778,12 +1756,6 @@ def _bridge_lines(lines): df_lines = _create_lines_geometry(df_lines) df_lines = _finalise_lines(df_lines) - # Dropping specific lines, manually - if lines_to_drop in df_lines["line_id"].values: - df_lines.drop( - df_lines[df_lines["line_id"].isin(lines_to_drop)].index, inplace=True - ) - # Create GeoDataFrame gdf_lines = gpd.GeoDataFrame(df_lines, geometry="geometry", crs=crs) gdf_lines = _remove_lines_within_substations(gdf_lines, gdf_substations_polygon) @@ -1838,10 +1810,6 @@ def _bridge_lines(lines): df_substations.drop(columns=["polygon"]), geometry="geometry", crs=crs ) - # Export GeoDataFrames to GeoJSON in specified output paths - parentfolder = os.path.dirname(snakemake.output.substations) - if not os.path.exists(parentfolder): - os.makedirs(parentfolder) output_substations_polygon = snakemake.output["substations_polygon"] output_substations = snakemake.output["substations"] output_lines = snakemake.output["lines"] diff --git a/scripts/prepare_osm_network_release.py b/scripts/prepare_osm_network_release.py index b33009e0b..13b287816 100644 --- a/scripts/prepare_osm_network_release.py +++ b/scripts/prepare_osm_network_release.py @@ -13,6 +13,55 @@ logger = logging.getLogger(__name__) +BUSES_COLUMNS = [ + "bus_id", + "voltage", + "dc", + "symbol", + "under_construction", + "x", + "y", + "country", + "geometry", +] +LINES_COLUMNS = [ + "line_id", + "bus0", + "bus1", + "voltage", + "circuits", + "length", + "underground", + "under_construction", + "geometry", +] +LINKS_COLUMNS = [ + "link_id", + "bus0", + "bus1", + "voltage", + "p_nom", + "length", + "underground", + "under_construction", + "geometry", +] +TRANSFORMERS_COLUMNS = [ + "transformer_id", + "bus0", + "bus1", + "voltage_bus0", + "voltage_bus1", + "geometry", +] +CONVERTERS_COLUMNS = [ + "converter_id", + "bus0", + "bus1", + "geometry", +] + + def export_clean_csv(df, columns, output_file): """ Export a cleaned DataFrame to a CSV file. @@ -37,9 +86,6 @@ def export_clean_csv(df, columns, output_file): if "converter_id" in columns: rename_dict["Link"] = "converter_id" - # Create the directory if it doesn't exist - os.makedirs(os.path.dirname(output_file), exist_ok=True) - df.reset_index().rename(columns=rename_dict).loc[:, columns].replace( {True: "t", False: "f"} ).to_csv(output_file, index=False, quotechar="'") @@ -56,58 +102,6 @@ def export_clean_csv(df, columns, output_file): configure_logging(snakemake) set_scenario_config(snakemake) - buses_columns = [ - "bus_id", - "voltage", - "dc", - "symbol", - "under_construction", - "x", - "y", - "country", - "geometry", - ] - - lines_columns = [ - "line_id", - "bus0", - "bus1", - "voltage", - "circuits", - "length", - "underground", - "under_construction", - "geometry", - ] - - links_columns = [ - "link_id", - "bus0", - "bus1", - "voltage", - "p_nom", - "length", - "underground", - "under_construction", - "geometry", - ] - - transformers_columns = [ - "transformer_id", - "bus0", - "bus1", - "voltage_bus0", - "voltage_bus1", - "geometry", - ] - - converters_columns = [ - "converter_id", - "bus0", - "bus1", - "geometry", - ] - network = pypsa.Network(snakemake.input.base_network) network.buses["dc"] = network.buses.pop("carrier").map({"DC": "t", "AC": "f"}) @@ -116,18 +110,18 @@ def export_clean_csv(df, columns, output_file): # Export to clean csv for release logger.info(f"Exporting {len(network.buses)} buses to %s", snakemake.output.buses) - export_clean_csv(network.buses, buses_columns, snakemake.output.buses) + export_clean_csv(network.buses, BUSES_COLUMNS, snakemake.output.buses) logger.info( f"Exporting {len(network.transformers)} transformers to %s", snakemake.output.transformers, ) export_clean_csv( - network.transformers, transformers_columns, snakemake.output.transformers + network.transformers, TRANSFORMERS_COLUMNS, snakemake.output.transformers ) logger.info(f"Exporting {len(network.lines)} lines to %s", snakemake.output.lines) - export_clean_csv(network.lines, lines_columns, snakemake.output.lines) + export_clean_csv(network.lines, LINES_COLUMNS, snakemake.output.lines) # Boolean that specifies if link element is a converter is_converter = network.links.index.str.startswith("conv") == True @@ -137,7 +131,7 @@ def export_clean_csv(df, columns, output_file): snakemake.output.links, ) export_clean_csv( - network.links[~is_converter], links_columns, snakemake.output.links + network.links[~is_converter], LINKS_COLUMNS, snakemake.output.links ) logger.info( @@ -145,7 +139,7 @@ def export_clean_csv(df, columns, output_file): snakemake.output.converters, ) export_clean_csv( - network.links[is_converter], converters_columns, snakemake.output.converters + network.links[is_converter], CONVERTERS_COLUMNS, snakemake.output.converters ) logger.info("Export of OSM network for release complete.") diff --git a/scripts/retrieve_gdp_uamd.py b/scripts/retrieve_gdp_uamd.py deleted file mode 100644 index 780f2ea65..000000000 --- a/scripts/retrieve_gdp_uamd.py +++ /dev/null @@ -1,34 +0,0 @@ -# -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: : 2023-2024 The PyPSA-Eur Authors -# -# SPDX-License-Identifier: MIT -""" -Retrieve monthly fuel prices from Destatis. -""" - -import logging -from pathlib import Path - -from _helpers import configure_logging, retrieve_file, set_scenario_config - -logger = logging.getLogger(__name__) - - -if __name__ == "__main__": - if "snakemake" not in globals(): - from _helpers import mock_snakemake - - snakemake = mock_snakemake("retrieve_gdp_uamd") - configure_logging(snakemake) - set_scenario_config(snakemake) - -dict_urls = dict( - { - "gdp_non_nuts3": "https://datadryad.org/stash/downloads/file_stream/241947", - "pop_non_nuts3": "https://github.com/ecohealthalliance/sars_cov_risk/releases/download/v2.0.1/ppp_2020_1km_Aggregated.tif", - } -) - -# Download and validate each dataset -for key, path in snakemake.output.items(): - retrieve_file(dict_urls[key], path) From 48c2cae37d378481a2d526d35ee135407893f602 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Wed, 21 Aug 2024 17:10:47 +0200 Subject: [PATCH 093/100] Bug fix: Added all voltages, 200 kV-750 kV, to default config. --- config/config.default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config.default.yaml b/config/config.default.yaml index 39017dfa5..0fd6e9869 100644 --- a/config/config.default.yaml +++ b/config/config.default.yaml @@ -86,7 +86,7 @@ co2_budget: # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#electricity electricity: - voltages: [200., 300., 380., 500., 750.] + voltages: [200., 220., 300., 380., 500., 750.] base_network: entsoegridkit gaslimit_enable: false gaslimit: false From 89f09554e51397df70d3d2ba407915786edfe91a Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Wed, 21 Aug 2024 22:21:44 +0200 Subject: [PATCH 094/100] Cleaning and bugfixes. --- config/config.default.yaml | 2 +- rules/build_electricity.smk | 36 +++++++++++++------------- rules/development.smk | 10 +++---- scripts/build_osm_network.py | 7 ++++- scripts/clean_osm_data.py | 2 ++ scripts/prepare_osm_network_release.py | 1 + 6 files changed, 33 insertions(+), 25 deletions(-) diff --git a/config/config.default.yaml b/config/config.default.yaml index 0fd6e9869..86689ea72 100644 --- a/config/config.default.yaml +++ b/config/config.default.yaml @@ -42,7 +42,7 @@ scenario: ll: - vopt clusters: - - 38 + - 41 - 128 - 256 opts: diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index 06730bcf6..4446ef765 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -54,7 +54,7 @@ def input_base_network(w): base_network = config_provider("electricity", "base_network")(w) components = {"buses", "lines", "links", "converters", "transformers"} if base_network == "osm-raw": - inputs = {c: resources(f"osm/pre-base/{c}.csv") for c in components} + inputs = {c: resources(f"osm-raw/build/{c}.csv") for c in components} else: inputs = {c: f"data/{base_network}/{c}.csv" for c in components} if base_network == "entsoegridkit": @@ -665,10 +665,10 @@ if config["electricity"]["base_network"] == "osm-raw": offshore_shapes=resources("offshore_shapes.geojson"), country_shapes=resources("country_shapes.geojson"), output: - substations=resources("osm/clean/substations.geojson"), - substations_polygon=resources("osm/clean/substations_polygon.geojson"), - lines=resources("osm/clean/lines.geojson"), - links=resources("osm/clean/links.geojson"), + substations=resources("osm-raw/clean/substations.geojson"), + substations_polygon=resources("osm-raw/clean/substations_polygon.geojson"), + lines=resources("osm-raw/clean/lines.geojson"), + links=resources("osm-raw/clean/links.geojson"), log: logs("clean_osm_data.log"), benchmark: @@ -686,21 +686,21 @@ if config["electricity"]["base_network"] == "osm-raw": rule build_osm_network: input: - substations=resources("osm/clean/substations.geojson"), - lines=resources("osm/clean/lines.geojson"), - links=resources("osm/clean/links.geojson"), + substations=resources("osm-raw/clean/substations.geojson"), + lines=resources("osm-raw/clean/lines.geojson"), + links=resources("osm-raw/clean/links.geojson"), country_shapes=resources("country_shapes.geojson"), output: - lines=resources("osm/pre-base/lines.csv"), - links=resources("osm/pre-base/links.csv"), - converters=resources("osm/pre-base/converters.csv"), - transformers=resources("osm/pre-base/transformers.csv"), - substations=resources("osm/pre-base/buses.csv"), - lines_geojson=resources("osm/pre-base/lines.geojson"), - links_geojson=resources("osm/pre-base/links.geojson"), - converters_geojson=resources("osm/pre-base/converters.geojson"), - transformers_geojson=resources("osm/pre-base/transformers.geojson"), - substations_geojson=resources("osm/pre-base/buses.geojson"), + lines=resources("osm-raw/build/lines.csv"), + links=resources("osm-raw/build/links.csv"), + converters=resources("osm-raw/build/converters.csv"), + transformers=resources("osm-raw/build/transformers.csv"), + substations=resources("osm-raw/build/buses.csv"), + lines_geojson=resources("osm-raw/build/geojson/lines.geojson"), + links_geojson=resources("osm-raw/build/geojson/links.geojson"), + converters_geojson=resources("osm-raw/build/geojson/converters.geojson"), + transformers_geojson=resources("osm-raw/build/geojson/transformers.geojson"), + substations_geojson=resources("osm-raw/build/geojson/buses.geojson"), log: logs("build_osm_network.log"), benchmark: diff --git a/rules/development.smk b/rules/development.smk index 0386e38e8..465490258 100644 --- a/rules/development.smk +++ b/rules/development.smk @@ -8,11 +8,11 @@ if config["electricity"]["base_network"] == "osm-raw": input: base_network=resources("networks/base.nc"), output: - buses=resources("osm/release/buses.csv"), - converters=resources("osm/release/converters.csv"), - lines=resources("osm/release/lines.csv"), - links=resources("osm/release/links.csv"), - transformers=resources("osm/release/transformers.csv"), + buses=resources("osm-raw/release/buses.csv"), + converters=resources("osm-raw/release/converters.csv"), + lines=resources("osm-raw/release/lines.csv"), + links=resources("osm-raw/release/links.csv"), + transformers=resources("osm-raw/release/transformers.csv"), log: logs("prepare_osm_network_release.log"), benchmark: diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 889105754..6d55f8ee6 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -39,6 +39,7 @@ "voltage", "p_nom", "length", + "underground", "under_construction", "geometry", ] @@ -484,6 +485,7 @@ def _get_converters(buses, links, distance_crs): converter_id, # "line_id" link_end, # "bus0" closest_bus, # "bus1" + row["voltage"], # "voltage" row["p_nom"], # "p_nom" False, # "underground" False, # "under_construction" @@ -498,6 +500,7 @@ def _get_converters(buses, links, distance_crs): "converter_id", "bus0", "bus1", + "voltage", "p_nom", "underground", "under_construction", @@ -802,7 +805,7 @@ def build_network(inputs, outputs): lines, buses = fix_overpassing_lines(lines, buses, DISTANCE_CRS, tol=1) # Merge buses with same voltage and within tolerance - logger.info(f"Aggregating close substations: Enabled with tolerance {BUS_TOL} m") + logger.info(f"Aggregating close substations with a tolerance of {BUS_TOL} m") lines, links, buses = merge_stations_lines_by_station_id_and_voltage( lines, links, buses, DISTANCE_CRS, BUS_TOL @@ -834,6 +837,8 @@ def build_network(inputs, outputs): lines["voltage"] = lines["voltage"] / 1000 if not links.empty: links["voltage"] = links["voltage"] / 1000 + if not converters.empty: + converters["voltage"] = converters["voltage"] / 1000 transformers["voltage_bus0"], transformers["voltage_bus1"] = ( transformers["voltage_bus0"] / 1000, transformers["voltage_bus1"] / 1000, diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index 7f42ee2c3..4377f84af 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -1295,6 +1295,7 @@ def _finalise_links(df_links): df_links["bus0"] = None df_links["bus1"] = None df_links["length"] = None + df_links["underground"] = True df_links["under_construction"] = False df_links["dc"] = True @@ -1307,6 +1308,7 @@ def _finalise_links(df_links): "bus0", "bus1", "length", + "underground", "under_construction", "dc", "country", diff --git a/scripts/prepare_osm_network_release.py b/scripts/prepare_osm_network_release.py index 13b287816..ac6b25354 100644 --- a/scripts/prepare_osm_network_release.py +++ b/scripts/prepare_osm_network_release.py @@ -58,6 +58,7 @@ "converter_id", "bus0", "bus1", + "voltage", "geometry", ] From 07a7a6476600d0dd23d3e5cc957b70132959a4f9 Mon Sep 17 00:00:00 2001 From: bobbyxng Date: Thu, 22 Aug 2024 11:25:07 +0200 Subject: [PATCH 095/100] Updated Zenodo repository to https://zenodo.org/records/13358976. Added converter voltages, 'underground' property for DC lines/cables, and included Konti-Skan HVDC (DK-SE). Added compatibility with https://github.com/PyPSA/pypsa-eur/pull/1079 and https://github.com/PyPSA/pypsa-eur/pull/1085 --- rules/retrieve.smk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rules/retrieve.smk b/rules/retrieve.smk index 75ab5a375..c30696ccf 100644 --- a/rules/retrieve.smk +++ b/rules/retrieve.smk @@ -398,14 +398,14 @@ if config["enable"]["retrieve"] and ( rule retrieve_osm_prebuilt: input: - buses=storage("https://zenodo.org/records/13342577/files/buses.csv"), + buses=storage("https://zenodo.org/records/13358976/files/buses.csv"), converters=storage( - "https://zenodo.org/records/13342577/files/converters.csv" + "https://zenodo.org/records/13358976/files/converters.csv" ), - lines=storage("https://zenodo.org/records/13342577/files/lines.csv"), - links=storage("https://zenodo.org/records/13342577/files/links.csv"), + lines=storage("https://zenodo.org/records/13358976/files/lines.csv"), + links=storage("https://zenodo.org/records/13358976/files/links.csv"), transformers=storage( - "https://zenodo.org/records/13342577/files/transformers.csv" + "https://zenodo.org/records/13358976/files/transformers.csv" ), output: buses="data/osm-prebuilt/buses.csv", From 6be527f66d4889ec1c4f234e68af8724d63b30e9 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Thu, 22 Aug 2024 11:53:17 +0200 Subject: [PATCH 096/100] Apply suggestions from code review --- scripts/base_network.py | 2 +- scripts/build_osm_network.py | 27 --------------------------- scripts/clean_osm_data.py | 26 -------------------------- 3 files changed, 1 insertion(+), 54 deletions(-) diff --git a/scripts/base_network.py b/scripts/base_network.py index 5c98129b1..afb66387e 100644 --- a/scripts/base_network.py +++ b/scripts/base_network.py @@ -776,7 +776,7 @@ def base_network( n.import_components_from_dataframe(converters, "Link") _set_lines_s_nom_from_linetypes(n) - if config["electricity"].get("base_network") == "gridkit": + if config["electricity"].get("base_network") == "entsoegridkit": _apply_parameter_corrections(n, parameter_corrections) n = _remove_unconnected_components(n) diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 6d55f8ee6..fb2c0ea15 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -53,33 +53,6 @@ ] -def read_geojson(fn, cols=[], dtype=None, crs="EPSG:4326"): - """ - Function to read a geojson file fn. When the file is empty, then an empty - GeoDataFrame is returned having columns cols, the specified crs and the - columns specified by the dtype dictionary it not none. - - Parameters: - ------------ - fn : str - Path to the file to read - cols : list - List of columns of the GeoDataFrame - dtype : dict - Dictionary of the type of the object by column - crs : str - CRS of the GeoDataFrame - """ - # if the file is non-zero, read the geodataframe and return it - if os.path.getsize(fn) > 0: - return gpd.read_file(fn) - else: - # else return an empty GeoDataFrame - df = gpd.GeoDataFrame(columns=cols, geometry=[], crs=crs) - if isinstance(dtype, dict): - for k, v in dtype.items(): - df[k] = df[k].astype(v) - return df def line_endings_to_bus_conversion(lines): diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py index 4377f84af..8669d0af5 100644 --- a/scripts/clean_osm_data.py +++ b/scripts/clean_osm_data.py @@ -1655,32 +1655,6 @@ def _extend_lines_to_substations(gdf_lines, gdf_substations_polygon): # Function to bridge gaps between all lines -def _bridge_lines(lines): - bridged_lines = [] - for i in range(len(lines) - 1): - bridged_lines.append(lines[i]) - - # Get the endpoints of the current line and the startpoints of the next line - end_points = [lines[i].coords[-1], lines[i].coords[0]] - start_points = [lines[i + 1].coords[0], lines[i + 1].coords[-1]] - - # Find the closest pair of points between the two LineStrings - min_distance = float("inf") - closest_pair = None - - for end_point in end_points: - for start_point in start_points: - distance = LineString([end_point, start_point]).length - if distance < min_distance: - min_distance = distance - closest_pair = (end_point, start_point) - - # Create a bridge between the closest points - bridge = LineString(closest_pair) - bridged_lines.append(bridge) - - bridged_lines.append(lines[-1]) - return bridged_lines if __name__ == "__main__": From c7cfd45c816cfe1a7f6bcd23a43c6d93a23eedfd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 22 Aug 2024 09:53:36 +0000 Subject: [PATCH 097/100] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/build_osm_network.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index fb2c0ea15..83461a98d 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -53,8 +53,6 @@ ] - - def line_endings_to_bus_conversion(lines): """ Converts line endings to bus connections. From 259b4ec7c9da2fd372b1f53f392987bc06af09a7 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Thu, 22 Aug 2024 14:47:57 +0200 Subject: [PATCH 098/100] simplify_network: handle complicated transformer topologies --- scripts/simplify_network.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 5407748fd..2b19a2b01 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -132,8 +132,8 @@ def simplify_network_to_380(n, linetype_380): trafo_map = pd.Series(n.transformers.bus1.values, n.transformers.bus0.values) trafo_map = trafo_map[~trafo_map.index.duplicated(keep="first")] - several_trafo_b = trafo_map.isin(trafo_map.index) - trafo_map[several_trafo_b] = trafo_map[several_trafo_b].map(trafo_map) + while (several_trafo_b := trafo_map.isin(trafo_map.index)).any(): + trafo_map[several_trafo_b] = trafo_map[several_trafo_b].map(trafo_map) missing_buses_i = n.buses.index.difference(trafo_map.index) missing = pd.Series(missing_buses_i, missing_buses_i) trafo_map = pd.concat([trafo_map, missing]) @@ -632,7 +632,7 @@ def find_closest_bus(n, x, y, tol=2000): aggregation_strategies=params.aggregation_strategies, ) busmaps.append(stub_map) - +' if params.simplify_network["to_substations"]: n, substation_map = aggregate_to_substations(n, params.aggregation_strategies) busmaps.append(substation_map) @@ -696,3 +696,4 @@ def find_closest_bus(n, x, y, tol=2000): n.meta = dict(snakemake.config, **dict(wildcards=dict(snakemake.wildcards))) n.export_to_netcdf(snakemake.output.network) +' \ No newline at end of file From e1d4b49b0a487a2e3d0d76c372499c489a9b8ea3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 22 Aug 2024 12:58:26 +0000 Subject: [PATCH 099/100] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/simplify_network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 2b19a2b01..116f155bf 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -696,4 +696,4 @@ def find_closest_bus(n, x, y, tol=2000): n.meta = dict(snakemake.config, **dict(wildcards=dict(snakemake.wildcards))) n.export_to_netcdf(snakemake.output.network) -' \ No newline at end of file +' From cd9855fd1905ba79488414f7635a9a756934f7fa Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Thu, 22 Aug 2024 14:59:13 +0200 Subject: [PATCH 100/100] syntax fix --- scripts/simplify_network.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 2b19a2b01..f8d2ed87e 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -632,7 +632,7 @@ def find_closest_bus(n, x, y, tol=2000): aggregation_strategies=params.aggregation_strategies, ) busmaps.append(stub_map) -' + if params.simplify_network["to_substations"]: n, substation_map = aggregate_to_substations(n, params.aggregation_strategies) busmaps.append(substation_map) @@ -695,5 +695,4 @@ def find_closest_bus(n, x, y, tol=2000): append_bus_shapes(n, clustered_regions, type=which.split("_")[1]) n.meta = dict(snakemake.config, **dict(wildcards=dict(snakemake.wildcards))) - n.export_to_netcdf(snakemake.output.network) -' \ No newline at end of file + n.export_to_netcdf(snakemake.output.network) \ No newline at end of file