From 154ab4ac1aa6559f2de131b57db12d84accd4d72 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Wed, 12 May 2021 11:05:13 +0200 Subject: [PATCH 01/49] Add basic support for variable mappings --- esmvalcore/_recipe.py | 8 ++++++++ esmvalcore/cmor/_fixes/fix.py | 22 ++++++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 3e094c9ed4..40fee37aa8 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -21,6 +21,7 @@ from ._provenance import TrackedFile, get_recipe_provenance from ._recipe_checks import RecipeError from ._task import DiagnosticTask, TaskSet +from .cmor._fixes.fix import get_variable_mappings from .cmor.check import CheckLevels from .cmor.table import CMOR_TABLES from .preprocessor import ( @@ -1075,6 +1076,12 @@ def expand_tag(variable, input_tag): return expanded + def _add_project_variable_mappings(self, variable): + mappings = get_variable_mappings(variable["project"], + variable["dataset"]) + mapping = mappings[variable["mip"]][variable["short_name"]] + _augment(variable, mapping) + def _initialize_variables(self, raw_variable, raw_datasets): """Define variables for all datasets.""" variables = [] @@ -1110,6 +1117,7 @@ def _initialize_variables(self, raw_variable, raw_datasets): if 'fx' not in raw_variable.get('mip', ''): required_keys.update({'start_year', 'end_year'}) for variable in variables: + self._add_project_variable_mappings(variable) if 'institute' not in variable: institute = get_institutes(variable) if institute: diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index 4595c3914f..86b7923cdd 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -2,15 +2,30 @@ import importlib import inspect import os +from functools import lru_cache +from pathlib import Path + +import yaml + +from esmvalcore._config._config import CFG from ..table import CMOR_TABLES +@lru_cache +def get_variable_mappings(project, dataset): + DEFAULT_PATH = (Path(__file__).parents[0] / project / + f"{dataset}-mappings.yml") + mapping_path = CFG.get(project, {}).get("mapping_path", DEFAULT_PATH) + with open(mapping_path, "r") as mapping_file: + return yaml.safe_load(mapping_file) + + class Fix: """ Base class for dataset fixes. """ - def __init__(self, vardef): + def __init__(self, vardef, var_mapping): """Initialize fix object. Parameters @@ -20,6 +35,7 @@ def __init__(self, vardef): """ self.vardef = vardef + self.var_mapping = var_mapping def fix_file(self, filepath, output_dir): """ @@ -150,6 +166,8 @@ def get_fixes(project, dataset, mip, short_name): """ cmor_table = CMOR_TABLES[project] vardef = cmor_table.get_variable(mip, short_name) + mapping = get_variable_mappings(project, dataset) + var_mapping = mapping.get(mip, {}).get(short_name, None) project = project.replace('-', '_').lower() dataset = dataset.replace('-', '_').lower() @@ -164,7 +182,7 @@ def get_fixes(project, dataset, mip, short_name): classes = dict((name.lower(), value) for name, value in classes) for fix_name in (short_name, mip.lower(), 'allvars'): try: - fixes.append(classes[fix_name](vardef)) + fixes.append(classes[fix_name](vardef, var_mapping)) except KeyError: pass except ImportError: From 363e75ccdc38c755dcbd5b5f0aa5c4247527cc78 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Fri, 14 May 2021 15:15:12 +0200 Subject: [PATCH 02/49] Move get_variable_mappings to _config --- esmvalcore/_config/__init__.py | 2 ++ esmvalcore/_config/_config.py | 10 ++++++++++ esmvalcore/_recipe.py | 9 +++++++-- esmvalcore/cmor/_fixes/fix.py | 16 +--------------- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/esmvalcore/_config/__init__.py b/esmvalcore/_config/__init__.py index 382237b990..c36e230d85 100644 --- a/esmvalcore/_config/__init__.py +++ b/esmvalcore/_config/__init__.py @@ -3,6 +3,7 @@ get_activity, get_institutes, get_project_config, + get_variable_mappings, load_config_developer, read_config_developer_file, read_config_user_file, @@ -14,6 +15,7 @@ 'read_config_user_file', 'read_config_developer_file', 'load_config_developer', + 'get_variable_mappings', 'get_project_config', 'get_institutes', 'get_activity', diff --git a/esmvalcore/_config/_config.py b/esmvalcore/_config/_config.py index 12e1abe8c8..87484fdc98 100644 --- a/esmvalcore/_config/_config.py +++ b/esmvalcore/_config/_config.py @@ -3,6 +3,7 @@ import logging import os import warnings +from functools import lru_cache from pathlib import Path import yaml @@ -14,6 +15,15 @@ CFG = {} +@lru_cache +def get_variable_mappings(project, dataset): + DEFAULT_PATH = (Path(__file__).parents[0] / project + / f"{dataset}-mappings.yml") + mapping_path = CFG.get(project, {}).get("mapping_path", DEFAULT_PATH) + with open(mapping_path, "r") as mapping_file: + return yaml.safe_load(mapping_file) + + def read_config_user_file(config_file, folder_name, options=None): """Read config user file and store settings in a dictionary.""" if not config_file: diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 40fee37aa8..ecae38f2cc 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -12,7 +12,13 @@ from . import __version__ from . import _recipe_checks as check -from ._config import TAGS, get_activity, get_institutes, get_project_config +from ._config import ( + TAGS, + get_activity, + get_institutes, + get_project_config, + get_variable_mappings, +) from ._data_finder import ( get_input_filelist, get_output_file, @@ -21,7 +27,6 @@ from ._provenance import TrackedFile, get_recipe_provenance from ._recipe_checks import RecipeError from ._task import DiagnosticTask, TaskSet -from .cmor._fixes.fix import get_variable_mappings from .cmor.check import CheckLevels from .cmor.table import CMOR_TABLES from .preprocessor import ( diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index 86b7923cdd..100bb9684d 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -2,25 +2,11 @@ import importlib import inspect import os -from functools import lru_cache -from pathlib import Path - -import yaml - -from esmvalcore._config._config import CFG +from esmvalcore._config import get_variable_mappings from ..table import CMOR_TABLES -@lru_cache -def get_variable_mappings(project, dataset): - DEFAULT_PATH = (Path(__file__).parents[0] / project / - f"{dataset}-mappings.yml") - mapping_path = CFG.get(project, {}).get("mapping_path", DEFAULT_PATH) - with open(mapping_path, "r") as mapping_file: - return yaml.safe_load(mapping_file) - - class Fix: """ Base class for dataset fixes. From 889173e14922a0b23ea1d000e15be2a085aeb215 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Fri, 14 May 2021 15:41:13 +0200 Subject: [PATCH 03/49] Add handling of mip and short_name to get_variable_mappings --- esmvalcore/_config/_config.py | 8 +++++++- esmvalcore/_recipe.py | 7 ++++--- esmvalcore/cmor/_fixes/fix.py | 3 +-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/esmvalcore/_config/_config.py b/esmvalcore/_config/_config.py index 87484fdc98..210bbe638c 100644 --- a/esmvalcore/_config/_config.py +++ b/esmvalcore/_config/_config.py @@ -16,7 +16,7 @@ @lru_cache -def get_variable_mappings(project, dataset): +def _get_project_mappings(project, dataset): DEFAULT_PATH = (Path(__file__).parents[0] / project / f"{dataset}-mappings.yml") mapping_path = CFG.get(project, {}).get("mapping_path", DEFAULT_PATH) @@ -24,6 +24,12 @@ def get_variable_mappings(project, dataset): return yaml.safe_load(mapping_file) +@lru_cache +def get_variable_mappings(project, dataset, mip, short_name): + project_mappings = _get_project_mappings(project, dataset) + return project_mappings.get(mip, {}).get(short_name, None) + + def read_config_user_file(config_file, folder_name, options=None): """Read config user file and store settings in a dictionary.""" if not config_file: diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index ecae38f2cc..2eade95977 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1083,9 +1083,10 @@ def expand_tag(variable, input_tag): def _add_project_variable_mappings(self, variable): mappings = get_variable_mappings(variable["project"], - variable["dataset"]) - mapping = mappings[variable["mip"]][variable["short_name"]] - _augment(variable, mapping) + variable["dataset"], + variable["mip"], + variable["short_name"]) + _augment(variable, mappings) def _initialize_variables(self, raw_variable, raw_datasets): """Define variables for all datasets.""" diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index 100bb9684d..b1157268b4 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -152,8 +152,7 @@ def get_fixes(project, dataset, mip, short_name): """ cmor_table = CMOR_TABLES[project] vardef = cmor_table.get_variable(mip, short_name) - mapping = get_variable_mappings(project, dataset) - var_mapping = mapping.get(mip, {}).get(short_name, None) + var_mapping = get_variable_mappings(project, dataset, mip, short_name) project = project.replace('-', '_').lower() dataset = dataset.replace('-', '_').lower() From 204bd13f6a317d73eab0961f114a7142db5d6762 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Sun, 16 May 2021 21:57:56 +0200 Subject: [PATCH 04/49] Move to new directory layout with importlib_resources --- esmvalcore/_config/_config.py | 20 +++++++++++++++----- setup.py | 1 + 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/esmvalcore/_config/_config.py b/esmvalcore/_config/_config.py index 210bbe638c..10abda49a3 100644 --- a/esmvalcore/_config/_config.py +++ b/esmvalcore/_config/_config.py @@ -14,14 +14,24 @@ CFG = {} +try: + from importlib.resources import files as importlib_files +except ImportError: + from importlib_resources import files as importlib_files + @lru_cache def _get_project_mappings(project, dataset): - DEFAULT_PATH = (Path(__file__).parents[0] / project - / f"{dataset}-mappings.yml") - mapping_path = CFG.get(project, {}).get("mapping_path", DEFAULT_PATH) - with open(mapping_path, "r") as mapping_file: - return yaml.safe_load(mapping_file) + config = {} + SEARCH_PATHS = [importlib_files(__package__) / "mappings"] + for search_path in SEARCH_PATHS: + config_files = search_path.glob(f"{project}-*.yml") + for config_file in sorted(config_files): + with config_file.open() as f: + config_piece = yaml.safe_load(f) + if config_piece: + config.update(config_piece) + return config @lru_cache diff --git a/setup.py b/setup.py index 38e6f48ccc..939b3c9484 100755 --- a/setup.py +++ b/setup.py @@ -34,6 +34,7 @@ 'dask[array]', 'fiona', 'fire', + "importlib_resources;python_version<'3.9'", 'jinja2', 'nc-time-axis', # needed by iris.plot 'netCDF4', From 1b5fbd157e08051276309767a00359f268e148ad Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Sun, 16 May 2021 22:20:48 +0200 Subject: [PATCH 05/49] Introduce deep_update functionality --- esmvalcore/_config/_config.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_config/_config.py b/esmvalcore/_config/_config.py index 10abda49a3..a61d90eb8d 100644 --- a/esmvalcore/_config/_config.py +++ b/esmvalcore/_config/_config.py @@ -1,4 +1,5 @@ """Functions dealing with config-user.yml / config-developer.yml.""" +import collections.abc import datetime import logging import os @@ -20,6 +21,15 @@ from importlib_resources import files as importlib_files +def deep_update(dictionary, update): + for k, v in update.items(): + if isinstance(v, collections.abc.Mapping): + dictionary[k] = deep_update(dictionary.get(k, {}), v) + else: + dictionary[k] = v + return dictionary + + @lru_cache def _get_project_mappings(project, dataset): config = {} @@ -30,7 +40,7 @@ def _get_project_mappings(project, dataset): with config_file.open() as f: config_piece = yaml.safe_load(f) if config_piece: - config.update(config_piece) + deep_update(config, config_piece) return config From 4cbb17bba4c5e5b1509f70a2f46b074af9fc2d14 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Sun, 16 May 2021 22:21:11 +0200 Subject: [PATCH 06/49] Fix dataset handling --- esmvalcore/_config/_config.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/esmvalcore/_config/_config.py b/esmvalcore/_config/_config.py index a61d90eb8d..40a0c3795e 100644 --- a/esmvalcore/_config/_config.py +++ b/esmvalcore/_config/_config.py @@ -31,7 +31,7 @@ def deep_update(dictionary, update): @lru_cache -def _get_project_mappings(project, dataset): +def _get_project_mappings(project): config = {} SEARCH_PATHS = [importlib_files(__package__) / "mappings"] for search_path in SEARCH_PATHS: @@ -46,8 +46,8 @@ def _get_project_mappings(project, dataset): @lru_cache def get_variable_mappings(project, dataset, mip, short_name): - project_mappings = _get_project_mappings(project, dataset) - return project_mappings.get(mip, {}).get(short_name, None) + project_mappings = _get_project_mappings(project) + return project_mappings.get(dataset, {}).get(mip, {}).get(short_name, None) def read_config_user_file(config_file, folder_name, options=None): From 90ba75f731fec117769cc5209ffb6397210531e1 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Sun, 16 May 2021 22:24:59 +0200 Subject: [PATCH 07/49] Use lowercase for project in filename --- esmvalcore/_config/_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_config/_config.py b/esmvalcore/_config/_config.py index 40a0c3795e..0eddc891da 100644 --- a/esmvalcore/_config/_config.py +++ b/esmvalcore/_config/_config.py @@ -35,7 +35,7 @@ def _get_project_mappings(project): config = {} SEARCH_PATHS = [importlib_files(__package__) / "mappings"] for search_path in SEARCH_PATHS: - config_files = search_path.glob(f"{project}-*.yml") + config_files = search_path.glob(f"{project.lower()}-*.yml") for config_file in sorted(config_files): with config_file.open() as f: config_piece = yaml.safe_load(f) From 0cf7ad7fe2d6eb768fa6f915f14fa98454c0605d Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Sun, 16 May 2021 22:51:14 +0200 Subject: [PATCH 08/49] Allow for empty var_mapping to support existing fixes --- esmvalcore/cmor/_fixes/fix.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index b1157268b4..82db07858e 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -11,7 +11,7 @@ class Fix: """ Base class for dataset fixes. """ - def __init__(self, vardef, var_mapping): + def __init__(self, vardef, var_mapping=None): """Initialize fix object. Parameters @@ -21,6 +21,8 @@ def __init__(self, vardef, var_mapping): """ self.vardef = vardef + if var_mapping is None: + var_mapping = {} self.var_mapping = var_mapping def fix_file(self, filepath, output_dir): From 3d5cbd8e589e9ca10615a5f166b884fd8cb56f19 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Mon, 17 May 2021 09:38:42 +0200 Subject: [PATCH 09/49] Return empty dict instead of None to signal "no mappings" --- esmvalcore/_config/_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_config/_config.py b/esmvalcore/_config/_config.py index 0eddc891da..d73d8a42c9 100644 --- a/esmvalcore/_config/_config.py +++ b/esmvalcore/_config/_config.py @@ -47,7 +47,7 @@ def _get_project_mappings(project): @lru_cache def get_variable_mappings(project, dataset, mip, short_name): project_mappings = _get_project_mappings(project) - return project_mappings.get(dataset, {}).get(mip, {}).get(short_name, None) + return project_mappings.get(dataset, {}).get(mip, {}).get(short_name, {}) def read_config_user_file(config_file, folder_name, options=None): From 97b0243d57ec3e95f2177b3a945eb8d26573c56e Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Mon, 17 May 2021 11:32:21 +0200 Subject: [PATCH 10/49] Change conditional import to work around mypy bug python/mypy#1153 https://github.com/python/mypy/issues/1153 --- esmvalcore/_config/_config.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_config/_config.py b/esmvalcore/_config/_config.py index d73d8a42c9..acf83b7f80 100644 --- a/esmvalcore/_config/_config.py +++ b/esmvalcore/_config/_config.py @@ -3,6 +3,7 @@ import datetime import logging import os +import sys import warnings from functools import lru_cache from pathlib import Path @@ -15,9 +16,9 @@ CFG = {} -try: +if sys.version_info[:2] >= (3, 9): from importlib.resources import files as importlib_files -except ImportError: +else: from importlib_resources import files as importlib_files From 3df52f5a8778a086d33f19da26f0bb5eb0dbbe54 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Mon, 17 May 2021 12:07:26 +0200 Subject: [PATCH 11/49] Add importlib_resources to doc requirements --- doc/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/requirements.txt b/doc/requirements.txt index a948ebe1d0..1a1e74592b 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,6 +1,7 @@ autodocsumm dask[array] fiona +importlib_resources jinja2 netCDF4 numpy From 160e359ed9b69ab5a74981092bd71300b96d34d7 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Mon, 17 May 2021 15:38:46 +0200 Subject: [PATCH 12/49] Improve code quality --- esmvalcore/_config/_config.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_config/_config.py b/esmvalcore/_config/_config.py index acf83b7f80..16ae1fefa4 100644 --- a/esmvalcore/_config/_config.py +++ b/esmvalcore/_config/_config.py @@ -17,31 +17,35 @@ CFG = {} if sys.version_info[:2] >= (3, 9): + # pylint: disable=no-name-in-module from importlib.resources import files as importlib_files else: from importlib_resources import files as importlib_files -def deep_update(dictionary, update): - for k, v in update.items(): - if isinstance(v, collections.abc.Mapping): - dictionary[k] = deep_update(dictionary.get(k, {}), v) +def _deep_update(dictionary, update): + for key, value in update.items(): + if isinstance(value, collections.abc.Mapping): + dictionary[key] = _deep_update(dictionary.get(key, {}), value) else: - dictionary[k] = v + dictionary[key] = value return dictionary @lru_cache def _get_project_mappings(project): config = {} - SEARCH_PATHS = [importlib_files(__package__) / "mappings"] - for search_path in SEARCH_PATHS: - config_files = search_path.glob(f"{project.lower()}-*.yml") - for config_file in sorted(config_files): - with config_file.open() as f: - config_piece = yaml.safe_load(f) + config_paths = [ + importlib_files("esmvalcore._config"), + ] + for config_path in config_paths: + search_path = config_path / "mappings" + config_file_paths = search_path.glob(f"{project.lower()}-*.yml") + for config_file_path in sorted(config_file_paths): + with config_file_path.open() as config_file: + config_piece = yaml.safe_load(config_file) if config_piece: - deep_update(config, config_piece) + _deep_update(config, config_piece) return config From 69d730111d14f752ca3b50c377ac682ca58138b0 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Mon, 17 May 2021 15:38:59 +0200 Subject: [PATCH 13/49] Add user config directory --- esmvalcore/_config/_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/esmvalcore/_config/_config.py b/esmvalcore/_config/_config.py index 16ae1fefa4..e989c57bac 100644 --- a/esmvalcore/_config/_config.py +++ b/esmvalcore/_config/_config.py @@ -37,6 +37,7 @@ def _get_project_mappings(project): config = {} config_paths = [ importlib_files("esmvalcore._config"), + Path.home() / ".esmvaltool", ] for config_path in config_paths: search_path = config_path / "mappings" From c68aaf249445c13a4182c43741f176a80e2b28eb Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Mon, 17 May 2021 15:51:05 +0200 Subject: [PATCH 14/49] Move project variable mappings handling out of Recipe class --- esmvalcore/_recipe.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 2eade95977..ac987a8381 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -99,6 +99,14 @@ def _add_cmor_info(variable, override=False): check.variable(variable, required_keys=cmor_keys) +def _add_project_variable_mappings(variable): + mappings = get_variable_mappings(variable["project"], + variable["dataset"], + variable["mip"], + variable["short_name"]) + _augment(variable, mappings) + + def _special_name_to_dataset(variable, special_name): """Convert special names to dataset names.""" if special_name in ('reference_dataset', 'alternative_dataset'): @@ -1081,13 +1089,6 @@ def expand_tag(variable, input_tag): return expanded - def _add_project_variable_mappings(self, variable): - mappings = get_variable_mappings(variable["project"], - variable["dataset"], - variable["mip"], - variable["short_name"]) - _augment(variable, mappings) - def _initialize_variables(self, raw_variable, raw_datasets): """Define variables for all datasets.""" variables = [] @@ -1123,7 +1124,7 @@ def _initialize_variables(self, raw_variable, raw_datasets): if 'fx' not in raw_variable.get('mip', ''): required_keys.update({'start_year', 'end_year'}) for variable in variables: - self._add_project_variable_mappings(variable) + _add_project_variable_mappings(variable) if 'institute' not in variable: institute = get_institutes(variable) if institute: From 037422e5088d1e6babebafbf793844d47c2786a5 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Mon, 17 May 2021 15:51:16 +0200 Subject: [PATCH 15/49] Add rudimentary docstring --- esmvalcore/_config/_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/esmvalcore/_config/_config.py b/esmvalcore/_config/_config.py index e989c57bac..6745a753b8 100644 --- a/esmvalcore/_config/_config.py +++ b/esmvalcore/_config/_config.py @@ -52,6 +52,7 @@ def _get_project_mappings(project): @lru_cache def get_variable_mappings(project, dataset, mip, short_name): + """Read configuration files with additional variable information.""" project_mappings = _get_project_mappings(project) return project_mappings.get(dataset, {}).get(mip, {}).get(short_name, {}) From b842a0b005943e8e799b3e53696b8ee978360968 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Mon, 17 May 2021 20:27:14 +0200 Subject: [PATCH 16/49] Use variable details instead of variable mappings for better terminology --- esmvalcore/_config/__init__.py | 4 ++-- esmvalcore/_config/_config.py | 10 +++++----- esmvalcore/_recipe.py | 16 ++++++++-------- esmvalcore/cmor/_fixes/fix.py | 4 ++-- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/esmvalcore/_config/__init__.py b/esmvalcore/_config/__init__.py index c36e230d85..ae1828aaa9 100644 --- a/esmvalcore/_config/__init__.py +++ b/esmvalcore/_config/__init__.py @@ -3,7 +3,7 @@ get_activity, get_institutes, get_project_config, - get_variable_mappings, + get_variable_details, load_config_developer, read_config_developer_file, read_config_user_file, @@ -15,7 +15,7 @@ 'read_config_user_file', 'read_config_developer_file', 'load_config_developer', - 'get_variable_mappings', + 'get_variable_details', 'get_project_config', 'get_institutes', 'get_activity', diff --git a/esmvalcore/_config/_config.py b/esmvalcore/_config/_config.py index 6745a753b8..953b0ed130 100644 --- a/esmvalcore/_config/_config.py +++ b/esmvalcore/_config/_config.py @@ -33,14 +33,14 @@ def _deep_update(dictionary, update): @lru_cache -def _get_project_mappings(project): +def _get_variable_details_for_project(project): config = {} config_paths = [ importlib_files("esmvalcore._config"), Path.home() / ".esmvaltool", ] for config_path in config_paths: - search_path = config_path / "mappings" + search_path = config_path / "variable_details" config_file_paths = search_path.glob(f"{project.lower()}-*.yml") for config_file_path in sorted(config_file_paths): with config_file_path.open() as config_file: @@ -51,10 +51,10 @@ def _get_project_mappings(project): @lru_cache -def get_variable_mappings(project, dataset, mip, short_name): +def get_variable_details(project, dataset, mip, short_name): """Read configuration files with additional variable information.""" - project_mappings = _get_project_mappings(project) - return project_mappings.get(dataset, {}).get(mip, {}).get(short_name, {}) + project_details = _get_variable_details_for_project(project) + return project_details.get(dataset, {}).get(mip, {}).get(short_name, {}) def read_config_user_file(config_file, folder_name, options=None): diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index ac987a8381..99bd6a4441 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -17,7 +17,7 @@ get_activity, get_institutes, get_project_config, - get_variable_mappings, + get_variable_details, ) from ._data_finder import ( get_input_filelist, @@ -99,12 +99,12 @@ def _add_cmor_info(variable, override=False): check.variable(variable, required_keys=cmor_keys) -def _add_project_variable_mappings(variable): - mappings = get_variable_mappings(variable["project"], - variable["dataset"], - variable["mip"], - variable["short_name"]) - _augment(variable, mappings) +def _add_project_variable_details(variable): + details = get_variable_details(variable["project"], + variable["dataset"], + variable["mip"], + variable["short_name"]) + _augment(variable, details) def _special_name_to_dataset(variable, special_name): @@ -1124,7 +1124,7 @@ def _initialize_variables(self, raw_variable, raw_datasets): if 'fx' not in raw_variable.get('mip', ''): required_keys.update({'start_year', 'end_year'}) for variable in variables: - _add_project_variable_mappings(variable) + _add_project_variable_details(variable) if 'institute' not in variable: institute = get_institutes(variable) if institute: diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index 82db07858e..bbdb9f8482 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -3,7 +3,7 @@ import inspect import os -from esmvalcore._config import get_variable_mappings +from esmvalcore._config import get_variable_details from ..table import CMOR_TABLES @@ -154,7 +154,7 @@ def get_fixes(project, dataset, mip, short_name): """ cmor_table = CMOR_TABLES[project] vardef = cmor_table.get_variable(mip, short_name) - var_mapping = get_variable_mappings(project, dataset, mip, short_name) + var_mapping = get_variable_details(project, dataset, mip, short_name) project = project.replace('-', '_').lower() dataset = dataset.replace('-', '_').lower() From 66a448036a0a2a527373c2ee98f644e760e7d823 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Tue, 1 Jun 2021 10:52:16 +0200 Subject: [PATCH 17/49] Address renaming and logging suggestions --- esmvalcore/_config/__init__.py | 4 ++-- esmvalcore/_config/_config.py | 7 ++++--- esmvalcore/_recipe.py | 10 +++++----- esmvalcore/cmor/_fixes/fix.py | 4 ++-- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_config/__init__.py b/esmvalcore/_config/__init__.py index ae1828aaa9..1b7357ac1c 100644 --- a/esmvalcore/_config/__init__.py +++ b/esmvalcore/_config/__init__.py @@ -3,7 +3,7 @@ get_activity, get_institutes, get_project_config, - get_variable_details, + get_extra_facets, load_config_developer, read_config_developer_file, read_config_user_file, @@ -15,7 +15,7 @@ 'read_config_user_file', 'read_config_developer_file', 'load_config_developer', - 'get_variable_details', + 'get_extra_facets', 'get_project_config', 'get_institutes', 'get_activity', diff --git a/esmvalcore/_config/_config.py b/esmvalcore/_config/_config.py index 953b0ed130..379a1ae383 100644 --- a/esmvalcore/_config/_config.py +++ b/esmvalcore/_config/_config.py @@ -33,7 +33,7 @@ def _deep_update(dictionary, update): @lru_cache -def _get_variable_details_for_project(project): +def _load_extra_facets(project): config = {} config_paths = [ importlib_files("esmvalcore._config"), @@ -43,6 +43,7 @@ def _get_variable_details_for_project(project): search_path = config_path / "variable_details" config_file_paths = search_path.glob(f"{project.lower()}-*.yml") for config_file_path in sorted(config_file_paths): + logger.info("Loading extra facets from %s", config_file_path) with config_file_path.open() as config_file: config_piece = yaml.safe_load(config_file) if config_piece: @@ -51,9 +52,9 @@ def _get_variable_details_for_project(project): @lru_cache -def get_variable_details(project, dataset, mip, short_name): +def get_extra_facets(project, dataset, mip, short_name): """Read configuration files with additional variable information.""" - project_details = _get_variable_details_for_project(project) + project_details = _load_extra_facets(project) return project_details.get(dataset, {}).get(mip, {}).get(short_name, {}) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 99bd6a4441..f89a45be68 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -17,7 +17,7 @@ get_activity, get_institutes, get_project_config, - get_variable_details, + get_extra_facets, ) from ._data_finder import ( get_input_filelist, @@ -100,10 +100,10 @@ def _add_cmor_info(variable, override=False): def _add_project_variable_details(variable): - details = get_variable_details(variable["project"], - variable["dataset"], - variable["mip"], - variable["short_name"]) + details = get_extra_facets(variable["project"], + variable["dataset"], + variable["mip"], + variable["short_name"]) _augment(variable, details) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index bbdb9f8482..42a1240353 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -3,7 +3,7 @@ import inspect import os -from esmvalcore._config import get_variable_details +from esmvalcore._config import get_extra_facets from ..table import CMOR_TABLES @@ -154,7 +154,7 @@ def get_fixes(project, dataset, mip, short_name): """ cmor_table = CMOR_TABLES[project] vardef = cmor_table.get_variable(mip, short_name) - var_mapping = get_variable_details(project, dataset, mip, short_name) + var_mapping = get_extra_facets(project, dataset, mip, short_name) project = project.replace('-', '_').lower() dataset = dataset.replace('-', '_').lower() From 449daa6c6b0ad3649ac9c3b907655f56073d53a9 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Tue, 1 Jun 2021 14:29:55 +0200 Subject: [PATCH 18/49] Pass extra_facets through recipe to allow for easy customization --- esmvalcore/_recipe.py | 13 +++++++------ esmvalcore/cmor/_fixes/fix.py | 12 ++++-------- esmvalcore/cmor/fix.py | 18 ++++++++++++------ 3 files changed, 23 insertions(+), 20 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index f89a45be68..bce16df803 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -15,9 +15,9 @@ from ._config import ( TAGS, get_activity, + get_extra_facets, get_institutes, get_project_config, - get_extra_facets, ) from ._data_finder import ( get_input_filelist, @@ -100,11 +100,9 @@ def _add_cmor_info(variable, override=False): def _add_project_variable_details(variable): - details = get_extra_facets(variable["project"], - variable["dataset"], - variable["mip"], - variable["short_name"]) - _augment(variable, details) + extra_facets = get_extra_facets(variable["project"], variable["dataset"], + variable["mip"], variable["short_name"]) + _augment(variable, extra_facets) def _special_name_to_dataset(variable, special_name): @@ -281,6 +279,9 @@ def _get_default_settings(variable, config_user, derive=False): 'short_name': variable['short_name'], 'mip': variable['mip'], } + extra_facets = get_extra_facets(variable['project'], variable['dataset'], + variable['mip'], variable['short_name']) + fix.update(extra_facets) # File fixes fix_dir = os.path.splitext(variable['filename'])[0] + '_fixed' settings['fix_file'] = dict(fix) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index 42a1240353..4da40474ba 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -3,7 +3,6 @@ import inspect import os -from esmvalcore._config import get_extra_facets from ..table import CMOR_TABLES @@ -11,7 +10,7 @@ class Fix: """ Base class for dataset fixes. """ - def __init__(self, vardef, var_mapping=None): + def __init__(self, vardef, **extra_facets): """Initialize fix object. Parameters @@ -21,9 +20,7 @@ def __init__(self, vardef, var_mapping=None): """ self.vardef = vardef - if var_mapping is None: - var_mapping = {} - self.var_mapping = var_mapping + self.extra_facets = extra_facets def fix_file(self, filepath, output_dir): """ @@ -125,7 +122,7 @@ def __ne__(self, other): return not self.__eq__(other) @staticmethod - def get_fixes(project, dataset, mip, short_name): + def get_fixes(project, dataset, mip, short_name, **extra_facets): """ Get the fixes that must be applied for a given dataset. @@ -154,7 +151,6 @@ def get_fixes(project, dataset, mip, short_name): """ cmor_table = CMOR_TABLES[project] vardef = cmor_table.get_variable(mip, short_name) - var_mapping = get_extra_facets(project, dataset, mip, short_name) project = project.replace('-', '_').lower() dataset = dataset.replace('-', '_').lower() @@ -169,7 +165,7 @@ def get_fixes(project, dataset, mip, short_name): classes = dict((name.lower(), value) for name, value in classes) for fix_name in (short_name, mip.lower(), 'allvars'): try: - fixes.append(classes[fix_name](vardef, var_mapping)) + fixes.append(classes[fix_name](vardef, **extra_facets)) except KeyError: pass except ImportError: diff --git a/esmvalcore/cmor/fix.py b/esmvalcore/cmor/fix.py index 23cc98e4f9..6d79e7ae1a 100644 --- a/esmvalcore/cmor/fix.py +++ b/esmvalcore/cmor/fix.py @@ -15,7 +15,8 @@ logger = logging.getLogger(__name__) -def fix_file(file, short_name, project, dataset, mip, output_dir): +def fix_file(file, short_name, project, dataset, mip, output_dir, + **extra_facets): """Fix files before ESMValTool can load them. This fixes are only for issues that prevent iris from loading the cube or @@ -42,7 +43,8 @@ def fix_file(file, short_name, project, dataset, mip, output_dir): for fix in Fix.get_fixes(project=project, dataset=dataset, mip=mip, - short_name=short_name): + short_name=short_name, + **extra_facets): file = fix.fix_file(file, output_dir) return file @@ -53,7 +55,8 @@ def fix_metadata(cubes, dataset, mip, frequency=None, - check_level=CheckLevels.DEFAULT): + check_level=CheckLevels.DEFAULT, + **extra_facets): """Fix cube metadata if fixes are required and check it anyway. This method collects all the relevant fixes for a given variable, applies @@ -92,7 +95,8 @@ def fix_metadata(cubes, fixes = Fix.get_fixes(project=project, dataset=dataset, mip=mip, - short_name=short_name) + short_name=short_name, + **extra_facets) fixed_cubes = [] by_file = defaultdict(list) for cube in cubes: @@ -147,7 +151,8 @@ def fix_data(cube, dataset, mip, frequency=None, - check_level=CheckLevels.DEFAULT): + check_level=CheckLevels.DEFAULT, + **extra_facets): """Fix cube data if fixes add present and check it anyway. This method assumes that metadata is already fixed and checked. @@ -185,7 +190,8 @@ def fix_data(cube, for fix in Fix.get_fixes(project=project, dataset=dataset, mip=mip, - short_name=short_name): + short_name=short_name, + **extra_facets): cube = fix.fix_data(cube) checker = _get_cmor_checker(frequency=frequency, table=project, From 801c97b6fb9ac62e9c698db31f688411ec3e08d1 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Tue, 1 Jun 2021 14:37:25 +0200 Subject: [PATCH 19/49] Pre-commit changes --- esmvalcore/_recipe.py | 41 +++++++++++++++++++---------------- esmvalcore/cmor/_fixes/fix.py | 30 ++++++++----------------- 2 files changed, 31 insertions(+), 40 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index bce16df803..3e5e47c32a 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -149,6 +149,7 @@ def _update_target_levels(variable, variables, settings, config_user): else: variable_data = _get_dataset_info(dataset, variables) filename = _dataset_to_file(variable_data, config_user) + fix_dir = f"{os.path.splitext(variable_data['filename'])[0]}_fixed" settings['extract_levels']['levels'] = get_reference_levels( filename=filename, project=variable_data['project'], @@ -156,8 +157,7 @@ def _update_target_levels(variable, variables, settings, config_user): short_name=variable_data['short_name'], mip=variable_data['mip'], frequency=variable_data['frequency'], - fix_dir=os.path.splitext(variable_data['filename'])[0] + - '_fixed', + fix_dir=fix_dir, ) @@ -364,14 +364,12 @@ def _search_fx_mip(tables, found_mip, variable, fx_info, config_user): found_mip = True fx_info['mip'] = mip fx_info = _add_fxvar_keys(fx_info, variable) - logger.debug( - "For fx variable '%s', found table '%s'", - fx_info['short_name'], mip) + logger.debug("For fx variable '%s', found table '%s'", + fx_info['short_name'], mip) fx_files = _get_input_files(fx_info, config_user)[0] if fx_files: - logger.debug( - "Found fx variables '%s':\n%s", - fx_info['short_name'], pformat(fx_files)) + logger.debug("Found fx variables '%s':\n%s", + fx_info['short_name'], pformat(fx_files)) return found_mip, fx_info, fx_files @@ -384,17 +382,17 @@ def _get_fx_files(variable, fx_info, config_user): try: get_project_config(var_project) except ValueError: - raise RecipeError( - f"Requested fx variable '{fx_info['short_name']}' " - f"with parent variable '{variable}' does not have " - f"a '{var_project}' project in config-developer.") + raise RecipeError(f"Requested fx variable '{fx_info['short_name']}' " + f"with parent variable '{variable}' does not have " + f"a '{var_project}' project in config-developer.") project_tables = CMOR_TABLES[var_project].tables # force only the mip declared by user found_mip = False if not fx_info['mip']: - found_mip, fx_info, fx_files = _search_fx_mip( - project_tables, found_mip, variable, fx_info, config_user) + found_mip, fx_info, fx_files = _search_fx_mip(project_tables, + found_mip, variable, + fx_info, config_user) else: fx_cmor = project_tables[fx_info['mip']].get(fx_info['short_name']) if fx_cmor: @@ -410,8 +408,8 @@ def _get_fx_files(variable, fx_info, config_user): # flag a warning if not fx_files: - logger.warning( - "Missing data for fx variable '%s'", fx_info['short_name']) + logger.warning("Missing data for fx variable '%s'", + fx_info['short_name']) # allow for empty lists corrected for by NE masks if fx_files: @@ -463,7 +461,10 @@ def _update_fx_files(step_name, settings, variable, config_user, fx_vars): def _fx_list_to_dict(fx_vars): - """Convert fx list to dictionary. To be deprecated at some point.""" + """Convert fx list to dictionary. + + To be deprecated at some point. + """ user_fx_vars = {} for fx_var in fx_vars: if isinstance(fx_var, dict): @@ -476,6 +477,7 @@ def _fx_list_to_dict(fx_vars): def _update_fx_settings(settings, variable, config_user): """Update fx settings depending on the needed method.""" + # get fx variables either from user defined attribute or fixed def _get_fx_vars_from_attribute(step_settings, step_name): user_fx_vars = step_settings.get('fx_variables') @@ -1027,12 +1029,13 @@ def _initialize_diagnostics(self, raw_diagnostics, raw_datasets): for name, raw_diagnostic in raw_diagnostics.items(): diagnostic = {} diagnostic['name'] = name + additional_datasets = raw_diagnostic.get('additional_datasets', []) + datasets = (raw_datasets + additional_datasets) diagnostic['preprocessor_output'] = \ self._initialize_preprocessor_output( name, raw_diagnostic.get('variables', {}), - raw_datasets + - raw_diagnostic.get('additional_datasets', [])) + datasets) variable_names = tuple(raw_diagnostic.get('variables', {})) diagnostic['scripts'] = self._initialize_scripts( name, raw_diagnostic.get('scripts'), variable_names) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index 4da40474ba..95d437bc26 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -1,4 +1,4 @@ -"""Contains the base class for dataset fixes""" +"""Contains the base class for dataset fixes.""" import importlib import inspect import os @@ -7,9 +7,7 @@ class Fix: - """ - Base class for dataset fixes. - """ + """Base class for dataset fixes.""" def __init__(self, vardef, **extra_facets): """Initialize fix object. @@ -17,14 +15,12 @@ def __init__(self, vardef, **extra_facets): ---------- vardef: str CMOR table entry - """ self.vardef = vardef self.extra_facets = extra_facets def fix_file(self, filepath, output_dir): - """ - Apply fixes to the files prior to creating the cube. + """Apply fixes to the files prior to creating the cube. Should be used only to fix errors that prevent loading or can not be fixed in the cube (i.e. those related with missing_value @@ -35,7 +31,7 @@ def fix_file(self, filepath, output_dir): filepath: str file to fix output_dir: str - path to the folder to store the fixe files, if required + path to the folder to store the fixed files, if required Returns ------- @@ -43,13 +39,11 @@ def fix_file(self, filepath, output_dir): Path to the corrected file. It can be different from the original filepath if a fix has been applied, but if not it should be the original filepath - """ return filepath def fix_metadata(self, cubes): - """ - Apply fixes to the metadata of the cube. + """Apply fixes to the metadata of the cube. Changes applied here must not require data loading. @@ -64,13 +58,11 @@ def fix_metadata(self, cubes): ------- iris.cube.CubeList Fixed cubes. They can be different instances. - """ return cubes def get_cube_from_list(self, cubes, short_name=None): - """ - Get a cube from the list with a given short name. + """Get a cube from the list with a given short name. Parameters ---------- @@ -97,8 +89,7 @@ def get_cube_from_list(self, cubes, short_name=None): raise Exception('Cube for variable "{}" not found'.format(short_name)) def fix_data(self, cube): - """ - Apply fixes to the data of the cube. + """Apply fixes to the data of the cube. These fixes should be applied before checking the data. @@ -111,7 +102,6 @@ def fix_data(self, cube): ------- iris.cube.Cube Fixed cube. It can be a difference instance. - """ return cube @@ -123,8 +113,7 @@ def __ne__(self, other): @staticmethod def get_fixes(project, dataset, mip, short_name, **extra_facets): - """ - Get the fixes that must be applied for a given dataset. + """Get the fixes that must be applied for a given dataset. It will look for them at the module esmvalcore.cmor._fixes.PROJECT in the file DATASET, and get @@ -174,8 +163,7 @@ def get_fixes(project, dataset, mip, short_name, **extra_facets): @staticmethod def get_fixed_filepath(output_dir, filepath): - """ - Get the filepath for the fixed file + """Get the filepath for the fixed file. Parameters ---------- From 0624c7d6ac35701616f78764c8999ced30eeef90 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Tue, 1 Jun 2021 15:58:05 +0200 Subject: [PATCH 20/49] Pass extra_facets also to fx variables --- esmvalcore/_recipe.py | 1 + esmvalcore/preprocessor/_ancillary_vars.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 3e5e47c32a..4ac94721e8 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -335,6 +335,7 @@ def _get_default_settings(variable, config_user, derive=False): 'fx_variables': {}, 'check_level': config_user.get('check_level', CheckLevels.DEFAULT) } + settings['add_fx_variables'].update(extra_facets) settings['remove_fx_variables'] = {} return settings diff --git a/esmvalcore/preprocessor/_ancillary_vars.py b/esmvalcore/preprocessor/_ancillary_vars.py index 53a53d529a..8b3f5ef54c 100644 --- a/esmvalcore/preprocessor/_ancillary_vars.py +++ b/esmvalcore/preprocessor/_ancillary_vars.py @@ -26,7 +26,7 @@ def _load_fx(var_cube, fx_info, check_level): loaded_cube = fix_metadata(loaded_cube, short_name=short_name, project=project, dataset=dataset, mip=mip, frequency=freq, - check_level=check_level) + check_level=check_level, **extra_facets) fx_cubes.append(loaded_cube[0]) fx_cube = concatenate(fx_cubes) @@ -40,7 +40,7 @@ def _load_fx(var_cube, fx_info, check_level): fx_cube = fix_data(fx_cube, short_name=short_name, project=project, dataset=dataset, mip=mip, frequency=freq, - check_level=check_level) + check_level=check_level, **extra_facets) fx_cube = cmor_check_data(fx_cube, cmor_table=project, mip=mip, short_name=fx_cube.var_name, frequency=freq, @@ -141,7 +141,7 @@ def add_ancillary_variable(cube, fx_cube): fx_cube.var_name, cube.var_name) -def add_fx_variables(cube, fx_variables, check_level): +def add_fx_variables(cube, fx_variables, check_level, **extra_facets): """ Load requested fx files, check with CMOR standards and add the fx variables as cell measures or ancillary variables in From 3ff737635e863dd27ccf36f4c8e9a9ad18049cd6 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Tue, 1 Jun 2021 15:58:47 +0200 Subject: [PATCH 21/49] Pre-commit changes --- esmvalcore/preprocessor/_ancillary_vars.py | 64 ++++++++++++---------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/esmvalcore/preprocessor/_ancillary_vars.py b/esmvalcore/preprocessor/_ancillary_vars.py index 8b3f5ef54c..96cf11041a 100644 --- a/esmvalcore/preprocessor/_ancillary_vars.py +++ b/esmvalcore/preprocessor/_ancillary_vars.py @@ -1,13 +1,13 @@ """Preprocessor functions for ancillary variables and cell measures.""" import logging -import iris import dask.array as da +import iris -from esmvalcore.preprocessor._io import load, concatenate_callback, concatenate -from esmvalcore.cmor.fix import fix_metadata, fix_data -from esmvalcore.cmor.check import cmor_check_metadata, cmor_check_data +from esmvalcore.cmor.check import cmor_check_data, cmor_check_metadata +from esmvalcore.cmor.fix import fix_data, fix_metadata +from esmvalcore.preprocessor._io import concatenate, concatenate_callback, load logger = logging.getLogger(__name__) @@ -23,10 +23,14 @@ def _load_fx(var_cube, fx_info, check_level): dataset = fx_info['dataset'] mip = fx_info['mip'] freq = fx_info['frequency'] - loaded_cube = fix_metadata(loaded_cube, short_name=short_name, - project=project, dataset=dataset, - mip=mip, frequency=freq, - check_level=check_level, **extra_facets) + loaded_cube = fix_metadata(loaded_cube, + short_name=short_name, + project=project, + dataset=dataset, + mip=mip, + frequency=freq, + check_level=check_level, + **extra_facets) fx_cubes.append(loaded_cube[0]) fx_cube = concatenate(fx_cubes) @@ -38,12 +42,20 @@ def _load_fx(var_cube, fx_info, check_level): short_name=short_name, frequency=freq, check_level=check_level) - fx_cube = fix_data(fx_cube, short_name=short_name, project=project, - dataset=dataset, mip=mip, frequency=freq, - check_level=check_level, **extra_facets) - - fx_cube = cmor_check_data(fx_cube, cmor_table=project, mip=mip, - short_name=fx_cube.var_name, frequency=freq, + fx_cube = fix_data(fx_cube, + short_name=short_name, + project=project, + dataset=dataset, + mip=mip, + frequency=freq, + check_level=check_level, + **extra_facets) + + fx_cube = cmor_check_data(fx_cube, + cmor_table=project, + mip=mip, + short_name=fx_cube.var_name, + frequency=freq, check_level=check_level) return fx_cube @@ -61,9 +73,8 @@ def _is_fx_broadcastable(fx_cube, cube): def add_cell_measure(cube, fx_cube, measure): - """ - Broadcast fx_cube and add it as a cell_measure in - the cube containing the data. + """Broadcast fx_cube and add it as a cell_measure in the cube containing + the data. Parameters ---------- @@ -102,14 +113,13 @@ def add_cell_measure(cube, fx_cube, measure): var_name=fx_cube.var_name, attributes=fx_cube.attributes) cube.add_cell_measure(measure, range(0, measure.ndim)) - logger.debug('Added %s as cell measure in cube of %s.', - fx_cube.var_name, cube.var_name) + logger.debug('Added %s as cell measure in cube of %s.', fx_cube.var_name, + cube.var_name) def add_ancillary_variable(cube, fx_cube): - """ - Broadcast fx_cube and add it as an ancillary_variable in - the cube containing the data. + """Broadcast fx_cube and add it as an ancillary_variable in the cube + containing the data. Parameters ---------- @@ -142,10 +152,9 @@ def add_ancillary_variable(cube, fx_cube): def add_fx_variables(cube, fx_variables, check_level, **extra_facets): - """ - Load requested fx files, check with CMOR standards and add the - fx variables as cell measures or ancillary variables in - the cube containing the data. + """Load requested fx files, check with CMOR standards and add the fx + variables as cell measures or ancillary variables in the cube containing + the data. Parameters ---------- @@ -189,8 +198,7 @@ def add_fx_variables(cube, fx_variables, check_level, **extra_facets): def remove_fx_variables(cube): - """ - Remove fx variables present as cell measures or ancillary variables in + """Remove fx variables present as cell measures or ancillary variables in the cube containing the data. Parameters From 19bd7fb78c71dd80461d39c92557b611f940bcb2 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Thu, 3 Jun 2021 14:11:44 +0200 Subject: [PATCH 22/49] Rename for consistency --- esmvalcore/_recipe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 4ac94721e8..abe5cf2425 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -99,7 +99,7 @@ def _add_cmor_info(variable, override=False): check.variable(variable, required_keys=cmor_keys) -def _add_project_variable_details(variable): +def _add_extra_facets(variable): extra_facets = get_extra_facets(variable["project"], variable["dataset"], variable["mip"], variable["short_name"]) _augment(variable, extra_facets) @@ -1129,7 +1129,7 @@ def _initialize_variables(self, raw_variable, raw_datasets): if 'fx' not in raw_variable.get('mip', ''): required_keys.update({'start_year', 'end_year'}) for variable in variables: - _add_project_variable_details(variable) + _add_extra_facets(variable) if 'institute' not in variable: institute = get_institutes(variable) if institute: From 857b726c146608da419fef8ee27b653dbc6cb571 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Thu, 3 Jun 2021 14:12:08 +0200 Subject: [PATCH 23/49] Pre-commit changes --- esmvalcore/_recipe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index abe5cf2425..979d5d7dac 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1060,8 +1060,8 @@ def _initialize_datasets(raw_datasets): @staticmethod def _expand_tag(variables, input_tag): - """ - Expand tags such as ensemble members or stardates to multiple datasets. + """Expand tags such as ensemble members or stardates to multiple + datasets. Expansion only supports ensembles defined as strings, not lists. """ From 8037ec674d845e5b4bb5038d6b4b9ea60790de82 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Thu, 3 Jun 2021 15:18:47 +0200 Subject: [PATCH 24/49] Add extra_facets_dir option to config_user.yml --- esmvalcore/_config/_config.py | 21 ++++++++++++++------- esmvalcore/_recipe.py | 10 ++++++---- esmvalcore/config-user.yml | 2 ++ 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/esmvalcore/_config/_config.py b/esmvalcore/_config/_config.py index 379a1ae383..53680ca796 100644 --- a/esmvalcore/_config/_config.py +++ b/esmvalcore/_config/_config.py @@ -33,15 +33,15 @@ def _deep_update(dictionary, update): @lru_cache -def _load_extra_facets(project): +def _load_extra_facets(project, extra_facets_dir): config = {} config_paths = [ - importlib_files("esmvalcore._config"), - Path.home() / ".esmvaltool", + importlib_files("esmvalcore._config") / "extra_facets", + Path.home() / ".esmvaltool" / "extra_facets", ] + config_paths.extend([Path(p) for p in extra_facets_dir]) for config_path in config_paths: - search_path = config_path / "variable_details" - config_file_paths = search_path.glob(f"{project.lower()}-*.yml") + config_file_paths = config_path.glob(f"{project.lower()}-*.yml") for config_file_path in sorted(config_file_paths): logger.info("Loading extra facets from %s", config_file_path) with config_file_path.open() as config_file: @@ -52,9 +52,9 @@ def _load_extra_facets(project): @lru_cache -def get_extra_facets(project, dataset, mip, short_name): +def get_extra_facets(project, dataset, mip, short_name, extra_facets_dir): """Read configuration files with additional variable information.""" - project_details = _load_extra_facets(project) + project_details = _load_extra_facets(project, extra_facets_dir) return project_details.get(dataset, {}).get(mip, {}).get(short_name, {}) @@ -105,6 +105,7 @@ def read_config_user_file(config_file, folder_name, options=None): 'output_file_type': 'png', 'output_dir': 'esmvaltool_output', 'auxiliary_data_dir': 'auxiliary_data', + 'extra_facets_dir': tuple(), 'save_intermediary_cubes': False, 'remove_preproc_dir': True, 'max_parallel_tasks': None, @@ -127,6 +128,12 @@ def read_config_user_file(config_file, folder_name, options=None): cfg['output_dir'] = _normalize_path(cfg['output_dir']) cfg['auxiliary_data_dir'] = _normalize_path(cfg['auxiliary_data_dir']) + if isinstance(cfg['extra_facets_dir'], str): + cfg['extra_facets_dir'] = (_normalize_path(cfg['extra_facets_dir']), ) + else: + cfg['extra_facets_dir'] = tuple( + _normalize_path(p) for p in cfg['extra_facets_dir']) + cfg['config_developer_file'] = _normalize_path( cfg['config_developer_file']) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 979d5d7dac..84eb544b65 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -99,9 +99,10 @@ def _add_cmor_info(variable, override=False): check.variable(variable, required_keys=cmor_keys) -def _add_extra_facets(variable): +def _add_extra_facets(variable, extra_facets_dir): extra_facets = get_extra_facets(variable["project"], variable["dataset"], - variable["mip"], variable["short_name"]) + variable["mip"], variable["short_name"], + extra_facets_dir) _augment(variable, extra_facets) @@ -280,7 +281,8 @@ def _get_default_settings(variable, config_user, derive=False): 'mip': variable['mip'], } extra_facets = get_extra_facets(variable['project'], variable['dataset'], - variable['mip'], variable['short_name']) + variable['mip'], variable['short_name'], + config_user['extra_facets_dir']) fix.update(extra_facets) # File fixes fix_dir = os.path.splitext(variable['filename'])[0] + '_fixed' @@ -1129,7 +1131,7 @@ def _initialize_variables(self, raw_variable, raw_datasets): if 'fx' not in raw_variable.get('mip', ''): required_keys.update({'start_year', 'end_year'}) for variable in variables: - _add_extra_facets(variable) + _add_extra_facets(variable, self._cfg['extra_facets_dir']) if 'institute' not in variable: institute = get_institutes(variable) if institute: diff --git a/esmvalcore/config-user.yml b/esmvalcore/config-user.yml index 257b358b06..a8bb639f9e 100644 --- a/esmvalcore/config-user.yml +++ b/esmvalcore/config-user.yml @@ -12,6 +12,8 @@ output_file_type: png output_dir: ./esmvaltool_output # Auxiliary data directory (used for some additional datasets) auxiliary_data_dir: ./auxiliary_data +# Extra facets directory +extra_facets_dir: [] # Use netCDF compression true/[false] compress_netcdf: false # Save intermediary cubes in the preprocessor true/[false] From b26362c0841ebcf1a678764e63921cb77177f0a1 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Thu, 3 Jun 2021 15:32:04 +0200 Subject: [PATCH 25/49] Add validator for new config option to experimental interface --- .../experimental/config/_config_validators.py | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/esmvalcore/experimental/config/_config_validators.py b/esmvalcore/experimental/config/_config_validators.py index d83a26f331..7725bffd52 100644 --- a/esmvalcore/experimental/config/_config_validators.py +++ b/esmvalcore/experimental/config/_config_validators.py @@ -111,6 +111,59 @@ def func(inp): return func +@lru_cache() +def _tuplify_validator(scalar_validator, + allow_stringlist=False, + *, + n_items=None, + docstring=None): + """Apply the validator to a list.""" + def func(inp): + if isinstance(inp, str): + try: + inp = tuple( + scalar_validator(val.strip()) for val in inp.split(',') + if val.strip()) + except Exception: + if allow_stringlist: + # Sometimes, a list of colors might be a single string + # of single-letter colornames. So give that a shot. + inp = tuple( + scalar_validator(val.strip()) for val in inp + if val.strip()) + else: + raise + # Allow any ordered sequence type -- generators, np.ndarray, pd.Series + # -- but not sets, whose iteration order is non-deterministic. + elif isinstance(inp, + Iterable) and not isinstance(inp, (set, frozenset)): + # The condition on this tuple comprehension will preserve the + # behavior of filtering out any empty strings (behavior was + # from the original validate_stringlist()), while allowing + # any non-string/text scalar values such as numbers and arrays. + inp = tuple( + scalar_validator(val) for val in inp + if not isinstance(val, str) or val) + else: + raise ValidationError( + f"Expected str or other non-set iterable, but got {inp}") + if n_items is not None and len(inp) != n_items: + raise ValidationError(f"Expected {n_items} values, " + f"but there are {len(inp)} values in {inp}") + return inp + + try: + func.__name__ = "{}tuple".format(scalar_validator.__name__) + except AttributeError: # class instance. + func.__name__ = "{}Tuple".format(type(scalar_validator).__name__) + func.__qualname__ = func.__qualname__.rsplit(".", + 1)[0] + "." + func.__name__ + if docstring is not None: + docstring = scalar_validator.__doc__ + func.__doc__ = docstring + return func + + def validate_bool(value, allow_none=False): """Check if the value can be evaluate as a boolean.""" if (value is None) and allow_none: @@ -166,6 +219,9 @@ def chained(value): validate_pathlist = _listify_validator(validate_path, docstring='Return a list of paths.') +validate_pathtuple = _tuplify_validator(validate_path, + docstring='Return a tuple of paths.') + validate_int_positive = _chain_validator(validate_int, validate_positive) validate_int_positive_or_none = _make_type_validator(validate_int_positive, allow_none=True) @@ -259,6 +315,7 @@ def deprecate(func, variable, version: str = None): 'exit_on_warning': validate_bool, 'output_dir': validate_path, 'auxiliary_data_dir': validate_path, + 'extra_facets_dir': validate_pathtuple, 'compress_netcdf': validate_bool, 'save_intermediary_cubes': validate_bool, 'remove_preproc_dir': validate_bool, From 2016c8bc62494f9fa20b1a3ab8e2c040fcdad8e9 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Thu, 3 Jun 2021 15:46:45 +0200 Subject: [PATCH 26/49] Add mapping_key to get_cube_from_list for fixes --- esmvalcore/cmor/_fixes/fix.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index 95d437bc26..5a432802d7 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -61,7 +61,7 @@ def fix_metadata(self, cubes): """ return cubes - def get_cube_from_list(self, cubes, short_name=None): + def get_cube_from_list(self, cubes, short_name=None, mapping_key=None): """Get a cube from the list with a given short name. Parameters @@ -83,6 +83,7 @@ def get_cube_from_list(self, cubes, short_name=None): """ if short_name is None: short_name = self.vardef.short_name + short_name = self.extra_facets.get(mapping_key, short_name) for cube in cubes: if cube.var_name == short_name: return cube From f8db25e9f85f8a0217edefb89aadcc149fe12d5e Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Fri, 4 Jun 2021 11:10:19 +0200 Subject: [PATCH 27/49] Simplify generation of tuple validator --- .../experimental/config/_config_validators.py | 76 +++---------------- 1 file changed, 11 insertions(+), 65 deletions(-) diff --git a/esmvalcore/experimental/config/_config_validators.py b/esmvalcore/experimental/config/_config_validators.py index 7725bffd52..4c1492c42f 100644 --- a/esmvalcore/experimental/config/_config_validators.py +++ b/esmvalcore/experimental/config/_config_validators.py @@ -60,75 +60,20 @@ def _listify_validator(scalar_validator, allow_stringlist=False, *, n_items=None, - docstring=None): + docstring=None, + return_type=list): """Apply the validator to a list.""" def func(inp): if isinstance(inp, str): try: - inp = [ - scalar_validator(val.strip()) for val in inp.split(',') - if val.strip() - ] - except Exception: - if allow_stringlist: - # Sometimes, a list of colors might be a single string - # of single-letter colornames. So give that a shot. - inp = [ - scalar_validator(val.strip()) for val in inp - if val.strip() - ] - else: - raise - # Allow any ordered sequence type -- generators, np.ndarray, pd.Series - # -- but not sets, whose iteration order is non-deterministic. - elif isinstance(inp, - Iterable) and not isinstance(inp, (set, frozenset)): - # The condition on this list comprehension will preserve the - # behavior of filtering out any empty strings (behavior was - # from the original validate_stringlist()), while allowing - # any non-string/text scalar values such as numbers and arrays. - inp = [ - scalar_validator(val) for val in inp - if not isinstance(val, str) or val - ] - else: - raise ValidationError( - f"Expected str or other non-set iterable, but got {inp}") - if n_items is not None and len(inp) != n_items: - raise ValidationError(f"Expected {n_items} values, " - f"but there are {len(inp)} values in {inp}") - return inp - - try: - func.__name__ = "{}list".format(scalar_validator.__name__) - except AttributeError: # class instance. - func.__name__ = "{}List".format(type(scalar_validator).__name__) - func.__qualname__ = func.__qualname__.rsplit(".", - 1)[0] + "." + func.__name__ - if docstring is not None: - docstring = scalar_validator.__doc__ - func.__doc__ = docstring - return func - - -@lru_cache() -def _tuplify_validator(scalar_validator, - allow_stringlist=False, - *, - n_items=None, - docstring=None): - """Apply the validator to a list.""" - def func(inp): - if isinstance(inp, str): - try: - inp = tuple( + inp = return_type( scalar_validator(val.strip()) for val in inp.split(',') if val.strip()) except Exception: if allow_stringlist: # Sometimes, a list of colors might be a single string # of single-letter colornames. So give that a shot. - inp = tuple( + inp = return_type( scalar_validator(val.strip()) for val in inp if val.strip()) else: @@ -137,11 +82,11 @@ def func(inp): # -- but not sets, whose iteration order is non-deterministic. elif isinstance(inp, Iterable) and not isinstance(inp, (set, frozenset)): - # The condition on this tuple comprehension will preserve the + # The condition on this list comprehension will preserve the # behavior of filtering out any empty strings (behavior was # from the original validate_stringlist()), while allowing # any non-string/text scalar values such as numbers and arrays. - inp = tuple( + inp = return_type( scalar_validator(val) for val in inp if not isinstance(val, str) or val) else: @@ -153,9 +98,9 @@ def func(inp): return inp try: - func.__name__ = "{}tuple".format(scalar_validator.__name__) + func.__name__ = "{}list".format(scalar_validator.__name__) except AttributeError: # class instance. - func.__name__ = "{}Tuple".format(type(scalar_validator).__name__) + func.__name__ = "{}List".format(type(scalar_validator).__name__) func.__qualname__ = func.__qualname__.rsplit(".", 1)[0] + "." + func.__name__ if docstring is not None: @@ -219,8 +164,9 @@ def chained(value): validate_pathlist = _listify_validator(validate_path, docstring='Return a list of paths.') -validate_pathtuple = _tuplify_validator(validate_path, - docstring='Return a tuple of paths.') +validate_pathtuple = _listify_validator(validate_path, + docstring='Return a tuple of paths.', + return_type=tuple) validate_int_positive = _chain_validator(validate_int, validate_positive) validate_int_positive_or_none = _make_type_validator(validate_int_positive, From 5cf38c391d089e4c825a587476538dd8b9778ac3 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Fri, 4 Jun 2021 14:50:32 +0200 Subject: [PATCH 28/49] Pass entire variable dict to fix and add_fx_variables instead of only extra_facets --- esmvalcore/_recipe.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 84eb544b65..b8a00e4e4c 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -274,16 +274,7 @@ def _get_default_settings(variable, config_user, derive=False): settings['concatenate'] = {} # Configure fixes - fix = { - 'project': variable['project'], - 'dataset': variable['dataset'], - 'short_name': variable['short_name'], - 'mip': variable['mip'], - } - extra_facets = get_extra_facets(variable['project'], variable['dataset'], - variable['mip'], variable['short_name'], - config_user['extra_facets_dir']) - fix.update(extra_facets) + fix = deepcopy(variable) # File fixes fix_dir = os.path.splitext(variable['filename'])[0] + '_fixed' settings['fix_file'] = dict(fix) @@ -337,7 +328,7 @@ def _get_default_settings(variable, config_user, derive=False): 'fx_variables': {}, 'check_level': config_user.get('check_level', CheckLevels.DEFAULT) } - settings['add_fx_variables'].update(extra_facets) + settings['add_fx_variables'].update(variable) settings['remove_fx_variables'] = {} return settings From 1bb84e6b0d883b70918d5715c341d596aa03039e Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Fri, 4 Jun 2021 14:51:12 +0200 Subject: [PATCH 29/49] Don't check for exact argument match if the preprocessor takes *args or **kwargs --- esmvalcore/preprocessor/__init__.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py index 9fe86d140e..43940f3c4d 100644 --- a/esmvalcore/preprocessor/__init__.py +++ b/esmvalcore/preprocessor/__init__.py @@ -212,13 +212,14 @@ def check_preprocessor_settings(settings): function = function = globals()[step] argspec = inspect.getfullargspec(function) args = argspec.args[1:] - # Check for invalid arguments - invalid_args = set(settings[step]) - set(args) - if invalid_args: - raise ValueError( - "Invalid argument(s): {} encountered for preprocessor " - "function {}. \nValid arguments are: [{}]".format( - ', '.join(invalid_args), step, ', '.join(args))) + if not (argspec.varargs or argspec.varkw): + # Check for invalid arguments + invalid_args = set(settings[step]) - set(args) + if invalid_args: + raise ValueError( + "Invalid argument(s): {} encountered for preprocessor " + "function {}. \nValid arguments are: [{}]".format( + ', '.join(invalid_args), step, ', '.join(args))) # Check for missing arguments defaults = argspec.defaults From 7f601516485bf06ac1cbcda301508fb5a1061b70 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Fri, 4 Jun 2021 14:55:43 +0200 Subject: [PATCH 30/49] Fix recipe tests to check agains new, more comprehensive dicts --- tests/integration/test_recipe.py | 174 +++++++++++++++++++++++++++---- 1 file changed, 155 insertions(+), 19 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 8d2f932e4e..5466d49dce 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -115,35 +115,86 @@ def create_test_file(filename, tracking_id=None): iris.save(cube, filename) -def _get_default_settings_for_chl(fix_dir, save_filename): +def _get_default_settings_for_chl(fix_dir, save_filename, preprocessor): """Get default preprocessor settings for chl.""" + standard_name = ('mass_concentration_of_phytoplankton_' + 'expressed_as_chlorophyll_in_sea_water') defaults = { 'load': { 'callback': concatenate_callback, }, 'concatenate': {}, 'fix_file': { - 'project': 'CMIP5', + 'alias': 'CanESM2', 'dataset': 'CanESM2', - 'short_name': 'chl', + 'diagnostic': 'diagnostic_name', + 'end_year': 2005, + 'ensemble': 'r1i1p1', + 'exp': 'historical', + 'filename': fix_dir.replace('_fixed', '.nc'), + 'frequency': 'yr', + 'institute': ['CCCma'], + 'long_name': 'Total Chlorophyll Mass Concentration', 'mip': 'Oyr', + 'modeling_realm': ['ocnBgchem'], + 'original_short_name': 'chl', 'output_dir': fix_dir, + 'preprocessor': preprocessor, + 'project': 'CMIP5', + 'recipe_dataset_index': 0, + 'short_name': 'chl', + 'standard_name': standard_name, + 'start_year': 2000, + 'units': 'kg m-3', + 'variable_group': 'chl', }, 'fix_data': { 'check_level': CheckLevels.DEFAULT, - 'project': 'CMIP5', + 'alias': 'CanESM2', 'dataset': 'CanESM2', - 'short_name': 'chl', - 'mip': 'Oyr', + 'diagnostic': 'diagnostic_name', + 'end_year': 2005, + 'ensemble': 'r1i1p1', + 'exp': 'historical', + 'filename': fix_dir.replace('_fixed', '.nc'), 'frequency': 'yr', + 'institute': ['CCCma'], + 'long_name': 'Total Chlorophyll Mass Concentration', + 'mip': 'Oyr', + 'modeling_realm': ['ocnBgchem'], + 'original_short_name': 'chl', + 'preprocessor': preprocessor, + 'project': 'CMIP5', + 'recipe_dataset_index': 0, + 'short_name': 'chl', + 'standard_name': standard_name, + 'start_year': 2000, + 'units': 'kg m-3', + 'variable_group': 'chl', }, 'fix_metadata': { 'check_level': CheckLevels.DEFAULT, - 'project': 'CMIP5', + 'alias': 'CanESM2', 'dataset': 'CanESM2', - 'short_name': 'chl', - 'mip': 'Oyr', + 'diagnostic': 'diagnostic_name', + 'end_year': 2005, + 'ensemble': 'r1i1p1', + 'exp': 'historical', + 'filename': fix_dir.replace('_fixed', '.nc'), 'frequency': 'yr', + 'institute': ['CCCma'], + 'long_name': 'Total Chlorophyll Mass Concentration', + 'mip': 'Oyr', + 'modeling_realm': ['ocnBgchem'], + 'original_short_name': 'chl', + 'preprocessor': preprocessor, + 'project': 'CMIP5', + 'recipe_dataset_index': 0, + 'short_name': 'chl', + 'standard_name': standard_name, + 'start_year': 2000, + 'units': 'kg m-3', + 'variable_group': 'chl', }, 'clip_start_end_year': { 'start_year': 2000, @@ -166,6 +217,27 @@ def _get_default_settings_for_chl(fix_dir, save_filename): 'add_fx_variables': { 'fx_variables': {}, 'check_level': CheckLevels.DEFAULT, + 'alias': 'CanESM2', + 'dataset': 'CanESM2', + 'diagnostic': 'diagnostic_name', + 'end_year': 2005, + 'ensemble': 'r1i1p1', + 'exp': 'historical', + 'filename': fix_dir.replace('_fixed', '.nc'), + 'frequency': 'yr', + 'institute': ['CCCma'], + 'long_name': 'Total Chlorophyll Mass Concentration', + 'mip': 'Oyr', + 'modeling_realm': ['ocnBgchem'], + 'original_short_name': 'chl', + 'preprocessor': preprocessor, + 'project': 'CMIP5', + 'recipe_dataset_index': 0, + 'short_name': 'chl', + 'standard_name': standard_name, + 'start_year': 2000, + 'units': 'kg m-3', + 'variable_group': 'chl', }, 'remove_fx_variables': {}, 'cleanup': { @@ -475,7 +547,8 @@ def test_default_preprocessor(tmp_path, patched_datafinder, config_user): fix_dir = os.path.join( preproc_dir, 'CMIP5_CanESM2_Oyr_historical_r1i1p1_chl_2000-2005_fixed') - defaults = _get_default_settings_for_chl(fix_dir, product.filename) + defaults = _get_default_settings_for_chl(fix_dir, product.filename, + 'default') assert product.settings == defaults @@ -515,7 +588,8 @@ def test_default_preprocessor_custom_order(tmp_path, patched_datafinder, fix_dir = os.path.join( preproc_dir, 'CMIP5_CanESM2_Oyr_historical_r1i1p1_chl_2000-2005_fixed') - defaults = _get_default_settings_for_chl(fix_dir, product.filename) + defaults = _get_default_settings_for_chl(fix_dir, product.filename, + 'default_custom_order') assert product.settings == defaults @@ -553,27 +627,70 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user): }, 'concatenate': {}, 'fix_file': { - 'project': 'CMIP5', + 'alias': 'CanESM2', 'dataset': 'CanESM2', - 'short_name': 'sftlf', + 'diagnostic': 'diagnostic_name', + 'ensemble': 'r0i0p0', + 'exp': 'historical', + 'filename': fix_dir.replace('_fixed', '.nc'), + 'frequency': 'fx', + 'institute': ['CCCma'], + 'long_name': 'Land Area Fraction', 'mip': 'fx', + 'modeling_realm': ['atmos'], + 'original_short_name': 'sftlf', 'output_dir': fix_dir, + 'preprocessor': 'default', + 'project': 'CMIP5', + 'recipe_dataset_index': 0, + 'short_name': 'sftlf', + 'standard_name': 'land_area_fraction', + 'units': '%', + 'variable_group': 'sftlf' }, 'fix_data': { 'check_level': CheckLevels.DEFAULT, - 'project': 'CMIP5', + 'alias': 'CanESM2', 'dataset': 'CanESM2', - 'short_name': 'sftlf', - 'mip': 'fx', + 'diagnostic': 'diagnostic_name', + 'ensemble': 'r0i0p0', + 'exp': 'historical', + 'filename': fix_dir.replace('_fixed', '.nc'), 'frequency': 'fx', + 'institute': ['CCCma'], + 'long_name': 'Land Area Fraction', + 'mip': 'fx', + 'modeling_realm': ['atmos'], + 'original_short_name': 'sftlf', + 'preprocessor': 'default', + 'project': 'CMIP5', + 'recipe_dataset_index': 0, + 'short_name': 'sftlf', + 'standard_name': 'land_area_fraction', + 'units': '%', + 'variable_group': 'sftlf' }, 'fix_metadata': { 'check_level': CheckLevels.DEFAULT, - 'project': 'CMIP5', + 'alias': 'CanESM2', 'dataset': 'CanESM2', - 'short_name': 'sftlf', - 'mip': 'fx', + 'diagnostic': 'diagnostic_name', + 'ensemble': 'r0i0p0', + 'exp': 'historical', + 'filename': fix_dir.replace('_fixed', '.nc'), 'frequency': 'fx', + 'institute': ['CCCma'], + 'long_name': 'Land Area Fraction', + 'mip': 'fx', + 'modeling_realm': ['atmos'], + 'original_short_name': 'sftlf', + 'preprocessor': 'default', + 'project': 'CMIP5', + 'recipe_dataset_index': 0, + 'short_name': 'sftlf', + 'standard_name': 'land_area_fraction', + 'units': '%', + 'variable_group': 'sftlf' }, 'cmor_check_metadata': { 'check_level': CheckLevels.DEFAULT, @@ -592,6 +709,25 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user): 'add_fx_variables': { 'fx_variables': {}, 'check_level': CheckLevels.DEFAULT, + 'alias': 'CanESM2', + 'dataset': 'CanESM2', + 'diagnostic': 'diagnostic_name', + 'ensemble': 'r0i0p0', + 'exp': 'historical', + 'filename': fix_dir.replace('_fixed', '.nc'), + 'frequency': 'fx', + 'institute': ['CCCma'], + 'long_name': 'Land Area Fraction', + 'mip': 'fx', + 'modeling_realm': ['atmos'], + 'original_short_name': 'sftlf', + 'preprocessor': 'default', + 'project': 'CMIP5', + 'recipe_dataset_index': 0, + 'short_name': 'sftlf', + 'standard_name': 'land_area_fraction', + 'units': '%', + 'variable_group': 'sftlf' }, 'remove_fx_variables': {}, 'cleanup': { From 384d249e7e4e7af829da55316dc301122eef1525 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Fri, 4 Jun 2021 15:50:58 +0200 Subject: [PATCH 31/49] Remove extra_facets_dir from example config-user.yml file This also fixes the related test for the experimental config system. --- esmvalcore/config-user.yml | 2 -- tests/sample_data/experimental/test_run_recipe.py | 10 ++++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/esmvalcore/config-user.yml b/esmvalcore/config-user.yml index a8bb639f9e..257b358b06 100644 --- a/esmvalcore/config-user.yml +++ b/esmvalcore/config-user.yml @@ -12,8 +12,6 @@ output_file_type: png output_dir: ./esmvaltool_output # Auxiliary data directory (used for some additional datasets) auxiliary_data_dir: ./auxiliary_data -# Extra facets directory -extra_facets_dir: [] # Use netCDF compression true/[false] compress_netcdf: false # Save intermediary cubes in the preprocessor true/[false] diff --git a/tests/sample_data/experimental/test_run_recipe.py b/tests/sample_data/experimental/test_run_recipe.py index d5efc210a5..e78eae94d4 100644 --- a/tests/sample_data/experimental/test_run_recipe.py +++ b/tests/sample_data/experimental/test_run_recipe.py @@ -53,7 +53,10 @@ def test_run_recipe(task, recipe, tmp_path): assert isinstance(recipe, Recipe) assert isinstance(recipe._repr_html_(), str) - output = recipe.run(task=task) + session = CFG.start_session(recipe.path.stem) + session['extra_facets_dir'] = [] + + output = recipe.run(task=task, session=session) assert len(output) > 0 assert isinstance(output, RecipeOutput) @@ -85,6 +88,9 @@ def test_run_recipe_diagnostic_failing(recipe, tmp_path): CFG['output_dir'] = tmp_path + session = CFG.start_session(recipe.path.stem) + session['extra_facets_dir'] = [] + with pytest.raises(RecipeError): task = 'example/non-existant' - _ = recipe.run(task) + _ = recipe.run(task, session) From 6dae6ad1d761bb414de37a518c9692ed6c374063 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Fri, 4 Jun 2021 16:39:42 +0200 Subject: [PATCH 32/49] Add basic documentation --- doc/develop/extra_facets.rst | 61 ++++++++++++++++++++++++++++++++++++ doc/develop/index.rst | 1 + 2 files changed, 62 insertions(+) create mode 100644 doc/develop/extra_facets.rst diff --git a/doc/develop/extra_facets.rst b/doc/develop/extra_facets.rst new file mode 100644 index 0000000000..5506319f0d --- /dev/null +++ b/doc/develop/extra_facets.rst @@ -0,0 +1,61 @@ +.. _extra_facets: + +************ +Extra Facets +************ + +Sometimes it is useful to provide extra information for the loading of data, +particularly in the case of native model data, or observational or other data, +that generally follows the established standards, but is not part of the big +supported projects like CMIP, CORDEX, obs4MIPs. + +To support this, we provide the extra facets facilities. Facets are the +key-value pairs described in :ref:`Datasets`. Extra facets allows for the +addition of more details per project, dataset, mip table, and variable name. + +More precisely, one can provide this information in an extra yaml file, named +`{project}-something.yml`, where `{project}` corresponds to the project as used +by ESMValTool in :ref:`Datasets` and "something" is arbitrary. + +Format of the extra facets files +================================ +The extra facets are given in a yaml file, whose file name identifies the +project. Inside the file there is a hierarchy of nested dictionaries with the +following levels. At the top there is the `dataset` facet, followed by the `mip` +table, and finally the `short_name`. The leaf dictionary placed here gives the +extra facets that will be made available to data finder and the fix +infrastructure. The following example illustrates the concept. + +.. code-block:: yaml + :caption: Extra facet example file `native6-era5.yml` + + era5: + Amon: + tas: {file_var_name: "t2m", name_in_filename: "2m_temperature"} + + +Location of the extra facets files +================================== +Extra facets files can be placed in several different places. When we use them +to support a particular use-case within the ESMValTool project, they will be +provided in the sub-folder `extra_facets` inside the package +`esmvalcore._config`. If they are used from the user side, they can be either +placed in `~/.esmvaltool/extra_facets` or in any other directory of the users +choosing. In that case this directory must be added to the `config-user.yml` +file under the `extra_facets_dir` setting, which can take a single directory or +a list of directories. + +The order in which the directories are searched is + +1. The internal directory `esmvalcore._config/extra_facets` +2. The default user directory `~/.esmvaltool/extra_facets` +3. The custom user directories in the order in which they are given in + `config-user.yml`. + +The extra facets files within each of these directories are processed in +lexicographical order according to their file name. + +In all cases it is allowed to supersede information from earlier files in later +files. This makes it possible for the user to effectively override even internal +default facets, for example to deal with local particularities in the data +handling. diff --git a/doc/develop/index.rst b/doc/develop/index.rst index e10a5143f0..d832933d75 100644 --- a/doc/develop/index.rst +++ b/doc/develop/index.rst @@ -12,3 +12,4 @@ features. Preprocessor function Dataset fix Deriving a variable + Extra facets From 6a66072d65c5eb1fde263f41a2d1ee83bd697f6e Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Fri, 4 Jun 2021 17:41:41 +0200 Subject: [PATCH 33/49] Complete documentation --- doc/develop/extra_facets.rst | 50 ++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/doc/develop/extra_facets.rst b/doc/develop/extra_facets.rst index 5506319f0d..18fec66584 100644 --- a/doc/develop/extra_facets.rst +++ b/doc/develop/extra_facets.rst @@ -26,6 +26,8 @@ table, and finally the `short_name`. The leaf dictionary placed here gives the extra facets that will be made available to data finder and the fix infrastructure. The following example illustrates the concept. +.. _extra-facets-example-1: + .. code-block:: yaml :caption: Extra facet example file `native6-era5.yml` @@ -59,3 +61,51 @@ In all cases it is allowed to supersede information from earlier files in later files. This makes it possible for the user to effectively override even internal default facets, for example to deal with local particularities in the data handling. + +Use of extra facets +=================== +For extra facets to be useful, the information that they provide must be +applied. There are fundamentally two places where this comes into play. One is +the datafinder, the other are fixes. + +Use of extra facets in the datafinder +------------------------------------- +Extra facets can be used to locate data files within the datafinder +framework. This is useful to build paths for directory structures and file names +that follow a different system than the established DRS for, e.g. CMIP. +A common application is the location of variables in multi-variable files as +often found in climate models' native output formats. + +Another use case is files that use different names for variables in their +file name than for the netCDF4 variable name. + +To apply the extra facets for this purpose, simply use the corresponding tag in +the applicable DRS inside the `config-developer.yml` file. For example, given +the extra facets in :ref:`extra-facets-example-1`, one might write the +following. + +.. extra-facets-example-2: + +.. code-block:: yaml + :caption: Example drs use in `config-developer.yml` + + native6: + input_file: + default: '{name_in_filename}*.nc' + +The same replacement mechanism can be employed everywhere where tags can be +used, particularly in `input_dir` and `input_file`. + +Use of extra facets in fixes +---------------------------- +In fixes, extra facets can be used to mold data into the form required by the +applicable standard. For example, if the input data is part of an observational +product that delivers surface temperature with a variable name of `t2m` inside a +file named `2m_temperature_1950_monthly.nc`, but the same variable is called +`tas` in the applicable standard, a fix can be created that reads the original +variable from the correct file, and provides a renamed variable to the rest of +the processing chain. + +Normally, the applicable standard for variables is CMIP6. + +For more details, refer to existing uses of this feature as examples. From 783298f496e7b2340d03473193998fb3f3318ebc Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Mon, 7 Jun 2021 16:26:08 +0200 Subject: [PATCH 34/49] Fix test failing because of coverage upload --- .circleci/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 4be508c9e5..9c3e9d626f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -47,6 +47,7 @@ jobs: - coverage-reporter/send_report: coverage-reports: 'test-reports/coverage.xml' project-token: $CODACY_PROJECT_TOKEN + skip: true # skip if project-token is not defined (i.e. on a fork) install: # Test installation From 6dd52f80af095aa0cc2ba0dba73b91a88fca9023 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Tue, 8 Jun 2021 14:49:40 +0200 Subject: [PATCH 35/49] Remove dubious caching --- esmvalcore/_config/_config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/esmvalcore/_config/_config.py b/esmvalcore/_config/_config.py index 53680ca796..7ae8250788 100644 --- a/esmvalcore/_config/_config.py +++ b/esmvalcore/_config/_config.py @@ -51,7 +51,6 @@ def _load_extra_facets(project, extra_facets_dir): return config -@lru_cache def get_extra_facets(project, dataset, mip, short_name, extra_facets_dir): """Read configuration files with additional variable information.""" project_details = _load_extra_facets(project, extra_facets_dir) From 0a73a18a2360cde086e3e6e6f4b959dbb700d95a Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Tue, 8 Jun 2021 17:49:01 +0200 Subject: [PATCH 36/49] Add docstrings --- esmvalcore/cmor/_fixes/fix.py | 6 ++++++ esmvalcore/cmor/fix.py | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index 5a432802d7..e23505e106 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -15,6 +15,9 @@ def __init__(self, vardef, **extra_facets): ---------- vardef: str CMOR table entry + **extra_facets: dict, optional + Extra facets are mainly used for data outside of the big projects + like CMIP, CORDEX, obs4MIPs. For details, see :ref:`extra_facets`. """ self.vardef = vardef self.extra_facets = extra_facets @@ -133,6 +136,9 @@ def get_fixes(project, dataset, mip, short_name, **extra_facets): dataset: str mip: str short_name: str + **extra_facets: dict, optional + Extra facets are mainly used for data outside of the big projects + like CMIP, CORDEX, obs4MIPs. For details, see :ref:`extra_facets`. Returns ------- diff --git a/esmvalcore/cmor/fix.py b/esmvalcore/cmor/fix.py index 6d79e7ae1a..729925d805 100644 --- a/esmvalcore/cmor/fix.py +++ b/esmvalcore/cmor/fix.py @@ -34,6 +34,9 @@ def fix_file(file, short_name, project, dataset, mip, output_dir, dataset:str output_dir: str Output directory for fixed files + **extra_facets: dict, optional + Extra facets are mainly used for data outside of the big projects like + CMIP, CORDEX, obs4MIPs. For details, see :ref:`extra_facets`. Returns ------- @@ -81,6 +84,9 @@ def fix_metadata(cubes, Variable's data frequency, if available check_level: CheckLevels Level of strictness of the checks. Set to default. + **extra_facets: dict, optional + Extra facets are mainly used for data outside of the big projects like + CMIP, CORDEX, obs4MIPs. For details, see :ref:`extra_facets`. Returns ------- @@ -176,6 +182,9 @@ def fix_data(cube, Variable's data frequency, if available check_level: CheckLevels Level of strictness of the checks. Set to default. + **extra_facets: dict, optional + Extra facets are mainly used for data outside of the big projects like + CMIP, CORDEX, obs4MIPs. For details, see :ref:`extra_facets`. Returns ------- From ccf622e0e2be77ce1e66efb1259e3f070b3412fe Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Tue, 8 Jun 2021 17:53:35 +0200 Subject: [PATCH 37/49] Minor improvements --- doc/develop/extra_facets.rst | 4 ++-- esmvalcore/_config/_config.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/develop/extra_facets.rst b/doc/develop/extra_facets.rst index 18fec66584..d4ad199c2e 100644 --- a/doc/develop/extra_facets.rst +++ b/doc/develop/extra_facets.rst @@ -31,9 +31,9 @@ infrastructure. The following example illustrates the concept. .. code-block:: yaml :caption: Extra facet example file `native6-era5.yml` - era5: + ERA5: Amon: - tas: {file_var_name: "t2m", name_in_filename: "2m_temperature"} + tas: {source_var_name: "t2m", cds_var_name: "2m_temperature"} Location of the extra facets files diff --git a/esmvalcore/_config/_config.py b/esmvalcore/_config/_config.py index 7ae8250788..f799200122 100644 --- a/esmvalcore/_config/_config.py +++ b/esmvalcore/_config/_config.py @@ -43,7 +43,7 @@ def _load_extra_facets(project, extra_facets_dir): for config_path in config_paths: config_file_paths = config_path.glob(f"{project.lower()}-*.yml") for config_file_path in sorted(config_file_paths): - logger.info("Loading extra facets from %s", config_file_path) + logger.debug("Loading extra facets from %s", config_file_path) with config_file_path.open() as config_file: config_piece = yaml.safe_load(config_file) if config_piece: From 686c723a140450a205d2010cfdc18813c9deba35 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Tue, 8 Jun 2021 19:26:23 +0200 Subject: [PATCH 38/49] Add basic test for _deep_update --- tests/unit/test_config.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 tests/unit/test_config.py diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py new file mode 100644 index 0000000000..e5f5fde4b0 --- /dev/null +++ b/tests/unit/test_config.py @@ -0,0 +1,17 @@ +import pytest + +from esmvalcore._config._config import _deep_update + +TEST_DEEP_UPDATE = [([{}], {}), ([dict(a=1, b=2), dict(a=3)], dict(a=3, b=2)), + ([ + dict(a=dict(b=1, c=dict(d=2)), e=dict(f=4, g=5)), + dict(a=dict(b=2, c=3)) + ], dict(a=dict(b=2, c=3), e=dict(f=4, g=5)))] + + +@pytest.mark.parametrize('dictionaries, expected_merged', TEST_DEEP_UPDATE) +def test_deep_update(dictionaries, expected_merged): + merged = dictionaries[0] + for update in dictionaries[1:]: + merged = _deep_update(merged, update) + assert expected_merged == merged From 51d7f76fcd48a0804bc8c702fffbf6e1e992dc58 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Tue, 8 Jun 2021 20:13:07 +0200 Subject: [PATCH 39/49] Add basic tests for _load_extra_facets --- .../extra_facets/override/test6-01.yml | 12 +++++++ .../extra_facets/override/test6-02.yml | 6 ++++ .../extra_facets/simple/test6-01.yml | 5 +++ tests/unit/test_config.py | 36 ++++++++++++++++++- 4 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 tests/sample_data/extra_facets/override/test6-01.yml create mode 100644 tests/sample_data/extra_facets/override/test6-02.yml create mode 100644 tests/sample_data/extra_facets/simple/test6-01.yml diff --git a/tests/sample_data/extra_facets/override/test6-01.yml b/tests/sample_data/extra_facets/override/test6-01.yml new file mode 100644 index 0000000000..3f375d1314 --- /dev/null +++ b/tests/sample_data/extra_facets/override/test6-01.yml @@ -0,0 +1,12 @@ +--- +PROJECT1: + Amon: + tas: + source_var_name: "t2m" + cds_var_name: "temperature_2m" + uas: + source_var_name: "u10n" + cds_var_name: "10m_u-component_of_neutral_wind" + vas: + source_var_name: "v10n" + cds_var_name: "10m_v-component_of_neutral_wind" diff --git a/tests/sample_data/extra_facets/override/test6-02.yml b/tests/sample_data/extra_facets/override/test6-02.yml new file mode 100644 index 0000000000..7cf8a552cd --- /dev/null +++ b/tests/sample_data/extra_facets/override/test6-02.yml @@ -0,0 +1,6 @@ +--- +PROJECT1: + Amon: + vas: + source_var_name: "10v" + cds_var_name: "v-component_of_neutral_wind_at_10m" diff --git a/tests/sample_data/extra_facets/simple/test6-01.yml b/tests/sample_data/extra_facets/simple/test6-01.yml new file mode 100644 index 0000000000..d940b8c96b --- /dev/null +++ b/tests/sample_data/extra_facets/simple/test6-01.yml @@ -0,0 +1,5 @@ +--- +PROJECT1: + Amon: + tas: {source_var_name: "2t", cds_var_name: "2m_temperature"} + psl: {source_var_name: "msl", cds_var_name: "mean_sea_level_pressure"} diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index e5f5fde4b0..86c218673e 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -1,6 +1,10 @@ import pytest -from esmvalcore._config._config import _deep_update +from esmvalcore._config._config import ( + _deep_update, + _load_extra_facets, + importlib_files, +) TEST_DEEP_UPDATE = [([{}], {}), ([dict(a=1, b=2), dict(a=3)], dict(a=3, b=2)), ([ @@ -15,3 +19,33 @@ def test_deep_update(dictionaries, expected_merged): for update in dictionaries[1:]: merged = _deep_update(merged, update) assert expected_merged == merged + + +BASE_PATH = importlib_files('tests') / 'sample_data' / 'extra_facets' + +TEST_LOAD_EXTRA_FACETS = [ + ('test-nonexistent', tuple(), {}), + ('test-nonexistent', (BASE_PATH / 'simple', ), {}), + ('test6', (BASE_PATH / 'simple', ), + dict(PROJECT1=dict(Amon=dict( + tas=dict(cds_var_name='2m_temperature', source_var_name='2t'), + psl=dict(cds_var_name='mean_sea_level_pressure', + source_var_name='msl'))))), + ('test6', (BASE_PATH / 'simple', BASE_PATH / 'override'), + dict(PROJECT1=dict(Amon=dict( + tas=dict(cds_var_name='temperature_2m', source_var_name='t2m'), + psl=dict(cds_var_name='mean_sea_level_pressure', + source_var_name='msl'), + uas=dict(cds_var_name='10m_u-component_of_neutral_wind', + source_var_name='u10n'), + vas=dict(cds_var_name='v-component_of_neutral_wind_at_10m', + source_var_name='10v'), + )))), +] + + +@pytest.mark.parametrize('project, extra_facets_dir, expected', + TEST_LOAD_EXTRA_FACETS) +def test_load_extra_facets(project, extra_facets_dir, expected): + extra_facets = _load_extra_facets(project, extra_facets_dir) + assert extra_facets == expected From d8a42e3351dcf1a7b5c9546fdd57f00559b68e05 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Tue, 8 Jun 2021 18:07:34 +0200 Subject: [PATCH 40/49] Simplify handling of fx vars --- esmvalcore/_recipe.py | 1 - esmvalcore/preprocessor/_ancillary_vars.py | 28 +++++++--------------- 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index b8a00e4e4c..fcf06502f4 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -328,7 +328,6 @@ def _get_default_settings(variable, config_user, derive=False): 'fx_variables': {}, 'check_level': config_user.get('check_level', CheckLevels.DEFAULT) } - settings['add_fx_variables'].update(variable) settings['remove_fx_variables'] = {} return settings diff --git a/esmvalcore/preprocessor/_ancillary_vars.py b/esmvalcore/preprocessor/_ancillary_vars.py index 96cf11041a..6309d35d43 100644 --- a/esmvalcore/preprocessor/_ancillary_vars.py +++ b/esmvalcore/preprocessor/_ancillary_vars.py @@ -16,21 +16,16 @@ def _load_fx(var_cube, fx_info, check_level): """Load and CMOR-check fx variables.""" fx_cubes = iris.cube.CubeList() + project = fx_info['project'] + mip = fx_info['mip'] + short_name = fx_info['short_name'] + freq = fx_info['frequency'] + for fx_file in fx_info['filename']: loaded_cube = load(fx_file, callback=concatenate_callback) - short_name = fx_info['short_name'] - project = fx_info['project'] - dataset = fx_info['dataset'] - mip = fx_info['mip'] - freq = fx_info['frequency'] loaded_cube = fix_metadata(loaded_cube, - short_name=short_name, - project=project, - dataset=dataset, - mip=mip, - frequency=freq, check_level=check_level, - **extra_facets) + **fx_info) fx_cubes.append(loaded_cube[0]) fx_cube = concatenate(fx_cubes) @@ -42,14 +37,7 @@ def _load_fx(var_cube, fx_info, check_level): short_name=short_name, frequency=freq, check_level=check_level) - fx_cube = fix_data(fx_cube, - short_name=short_name, - project=project, - dataset=dataset, - mip=mip, - frequency=freq, - check_level=check_level, - **extra_facets) + fx_cube = fix_data(fx_cube, check_level=check_level, **fx_info) fx_cube = cmor_check_data(fx_cube, cmor_table=project, @@ -151,7 +139,7 @@ def add_ancillary_variable(cube, fx_cube): fx_cube.var_name, cube.var_name) -def add_fx_variables(cube, fx_variables, check_level, **extra_facets): +def add_fx_variables(cube, fx_variables, check_level): """Load requested fx files, check with CMOR standards and add the fx variables as cell measures or ancillary variables in the cube containing the data. From 75adca1a587103a069722788339e891c99c8324e Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Wed, 9 Jun 2021 09:10:27 +0200 Subject: [PATCH 41/49] Fix mypy issues --- tests/unit/test_config.py | 41 +++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 86c218673e..74c87217e4 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -1,3 +1,5 @@ +from pathlib import Path + import pytest from esmvalcore._config._config import ( @@ -21,26 +23,31 @@ def test_deep_update(dictionaries, expected_merged): assert expected_merged == merged -BASE_PATH = importlib_files('tests') / 'sample_data' / 'extra_facets' +BASE_PATH = importlib_files('tests') +BASE_PATH /= Path('sample_data') / Path('extra_facets') # type: ignore TEST_LOAD_EXTRA_FACETS = [ ('test-nonexistent', tuple(), {}), - ('test-nonexistent', (BASE_PATH / 'simple', ), {}), - ('test6', (BASE_PATH / 'simple', ), - dict(PROJECT1=dict(Amon=dict( - tas=dict(cds_var_name='2m_temperature', source_var_name='2t'), - psl=dict(cds_var_name='mean_sea_level_pressure', - source_var_name='msl'))))), - ('test6', (BASE_PATH / 'simple', BASE_PATH / 'override'), - dict(PROJECT1=dict(Amon=dict( - tas=dict(cds_var_name='temperature_2m', source_var_name='t2m'), - psl=dict(cds_var_name='mean_sea_level_pressure', - source_var_name='msl'), - uas=dict(cds_var_name='10m_u-component_of_neutral_wind', - source_var_name='u10n'), - vas=dict(cds_var_name='v-component_of_neutral_wind_at_10m', - source_var_name='10v'), - )))), + ('test-nonexistent', (BASE_PATH / 'simple', ), {}), # type: ignore + ( + 'test6', + (BASE_PATH / 'simple', ), # type: ignore + dict(PROJECT1=dict(Amon=dict( + tas=dict(cds_var_name='2m_temperature', source_var_name='2t'), + psl=dict(cds_var_name='mean_sea_level_pressure', + source_var_name='msl'))))), + ( + 'test6', + (BASE_PATH / 'simple', BASE_PATH / 'override'), # type: ignore + dict(PROJECT1=dict(Amon=dict( + tas=dict(cds_var_name='temperature_2m', source_var_name='t2m'), + psl=dict(cds_var_name='mean_sea_level_pressure', + source_var_name='msl'), + uas=dict(cds_var_name='10m_u-component_of_neutral_wind', + source_var_name='u10n'), + vas=dict(cds_var_name='v-component_of_neutral_wind_at_10m', + source_var_name='10v'), + )))), ] From 004cafd9207f264ca82ce4acb981b711c9853e5e Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Wed, 9 Jun 2021 10:04:48 +0200 Subject: [PATCH 42/49] Remove mapping_key --- esmvalcore/cmor/_fixes/fix.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index e23505e106..a0ab47f6f0 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -64,7 +64,7 @@ def fix_metadata(self, cubes): """ return cubes - def get_cube_from_list(self, cubes, short_name=None, mapping_key=None): + def get_cube_from_list(self, cubes, short_name=None): """Get a cube from the list with a given short name. Parameters @@ -86,7 +86,6 @@ def get_cube_from_list(self, cubes, short_name=None, mapping_key=None): """ if short_name is None: short_name = self.vardef.short_name - short_name = self.extra_facets.get(mapping_key, short_name) for cube in cubes: if cube.var_name == short_name: return cube From f888f8d063dfcf34b655d9f4733fc9b844d96829 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Wed, 9 Jun 2021 10:05:52 +0200 Subject: [PATCH 43/49] Fix fx preprocessor test --- tests/integration/test_recipe.py | 40 -------------------------------- 1 file changed, 40 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 5466d49dce..dee11ed554 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -217,27 +217,6 @@ def _get_default_settings_for_chl(fix_dir, save_filename, preprocessor): 'add_fx_variables': { 'fx_variables': {}, 'check_level': CheckLevels.DEFAULT, - 'alias': 'CanESM2', - 'dataset': 'CanESM2', - 'diagnostic': 'diagnostic_name', - 'end_year': 2005, - 'ensemble': 'r1i1p1', - 'exp': 'historical', - 'filename': fix_dir.replace('_fixed', '.nc'), - 'frequency': 'yr', - 'institute': ['CCCma'], - 'long_name': 'Total Chlorophyll Mass Concentration', - 'mip': 'Oyr', - 'modeling_realm': ['ocnBgchem'], - 'original_short_name': 'chl', - 'preprocessor': preprocessor, - 'project': 'CMIP5', - 'recipe_dataset_index': 0, - 'short_name': 'chl', - 'standard_name': standard_name, - 'start_year': 2000, - 'units': 'kg m-3', - 'variable_group': 'chl', }, 'remove_fx_variables': {}, 'cleanup': { @@ -709,25 +688,6 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user): 'add_fx_variables': { 'fx_variables': {}, 'check_level': CheckLevels.DEFAULT, - 'alias': 'CanESM2', - 'dataset': 'CanESM2', - 'diagnostic': 'diagnostic_name', - 'ensemble': 'r0i0p0', - 'exp': 'historical', - 'filename': fix_dir.replace('_fixed', '.nc'), - 'frequency': 'fx', - 'institute': ['CCCma'], - 'long_name': 'Land Area Fraction', - 'mip': 'fx', - 'modeling_realm': ['atmos'], - 'original_short_name': 'sftlf', - 'preprocessor': 'default', - 'project': 'CMIP5', - 'recipe_dataset_index': 0, - 'short_name': 'sftlf', - 'standard_name': 'land_area_fraction', - 'units': '%', - 'variable_group': 'sftlf' }, 'remove_fx_variables': {}, 'cleanup': { From 79915cdcc8cfacde1c2638ab2ab40c67526420c4 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Wed, 9 Jun 2021 10:14:43 +0200 Subject: [PATCH 44/49] Improve formatting --- tests/unit/test_config.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 74c87217e4..708923890e 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -8,11 +8,14 @@ importlib_files, ) -TEST_DEEP_UPDATE = [([{}], {}), ([dict(a=1, b=2), dict(a=3)], dict(a=3, b=2)), - ([ - dict(a=dict(b=1, c=dict(d=2)), e=dict(f=4, g=5)), - dict(a=dict(b=2, c=3)) - ], dict(a=dict(b=2, c=3), e=dict(f=4, g=5)))] +TEST_DEEP_UPDATE = [ + ([{}], {}), + ([dict(a=1, b=2), dict(a=3)], dict(a=3, b=2)), + ([ + dict(a=dict(b=1, c=dict(d=2)), e=dict(f=4, g=5)), + dict(a=dict(b=2, c=3)), + ], dict(a=dict(b=2, c=3), e=dict(f=4, g=5))), +] @pytest.mark.parametrize('dictionaries, expected_merged', TEST_DEEP_UPDATE) From 731794e5b952fba127df93629f3a253c6bd0c9c1 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Wed, 9 Jun 2021 10:27:37 +0200 Subject: [PATCH 45/49] Moving extra facet documentation to better places --- doc/develop/extra_facets.rst | 111 ----------------------------------- doc/develop/fixing_data.rst | 16 +++++ doc/develop/index.rst | 1 - doc/quickstart/configure.rst | 70 ++++++++++++++++++++++ doc/quickstart/find_data.rst | 30 ++++++++++ 5 files changed, 116 insertions(+), 112 deletions(-) delete mode 100644 doc/develop/extra_facets.rst diff --git a/doc/develop/extra_facets.rst b/doc/develop/extra_facets.rst deleted file mode 100644 index d4ad199c2e..0000000000 --- a/doc/develop/extra_facets.rst +++ /dev/null @@ -1,111 +0,0 @@ -.. _extra_facets: - -************ -Extra Facets -************ - -Sometimes it is useful to provide extra information for the loading of data, -particularly in the case of native model data, or observational or other data, -that generally follows the established standards, but is not part of the big -supported projects like CMIP, CORDEX, obs4MIPs. - -To support this, we provide the extra facets facilities. Facets are the -key-value pairs described in :ref:`Datasets`. Extra facets allows for the -addition of more details per project, dataset, mip table, and variable name. - -More precisely, one can provide this information in an extra yaml file, named -`{project}-something.yml`, where `{project}` corresponds to the project as used -by ESMValTool in :ref:`Datasets` and "something" is arbitrary. - -Format of the extra facets files -================================ -The extra facets are given in a yaml file, whose file name identifies the -project. Inside the file there is a hierarchy of nested dictionaries with the -following levels. At the top there is the `dataset` facet, followed by the `mip` -table, and finally the `short_name`. The leaf dictionary placed here gives the -extra facets that will be made available to data finder and the fix -infrastructure. The following example illustrates the concept. - -.. _extra-facets-example-1: - -.. code-block:: yaml - :caption: Extra facet example file `native6-era5.yml` - - ERA5: - Amon: - tas: {source_var_name: "t2m", cds_var_name: "2m_temperature"} - - -Location of the extra facets files -================================== -Extra facets files can be placed in several different places. When we use them -to support a particular use-case within the ESMValTool project, they will be -provided in the sub-folder `extra_facets` inside the package -`esmvalcore._config`. If they are used from the user side, they can be either -placed in `~/.esmvaltool/extra_facets` or in any other directory of the users -choosing. In that case this directory must be added to the `config-user.yml` -file under the `extra_facets_dir` setting, which can take a single directory or -a list of directories. - -The order in which the directories are searched is - -1. The internal directory `esmvalcore._config/extra_facets` -2. The default user directory `~/.esmvaltool/extra_facets` -3. The custom user directories in the order in which they are given in - `config-user.yml`. - -The extra facets files within each of these directories are processed in -lexicographical order according to their file name. - -In all cases it is allowed to supersede information from earlier files in later -files. This makes it possible for the user to effectively override even internal -default facets, for example to deal with local particularities in the data -handling. - -Use of extra facets -=================== -For extra facets to be useful, the information that they provide must be -applied. There are fundamentally two places where this comes into play. One is -the datafinder, the other are fixes. - -Use of extra facets in the datafinder -------------------------------------- -Extra facets can be used to locate data files within the datafinder -framework. This is useful to build paths for directory structures and file names -that follow a different system than the established DRS for, e.g. CMIP. -A common application is the location of variables in multi-variable files as -often found in climate models' native output formats. - -Another use case is files that use different names for variables in their -file name than for the netCDF4 variable name. - -To apply the extra facets for this purpose, simply use the corresponding tag in -the applicable DRS inside the `config-developer.yml` file. For example, given -the extra facets in :ref:`extra-facets-example-1`, one might write the -following. - -.. extra-facets-example-2: - -.. code-block:: yaml - :caption: Example drs use in `config-developer.yml` - - native6: - input_file: - default: '{name_in_filename}*.nc' - -The same replacement mechanism can be employed everywhere where tags can be -used, particularly in `input_dir` and `input_file`. - -Use of extra facets in fixes ----------------------------- -In fixes, extra facets can be used to mold data into the form required by the -applicable standard. For example, if the input data is part of an observational -product that delivers surface temperature with a variable name of `t2m` inside a -file named `2m_temperature_1950_monthly.nc`, but the same variable is called -`tas` in the applicable standard, a fix can be created that reads the original -variable from the correct file, and provides a renamed variable to the rest of -the processing chain. - -Normally, the applicable standard for variables is CMIP6. - -For more details, refer to existing uses of this feature as examples. diff --git a/doc/develop/fixing_data.rst b/doc/develop/fixing_data.rst index 6dbe5fe96b..46411d27f0 100644 --- a/doc/develop/fixing_data.rst +++ b/doc/develop/fixing_data.rst @@ -353,3 +353,19 @@ For example for monthly data, place the files in the ``/Tier3/MSWEP/latestversio For monthly data (V220), the data must be postfixed with the date, i.e. rename ``global_monthly_050deg.nc`` to ``global_monthly_050deg_197901-201710.nc`` For more info: http://www.gloh2o.org/ + +.. _extra-facets-fixes: + +Use of extra facets in fixes +============================ +In fixes, extra facets can be used to mold data into the form required by the +applicable standard. For example, if the input data is part of an observational +product that delivers surface temperature with a variable name of `t2m` inside a +file named `2m_temperature_1950_monthly.nc`, but the same variable is called +`tas` in the applicable standard, a fix can be created that reads the original +variable from the correct file, and provides a renamed variable to the rest of +the processing chain. + +Normally, the applicable standard for variables is CMIP6. + +For more details, refer to existing uses of this feature as examples. diff --git a/doc/develop/index.rst b/doc/develop/index.rst index d832933d75..e10a5143f0 100644 --- a/doc/develop/index.rst +++ b/doc/develop/index.rst @@ -12,4 +12,3 @@ features. Preprocessor function Dataset fix Deriving a variable - Extra facets diff --git a/doc/quickstart/configure.rst b/doc/quickstart/configure.rst index cd8a92eca9..accc7f87f4 100644 --- a/doc/quickstart/configure.rst +++ b/doc/quickstart/configure.rst @@ -320,3 +320,73 @@ following documentation section: These four items here are named people, references and projects listed in the ``config-references.yml`` file. + +.. _extra_facets: + +Extra Facets +============ + +Sometimes it is useful to provide extra information for the loading of data, +particularly in the case of native model data, or observational or other data, +that generally follows the established standards, but is not part of the big +supported projects like CMIP, CORDEX, obs4MIPs. + +To support this, we provide the extra facets facilities. Facets are the +key-value pairs described in :ref:`Datasets`. Extra facets allows for the +addition of more details per project, dataset, mip table, and variable name. + +More precisely, one can provide this information in an extra yaml file, named +`{project}-something.yml`, where `{project}` corresponds to the project as used +by ESMValTool in :ref:`Datasets` and "something" is arbitrary. + +Format of the extra facets files +-------------------------------- +The extra facets are given in a yaml file, whose file name identifies the +project. Inside the file there is a hierarchy of nested dictionaries with the +following levels. At the top there is the `dataset` facet, followed by the `mip` +table, and finally the `short_name`. The leaf dictionary placed here gives the +extra facets that will be made available to data finder and the fix +infrastructure. The following example illustrates the concept. + +.. _extra-facets-example-1: + +.. code-block:: yaml + :caption: Extra facet example file `native6-era5.yml` + + ERA5: + Amon: + tas: {source_var_name: "t2m", cds_var_name: "2m_temperature"} + + +Location of the extra facets files +---------------------------------- +Extra facets files can be placed in several different places. When we use them +to support a particular use-case within the ESMValTool project, they will be +provided in the sub-folder `extra_facets` inside the package +`esmvalcore._config`. If they are used from the user side, they can be either +placed in `~/.esmvaltool/extra_facets` or in any other directory of the users +choosing. In that case this directory must be added to the `config-user.yml` +file under the `extra_facets_dir` setting, which can take a single directory or +a list of directories. + +The order in which the directories are searched is + +1. The internal directory `esmvalcore._config/extra_facets` +2. The default user directory `~/.esmvaltool/extra_facets` +3. The custom user directories in the order in which they are given in + `config-user.yml`. + +The extra facets files within each of these directories are processed in +lexicographical order according to their file name. + +In all cases it is allowed to supersede information from earlier files in later +files. This makes it possible for the user to effectively override even internal +default facets, for example to deal with local particularities in the data +handling. + +Use of extra facets +------------------- +For extra facets to be useful, the information that they provide must be +applied. There are fundamentally two places where this comes into play. One is +:ref:`the datafinder`, the other are +:ref:`fixes`. diff --git a/doc/quickstart/find_data.rst b/doc/quickstart/find_data.rst index e2fa0a61bd..809db9965a 100644 --- a/doc/quickstart/find_data.rst +++ b/doc/quickstart/find_data.rst @@ -303,3 +303,33 @@ flexible concatenation between two cubes, depending on the particular setup: Note that two cube concatenation is the base operation of an iterative process of reducing multiple cubes from multiple data segments via cube concatenation ie if there is no time-overlapping data, the cubes concatenation is performed in one step. + +.. _extra-facets-data-finder: + +Use of extra facets in the datafinder +===================================== +Extra facets can be used to locate data files within the datafinder +framework. This is useful to build paths for directory structures and file names +that follow a different system than the established DRS for, e.g. CMIP. +A common application is the location of variables in multi-variable files as +often found in climate models' native output formats. + +Another use case is files that use different names for variables in their +file name than for the netCDF4 variable name. + +To apply the extra facets for this purpose, simply use the corresponding tag in +the applicable DRS inside the `config-developer.yml` file. For example, given +the extra facets in :ref:`extra-facets-example-1`, one might write the +following. + +.. _extra-facets-example-2: + +.. code-block:: yaml + :caption: Example drs use in `config-developer.yml` + + native6: + input_file: + default: '{name_in_filename}*.nc' + +The same replacement mechanism can be employed everywhere where tags can be +used, particularly in `input_dir` and `input_file`. From 3d362d02d900c846bd04167b730012210367193d Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Wed, 9 Jun 2021 10:32:41 +0200 Subject: [PATCH 46/49] Handle extra_facets as dictionary instead of kwargs where possible --- esmvalcore/cmor/_fixes/fix.py | 10 +++++----- esmvalcore/cmor/fix.py | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index a0ab47f6f0..669867f79d 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -8,14 +8,14 @@ class Fix: """Base class for dataset fixes.""" - def __init__(self, vardef, **extra_facets): + def __init__(self, vardef, extra_facets): """Initialize fix object. Parameters ---------- vardef: str CMOR table entry - **extra_facets: dict, optional + extra_facets: dict, optional Extra facets are mainly used for data outside of the big projects like CMIP, CORDEX, obs4MIPs. For details, see :ref:`extra_facets`. """ @@ -115,7 +115,7 @@ def __ne__(self, other): return not self.__eq__(other) @staticmethod - def get_fixes(project, dataset, mip, short_name, **extra_facets): + def get_fixes(project, dataset, mip, short_name, extra_facets): """Get the fixes that must be applied for a given dataset. It will look for them at the module @@ -135,7 +135,7 @@ def get_fixes(project, dataset, mip, short_name, **extra_facets): dataset: str mip: str short_name: str - **extra_facets: dict, optional + extra_facets: dict, optional Extra facets are mainly used for data outside of the big projects like CMIP, CORDEX, obs4MIPs. For details, see :ref:`extra_facets`. @@ -160,7 +160,7 @@ def get_fixes(project, dataset, mip, short_name, **extra_facets): classes = dict((name.lower(), value) for name, value in classes) for fix_name in (short_name, mip.lower(), 'allvars'): try: - fixes.append(classes[fix_name](vardef, **extra_facets)) + fixes.append(classes[fix_name](vardef, extra_facets)) except KeyError: pass except ImportError: diff --git a/esmvalcore/cmor/fix.py b/esmvalcore/cmor/fix.py index 729925d805..7fb957fd26 100644 --- a/esmvalcore/cmor/fix.py +++ b/esmvalcore/cmor/fix.py @@ -47,7 +47,7 @@ def fix_file(file, short_name, project, dataset, mip, output_dir, dataset=dataset, mip=mip, short_name=short_name, - **extra_facets): + extra_facets=extra_facets): file = fix.fix_file(file, output_dir) return file @@ -102,7 +102,7 @@ def fix_metadata(cubes, dataset=dataset, mip=mip, short_name=short_name, - **extra_facets) + extra_facets=extra_facets) fixed_cubes = [] by_file = defaultdict(list) for cube in cubes: @@ -200,7 +200,7 @@ def fix_data(cube, dataset=dataset, mip=mip, short_name=short_name, - **extra_facets): + extra_facets=extra_facets): cube = fix.fix_data(cube) checker = _get_cmor_checker(frequency=frequency, table=project, From 99d5cfab955675a2f63b85a3fdfc08458d9ca1eb Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Wed, 9 Jun 2021 11:07:32 +0200 Subject: [PATCH 47/49] Add empty defaults to extra_facets to keep tests working --- esmvalcore/cmor/_fixes/fix.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index 669867f79d..ccbfe8a7ea 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -8,7 +8,7 @@ class Fix: """Base class for dataset fixes.""" - def __init__(self, vardef, extra_facets): + def __init__(self, vardef, extra_facets={}): """Initialize fix object. Parameters @@ -115,7 +115,7 @@ def __ne__(self, other): return not self.__eq__(other) @staticmethod - def get_fixes(project, dataset, mip, short_name, extra_facets): + def get_fixes(project, dataset, mip, short_name, extra_facets={}): """Get the fixes that must be applied for a given dataset. It will look for them at the module From 78e6e9ef2d326e6426868b6f994338f5da73c7a8 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Wed, 9 Jun 2021 15:32:33 +0200 Subject: [PATCH 48/49] Use better default for extra_facets in method signatures --- esmvalcore/cmor/_fixes/fix.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index ccbfe8a7ea..3e1e9a8a00 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -8,7 +8,7 @@ class Fix: """Base class for dataset fixes.""" - def __init__(self, vardef, extra_facets={}): + def __init__(self, vardef, extra_facets=None): """Initialize fix object. Parameters @@ -20,6 +20,8 @@ def __init__(self, vardef, extra_facets={}): like CMIP, CORDEX, obs4MIPs. For details, see :ref:`extra_facets`. """ self.vardef = vardef + if extra_facets is None: + extra_facets = {} self.extra_facets = extra_facets def fix_file(self, filepath, output_dir): @@ -115,7 +117,7 @@ def __ne__(self, other): return not self.__eq__(other) @staticmethod - def get_fixes(project, dataset, mip, short_name, extra_facets={}): + def get_fixes(project, dataset, mip, short_name, extra_facets=None): """Get the fixes that must be applied for a given dataset. It will look for them at the module @@ -151,6 +153,9 @@ def get_fixes(project, dataset, mip, short_name, extra_facets={}): dataset = dataset.replace('-', '_').lower() short_name = short_name.replace('-', '_').lower() + if extra_facets is None: + extra_facets = {} + fixes = [] try: fixes_module = importlib.import_module( From 9e2451516866054559e8bf408daa0d575ec0db16 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Wed, 9 Jun 2021 16:12:53 +0200 Subject: [PATCH 49/49] Update documentation with backlinks to main description --- doc/develop/fixing_data.rst | 16 +++++++++------- doc/quickstart/find_data.rst | 4 +++- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/doc/develop/fixing_data.rst b/doc/develop/fixing_data.rst index 46411d27f0..3008863a34 100644 --- a/doc/develop/fixing_data.rst +++ b/doc/develop/fixing_data.rst @@ -358,13 +358,15 @@ For more info: http://www.gloh2o.org/ Use of extra facets in fixes ============================ -In fixes, extra facets can be used to mold data into the form required by the -applicable standard. For example, if the input data is part of an observational -product that delivers surface temperature with a variable name of `t2m` inside a -file named `2m_temperature_1950_monthly.nc`, but the same variable is called -`tas` in the applicable standard, a fix can be created that reads the original -variable from the correct file, and provides a renamed variable to the rest of -the processing chain. +Extra facets are a mechanism to provide additional information for certain kinds +of data. The general approach is described in :ref:`extra_facets`. Here, we +describe how they can be used in fixes to mold data into the form required by +the applicable standard. For example, if the input data is part of an +observational product that delivers surface temperature with a variable name of +`t2m` inside a file named `2m_temperature_1950_monthly.nc`, but the same +variable is called `tas` in the applicable standard, a fix can be created that +reads the original variable from the correct file, and provides a renamed +variable to the rest of the processing chain. Normally, the applicable standard for variables is CMIP6. diff --git a/doc/quickstart/find_data.rst b/doc/quickstart/find_data.rst index 809db9965a..05905c04a1 100644 --- a/doc/quickstart/find_data.rst +++ b/doc/quickstart/find_data.rst @@ -308,7 +308,9 @@ cubes concatenation is performed in one step. Use of extra facets in the datafinder ===================================== -Extra facets can be used to locate data files within the datafinder +Extra facets are a mechanism to provide additional information for certain kinds +of data. The general approach is described in :ref:`extra_facets`. Here, we +describe how they can be used to locate data files within the datafinder framework. This is useful to build paths for directory structures and file names that follow a different system than the established DRS for, e.g. CMIP. A common application is the location of variables in multi-variable files as