From fd6d8550f237a7c1e812b6edc1d5dac54aed92bf Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Tue, 12 Sep 2023 22:21:55 +0200 Subject: [PATCH] Release candidate 2.0 (#779) --- .github/workflows/pytest.yml | 1 + RELEASE_NOTES.md | 13 +-- docs/R_tutorials/pyam_R_tutorial.ipynb | 2 +- docs/api/database.rst | 2 +- docs/api/iiasa.rst | 2 +- docs/tutorials.rst | 2 +- .../{iiasa_dbs.ipynb => iiasa.ipynb} | 0 pyam/core.py | 89 +++---------------- pyam/iiasa.py | 17 ++-- pyam/str.py | 4 +- setup.cfg | 10 +-- tests/conftest.py | 13 ++- tests/test_feature_validation.py | 56 +----------- tests/test_tutorials.py | 4 +- 14 files changed, 48 insertions(+), 167 deletions(-) rename docs/tutorials/{iiasa_dbs.ipynb => iiasa.ipynb} (100%) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index d266bbffe..a3a4a79d3 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -19,6 +19,7 @@ jobs: - windows-latest python-version: - '3.10' + - '3.11' fail-fast: false diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 2c1ae36e3..e7d85bcce 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,7 +1,10 @@ -# Next Release +# Release v2.0.0 -The next release must bump the major version number. -Reactivate tests for Python 3.11 once ixmp4 0.3 is released. +## Highlights + +- Use **ixmp4** as dependency for better integration with the IIASA Scenario Explorer database infrastructure +- Refactor the "exclude"-column for validation as own attribute (instead of a column in `meta`) +- Implement a cleaner package structure and reduce exposure of internal methods/modules ## Dependency changes @@ -20,9 +23,9 @@ The column *exclude* of the `meta` indicators was moved to a new attribute `excl All validation methods are refactored such that the argument `exclude_on_fail` changes this new attribute (see PR [#759](https://github.com/IAMconsortium/pyam/pull/759)). -The term "exclude" is now an illegal column name for (timeseries) data and meta tables. +The term "exclude" is now an illegal column for (timeseries) data and meta indicators. When importing an xlsx file created with pyam < 2.0, which has an "exclude" column in -"meta", that column is moved to the new exclude attribute with a log message. +the "meta" sheet, the column is moved to the new `exclude` attribute with a log message. PR [#764](https://github.com/IAMconsortium/pyam/pull/764) implemented a more restrictive approach to exposing pyam-internals at the package level, requiring a more explicit diff --git a/docs/R_tutorials/pyam_R_tutorial.ipynb b/docs/R_tutorials/pyam_R_tutorial.ipynb index 43ffc2236..53a09cd45 100644 --- a/docs/R_tutorials/pyam_R_tutorial.ipynb +++ b/docs/R_tutorials/pyam_R_tutorial.ipynb @@ -518,7 +518,7 @@ "id": "a0eb9b43", "metadata": {}, "source": [ - "See the [pyam-IIASA-database tutorial](https://pyam-iamc.readthedocs.io/en/stable/tutorials/iiasa_dbs.html)\n", + "See the [pyam-IIASA-database tutorial](https://pyam-iamc.readthedocs.io/en/stable/tutorials/iiasa.html)\n", "or the [API documentation](https://pyam-iamc.readthedocs.io/en/stable/api/iiasa.html)\n", "for more information and a complete list of features!" ] diff --git a/docs/api/database.rst b/docs/api/database.rst index 204486a7a..a107dc69f 100644 --- a/docs/api/database.rst +++ b/docs/api/database.rst @@ -13,7 +13,7 @@ See https://software.ece.iiasa.ac.at/ixmp-server for more information. The |pyam| package uses this interface to read timeseries data as well as categorization and quantitative indicators. The data is returned as an :class:`IamDataFrame`. -See `this tutorial <../tutorials/iiasa_dbs.html>`_ for more information. +See `this tutorial <../tutorials/iiasa.html>`_ for more information. .. autofunction:: read_iiasa diff --git a/docs/api/iiasa.rst b/docs/api/iiasa.rst index 94ee1fae4..77ea0ca2a 100644 --- a/docs/api/iiasa.rst +++ b/docs/api/iiasa.rst @@ -46,7 +46,7 @@ Coming soon... The *Scenario Explorer* infrastructure developed by the Scenario Services and Scientific Software team was developed and used for projects from 2018 until 2023. -See `this tutorial <../tutorials/iiasa_dbs.html>`_ for more information. +See `this tutorial <../tutorials/iiasa.html>`_ for more information. .. autoclass:: Connection :members: diff --git a/docs/tutorials.rst b/docs/tutorials.rst index 72c84ae66..5e66d0d89 100644 --- a/docs/tutorials.rst +++ b/docs/tutorials.rst @@ -24,7 +24,7 @@ The source code is available in the folder tutorials/unit_conversion.ipynb tutorials/algebraic_operations.ipynb tutorials/quantiles.ipynb - tutorials/iiasa_dbs.ipynb + tutorials/iiasa.ipynb tutorials/unfccc.ipynb tutorials/GAMS_to_pyam.ipynb tutorials/aggregating_downscaling_consistency.ipynb diff --git a/docs/tutorials/iiasa_dbs.ipynb b/docs/tutorials/iiasa.ipynb similarity index 100% rename from docs/tutorials/iiasa_dbs.ipynb rename to docs/tutorials/iiasa.ipynb diff --git a/pyam/core.py b/pyam/core.py index 1ced3f17c..90969c26b 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -181,8 +181,7 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs): self._data, index, self.time_col, self.extra_cols = _data # define `meta` dataframe for categorization & quantitative indicators - _index = make_index(self._data, cols=index) - self.meta = pd.DataFrame(index=_index) + self.meta = pd.DataFrame(index=make_index(self._data, cols=index)) self.exclude = False # if given explicitly, merge meta dataframe after downselecting @@ -1039,52 +1038,10 @@ def require_data( _exclude_on_fail(self, missing_required.droplevel(list(required))) return missing_required.to_frame(index=False) - def require_variable(self, variable, unit=None, year=None, exclude_on_fail=False): - """Check whether all scenarios have a required variable - - Parameters - ---------- - variable : str - Required variable. - unit : str, optional - Name of unit (optional). - year : int or list, optional - Check whether the variable exists for ANY of the years (if a list). - exclude_on_fail : bool, optional - If True, set :attr:`exclude` = True for all scenarios that do not satisfy - the criteria. - - """ - # TODO: deprecated, remove for release >= 2.0 - deprecation_warning("Use `df.require_data()` instead.") - - criteria = {"variable": variable} - if unit: - criteria.update({"unit": unit}) - if year: - criteria.update({"year": year}) - - keep = self._apply_filters(**criteria) - idx = self.meta.index.difference(_meta_idx(self.data[keep])) - - n = len(idx) - if n == 0: - logger.info( - "All scenarios have the required variable `{}`".format(variable) - ) - return - - msg = ( - "{} scenario does not include required variable `{}`" - if n == 1 - else "{} scenarios do not include required variable `{}`" - ) - - if exclude_on_fail: - _exclude_on_fail(self, idx) - - logger.info(msg.format(n, variable)) - return pd.DataFrame(index=idx).reset_index() + def require_variable(self, *args, **kwargs): + """This method is deprecated, use `df.require_data()` instead.""" + # TODO: deprecated, remove for release >= 2.1 + raise DeprecationWarning("Use `df.require_data()` instead.") def validate(self, criteria={}, exclude_on_fail=False): """Validate scenarios using criteria on timeseries values @@ -1901,6 +1858,8 @@ def filter(self, keep=True, inplace=False, **kwargs): msg = "Only yearly data after filtering, time-domain changed to 'year'." logger.info(msg) + ret._data.sort_index(inplace=True) + # downselect `meta` dataframe idx = make_index(ret._data, cols=self.index.names) if len(idx) == 0: @@ -2562,16 +2521,7 @@ def load_meta(self, path, sheet_name="meta", ignore_conflict=False, **kwargs): # merge imported meta indicators self.meta = merge_meta(meta, self.meta, ignore_conflict=ignore_conflict) - def map_regions( - self, - map_col, - agg=None, - copy_col=None, - fname=None, - region_col=None, - remove_duplicates=False, - inplace=False, - ): + def map_regions(self, map_col, **kwargs): # TODO: deprecated, remove for release >= 2.1 raise DeprecationWarning( "This method was removed. Please use `aggregate_region()` instead." @@ -2671,25 +2621,10 @@ def validate(df, criteria={}, exclude_on_fail=False, **kwargs): return vdf -def require_variable( - df, variable, unit=None, year=None, exclude_on_fail=False, **kwargs -): - """Check whether all scenarios have a required variable - - Parameters - ---------- - df : IamDataFrame - args : passed to :meth:`IamDataFrame.require_variable` - kwargs : used for downselecting IamDataFrame - passed to :meth:`IamDataFrame.filter` - """ - fdf = df.filter(**kwargs) - if len(fdf.data) > 0: - vdf = fdf.require_variable( - variable=variable, unit=unit, year=year, exclude_on_fail=exclude_on_fail - ) - df._exclude |= fdf._exclude # update if any excluded - return vdf +def require_variable(*args, **kwargs): + """This method is deprecated, use `df.require_data()` instead.""" + # TODO: deprecated, remove for release >= 2.1 + raise DeprecationWarning("Use `df.require_data()` instead.") def categorize( diff --git a/pyam/iiasa.py b/pyam/iiasa.py index 87387f1f3..3edff2f0d 100644 --- a/pyam/iiasa.py +++ b/pyam/iiasa.py @@ -1,3 +1,4 @@ +from io import StringIO from pathlib import Path import json import logging @@ -242,7 +243,7 @@ def meta_columns(self): url = "/".join([self._base_url, "metadata/types"]) r = requests.get(url, headers=self.auth()) _check_response(r) - return pd.read_json(r.text, orient="records")["name"] + return pd.read_json(StringIO(r.text), orient="records")["name"] def _query_index(self, default_only=True, meta=False, cols=[], **kwargs): # TODO: at present this reads in all data for all scenarios, @@ -255,7 +256,7 @@ def _query_index(self, default_only=True, meta=False, cols=[], **kwargs): _check_response(r) # cast response to dataframe, apply filter by kwargs, and return - runs = pd.read_json(r.text, orient="records") + runs = pd.read_json(StringIO(r.text), orient="records") if runs.empty: logger.warning("No permission to view model(s) or no scenarios exist.") return pd.DataFrame([], columns=META_IDX + ["version", "run_id"] + cols) @@ -360,7 +361,7 @@ def variables(self): url = "/".join([self._base_url, "ts"]) r = requests.get(url, headers=self.auth()) _check_response(r) - df = pd.read_json(r.text, orient="records") + df = pd.read_json(StringIO(r.text), orient="records") return pd.Series(df["variable"].unique(), name="variable") @lru_cache() @@ -382,7 +383,7 @@ def regions(self, include_synonyms=False): @staticmethod def convert_regions_payload(response, include_synonyms): - df = pd.read_json(response, orient="records") + df = pd.read_json(StringIO(response), orient="records") if df.empty: return df if "synonyms" not in df.columns: @@ -449,10 +450,6 @@ def _match(data, patterns): # pass empty list to API if all regions selected if len(regions) == len(self.regions()): regions = [] - logger.debug( - f"Prepared filter for {len(regions)} region(s), " - f"{len(variables)} variables and {len(runs)} runs" - ) data = { "filters": { "regions": list(regions), @@ -523,7 +520,6 @@ def query(self, default_only=True, meta=True, **kwargs): # retrieve data _args = json.dumps(self._query_post(_meta, default_only=default_only, **kwargs)) url = "/".join([self._base_url, "runs/bulk/ts"]) - logger.debug(f"Query timeseries data from {url} with data {_args}") r = requests.post(url, headers=headers, data=_args) _check_response(r) # refactor returned json object to be castable to an IamDataFrame @@ -537,8 +533,7 @@ def query(self, default_only=True, meta=True, **kwargs): value=float, version=int, ) - data = pd.read_json(r.text, orient="records", dtype=dtype) - logger.debug(f"Response: {len(r.text)} bytes, {len(data)} records") + data = pd.read_json(StringIO(r.text), orient="records", dtype=dtype) cols = IAMC_IDX + ["year", "value", "subannual", "version"] # keep only known columns or init empty df data = pd.DataFrame(data=data, columns=cols) diff --git a/pyam/str.py b/pyam/str.py index c0f77ff4e..cf748a5f6 100644 --- a/pyam/str.py +++ b/pyam/str.py @@ -106,8 +106,8 @@ def reduce_hierarchy(x, depth): ---------- x : str Uses ``|`` to separate the components of the variable. - level : int or list of int - Position of the component.s + depth : int or list of int + Position of the components. """ _x = x.split("|") diff --git a/setup.cfg b/setup.cfg index c6c125d10..1e22763fd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,18 +27,18 @@ python_requires = >=3.10, <3.12 # Please also add a section "Dependency changes" to the release notes install_requires = iam-units >= 2020.4.21 - ixmp4 >= 0.2.0 + ixmp4 >= 0.4.0 numpy >= 1.23.0, < 1.24 - requests +# requests included via ixmp4 pyjwt - httpx[http2] +# httpx[http2] included via ixmp4 openpyxl pandas >= 2.0.0 - scipy pint >= 0.13 PyYAML matplotlib >= 3.6.0, < 3.7.1 - seaborn + scipy >= 1.10.0 + seaborn >= 0.11 six setuptools >= 41 setuptools_scm diff --git a/tests/conftest.py b/tests/conftest.py index 21a2d9864..7e358127f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,14 +3,13 @@ matplotlib.use("agg") -from pathlib import Path -import os -from requests.exceptions import ConnectionError -import pytest +from datetime import datetime +from httpx import ConnectError import numpy as np import pandas as pd +import pytest +from pathlib import Path -from datetime import datetime from pyam import IamDataFrame, iiasa from pyam.utils import META_IDX, IAMC_IDX @@ -19,7 +18,7 @@ try: iiasa.Connection() IIASA_UNAVAILABLE = False -except ConnectionError: # pragma: no cover +except ConnectError: # pragma: no cover IIASA_UNAVAILABLE = True TEST_API = "integration-test" @@ -237,7 +236,7 @@ def reg_df(): @pytest.fixture(scope="session") def plot_df(): - df = IamDataFrame(data=os.path.join(TEST_DATA_DIR, "plot_data.csv")) + df = IamDataFrame(data=TEST_DATA_DIR / "plot_data.csv") yield df diff --git a/tests/test_feature_validation.py b/tests/test_feature_validation.py index 48ed8fd28..6aed06c2c 100644 --- a/tests/test_feature_validation.py +++ b/tests/test_feature_validation.py @@ -2,8 +2,8 @@ import pandas.testing as pdt import pytest -from pyam import IamDataFrame, validate, categorize, require_variable -from pyam.utils import IAMC_IDX, META_IDX +from pyam import IamDataFrame, validate, categorize +from pyam.utils import IAMC_IDX from .conftest import TEST_YEARS @@ -68,58 +68,6 @@ def test_require_data(test_df_year, kwargs, exclude_on_fail): assert list(df.exclude) == [False, False] -def test_require_variable_pass(test_df): - # checking that the return-type is correct - obs = test_df.require_variable(variable="Primary Energy", exclude_on_fail=True) - assert obs is None - assert list(test_df.exclude) == [False, False] - - -def test_require_variable(test_df): - exp = pd.DataFrame([["model_a", "scen_b"]], columns=META_IDX) - - # checking that the return-type is correct - obs = test_df.require_variable(variable="Primary Energy|Coal") - pdt.assert_frame_equal(obs, exp) - assert list(test_df.exclude) == [False, False] - - # checking exclude on fail - obs = test_df.require_variable(variable="Primary Energy|Coal", exclude_on_fail=True) - pdt.assert_frame_equal(obs, exp) - assert list(test_df.exclude) == [False, True] - - -def test_require_variable_top_level(test_df): - exp = pd.DataFrame([["model_a", "scen_b"]], columns=META_IDX) - - # checking that the return-type is correct - obs = require_variable(test_df, variable="Primary Energy|Coal") - pdt.assert_frame_equal(obs, exp) - assert list(test_df.exclude) == [False, False] - - # checking exclude on fail - obs = require_variable( - test_df, variable="Primary Energy|Coal", exclude_on_fail=True - ) - pdt.assert_frame_equal(obs, exp) - assert list(test_df.exclude) == [False, True] - - -def test_require_variable_year_list(test_df): - # drop first data point - df = IamDataFrame(test_df.data[1:]) - # checking for variables that have data for ANY of the years in the list - obs = df.require_variable(variable="Primary Energy", year=[2005, 2010]) - assert obs is None - - # checking for variables that have data for ALL of the years in the list - df = IamDataFrame(test_df.data[1:]) - exp = pd.DataFrame([["model_a", "scen_a"]], columns=META_IDX) - - obs = df.require_variable(variable="Primary Energy", year=[2005]) - pdt.assert_frame_equal(obs, exp) - - def test_validate_pass(test_df): obs = test_df.validate({"Primary Energy": {"up": 10}}, exclude_on_fail=True) assert obs is None diff --git a/tests/test_tutorials.py b/tests/test_tutorials.py index e8492f359..19a97a0e9 100644 --- a/tests/test_tutorials.py +++ b/tests/test_tutorials.py @@ -42,5 +42,5 @@ def test_tutorial_notebook(file): @pytest.mark.skipif(IIASA_UNAVAILABLE, reason="IIASA database API unavailable") -def test_tutorial_iiasa_dbs(): - _run_notebook("iiasa_dbs") +def test_tutorial_iiasa(): + _run_notebook("iiasa")