Skip to content

Commit

Permalink
Sort columns in timeseries() with mixed time domain (#896)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhuppmann authored Dec 16, 2024
1 parent a08f7c3 commit 1ef2e7b
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 53 deletions.
1 change: 1 addition & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

The next release must be bumped to v3.0.0.

- [#896](https://github.com/IAMconsortium/pyam/pull/896) Sort columns of `timeseries()` with mixed time domain
- [#893](https://github.com/IAMconsortium/pyam/pull/893) No sorting of timeseries data on initialization or append
- [#879](https://github.com/IAMconsortium/pyam/pull/879) Add `read_netcdf()` function

Expand Down
17 changes: 12 additions & 5 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
IAMC_IDX,
ILLEGAL_COLS,
META_IDX,
compare_year_time,
format_data,
get_excel_file_with_kwargs,
is_list_like,
Expand Down Expand Up @@ -702,7 +703,7 @@ def interpolate(self, time, inplace=False, **kwargs):
df.columns.name = ret.time_col
df = df.stack(future_stack=True).dropna() # wide data to pd.Series
df.name = "value"
ret._data = df.sort_index()
ret._data = df
ret._set_attributes()

if not inplace:
Expand Down Expand Up @@ -794,8 +795,8 @@ def timeseries(self, iamc_index=False):
Parameters
----------
iamc_index : bool, optional
If True, use `['model', 'scenario', 'region', 'variable', 'unit']`;
else, use all 'data' columns.
If True, return only IAMC-index `['model', 'scenario', 'region', 'variable',
'unit']`; else, use all 'data' columns.
Raises
------
Expand All @@ -813,10 +814,16 @@ def timeseries(self, iamc_index=False):
raise ValueError(
"Cannot use `iamc_index=True` with 'datetime' time-domain."
)
s = s.droplevel(self.extra_cols)
s = self._data.droplevel(self.extra_cols)
if s.index.has_duplicates:
raise ValueError("Dropping non-IAMC-index causes duplicated index.")

return (
s.unstack(level=self.time_col).sort_index(axis=1).rename_axis(None, axis=1)
s.unstack(level=self.time_col)
.rename_axis(None, axis=1)
.sort_index(
axis=1, key=compare_year_time if self.time_domain == "mixed" else None
)
)

def set_meta(self, meta, name=None, index=None): # noqa: C901
Expand Down
12 changes: 12 additions & 0 deletions pyam/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import re
import string
import warnings
from datetime import datetime, timedelta
from pathlib import Path

import dateutil
Expand Down Expand Up @@ -603,6 +604,17 @@ def print_list(x, n):
return lst + count


# utility method to compare years (as integer) and datetime for index-sorting
def compare_year_time(x):
return pd.Index([
# set year lower than first timestep of that year (2010 < 2010-01-01 00:00)
datetime(time, 1, 1, 0, 0, 0) - timedelta(0, 0.01)
if isinstance(time, int)
else time
for time in x
])


def to_time(x):
"""Cast a value to either year (int) or datetime"""

Expand Down
4 changes: 2 additions & 2 deletions tests/test_cast_to_iamc.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_cast_from_value_col_and_args(test_df_year):
],
columns=[
"scenario",
"iso",
"node",
"unit",
"year",
"Primary Energy",
Expand All @@ -53,7 +53,7 @@ def test_cast_from_value_col_and_args(test_df_year):
df = IamDataFrame(
df_with_value_cols,
model="model_a",
region="iso",
region="node",
value=["Primary Energy", "Primary Energy|Coal"],
)
pdt.assert_series_equal(df._data, test_df_year._data, check_like=True)
Expand Down
80 changes: 49 additions & 31 deletions tests/test_core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import datetime
import logging
from datetime import datetime

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -48,7 +48,7 @@ def test_init_df_with_non_default_index(test_pd_df, index):
"""Casting to IamDataFrame and returning as `timeseries()` yields original frame"""

# set a value to `nan` to check that timeseries columns are ordered correctly
test_pd_df.loc[0, test_pd_df.columns[5]] = np.nan
test_pd_df.loc[0, 2010] = np.nan

# any number of columns can be set as index
df = test_pd_df.copy() if index is None else test_pd_df.set_index(index)
Expand Down Expand Up @@ -130,12 +130,12 @@ def test_init_df_with_na_scenario(test_pd_df):
def test_init_df_with_float_cols(test_pd_df):
_test_df = test_pd_df.rename(columns={2005: 2005.0, 2010: 2010.0})
obs = IamDataFrame(_test_df).timeseries().reset_index()
pd.testing.assert_series_equal(obs[2005], test_pd_df[2005])
pdt.assert_series_equal(obs[2005], test_pd_df[2005])


def test_init_df_from_timeseries(test_df):
df = IamDataFrame(test_df.timeseries())
pd.testing.assert_frame_equal(df.timeseries(), test_df.timeseries())
pdt.assert_frame_equal(df.timeseries(), test_df.timeseries())


def test_init_df_from_timeseries_unused_levels(test_df):
Expand Down Expand Up @@ -164,15 +164,15 @@ def test_init_df_with_extra_col(test_pd_df):
# check that timeseries data is as expected
obs = df.timeseries().reset_index()
exp = tdf[obs.columns] # get the columns into the right order
pd.testing.assert_frame_equal(obs, exp)
pdt.assert_frame_equal(obs, exp)


def test_init_df_with_meta_with_index(test_pd_df):
# pass indexed meta dataframe with a scenario that doesn't exist in data
df = IamDataFrame(test_pd_df, meta=META_DF)

# check that scenario not existing in data is removed during initialization
pd.testing.assert_frame_equal(df.meta, META_DF.iloc[[0, 1]])
pdt.assert_frame_equal(df.meta, META_DF.iloc[[0, 1]])
assert df.scenario == ["scen_a", "scen_b"]


Expand All @@ -181,7 +181,7 @@ def test_init_df_with_meta_no_index(test_pd_df):
df = IamDataFrame(test_pd_df, meta=META_DF.reset_index())

# check that scenario not existing in data is removed during initialization
pd.testing.assert_frame_equal(df.meta, META_DF.iloc[[0, 1]])
pdt.assert_frame_equal(df.meta, META_DF.iloc[[0, 1]])
assert df.scenario == ["scen_a", "scen_b"]


Expand All @@ -200,7 +200,7 @@ def test_init_df_with_meta_key_value(test_pd_df):
df = IamDataFrame(test_pd_df, meta=meta_df)

# check that scenario not existing in data is removed during initialization
pd.testing.assert_frame_equal(df.meta, META_DF.iloc[[0, 1]], check_dtype=False)
pdt.assert_frame_equal(df.meta, META_DF.iloc[[0, 1]], check_dtype=False)
assert df.scenario == ["scen_a", "scen_b"]


Expand Down Expand Up @@ -391,7 +391,7 @@ def test_index(test_df_year):
exp = pd.MultiIndex.from_arrays(
[["model_a"] * 2, ["scen_a", "scen_b"]], names=["model", "scenario"]
)
pd.testing.assert_index_equal(test_df_year.index, exp)
pdt.assert_index_equal(test_df_year.index, exp)


def test_index_attributes(test_df):
Expand Down Expand Up @@ -526,8 +526,8 @@ def test_variable_depth_with_list_raises(test_df, filter_name):


@pytest.mark.parametrize("unsort", [False, True])
def test_timeseries(test_df, unsort):
"""Assert that the timeseries is shown as expected even from unordered data"""
def test_timeseries_long(test_df, unsort):
"""Assert that timeseries is shown as expected from (unsorted) long data"""
exp = TEST_DF.set_index(IAMC_IDX)

if unsort:
Expand All @@ -553,15 +553,32 @@ def test_timeseries(test_df, unsort):
exp.columns.name = None

obs = test_df.timeseries()
pdt.assert_frame_equal(obs, exp, check_column_type=False)
pdt.assert_frame_equal(obs, exp, check_like=True, check_column_type=False)


def test_timeseries_wide_unsorted(test_pd_df):
"""Assert that the timeseries is shown as expected even from unordered data"""
@pytest.mark.parametrize("unsort", [False, True])
def test_timeseries_wide(test_pd_df, unsort):
"""Assert that timeseries is shown as expected from (unsorted) wide data"""

# for some reason, `unstack` behaves differently if columns or rows are not sorted
exp = test_pd_df.set_index(IAMC_IDX)
obs = IamDataFrame(test_pd_df[IAMC_IDX + [2010, 2005]]).timeseries()

if unsort:
obs = IamDataFrame(test_pd_df[IAMC_IDX + [2010, 2005]]).timeseries()
else:
obs = IamDataFrame(test_pd_df).timeseries()
pdt.assert_frame_equal(obs, exp, check_column_type=False)


def test_timeseries_mixed_time_domain(test_pd_df):
"""Assert that timeseries is shown as expected from mixed time-domain data"""
test_pd_df = test_pd_df.rename(columns={2005: "2010-01-01 00:00"})
exp = (
test_pd_df.set_index(IAMC_IDX)[[2010, "2010-01-01 00:00"]]
.rename(columns={"2010-01-01 00:00": datetime(2010, 1, 1, 0, 0)})
)

obs = IamDataFrame(test_pd_df).timeseries()
pdt.assert_frame_equal(obs, exp, check_column_type=False)


Expand All @@ -582,11 +599,11 @@ def test_timeseries_time_iamc_raises(test_df_time):
def test_timeseries_to_iamc_index(test_pd_df, test_df_year):
"""Reducing timeseries() of an IamDataFrame with extra-columns to IAMC-index"""
test_pd_df["foo"] = "bar"
exta_col_df = IamDataFrame(test_pd_df)
assert exta_col_df.extra_cols == ["foo"]
extra_col_df = IamDataFrame(test_pd_df)
assert extra_col_df.extra_cols == ["foo"]

# assert that reducing to IAMC-columns (dropping extra-columns) with timeseries()
obs = exta_col_df.timeseries(iamc_index=True)
obs = extra_col_df.timeseries(iamc_index=True)
exp = test_df_year.timeseries()
pdt.assert_frame_equal(obs, exp)

Expand All @@ -596,13 +613,14 @@ def test_timeseries_to_iamc_index_duplicated_raises(test_pd_df):
test_pd_df = pd.concat([test_pd_df, test_pd_df])
# adding an extra-col creates a unique index
test_pd_df["foo"] = ["bar", "bar", "bar", "baz", "baz", "baz"]
exta_col_df = IamDataFrame(test_pd_df)
assert exta_col_df.extra_cols == ["foo"]

extra_col_df = IamDataFrame(test_pd_df)
assert extra_col_df.extra_cols == ["foo"]

# dropping the extra-column by setting `iamc_index=True` creates duplicated index
match = "Index contains duplicate entries, cannot reshape"
match = "Dropping non-IAMC-index causes duplicated index"
with pytest.raises(ValueError, match=match):
exta_col_df.timeseries(iamc_index=True)
extra_col_df.timeseries(iamc_index=True)


def test_pivot_table(test_df):
Expand Down Expand Up @@ -635,7 +653,7 @@ def test_filter_meta_index(test_df):
exp = pd.MultiIndex(
levels=[["model_a"], ["scen_b"]], codes=[[0], [0]], names=["model", "scenario"]
)
pd.testing.assert_index_equal(obs, exp)
pdt.assert_index_equal(obs, exp)


def test_meta_idx(test_df):
Expand Down Expand Up @@ -668,7 +686,7 @@ def test_pd_filter_by_meta(test_df):
exp["boolean"] = True
exp["integer"] = 0

pd.testing.assert_frame_equal(obs, exp)
pdt.assert_frame_equal(obs, exp)


def test_pd_filter_by_meta_no_index(test_df):
Expand All @@ -684,7 +702,7 @@ def test_pd_filter_by_meta_no_index(test_df):
exp["boolean"] = True
exp["int"] = 0

pd.testing.assert_frame_equal(obs, exp)
pdt.assert_frame_equal(obs, exp)


def test_pd_filter_by_meta_nonmatching_index(test_df):
Expand All @@ -697,7 +715,7 @@ def test_pd_filter_by_meta_nonmatching_index(test_df):
exp = data.iloc[2:3].copy()
exp["string"] = "b"

pd.testing.assert_frame_equal(obs, exp)
pdt.assert_frame_equal(obs, exp)


def test_pd_join_by_meta_nonmatching_index(test_df):
Expand All @@ -710,7 +728,7 @@ def test_pd_join_by_meta_nonmatching_index(test_df):
exp = data.copy()
exp["string"] = [np.nan, np.nan, "b"]

pd.testing.assert_frame_equal(obs.sort_index(level=1), exp)
pdt.assert_frame_equal(obs.sort_index(level=1), exp)


def test_normalize(test_df):
Expand All @@ -720,10 +738,10 @@ def test_normalize(test_df):
if "year" in test_df.data:
obs = test_df.normalize(year=2005).data.reset_index(drop=True)
else:
obs = test_df.normalize(time=datetime.datetime(2005, 6, 17)).data.reset_index(
obs = test_df.normalize(time=datetime(2005, 6, 17)).data.reset_index(
drop=True
)
pd.testing.assert_frame_equal(obs, exp)
pdt.assert_frame_equal(obs, exp)


def test_normalize_not_time(test_df):
Expand All @@ -742,9 +760,9 @@ def test_offset(test_df, padding):
obs = test_df.offset(year=2005, **kwargs).data.reset_index(drop=True)
else:
obs = test_df.offset(
time=datetime.datetime(2005, 6, 17), **kwargs
time=datetime(2005, 6, 17), **kwargs
).data.reset_index(drop=True)
pd.testing.assert_frame_equal(obs, exp)
pdt.assert_frame_equal(obs, exp)


def test_offset_not_time(test_df):
Expand Down
Loading

0 comments on commit 1ef2e7b

Please sign in to comment.