Skip to content

Commit

Permalink
Prevent conflicts between attributes and data/meta columns (#445)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhuppmann authored Oct 23, 2020
1 parent c08da61 commit f071e79
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 4 deletions.
4 changes: 4 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Next Release

## Individual updates

- [#445](https://github.com/IAMconsortium/pyam/pull/445) Prevent conflicts between attributes and data/meta columns

# Release v0.8.0

## Highlights
Expand Down
9 changes: 6 additions & 3 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@
META_IDX,
YEAR_IDX,
IAMC_IDX,
SORT_IDX
SORT_IDX,
ILLEGAL_COLS
)
from pyam.read_ixmp import read_ix
from pyam.timeseries import fill_series
Expand Down Expand Up @@ -618,8 +619,10 @@ def set_meta(self, meta, name=None, index=None):
if (name or (hasattr(meta, 'name') and meta.name)) in [None, False]:
raise ValueError('Must pass a name or use a named pd.Series')
name = name or meta.name
if name in self.data.columns:
raise ValueError(f'A column `{name}` already exists in `data`!')
if name in self._data.index.names:
raise ValueError(f'Column {name} already exists in `data`!')
if name in ILLEGAL_COLS:
raise ValueError(f'Name {name} is illegal for meta indicators!')

# check if meta has a valid index and use it for further workflow
if hasattr(meta, 'index') and hasattr(meta.index, 'names') \
Expand Down
13 changes: 12 additions & 1 deletion pyam/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
SORT_IDX = ['model', 'scenario', 'variable', 'year', 'region']
LONG_IDX = IAMC_IDX + ['year']

# illegal terms for data/meta column names to prevent attribute conflicts
ILLEGAL_COLS = ['data', 'meta']

# dictionary to translate column count to Excel column names
NUMERIC_TO_STR = dict(zip(range(0, 702),
[i for i in string.ascii_uppercase]
Expand Down Expand Up @@ -133,7 +136,7 @@ def format_data(df, **kwargs):
df.name = df.name or 'value'
df = df.to_frame()

# Check for R-style year columns, converting where necessary
# check for R-style year columns, converting where necessary
def convert_r_columns(c):
try:
first = c[0]
Expand Down Expand Up @@ -203,6 +206,14 @@ def convert_r_columns(c):
if not list(df.index.names) == [None]:
df.reset_index(inplace=True)

# check that there is no column in the timeseries data with reserved names
conflict_cols = [i for i in df.columns if i in ILLEGAL_COLS]
if conflict_cols:
msg = f'Column name {conflict_cols} is illegal for timeseries data.\n'
_args = ', '.join([f"{i}_1='{i}'" for i in conflict_cols])
msg += f'Use `IamDataFrame(..., {_args})` to rename at initalization.'
raise ValueError(msg)

# format columns to lower-case and check that all required columns exist
if not set(IAMC_IDX).issubset(set(df.columns)):
missing = list(set(IAMC_IDX) - set(df.columns))
Expand Down
25 changes: 25 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,31 @@ def test_init_empty_message(caplog):
assert caplog.records[message_idx].levelno == logging.WARNING


def test_init_with_column_conflict(test_pd_df):
# add a column to the timeseries data with a conflict to the meta attribute
test_pd_df['meta'] = 'foo'

# check that initialising an instance with an extra-column `meta` raises
msg = re.compile(r"Column name \['meta'\] is illegal for timeseries data.")
with pytest.raises(ValueError, match=msg):
IamDataFrame(test_pd_df)

# check that recommended fix works
df = IamDataFrame(test_pd_df, meta_1='meta')
assert df.meta_1 == ['foo']


def test_set_meta_with_column_conflict(test_df_year):
# check that setting a `meta` column with a name conflict raises
msg = 'Column model already exists in `data`!'
with pytest.raises(ValueError, match=msg):
test_df_year.set_meta(name='model', meta='foo')

msg = 'Name meta is illegal for meta indicators!'
with pytest.raises(ValueError, match=msg):
test_df_year.set_meta(name='meta', meta='foo')


def test_print(test_df_year):
"""Assert that `print(IamDataFrame)` (and `info()`) returns as expected"""
exp = '\n'.join([
Expand Down

0 comments on commit f071e79

Please sign in to comment.