Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an info() function and use in print(IamDataFrame) #427

Merged
merged 9 commits into from
Sep 23, 2020
1 change: 1 addition & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ via getter and setter functions.

## Individual updates

- [#427](https://github.com/IAMconsortium/pyam/pull/427) Add an `info()` function and use in `print(IamDataFrame)`
- [#424](https://github.com/IAMconsortium/pyam/pull/424) Add a tutorial reading results from a GAMS model (via a gdx file).
- [#420](https://github.com/IAMconsortium/pyam/pull/420) Add a `_data` object (implemented as a pandas.Series) to handle timeseries data internally.
- [#418](https://github.com/IAMconsortium/pyam/pull/418) Read data from World Bank Open Data Catalogue as IamDataFrame.
Expand Down
20 changes: 19 additions & 1 deletion doc/source/tutorials/pyam_first_steps.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,25 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"As a first step, we show lists of all models, scenarios, regions, and the variables (including units) in the snapshot."
"As a first step, we show an overview of the `IamDataFrame` content by simply calling `df` (alternatively, you can use `print(df)` or [df.info()](https://pyam-iamc.readthedocs.io/en/stable/api/iamdataframe.html#pyam.IamDataFrame.info)).\n",
"\n",
"This function returns a concise (abbreviated) overview of the index dimensions and the qualitative/quantitative meta indicators (see an explanation of indicators below)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In the following cells, we display the lists of all models, scenarios, regions, and the variables (including units) in the snapshot."
]
},
{
Expand Down
2 changes: 2 additions & 0 deletions pyam/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import logging # noqa: F401

from pyam.core import *
from pyam.utils import *
from pyam.statistics import *
Expand Down
43 changes: 43 additions & 0 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
datetime_match,
isstr,
islistable,
print_list,
META_IDX,
YEAR_IDX,
IAMC_IDX,
Expand Down Expand Up @@ -178,6 +179,48 @@ def __setitem__(self, key, value):
def __len__(self):
return self.data.__len__()

def __repr__(self):
return self.info()

def info(self, n=80, meta_rows=5, memory_usage=False):
"""Print a summary of the object index dimensions and meta indicators

Parameters
----------
n : int
The maximum line length
meta_rows : int
The maximum number of meta indicators printed
"""
# concatenate list of index dimensions and levels
info = f'{type(self)}\nIndex dimensions:\n'
c1 = max([len(i) for i in self._LONG_IDX]) + 1
c2 = n - c1 - 5
info += '\n'.join(
[f' * {i:{c1}}: {print_list(get_index_levels(self._data, i), c2)}'
for i in self._LONG_IDX])

# concatenate list of (head of) meta indicators and levels/values
def print_meta_row(m, t, lst):
_lst = print_list(lst, n - len(m) - len(t) - 7)
return f' {m} ({t}) {_lst}'

info += '\nMeta indicators:\n'
info += '\n'.join(
[print_meta_row(m, t, self.meta[m].unique())
for m, t in zip(self.meta.columns[0:meta_rows],
self.meta.dtypes[0:meta_rows])])
# print `...` if more than `meta_rows` columns
if len(self.meta.columns) > meta_rows:
info += '\n * ...'

# add info on size (optional)
if memory_usage:
size = self._data.memory_usage() + sum(self.meta.memory_usage())
info += f'\nMemory usage: {size} bytes'

return info

def _execute_run_control(self):
for module_block in run_control()['exec']:
fname = module_block['file']
Expand Down
40 changes: 40 additions & 0 deletions pyam/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,46 @@ def datetime_match(data, dts):
return data.isin(dts)


def print_list(x, n):
"""Return a printable string of a list shortened to n characters"""
# subtract count added at end from line width
x = list(map(str, x))

# write number of elements
count = f' ({len(x)})'
n -= len(count)

# if not enough space to write first item, write shortest sensible line
if len(x[0]) > n - 5:
return '...' + count

# if only one item in list
if len(x) == 1:
return f'{x[0]} (1)'

# add first item
lst = f'{x[0]}, '
n -= len(lst)

# if possible, add last item before number of elements
if len(x[-1]) + 4 > n:
return lst + '...' + count
else:
count = f'{x[-1]}{count}'
n -= len({x[-1]}) + 3

# iterate over remaining entries until line is full
for i in x[1:-1]:
if len(i) + 6 <= n:
lst += f'{i}, '
n -= len(i) + 2
else:
lst += '... '
break

return lst + count


def to_int(x, index=False):
"""Formatting series or timeseries columns to int and checking validity

Expand Down
21 changes: 21 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,27 @@ def test_init_empty_message(test_pd_df, caplog):
assert caplog.records[message_idx].levelno == logging.WARNING


def test_print(test_df_year):
"""Assert that `print(IamDataFrame)` (and `info()`) returns as expected"""
exp = '\n'.join([
"<class 'pyam.core.IamDataFrame'>",
'Index dimensions:',
' * model : model_a (1)',
' * scenario : scen_a, scen_b (2)',
' * region : World (1)',
' * variable : Primary Energy, Primary Energy|Coal (2)',
' * unit : EJ/yr (1)',
' * year : 2005, 2010 (2)',
'Meta indicators:',
' exclude (bool) False (1)',
' number (int64) 1, 2 (2)',
' string (object) foo, nan (2)'])
obs = test_df_year.info()

print(obs)
assert obs == exp


def test_as_pandas(test_df):
# test that `as_pandas()` returns the right columns
df = test_df.copy()
Expand Down