Add an info() function and use in print(IamDataFrame) (#427)

IAMconsortium · Sep 23, 2020 · 015e8a6 · 015e8a6
1 parent ddc67f7
commit 015e8a6
Show file tree

Hide file tree

Showing 6 changed files with 126 additions and 1 deletion.
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -11,6 +11,7 @@ via getter and setter functions.
 
 ## Individual updates
 
+- [#427](https://github.com/IAMconsortium/pyam/pull/427) Add an `info()` function and use in `print(IamDataFrame)`
 - [#424](https://github.com/IAMconsortium/pyam/pull/424) Add a tutorial reading results from a GAMS model (via a gdx file).
 - [#420](https://github.com/IAMconsortium/pyam/pull/420) Add a `_data` object (implemented as a pandas.Series) to handle timeseries data internally.
 - [#418](https://github.com/IAMconsortium/pyam/pull/418) Read data from World Bank Open Data Catalogue as IamDataFrame.

diff --git a/doc/source/tutorials/pyam_first_steps.ipynb b/doc/source/tutorials/pyam_first_steps.ipynb
@@ -121,7 +121,25 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "As a first step, we show lists of all models, scenarios, regions, and the variables (including units) in the snapshot."
+    "As a first step, we show an overview of the `IamDataFrame` content by simply calling `df` (alternatively, you can use `print(df)` or [df.info()](https://pyam-iamc.readthedocs.io/en/stable/api/iamdataframe.html#pyam.IamDataFrame.info)).\n",
+    "\n",
+    "This function returns a concise (abbreviated) overview of the index dimensions and the qualitative/quantitative meta indicators (see an explanation of indicators below)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the following cells, we display the lists of all models, scenarios, regions, and the variables (including units) in the snapshot."
    ]
   },
   {

diff --git a/pyam/__init__.py b/pyam/__init__.py
@@ -1,3 +1,5 @@
+import logging  # noqa: F401
+
 from pyam.core import *
 from pyam.utils import *
 from pyam.statistics import *

diff --git a/pyam/core.py b/pyam/core.py
@@ -44,6 +44,7 @@
     datetime_match,
     isstr,
     islistable,
+    print_list,
     META_IDX,
     YEAR_IDX,
     IAMC_IDX,
@@ -178,6 +179,48 @@ def __setitem__(self, key, value):
     def __len__(self):
         return self.data.__len__()
 
+    def __repr__(self):
+        return self.info()
+
+    def info(self, n=80, meta_rows=5, memory_usage=False):
+        """Print a summary of the object index dimensions and meta indicators
+
+        Parameters
+        ----------
+        n : int
+            The maximum line length
+        meta_rows : int
+            The maximum number of meta indicators printed
+        """
+        # concatenate list of index dimensions and levels
+        info = f'{type(self)}\nIndex dimensions:\n'
+        c1 = max([len(i) for i in self._LONG_IDX]) + 1
+        c2 = n - c1 - 5
+        info += '\n'.join(
+            [f' * {i:{c1}}: {print_list(get_index_levels(self._data, i), c2)}'
+             for i in self._LONG_IDX])
+
+        # concatenate list of (head of) meta indicators and levels/values
+        def print_meta_row(m, t, lst):
+            _lst = print_list(lst, n - len(m) - len(t) - 7)
+            return f'   {m} ({t}) {_lst}'
+
+        info += '\nMeta indicators:\n'
+        info += '\n'.join(
+            [print_meta_row(m, t, self.meta[m].unique())
+             for m, t in zip(self.meta.columns[0:meta_rows],
+                             self.meta.dtypes[0:meta_rows])])
+        # print `...` if more than `meta_rows` columns
+        if len(self.meta.columns) > meta_rows:
+            info += '\n * ...'
+
+        # add info on size (optional)
+        if memory_usage:
+            size = self._data.memory_usage() + sum(self.meta.memory_usage())
+            info += f'\nMemory usage: {size} bytes'
+
+        return info
+
     def _execute_run_control(self):
         for module_block in run_control()['exec']:
             fname = module_block['file']

diff --git a/pyam/utils.py b/pyam/utils.py
@@ -505,6 +505,46 @@ def datetime_match(data, dts):
     return data.isin(dts)
 
 
+def print_list(x, n):
+    """Return a printable string of a list shortened to n characters"""
+    # subtract count added at end from line width
+    x = list(map(str, x))
+
+    # write number of elements
+    count = f' ({len(x)})'
+    n -= len(count)
+
+    # if not enough space to write first item, write shortest sensible line
+    if len(x[0]) > n - 5:
+        return '...' + count
+
+    # if only one item in list
+    if len(x) == 1:
+        return f'{x[0]} (1)'
+
+    # add first item
+    lst = f'{x[0]}, '
+    n -= len(lst)
+
+    # if possible, add last item before number of elements
+    if len(x[-1]) + 4 > n:
+        return lst + '...' + count
+    else:
+        count = f'{x[-1]}{count}'
+        n -= len({x[-1]}) + 3
+
+    # iterate over remaining entries until line is full
+    for i in x[1:-1]:
+        if len(i) + 6 <= n:
+            lst += f'{i}, '
+            n -= len(i) + 2
+        else:
+            lst += '... '
+            break
+
+    return lst + count
+
+
 def to_int(x, index=False):
     """Formatting series or timeseries columns to int and checking validity
 

diff --git a/tests/test_core.py b/tests/test_core.py
@@ -119,6 +119,27 @@ def test_init_empty_message(test_pd_df, caplog):
     assert caplog.records[message_idx].levelno == logging.WARNING
 
 
+def test_print(test_df_year):
+    """Assert that `print(IamDataFrame)` (and `info()`) returns as expected"""
+    exp = '\n'.join([
+        "<class 'pyam.core.IamDataFrame'>",
+        'Index dimensions:',
+        ' * model    : model_a (1)',
+        ' * scenario : scen_a, scen_b (2)',
+        ' * region   : World (1)',
+        ' * variable : Primary Energy, Primary Energy|Coal (2)',
+        ' * unit     : EJ/yr (1)',
+        ' * year     : 2005, 2010 (2)',
+        'Meta indicators:',
+        '   exclude (bool) False (1)',
+        '   number (int64) 1, 2 (2)',
+        '   string (object) foo, nan (2)'])
+    obs = test_df_year.info()
+
+    print(obs)
+    assert obs == exp
+
+
 def test_as_pandas(test_df):
     # test that `as_pandas()` returns the right columns
     df = test_df.copy()