Merge remote-tracking branch 'altair-viz/master' into vega#588-geopandas

iliatimofeev · Jun 9, 2018 · 9de208b · 9de208b
2 parents 8a85f64 + 1d87bf6
commit 9de208b
Show file tree

Hide file tree

Showing 65 changed files with 985 additions and 378 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,12 +1,13 @@
 # Altair Change Log
 
-## Version 2.1.0 (Unreleased):
+## Version 2.2.0 (Unreleased):
 
-- update vega & vega-embed versions in html output (#838)
+## Version 2.1.0 (Released June 6, 2018):
 
-- update vega-lite to version 2.4.3 (#836)
+### Enhancements
 
-  - Only API change is internal: ``alt.MarkProperties`` is now ``alt.MarkConfig``
+- add a ``scale_factor`` argument to ``chart.save()`` to allow the
+  size/resolution of saved figures to be adjusted. (#918)
 
 - add an ``add_selection()`` method to add selections to charts (#832)
 
@@ -16,6 +17,27 @@
 - allow multiple fields to be passed to encodings such as ``tooltip``
   and ``detail`` (#830)
 
+- make ``timeUnit`` specifications more succinct, by parsing them in a manner
+  similar to aggregates (#866)
+
+- make ``to_json()`` and ``to_csv()`` have deterministic filenames, so in json
+  mode a single datasets will lead to a single on-disk serialization (#862)
+
+### Breaking Changes
+
+- make ``data`` the first argument for all compound chart types to match the
+  semantics of ``alt.Chart`` (this includes ``alt.FacetChart``,
+  ``alt.LayerChart``, ``alt.RepeatChart``, ``alt.VConcatChart``, and
+  ``alt.HConcatChart``) (#895).
+
+- update vega-lite to version 2.4.3 (#836)
+
+  - Only API change is internal: ``alt.MarkProperties`` is now ``alt.MarkConfig``
+
+### Maintenance
+
+- update vega to v3.3 & vega-embed to v3.11 in html output & colab renderer (#838)
+
 
 ## Version 2.0.0: May 2, 2018
 

diff --git a/RELEASING.md b/RELEASING.md
@@ -28,7 +28,7 @@
 
 7. build and publish docs (Requires write-access to altair-viz/altair-viz.github.io)
 
-       cd docs
+       cd doc
        make clean
        make html
        bash sync_website.sh

diff --git a/altair/__init__.py b/altair/__init__.py
@@ -1,5 +1,5 @@
 # flake8: noqa
-__version__ = '2.1.0dev0'
+__version__ = '2.2.0dev0'
 
 from .vegalite import *
 

diff --git a/altair/sphinxext/altairgallery.py b/altair/sphinxext/altairgallery.py
@@ -36,7 +36,7 @@
 
 Many draw upon sample datasets compiled by the `Vega <https://vega.github.io/vega/>`_ project. To access them yourself, install `vega_datasets <https://github.com/altair-viz/vega_datasets>`_.
 
-.. code-block::
+.. code-block:: none
 
    $ pip install vega_datasets
 
@@ -244,6 +244,7 @@ def main(app):
         'Histograms': [],
         'Maps': [],
         'Interactive Charts': [],
+        'Case Studies': [],
         'Other Charts': []
     })
     for d in examples:

diff --git a/altair/utils/core.py b/altair/utils/core.py
@@ -1,12 +1,13 @@
 """
 Utility routines
 """
-import re
-import warnings
 import collections
 from copy import deepcopy
+import itertools
+import re
 import sys
 import traceback
+import warnings
 
 import six
 import pandas as pd
@@ -28,6 +29,31 @@
 INV_TYPECODE_MAP = {v: k for k, v in TYPECODE_MAP.items()}
 
 
+# aggregates from vega-lite version 2.4.3
+AGGREGATES = ['argmax', 'argmin', 'average', 'count', 'distinct', 'max',
+              'mean', 'median', 'min', 'missing', 'q1', 'q3', 'ci0', 'ci1',
+              'stderr', 'stdev', 'stdevp', 'sum', 'valid', 'values',
+              'variance', 'variancep']
+
+# timeUnits from vega-lite version 2.4.3
+TIMEUNITS = ["utcyear", "utcquarter", "utcmonth", "utcday", "utcdate",
+             "utchours", "utcminutes", "utcseconds", "utcmilliseconds",
+             "utcyearquarter", "utcyearquartermonth", "utcyearmonth",
+             "utcyearmonthdate", "utcyearmonthdatehours",
+             "utcyearmonthdatehoursminutes",
+             "utcyearmonthdatehoursminutesseconds",
+             "utcquartermonth", "utcmonthdate", "utchoursminutes",
+             "utchoursminutesseconds", "utcminutesseconds",
+             "utcsecondsmilliseconds",
+             "year", "quarter", "month", "day", "date", "hours", "minutes",
+             "seconds", "milliseconds", "yearquarter", "yearquartermonth",
+             "yearmonth", "yearmonthdate", "yearmonthdatehours",
+             "yearmonthdatehoursminutes",
+             "yearmonthdatehoursminutesseconds", "quartermonth", "monthdate",
+             "hoursminutes", "hoursminutesseconds", "minutesseconds",
+             "secondsmilliseconds"]
+
+
 def infer_vegalite_type(data):
     """
     From an array-like input, infer the correct vega typecode
@@ -64,7 +90,7 @@ def sanitize_dataframe(df):
     * Convert categoricals to strings.
     * Convert np.bool_ dtypes to Python bool objects
     * Convert np.int dtypes to Python int objects
-    * Convert floats to objects and replace NaNs by None.
+    * Convert floats to objects and replace NaNs/infs with None.
     * Convert DateTime dtypes into appropriate string representations
     """
     df = df.copy()
@@ -88,17 +114,19 @@ def to_list_if_array(val):
         elif str(dtype) == 'bool':
             # convert numpy bools to objects; np.bool is not JSON serializable
             df[col_name] = df[col_name].astype(object)
-        elif np.issubdtype(dtype, np.integer):
-            # convert integers to objects; np.int is not JSON serializable
-            df[col_name] = df[col_name].astype(object)
-        elif np.issubdtype(dtype, np.floating):
-            # For floats, convert nan->None: np.float is not JSON serializable
-            col = df[col_name].astype(object)
-            df[col_name] = col.where(col.notnull(), None)
         elif str(dtype).startswith('datetime'):
             # Convert datetimes to strings
             # astype(str) will choose the appropriate resolution
             df[col_name] = df[col_name].astype(str).replace('NaT', '')
+        elif np.issubdtype(dtype, np.integer):
+            # convert integers to objects; np.int is not JSON serializable
+            df[col_name] = df[col_name].astype(object)
+        elif np.issubdtype(dtype, np.floating):
+            # For floats, convert to Python float: np.float is not JSON serializable
+            # Also convert NaN/inf values to null, as they are not JSON serializable
+            col = df[col_name]
+            bad_values = col.isnull() | np.isinf(col)
+            df[col_name] = col.astype(object).where(~bad_values, None)
         elif dtype == object:
             # Convert numpy arrays saved as objects to lists
             # Arrays are not JSON serializable
@@ -107,9 +135,9 @@ def to_list_if_array(val):
     return df
 
 
-def _parse_shorthand(shorthand):
-    """
-    Parse the shorthand expression for aggregation, field, and type.
+def parse_shorthand(shorthand, data=None, parse_aggregates=True,
+                    parse_timeunits=True, parse_types=True):
+    """General tool to parse shorthand values
 
     These are of the form:
 
@@ -118,115 +146,116 @@ def _parse_shorthand(shorthand):
     - "average(col_name)"
     - "average(col_name):O"
 
+    Optionally, a dataframe may be supplied, from which the type
+    will be inferred if not specified in the shorthand.
+
     Parameters
     ----------
-    shorthand: str
-        Shorthand string
+    shorthand : dict or string
+        The shorthand representation to be parsed
+    data : DataFrame, optional
+        If specified and of type DataFrame, then use these values to infer the
+        column type if not provided by the shorthand.
+    parse_aggregates : boolean
+        If True (default), then parse aggregate functions within the shorthand.
+    parse_timeunits : boolean
+        If True (default), then parse timeUnits from within the shorthand
+    parse_types : boolean
+        If True (default), then parse typecodes within the shorthand
 
     Returns
     -------
-    D : dict
-        Dictionary containing the field, aggregate, and typecode
-    """
-    if not shorthand:
-        return {}
+    attrs : dict
+        a dictionary of attributes extracted from the shorthand
 
-    # List taken from vega-lite v2 AggregateOp
-    valid_aggregates = ["argmax", "argmin", "average", "count", "distinct",
-                        "max", "mean", "median", "min", "missing", "q1", "q3",
-                        "ci0", "ci1", "stderr", "stdev", "stdevp", "sum",
-                        "valid", "values", "variance", "variancep"]
-    valid_typecodes = list(TYPECODE_MAP) + list(INV_TYPECODE_MAP)
+    Examples
+    --------
+    >>> data = pd.DataFrame({'foo': ['A', 'B', 'A', 'B'],
+    ...                      'bar': [1, 2, 3, 4]})
 
-    # build regular expressions
-    units = dict(field='(?P<field>.*)',
-                 type='(?P<type>{0})'.format('|'.join(valid_typecodes)),
-                 count='(?P<aggregate>count)',
-                 aggregate='(?P<aggregate>{0})'.format('|'.join(valid_aggregates)))
-    patterns = [r'{count}\(\)',
-                r'{count}\(\):{type}',
-                r'{aggregate}\({field}\):{type}',
-                r'{aggregate}\({field}\)',
-                r'{field}:{type}',
-                r'{field}']
-    regexps = (re.compile('\A' + p.format(**units) + '\Z', re.DOTALL)
-               for p in patterns)
+    >>> parse_shorthand('name') == {'field': 'name'}
+    True
 
-    # find matches depending on valid fields passed
-    match = next(exp.match(shorthand).groupdict() for exp in regexps
-                 if exp.match(shorthand))
+    >> parse_shorthand('name:Q') == {'field': 'name', 'type': 'quantitative'}
+    True
 
-    # Handle short form of the type expression
-    type_ = match.get('type', None)
-    if type_:
-        match['type'] = INV_TYPECODE_MAP.get(type_, type_)
+    >>> parse_shorthand('average(col)') == {'aggregate': 'average', 'field': 'col'}
+    True
 
-    # counts are quantitative by default
-    if match == {'aggregate': 'count'}:
-        match['type'] = 'quantitative'
+    >>> parse_shorthand('foo:O') == {'field': 'foo', 'type': 'ordinal'}
+    True
 
-    return match
+    >>> parse_shorthand('min(foo):Q') == {'aggregate': 'min', 'field': 'foo', 'type': 'quantitative'}
+    True
 
+    >>> parse_shorthand('month(col)') == {'field': 'col', 'timeUnit': 'month', 'type': 'temporal'}
+    True
 
-def parse_shorthand(shorthand, data=None):
-    """Parse the shorthand expression for aggregation, field, and type.
+    >>> parse_shorthand('year(col):O') == {'field': 'col', 'timeUnit': 'year', 'type': 'ordinal'}
+    True
 
-    These are of the form:
+    >>> parse_shorthand('foo', data) == {'field': 'foo', 'type': 'nominal'}
+    True
 
-    - "col_name"
-    - "col_name:O"
-    - "average(col_name)"
-    - "average(col_name):O"
+    >>> parse_shorthand('bar', data) == {'field': 'bar', 'type': 'quantitative'}
+    True
 
-    Optionally, a dataframe may be supplied, from which the type
-    will be inferred if not specified in the shorthand.
+    >>> parse_shorthand('bar:O', data) == {'field': 'bar', 'type': 'ordinal'}
+    True
 
-    Parameters
-    ----------
-    shorthand: str
-        Shorthand string of the form "agg(col):typ"
-    data : pd.DataFrame (optional)
-        Dataframe from which to infer types
+    >>> parse_shorthand('sum(bar)', data) == {'aggregate': 'sum', 'field': 'bar', 'type': 'quantitative'}
+    True
 
-    Returns
-    -------
-    D : dict
-        Dictionary which always contains a 'field' key, and additionally
-        contains an 'aggregate' and 'type' key depending on the input.
+    >>> parse_shorthand('count()', data) == {'aggregate': 'count', 'type': 'quantitative'}
+    True
+    """
+    if not shorthand:
+        return {}
 
-    Examples
-    --------
-    >>> data = pd.DataFrame({'foo': ['A', 'B', 'A', 'B'],
-    ...                      'bar': [1, 2, 3, 4]})
+    valid_typecodes = list(TYPECODE_MAP) + list(INV_TYPECODE_MAP)
+
+    units = dict(field='(?P<field>.*)',
+                 type='(?P<type>{0})'.format('|'.join(valid_typecodes)),
+                 count='(?P<aggregate>count)',
+                 aggregate='(?P<aggregate>{0})'.format('|'.join(AGGREGATES)),
+                 timeUnit='(?P<timeUnit>{0})'.format('|'.join(TIMEUNITS)))
 
-    >>> parse_shorthand('name')
-    {'field': 'name'}
+    patterns = []
 
-    >>> parse_shorthand('average(col)')  # doctest: +SKIP
-    {'aggregate': 'average', 'field': 'col'}
+    if parse_aggregates:
+        patterns.extend([r'{count}\(\)',
+                         r'{aggregate}\({field}\)'])
+    if parse_timeunits:
+        patterns.extend([r'{timeUnit}\({field}\)'])
 
-    >>> parse_shorthand('foo:O')  # doctest: +SKIP
-    {'field': 'foo', 'type': 'ordinal'}
+    patterns.extend([r'{field}'])
 
-    >>> parse_shorthand('min(foo):Q')  # doctest: +SKIP
-    {'aggregate': 'min', 'field': 'foo', 'type': 'quantitative'}
+    if parse_types:
+        patterns = list(itertools.chain(*((p + ':{type}', p) for p in patterns)))
 
-    >>> parse_shorthand('foo', data)  # doctest: +SKIP
-    {'field': 'foo', 'type': 'nominal'}
+    regexps = (re.compile('\A' + p.format(**units) + '\Z', re.DOTALL)
+               for p in patterns)
+
+    # find matches depending on valid fields passed
+    if isinstance(shorthand, dict):
+        attrs = shorthand
+    else:
+        attrs = next(exp.match(shorthand).groupdict() for exp in regexps
+                     if exp.match(shorthand))
 
-    >>> parse_shorthand('bar', data)  # doctest: +SKIP
-    {'field': 'bar', 'type': 'quantitative'}
+    # Handle short form of the type expression
+    if 'type' in attrs:
+        attrs['type'] = INV_TYPECODE_MAP.get(attrs['type'], attrs['type'])
 
-    >>> parse_shorthand('bar:O', data)  # doctest: +SKIP
-    {'field': 'bar', 'type': 'ordinal'}
+    # counts are quantitative by default
+    if attrs == {'aggregate': 'count'}:
+        attrs['type'] = 'quantitative'
 
-    >>> parse_shorthand('sum(bar)', data)  # doctest: +SKIP
-    {'aggregate': 'sum', 'field': 'bar', 'type': 'quantitative'}
+    # times are temporal by default
+    if 'timeUnit' in attrs and 'type' not in attrs:
+        attrs['type'] = 'temporal'
 
-    >>> parse_shorthand('count()', data)  # doctest: +SKIP
-    {'aggregate': 'count', 'type': 'quantitative'}
-    """
-    attrs = _parse_shorthand(shorthand)
+    # if data is specified and type is not, infer type from data
     if isinstance(data, pd.DataFrame) and 'type' not in attrs:
         if 'field' in attrs and attrs['field'] in data.columns:
             attrs['type'] = infer_vegalite_type(data[attrs['field']])