Skip to content

Commit

Permalink
Merge remote-tracking branch 'altair-viz/master' into vega#588-geopandas
Browse files Browse the repository at this point in the history
  • Loading branch information
iliatimofeev committed Jun 9, 2018
2 parents 8a85f64 + 1d87bf6 commit 9de208b
Show file tree
Hide file tree
Showing 65 changed files with 985 additions and 378 deletions.
30 changes: 26 additions & 4 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# Altair Change Log

## Version 2.1.0 (Unreleased):
## Version 2.2.0 (Unreleased):

- update vega & vega-embed versions in html output (#838)
## Version 2.1.0 (Released June 6, 2018):

- update vega-lite to version 2.4.3 (#836)
### Enhancements

- Only API change is internal: ``alt.MarkProperties`` is now ``alt.MarkConfig``
- add a ``scale_factor`` argument to ``chart.save()`` to allow the
size/resolution of saved figures to be adjusted. (#918)

- add an ``add_selection()`` method to add selections to charts (#832)

Expand All @@ -16,6 +17,27 @@
- allow multiple fields to be passed to encodings such as ``tooltip``
and ``detail`` (#830)

- make ``timeUnit`` specifications more succinct, by parsing them in a manner
similar to aggregates (#866)

- make ``to_json()`` and ``to_csv()`` have deterministic filenames, so in json
mode a single datasets will lead to a single on-disk serialization (#862)

### Breaking Changes

- make ``data`` the first argument for all compound chart types to match the
semantics of ``alt.Chart`` (this includes ``alt.FacetChart``,
``alt.LayerChart``, ``alt.RepeatChart``, ``alt.VConcatChart``, and
``alt.HConcatChart``) (#895).

- update vega-lite to version 2.4.3 (#836)

- Only API change is internal: ``alt.MarkProperties`` is now ``alt.MarkConfig``

### Maintenance

- update vega to v3.3 & vega-embed to v3.11 in html output & colab renderer (#838)


## Version 2.0.0: May 2, 2018

Expand Down
2 changes: 1 addition & 1 deletion RELEASING.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

7. build and publish docs (Requires write-access to altair-viz/altair-viz.github.io)

cd docs
cd doc
make clean
make html
bash sync_website.sh
Expand Down
2 changes: 1 addition & 1 deletion altair/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# flake8: noqa
__version__ = '2.1.0dev0'
__version__ = '2.2.0dev0'

from .vegalite import *

Expand Down
3 changes: 2 additions & 1 deletion altair/sphinxext/altairgallery.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
Many draw upon sample datasets compiled by the `Vega <https://vega.github.io/vega/>`_ project. To access them yourself, install `vega_datasets <https://github.com/altair-viz/vega_datasets>`_.
.. code-block::
.. code-block:: none
$ pip install vega_datasets
Expand Down Expand Up @@ -244,6 +244,7 @@ def main(app):
'Histograms': [],
'Maps': [],
'Interactive Charts': [],
'Case Studies': [],
'Other Charts': []
})
for d in examples:
Expand Down
217 changes: 123 additions & 94 deletions altair/utils/core.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""
Utility routines
"""
import re
import warnings
import collections
from copy import deepcopy
import itertools
import re
import sys
import traceback
import warnings

import six
import pandas as pd
Expand All @@ -28,6 +29,31 @@
INV_TYPECODE_MAP = {v: k for k, v in TYPECODE_MAP.items()}


# aggregates from vega-lite version 2.4.3
AGGREGATES = ['argmax', 'argmin', 'average', 'count', 'distinct', 'max',
'mean', 'median', 'min', 'missing', 'q1', 'q3', 'ci0', 'ci1',
'stderr', 'stdev', 'stdevp', 'sum', 'valid', 'values',
'variance', 'variancep']

# timeUnits from vega-lite version 2.4.3
TIMEUNITS = ["utcyear", "utcquarter", "utcmonth", "utcday", "utcdate",
"utchours", "utcminutes", "utcseconds", "utcmilliseconds",
"utcyearquarter", "utcyearquartermonth", "utcyearmonth",
"utcyearmonthdate", "utcyearmonthdatehours",
"utcyearmonthdatehoursminutes",
"utcyearmonthdatehoursminutesseconds",
"utcquartermonth", "utcmonthdate", "utchoursminutes",
"utchoursminutesseconds", "utcminutesseconds",
"utcsecondsmilliseconds",
"year", "quarter", "month", "day", "date", "hours", "minutes",
"seconds", "milliseconds", "yearquarter", "yearquartermonth",
"yearmonth", "yearmonthdate", "yearmonthdatehours",
"yearmonthdatehoursminutes",
"yearmonthdatehoursminutesseconds", "quartermonth", "monthdate",
"hoursminutes", "hoursminutesseconds", "minutesseconds",
"secondsmilliseconds"]


def infer_vegalite_type(data):
"""
From an array-like input, infer the correct vega typecode
Expand Down Expand Up @@ -64,7 +90,7 @@ def sanitize_dataframe(df):
* Convert categoricals to strings.
* Convert np.bool_ dtypes to Python bool objects
* Convert np.int dtypes to Python int objects
* Convert floats to objects and replace NaNs by None.
* Convert floats to objects and replace NaNs/infs with None.
* Convert DateTime dtypes into appropriate string representations
"""
df = df.copy()
Expand All @@ -88,17 +114,19 @@ def to_list_if_array(val):
elif str(dtype) == 'bool':
# convert numpy bools to objects; np.bool is not JSON serializable
df[col_name] = df[col_name].astype(object)
elif np.issubdtype(dtype, np.integer):
# convert integers to objects; np.int is not JSON serializable
df[col_name] = df[col_name].astype(object)
elif np.issubdtype(dtype, np.floating):
# For floats, convert nan->None: np.float is not JSON serializable
col = df[col_name].astype(object)
df[col_name] = col.where(col.notnull(), None)
elif str(dtype).startswith('datetime'):
# Convert datetimes to strings
# astype(str) will choose the appropriate resolution
df[col_name] = df[col_name].astype(str).replace('NaT', '')
elif np.issubdtype(dtype, np.integer):
# convert integers to objects; np.int is not JSON serializable
df[col_name] = df[col_name].astype(object)
elif np.issubdtype(dtype, np.floating):
# For floats, convert to Python float: np.float is not JSON serializable
# Also convert NaN/inf values to null, as they are not JSON serializable
col = df[col_name]
bad_values = col.isnull() | np.isinf(col)
df[col_name] = col.astype(object).where(~bad_values, None)
elif dtype == object:
# Convert numpy arrays saved as objects to lists
# Arrays are not JSON serializable
Expand All @@ -107,9 +135,9 @@ def to_list_if_array(val):
return df


def _parse_shorthand(shorthand):
"""
Parse the shorthand expression for aggregation, field, and type.
def parse_shorthand(shorthand, data=None, parse_aggregates=True,
parse_timeunits=True, parse_types=True):
"""General tool to parse shorthand values
These are of the form:
Expand All @@ -118,115 +146,116 @@ def _parse_shorthand(shorthand):
- "average(col_name)"
- "average(col_name):O"
Optionally, a dataframe may be supplied, from which the type
will be inferred if not specified in the shorthand.
Parameters
----------
shorthand: str
Shorthand string
shorthand : dict or string
The shorthand representation to be parsed
data : DataFrame, optional
If specified and of type DataFrame, then use these values to infer the
column type if not provided by the shorthand.
parse_aggregates : boolean
If True (default), then parse aggregate functions within the shorthand.
parse_timeunits : boolean
If True (default), then parse timeUnits from within the shorthand
parse_types : boolean
If True (default), then parse typecodes within the shorthand
Returns
-------
D : dict
Dictionary containing the field, aggregate, and typecode
"""
if not shorthand:
return {}
attrs : dict
a dictionary of attributes extracted from the shorthand
# List taken from vega-lite v2 AggregateOp
valid_aggregates = ["argmax", "argmin", "average", "count", "distinct",
"max", "mean", "median", "min", "missing", "q1", "q3",
"ci0", "ci1", "stderr", "stdev", "stdevp", "sum",
"valid", "values", "variance", "variancep"]
valid_typecodes = list(TYPECODE_MAP) + list(INV_TYPECODE_MAP)
Examples
--------
>>> data = pd.DataFrame({'foo': ['A', 'B', 'A', 'B'],
... 'bar': [1, 2, 3, 4]})
# build regular expressions
units = dict(field='(?P<field>.*)',
type='(?P<type>{0})'.format('|'.join(valid_typecodes)),
count='(?P<aggregate>count)',
aggregate='(?P<aggregate>{0})'.format('|'.join(valid_aggregates)))
patterns = [r'{count}\(\)',
r'{count}\(\):{type}',
r'{aggregate}\({field}\):{type}',
r'{aggregate}\({field}\)',
r'{field}:{type}',
r'{field}']
regexps = (re.compile('\A' + p.format(**units) + '\Z', re.DOTALL)
for p in patterns)
>>> parse_shorthand('name') == {'field': 'name'}
True
# find matches depending on valid fields passed
match = next(exp.match(shorthand).groupdict() for exp in regexps
if exp.match(shorthand))
>> parse_shorthand('name:Q') == {'field': 'name', 'type': 'quantitative'}
True
# Handle short form of the type expression
type_ = match.get('type', None)
if type_:
match['type'] = INV_TYPECODE_MAP.get(type_, type_)
>>> parse_shorthand('average(col)') == {'aggregate': 'average', 'field': 'col'}
True
# counts are quantitative by default
if match == {'aggregate': 'count'}:
match['type'] = 'quantitative'
>>> parse_shorthand('foo:O') == {'field': 'foo', 'type': 'ordinal'}
True
return match
>>> parse_shorthand('min(foo):Q') == {'aggregate': 'min', 'field': 'foo', 'type': 'quantitative'}
True
>>> parse_shorthand('month(col)') == {'field': 'col', 'timeUnit': 'month', 'type': 'temporal'}
True
def parse_shorthand(shorthand, data=None):
"""Parse the shorthand expression for aggregation, field, and type.
>>> parse_shorthand('year(col):O') == {'field': 'col', 'timeUnit': 'year', 'type': 'ordinal'}
True
These are of the form:
>>> parse_shorthand('foo', data) == {'field': 'foo', 'type': 'nominal'}
True
- "col_name"
- "col_name:O"
- "average(col_name)"
- "average(col_name):O"
>>> parse_shorthand('bar', data) == {'field': 'bar', 'type': 'quantitative'}
True
Optionally, a dataframe may be supplied, from which the type
will be inferred if not specified in the shorthand.
>>> parse_shorthand('bar:O', data) == {'field': 'bar', 'type': 'ordinal'}
True
Parameters
----------
shorthand: str
Shorthand string of the form "agg(col):typ"
data : pd.DataFrame (optional)
Dataframe from which to infer types
>>> parse_shorthand('sum(bar)', data) == {'aggregate': 'sum', 'field': 'bar', 'type': 'quantitative'}
True
Returns
-------
D : dict
Dictionary which always contains a 'field' key, and additionally
contains an 'aggregate' and 'type' key depending on the input.
>>> parse_shorthand('count()', data) == {'aggregate': 'count', 'type': 'quantitative'}
True
"""
if not shorthand:
return {}

Examples
--------
>>> data = pd.DataFrame({'foo': ['A', 'B', 'A', 'B'],
... 'bar': [1, 2, 3, 4]})
valid_typecodes = list(TYPECODE_MAP) + list(INV_TYPECODE_MAP)

units = dict(field='(?P<field>.*)',
type='(?P<type>{0})'.format('|'.join(valid_typecodes)),
count='(?P<aggregate>count)',
aggregate='(?P<aggregate>{0})'.format('|'.join(AGGREGATES)),
timeUnit='(?P<timeUnit>{0})'.format('|'.join(TIMEUNITS)))

>>> parse_shorthand('name')
{'field': 'name'}
patterns = []

>>> parse_shorthand('average(col)') # doctest: +SKIP
{'aggregate': 'average', 'field': 'col'}
if parse_aggregates:
patterns.extend([r'{count}\(\)',
r'{aggregate}\({field}\)'])
if parse_timeunits:
patterns.extend([r'{timeUnit}\({field}\)'])

>>> parse_shorthand('foo:O') # doctest: +SKIP
{'field': 'foo', 'type': 'ordinal'}
patterns.extend([r'{field}'])

>>> parse_shorthand('min(foo):Q') # doctest: +SKIP
{'aggregate': 'min', 'field': 'foo', 'type': 'quantitative'}
if parse_types:
patterns = list(itertools.chain(*((p + ':{type}', p) for p in patterns)))

>>> parse_shorthand('foo', data) # doctest: +SKIP
{'field': 'foo', 'type': 'nominal'}
regexps = (re.compile('\A' + p.format(**units) + '\Z', re.DOTALL)
for p in patterns)

# find matches depending on valid fields passed
if isinstance(shorthand, dict):
attrs = shorthand
else:
attrs = next(exp.match(shorthand).groupdict() for exp in regexps
if exp.match(shorthand))

>>> parse_shorthand('bar', data) # doctest: +SKIP
{'field': 'bar', 'type': 'quantitative'}
# Handle short form of the type expression
if 'type' in attrs:
attrs['type'] = INV_TYPECODE_MAP.get(attrs['type'], attrs['type'])

>>> parse_shorthand('bar:O', data) # doctest: +SKIP
{'field': 'bar', 'type': 'ordinal'}
# counts are quantitative by default
if attrs == {'aggregate': 'count'}:
attrs['type'] = 'quantitative'

>>> parse_shorthand('sum(bar)', data) # doctest: +SKIP
{'aggregate': 'sum', 'field': 'bar', 'type': 'quantitative'}
# times are temporal by default
if 'timeUnit' in attrs and 'type' not in attrs:
attrs['type'] = 'temporal'

>>> parse_shorthand('count()', data) # doctest: +SKIP
{'aggregate': 'count', 'type': 'quantitative'}
"""
attrs = _parse_shorthand(shorthand)
# if data is specified and type is not, infer type from data
if isinstance(data, pd.DataFrame) and 'type' not in attrs:
if 'field' in attrs and attrs['field'] in data.columns:
attrs['type'] = infer_vegalite_type(data[attrs['field']])
Expand Down
Loading

0 comments on commit 9de208b

Please sign in to comment.