diff --git a/doc/conf.py b/doc/conf.py index 8f5e90ba7..ca79d67ce 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -112,7 +112,7 @@ #show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +# pygments_style = 'colorful' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] @@ -318,5 +318,5 @@ def setup(app): # For the altairplot extension altairplot_links = {'editor': True, 'source': True, 'export': True} altairplot_vega_js_url = "https://cdn.jsdelivr.net/npm/vega@3.3" -altairplot_vegalite_js_url = "https://cdn.jsdelivr.net/npm/vega-lite@2.4" -altairplot_vegaembed_js_url = "https://cdn.jsdelivr.net/npm/vega-embed@3.14" +altairplot_vegalite_js_url = "https://cdn.jsdelivr.net/npm/vega-lite@2.6" +altairplot_vegaembed_js_url = "https://cdn.jsdelivr.net/npm/vega-embed@3.18" diff --git a/doc/index.rst b/doc/index.rst index d6953d452..96a2834c4 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -44,6 +44,7 @@ beautiful and effective visualizations with a minimal amount of code. user_guide/compound_charts user_guide/saving_charts user_guide/customization + user_guide/times_and_dates user_guide/faq user_guide/troubleshooting user_guide/renderers diff --git a/doc/user_guide/encoding.rst b/doc/user_guide/encoding.rst index e74f24d90..16f6b6bab 100644 --- a/doc/user_guide/encoding.rst +++ b/doc/user_guide/encoding.rst @@ -113,8 +113,8 @@ row :class:`Row` The row of a faceted plot :ref:`gallery_beckers_b .. _data-types: -Data Types -~~~~~~~~~~ +Encoding Data Types +~~~~~~~~~~~~~~~~~~~ The details of any mapping depend on the *type* of the data. Altair recognizes four main data types: diff --git a/doc/user_guide/times_and_dates.rst b/doc/user_guide/times_and_dates.rst new file mode 100644 index 000000000..97db1f112 --- /dev/null +++ b/doc/user_guide/times_and_dates.rst @@ -0,0 +1,167 @@ +.. currentmodule:: altair + +.. _user-guide-time: + +Times and Dates in Altair +========================= +Working with dates, times, and timezones is often one of the more challenging +aspects of data analysis. In Altair, the difficulties are compounded by the +fact that users are writing Python code, which outputs JSON-serialized +timestamps, which are interpreted by Javascript, and then rendered by your +browser. At each of these steps, there are things that can go wrong, but +Altair and Vega-Lite do their best to ensure that dates are interpreted and +visualized in a way that limits surprises. + +.. note:: Warning about the Safari Browser + + The discussion below applies to all major browsers except Safari. The Safari + browser treats date encodings somewhat differently than other major browsers: + where Chrome, Firefox, and others interpret the dates output by Altair as + reflecting the user's local time, Safari treats them as + `Coordinated Universal Time (UTC)`_. The result is that when viewed in Safari, + visualized dates will differ from their inputs due to a time zone correction, + unless you explicitly use UTC (see :ref:`explicit-utc-time`) + +Altair and Pandas Datetimes +--------------------------- + +Altair is designed to work best with `Pandas timeseries`_. A standard date/time +column in a Pandas dataframe will be both interpreted and displayed as local +user time. For example, here is a dataset containing hourly temperatures +measured in Seattle: + +.. altair-plot:: + :output: repr + + import altair as alt + from vega_datasets import data + + temps = data.seattle_temps() + temps.head() + +We can see from the ``dtypes`` attribute that the times are encoded as a standard +64-bit datetime: + +.. altair-plot:: + :output: repr + + temps.dtypes + +We can use Altair to visualize this datetime data; for clarity in this +example, we'll limit ourselves to the first two weeks of data: + +.. altair-plot:: + + temps = temps[temps.date < '2010-01-15'] + + alt.Chart(temps).mark_line().encode( + x='date:T', + y='temp:Q' + ) + +(notice that for date/time values we use the ``T`` to indicate a temporal +encoding: while this is optional for pandas datetime input, it is good practice +to specify a type explicitly; see :ref:`encoding-data-types` for more discussion). + +For date-time inputs like these, it can sometimes be useful to extract particular +time units (e.g. hours of the day, dates of the month, etc.). +In Altair, this can be done with a time unit transform, discussed in detail in +:ref:`user-guide-timeunit-transform`. +For example, we might decide we want a heatmap with hour of the day on the +x-axis, and day of the month on the y-axis: + +.. altair-plot:: + + alt.Chart(temps).mark_rect().encode( + alt.X('hoursminutes(date):O', title='hour of day'), + alt.Y('monthdate(date):O', title='date'), + alt.Color('temp:Q', title='temperature (F)') + ) + +Extracting time units within the spec can lead to much more flexible and compelling +charts. + +Specifying Time Zones +--------------------- +If you are viewing the above visualizations in a supported browser (see note +about Safari above), the times are both serialized and rendered in local time, +so that the ``January 1st 00:00:00`` row renders in the chart as ``00:00`` on +``January 1st``. + +In Altair, simple dates without an explicit timezone are treated as local time, +and in Vega-Lite, unless otherwise specified, times are rendered in the local +time of the browser that does the rendering. + +If you would like your dates to be time-zone aware, you can set the timezone +explicitly in the input dataframe. Since Seattle is in the ``US/Pacific`` +timezone, we can localize the timestamps in Pandas as follows: + +.. altair-plot:: + :output: repr + + temps['date_pacific'] = temps['date'].dt.tz_localize('US/Pacific') + temps.dtypes + +Notice that the timezone is now part of the pandas datatype. +If we repeat the above chart with this timezone-aware data, the result will +render **according to the timezone of the browser rendering it**: + +.. altair-plot:: + + alt.Chart(temps).mark_rect().encode( + alt.X('hoursminutes(date_pacific):O', title='hour of day'), + alt.Y('monthdate(date_pacific):O', title='date'), + alt.Color('temp:Q', title='temperature (F)') + ) + +If you are viewing this chart on a computer whose time is set to the west coast +of the US, it should appear identical to the first version. If you are rendering +the chart in any other timezone, it will render using a timezone correction +computed from the location set in your system. + +.. _explicit-utc-time: + +Using UTC Time +-------------- +This user-local rendering can sometimes be confusing, because it leads to the +same output being visualized differently by different users. +If you want timezone-aware data to appear the same to every user regardless of +location, the best approach is to adopt a standard timezone in which to render +the data. One commonly-used standard is `Coordinated Universal Time (UTC)`_. +In Altair, any of the ``timeUnit`` binnings can be prefixed with ``utc`` in +order to extract UTC time units. + +Here is the above chart visualized in UTC time, which will render the same way +regardless of the system location: + +.. altair-plot:: + + alt.Chart(temps).mark_rect().encode( + alt.X('utchoursminutes(date_pacific):O', title='UTC hour of day'), + alt.Y('utcmonthdate(date_pacific):O', title='UTC date'), + alt.Color('temp:Q', title='temperature (F)') + ) + +To make your charts as portable as possible (even in browsers like Safari that +do not parse local times the way other browsers do), you can explicitly work +in UTC time, both on the Pandas side and on the Vega-Lite side: + + +.. altair-plot:: + + temps['date_utc'] = temps['date'].dt.tz_localize('UTC') + + alt.Chart(temps).mark_rect().encode( + alt.X('utchoursminutes(date_utc):O', title='hour of day'), + alt.Y('utcmonthdate(date_utc):O', title='date'), + alt.Color('temp:Q', title='temperature (F)') + ) + +This is somewhat less convenient than the default behavior for non-timezone-aware +data, in which both Pandas and Vega-Lite assume times are local (except in Safari), +but it gets around browser incompatibilities by explicitly working in UTC, which +gives similar results in all browsers. + + +.. _Coordinated Universal Time (UTC): https://en.wikipedia.org/wiki/Coordinated_Universal_Time +.. _Pandas timeseries: https://pandas.pydata.org/pandas-docs/stable/timeseries.html