Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support extracting transformed chart data using VegaFusion #3081

Merged
merged 23 commits into from
Jun 14, 2023
Merged
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
67b44da
Port transformed_data functionality from VegaFusion
jonmmease Jun 7, 2023
2993959
Add initial transformed_data tests
jonmmease Jun 8, 2023
3ae6c7d
skip black formatting for pytest.mark.parametrize
jonmmease Jun 8, 2023
eed32e9
Test exclude flag to transformed_data
jonmmease Jun 9, 2023
6f61bad
chart.transformed_data -> chart._transformed_data
jonmmease Jun 9, 2023
2be1f64
Add VegaFusion as dev dependency
jonmmease Jun 9, 2023
07c5a00
Add better error message when VegaFusion is not installed
jonmmease Jun 9, 2023
4360cf8
Merge remote-tracking branch 'origin/master' into jonmmease/transform…
jonmmease Jun 9, 2023
f0b26ea
Move import
jonmmease Jun 10, 2023
b48f8d3
move import
jonmmease Jun 10, 2023
75cf958
Docstring update
jonmmease Jun 10, 2023
a46ce1b
Make utils.transformed_data internal, use absolute imports
jonmmease Jun 10, 2023
48f802c
Reword docstring
jonmmease Jun 10, 2023
dfa18bc
Merge branch 'jonmmease/transformed_data' of github.com:altair-viz/al…
jonmmease Jun 10, 2023
280eb0f
Remove magic, use "view" instead of chart or mark
jonmmease Jun 10, 2023
aabf5d6
Reword
jonmmease Jun 10, 2023
16250fd
Remove incorrect comment
jonmmease Jun 10, 2023
8ab1dce
black
jonmmease Jun 10, 2023
6f43d6b
Use DataFrameLike protocol for the transformed_data signature
jonmmease Jun 10, 2023
a738408
Add NotImplementedError for RepeatChart
jonmmease Jun 10, 2023
88fceb5
Use Chart._get_name to name subcharts
jonmmease Jun 10, 2023
1416f4d
Protocol is available in Python 3.8
jonmmease Jun 12, 2023
c665e8f
Make DataFrameLike private for now
jonmmease Jun 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add initial transformed_data tests
  • Loading branch information
jonmmease committed Jun 8, 2023
commit 2993959aa1ad4424a33b11d5d06a6e346372a7fd
102 changes: 102 additions & 0 deletions tests/test_transformed_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
from altair.utils.execeval import eval_block
from tests import examples_methods_syntax
import pkgutil
import pytest


@pytest.mark.parametrize("filename,rows,cols", [
("annual_weather_heatmap.py", 366, ["monthdate_date_end", "max_temp_max"]),
("anscombe_plot.py", 44, ["Series", "X", "Y"]),
("bar_chart_sorted.py", 6, ["site", "sum_yield"]),
("bar_chart_trellis_compact.py", 27, ["p", "p_end"]),
("beckers_barley_trellis_plot.py", 120, ["year", "site"]),
("beckers_barley_wrapped_facet.py", 120, ["site", "median_yield"]),
("bump_chart.py", 100, ["rank", "yearmonth_date"]),
("comet_chart.py", 120, ["variety", "delta"]),
("connected_scatterplot.py", 55, ["miles", "gas"]),
("diverging_stacked_bar_chart.py", 40, ["value", "percentage_start"]),
("donut_chart.py", 6, ["value_start", "value_end"]),
("gapminder_bubble_plot.py", 187, ["income", "population"]),
("grouped_bar_chart2.py", 9, ["Group", "Value_start"]),
("hexbins.py", 84, ["xFeaturePos", "mean_temp_max"]),
("histogram_heatmap.py", 378, ["bin_maxbins_40_Rotten_Tomatoes_Rating", "__count"]),
("histogram_scatterplot.py", 64, ["bin_maxbins_10_Rotten_Tomatoes_Rating", "__count"]),
("interactive_legend.py", 1708, ["sum_count_start", "series"]),
("iowa_electricity.py", 51, ["net_generation_start", "year"]),
("isotype.py", 37, ["animal", "x"]),
("isotype_grid.py", 100, ["row", "col"]),
("lasagna_plot.py", 492, ["yearmonthdate_date", "sum_price"]),
("layered_area_chart.py", 51, ["source", "net_generation"]),
("layered_bar_chart.py", 51, ["source", "net_generation"]),
("layered_histogram.py", 113, ["bin_maxbins_100_Measurement"]),
("line_chart_with_cumsum.py", 52, ["cumulative_wheat"]),
("line_percent.py", 30, ["sex", "perc"]),
("line_with_log_scale.py", 15, ["year", "sum_people"]),
("multifeature_scatter_plot.py", 150, ["petalWidth", "species"]),
("natural_disasters.py", 686, ["Deaths", "Year"]),
("normalized_stacked_area_chart.py", 51, ["source", "net_generation_start"]),
("normalized_stacked_bar_chart.py", 60, ["site", "sum_yield_start"]),
("parallel_coordinates.py", 600, ["key", "value"]),
("percentage_of_total.py", 5, ["PercentOfTotal", "TotalTime"]),
("pie_chart.py", 6, ["category", "value_start"]),
("pyramid.py", 3, ["category", "value_start"]),
("stacked_bar_chart_sorted_segments.py", 60, ["variety", "site"]),
("stem_and_leaf.py", 100, ["stem", "leaf"]),
("streamgraph.py", 1708, ["series", "sum_count"]),
("top_k_items.py", 10, ["rank", "IMDB_Rating_start"]),
("top_k_letters.py", 9, ["rank", "letters"]),
("top_k_with_others.py", 10, ["ranked_director", "mean_aggregate_gross"]),
("trellis_area_sort_array.py", 492, ["date", "price"]),
("trellis_histogram.py", 20, ["Origin", "__count"]),
("us_population_over_time.py", 38, ["sex", "people_start"]),
("us_population_over_time_facet.py", 285, ["year", "sum_people"]),
("wilkinson-dot-plot.py", 21, ["data", "id"]),
("window_rank.py", 12, ["team", "diff"]),
])
def test_primitive_chart_examples(filename, rows, cols):
source = pkgutil.get_data(examples_methods_syntax.__name__, filename)
chart = eval_block(source)
df = chart.transformed_data()
assert len(df) == rows
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think it makes sense to also check if this dataframe no nulls? assert df.notnull().all().all()

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so. When the input DataFrame has nulls it's possible for these to be pass through to the transformed data. Vega-Lite usually filters null values for the columns that are used in the chart, but transformed_data returns all of the columns, so the unused columns can still have nulls.

assert set(cols).issubset(set(df.columns))


@pytest.mark.parametrize("filename,all_rows,all_cols", [
("errorbars_with_std.py", [10, 10], [["upper_yield"], ["extent_yield"]]),
("candlestick_chart.py", [44, 44], [["low"], ["close"]]),
("co2_concentration.py", [713, 7, 7], [["first_date"], ["scaled_date"], ["end"]]),
("falkensee.py", [2, 38, 38], [["event"], ["population"], ["population"]]),
("heat_lane.py", [10, 10], [["bin_count_start"], ["y2"]]),
("histogram_responsive.py", [20, 20], [["__count"], ["__count"]]),
("histogram_with_a_global_mean_overlay.py", [9, 1], [["__count"], ["mean_IMDB_Rating"]]),
("horizon_graph.py", [20, 20], [["x"], ["ny"]]),
("interactive_cross_highlight.py", [64, 64, 13], [["__count"], ["__count"], ["Major_Genre"]]),
("interval_selection.py", [123, 123], [["price_start"], ["date"]]),
("layered_chart_with_dual_axis.py", [12, 12], [["month_date"], ["average_precipitation"]]),
("layered_heatmap_text.py", [9, 9], [["Cylinders"], ["mean_horsepower"]]),
("multiline_highlight.py", [560, 560], [["price"], ["date"]]),
("multiline_tooltip.py", [300, 300, 300, 0, 300], [["x"], ["y"], ["y"], ["x"], ["x"]]),
("pie_chart_with_labels.py", [6, 6], [["category"], ["value"]]),
("radial_chart.py", [6, 6], [["values"], ["values_start"]]),
("scatter_linked_table.py", [392, 14, 14, 14], [["Year"], ["Year"], ["Year"], ["Year"]]),
("scatter_marginal_hist.py", [34, 150, 27], [["__count"], ["species"], ["__count"]]),
("scatter_with_layered_histogram.py", [2, 19], [["gender"], ["__count"]]),
("scatter_with_minimap.py", [1461, 1461], [["date"], ["date"]]),
("scatter_with_rolling_mean.py", [1461, 1461], [["date"], ["rolling_mean"]]),
("seattle_weather_interactive.py", [1461, 5], [["date"], ["__count"]]),
("select_detail.py", [20, 1000], [["id"], ["x"]]),
("simple_scatter_with_errorbars.py", [5, 5], [["x"], ["upper_ymin"]]),
("stacked_bar_chart_with_text.py", [60, 60], [["site"], ["site"]]),
("us_employment.py", [120, 1, 2], [["month"], ["president"], ["president"]]),
("us_population_pyramid_over_time.py", [19, 38, 19], [["gender"], ["year"], ["gender"]]),
])
def test_compound_chart_examples(filename, all_rows, all_cols):
source = pkgutil.get_data(examples_methods_syntax.__name__, filename)
chart = eval_block(source)
print(chart)

dfs = chart.transformed_data()
assert len(dfs) == len(all_rows)
for df, rows, cols in zip(dfs, all_rows, all_cols):
assert len(df) == rows
assert set(cols).issubset(set(df.columns))