-
Notifications
You must be signed in to change notification settings - Fork 801
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support extracting transformed chart data using VegaFusion #3081
Merged
Merged
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit
Hold shift + click to select a range
67b44da
Port transformed_data functionality from VegaFusion
jonmmease 2993959
Add initial transformed_data tests
jonmmease 3ae6c7d
skip black formatting for pytest.mark.parametrize
jonmmease eed32e9
Test exclude flag to transformed_data
jonmmease 6f61bad
chart.transformed_data -> chart._transformed_data
jonmmease 2be1f64
Add VegaFusion as dev dependency
jonmmease 07c5a00
Add better error message when VegaFusion is not installed
jonmmease 4360cf8
Merge remote-tracking branch 'origin/master' into jonmmease/transform…
jonmmease f0b26ea
Move import
jonmmease b48f8d3
move import
jonmmease 75cf958
Docstring update
jonmmease a46ce1b
Make utils.transformed_data internal, use absolute imports
jonmmease 48f802c
Reword docstring
jonmmease dfa18bc
Merge branch 'jonmmease/transformed_data' of github.com:altair-viz/al…
jonmmease 280eb0f
Remove magic, use "view" instead of chart or mark
jonmmease aabf5d6
Reword
jonmmease 16250fd
Remove incorrect comment
jonmmease 8ab1dce
black
jonmmease 6f43d6b
Use DataFrameLike protocol for the transformed_data signature
jonmmease a738408
Add NotImplementedError for RepeatChart
jonmmease 88fceb5
Use Chart._get_name to name subcharts
jonmmease 1416f4d
Protocol is available in Python 3.8
jonmmease c665e8f
Make DataFrameLike private for now
jonmmease File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Add initial transformed_data tests
- Loading branch information
commit 2993959aa1ad4424a33b11d5d06a6e346372a7fd
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
from altair.utils.execeval import eval_block | ||
from tests import examples_methods_syntax | ||
import pkgutil | ||
import pytest | ||
|
||
|
||
@pytest.mark.parametrize("filename,rows,cols", [ | ||
("annual_weather_heatmap.py", 366, ["monthdate_date_end", "max_temp_max"]), | ||
("anscombe_plot.py", 44, ["Series", "X", "Y"]), | ||
("bar_chart_sorted.py", 6, ["site", "sum_yield"]), | ||
("bar_chart_trellis_compact.py", 27, ["p", "p_end"]), | ||
("beckers_barley_trellis_plot.py", 120, ["year", "site"]), | ||
("beckers_barley_wrapped_facet.py", 120, ["site", "median_yield"]), | ||
("bump_chart.py", 100, ["rank", "yearmonth_date"]), | ||
("comet_chart.py", 120, ["variety", "delta"]), | ||
("connected_scatterplot.py", 55, ["miles", "gas"]), | ||
("diverging_stacked_bar_chart.py", 40, ["value", "percentage_start"]), | ||
("donut_chart.py", 6, ["value_start", "value_end"]), | ||
("gapminder_bubble_plot.py", 187, ["income", "population"]), | ||
("grouped_bar_chart2.py", 9, ["Group", "Value_start"]), | ||
("hexbins.py", 84, ["xFeaturePos", "mean_temp_max"]), | ||
("histogram_heatmap.py", 378, ["bin_maxbins_40_Rotten_Tomatoes_Rating", "__count"]), | ||
("histogram_scatterplot.py", 64, ["bin_maxbins_10_Rotten_Tomatoes_Rating", "__count"]), | ||
("interactive_legend.py", 1708, ["sum_count_start", "series"]), | ||
("iowa_electricity.py", 51, ["net_generation_start", "year"]), | ||
("isotype.py", 37, ["animal", "x"]), | ||
("isotype_grid.py", 100, ["row", "col"]), | ||
("lasagna_plot.py", 492, ["yearmonthdate_date", "sum_price"]), | ||
("layered_area_chart.py", 51, ["source", "net_generation"]), | ||
("layered_bar_chart.py", 51, ["source", "net_generation"]), | ||
("layered_histogram.py", 113, ["bin_maxbins_100_Measurement"]), | ||
("line_chart_with_cumsum.py", 52, ["cumulative_wheat"]), | ||
("line_percent.py", 30, ["sex", "perc"]), | ||
("line_with_log_scale.py", 15, ["year", "sum_people"]), | ||
("multifeature_scatter_plot.py", 150, ["petalWidth", "species"]), | ||
("natural_disasters.py", 686, ["Deaths", "Year"]), | ||
("normalized_stacked_area_chart.py", 51, ["source", "net_generation_start"]), | ||
("normalized_stacked_bar_chart.py", 60, ["site", "sum_yield_start"]), | ||
("parallel_coordinates.py", 600, ["key", "value"]), | ||
("percentage_of_total.py", 5, ["PercentOfTotal", "TotalTime"]), | ||
("pie_chart.py", 6, ["category", "value_start"]), | ||
("pyramid.py", 3, ["category", "value_start"]), | ||
("stacked_bar_chart_sorted_segments.py", 60, ["variety", "site"]), | ||
("stem_and_leaf.py", 100, ["stem", "leaf"]), | ||
("streamgraph.py", 1708, ["series", "sum_count"]), | ||
("top_k_items.py", 10, ["rank", "IMDB_Rating_start"]), | ||
("top_k_letters.py", 9, ["rank", "letters"]), | ||
("top_k_with_others.py", 10, ["ranked_director", "mean_aggregate_gross"]), | ||
("trellis_area_sort_array.py", 492, ["date", "price"]), | ||
("trellis_histogram.py", 20, ["Origin", "__count"]), | ||
("us_population_over_time.py", 38, ["sex", "people_start"]), | ||
("us_population_over_time_facet.py", 285, ["year", "sum_people"]), | ||
("wilkinson-dot-plot.py", 21, ["data", "id"]), | ||
("window_rank.py", 12, ["team", "diff"]), | ||
]) | ||
def test_primitive_chart_examples(filename, rows, cols): | ||
source = pkgutil.get_data(examples_methods_syntax.__name__, filename) | ||
chart = eval_block(source) | ||
df = chart.transformed_data() | ||
assert len(df) == rows | ||
assert set(cols).issubset(set(df.columns)) | ||
|
||
|
||
@pytest.mark.parametrize("filename,all_rows,all_cols", [ | ||
("errorbars_with_std.py", [10, 10], [["upper_yield"], ["extent_yield"]]), | ||
("candlestick_chart.py", [44, 44], [["low"], ["close"]]), | ||
("co2_concentration.py", [713, 7, 7], [["first_date"], ["scaled_date"], ["end"]]), | ||
("falkensee.py", [2, 38, 38], [["event"], ["population"], ["population"]]), | ||
("heat_lane.py", [10, 10], [["bin_count_start"], ["y2"]]), | ||
("histogram_responsive.py", [20, 20], [["__count"], ["__count"]]), | ||
("histogram_with_a_global_mean_overlay.py", [9, 1], [["__count"], ["mean_IMDB_Rating"]]), | ||
("horizon_graph.py", [20, 20], [["x"], ["ny"]]), | ||
("interactive_cross_highlight.py", [64, 64, 13], [["__count"], ["__count"], ["Major_Genre"]]), | ||
("interval_selection.py", [123, 123], [["price_start"], ["date"]]), | ||
("layered_chart_with_dual_axis.py", [12, 12], [["month_date"], ["average_precipitation"]]), | ||
("layered_heatmap_text.py", [9, 9], [["Cylinders"], ["mean_horsepower"]]), | ||
("multiline_highlight.py", [560, 560], [["price"], ["date"]]), | ||
("multiline_tooltip.py", [300, 300, 300, 0, 300], [["x"], ["y"], ["y"], ["x"], ["x"]]), | ||
("pie_chart_with_labels.py", [6, 6], [["category"], ["value"]]), | ||
("radial_chart.py", [6, 6], [["values"], ["values_start"]]), | ||
("scatter_linked_table.py", [392, 14, 14, 14], [["Year"], ["Year"], ["Year"], ["Year"]]), | ||
("scatter_marginal_hist.py", [34, 150, 27], [["__count"], ["species"], ["__count"]]), | ||
("scatter_with_layered_histogram.py", [2, 19], [["gender"], ["__count"]]), | ||
("scatter_with_minimap.py", [1461, 1461], [["date"], ["date"]]), | ||
("scatter_with_rolling_mean.py", [1461, 1461], [["date"], ["rolling_mean"]]), | ||
("seattle_weather_interactive.py", [1461, 5], [["date"], ["__count"]]), | ||
("select_detail.py", [20, 1000], [["id"], ["x"]]), | ||
("simple_scatter_with_errorbars.py", [5, 5], [["x"], ["upper_ymin"]]), | ||
("stacked_bar_chart_with_text.py", [60, 60], [["site"], ["site"]]), | ||
("us_employment.py", [120, 1, 2], [["month"], ["president"], ["president"]]), | ||
("us_population_pyramid_over_time.py", [19, 38, 19], [["gender"], ["year"], ["gender"]]), | ||
]) | ||
def test_compound_chart_examples(filename, all_rows, all_cols): | ||
source = pkgutil.get_data(examples_methods_syntax.__name__, filename) | ||
chart = eval_block(source) | ||
print(chart) | ||
|
||
dfs = chart.transformed_data() | ||
assert len(dfs) == len(all_rows) | ||
for df, rows, cols in zip(dfs, all_rows, all_cols): | ||
assert len(df) == rows | ||
assert set(cols).issubset(set(df.columns)) |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you think it makes sense to also check if this dataframe no nulls?
assert df.notnull().all().all()
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think so. When the input DataFrame has nulls it's possible for these to be pass through to the transformed data. Vega-Lite usually filters null values for the columns that are used in the chart, but
transformed_data
returns all of the columns, so the unused columns can still have nulls.