ENH: Use feat. metadata fields as r.plot tooltips

Also stores feature metadata cols in the rank plot JSON -- will be super easy to retrieve and use these in the viz interface #132
biocore · May 22, 2019 · 3975430 · 3975430
1 parent 94f8989
commit 3975430
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 11 deletions.
diff --git a/rankratioviz/generate.py b/rankratioviz/generate.py
@@ -112,14 +112,16 @@ def process_and_generate(
     extreme_feature_count=None,
 ):
     """Just calls process_input() and gen_visualization()."""
-    U, V, ranking_ids, processed_table = process_input(
+    U, V, ranking_ids, feature_metadata_cols, processed_table = process_input(
         feature_ranks,
         sample_metadata,
         biom_table,
         feature_metadata,
         extreme_feature_count,
     )
-    return gen_visualization(V, ranking_ids, processed_table, U, output_dir)
+    return gen_visualization(
+        V, ranking_ids, feature_metadata_cols, processed_table, U, output_dir
+    )
 
 
 def process_input(
@@ -215,13 +217,15 @@ def process_input(
     filtered_ranks.columns = [fix_id(str(c)) for c in filtered_ranks.columns]
     ranking_ids = filtered_ranks.columns
 
+    feature_metadata_cols = []
     # If the user passed in feature metadata corresponding to taxonomy
     # information, then we use that to update the feature data to include
     # that metadata. Feature metadata will be represented as additional fields
     # for each feature in the rank plot. (This can help out in the searching
     # part of the visualization, but it isn't necessary.)
     if feature_metadata is not None:
         try:
+            feature_metadata_cols = feature_metadata.columns
             # Use of suffixes=(False, False) ensures that columns are unique
             # between feature metadata and feature ranks.
             filtered_ranks = filtered_ranks.merge(
@@ -244,17 +248,24 @@ def process_input(
             raise
 
     logging.debug("Finished input processing.")
-    return U, filtered_ranks, ranking_ids, table
+    return U, filtered_ranks, ranking_ids, feature_metadata_cols, table
 
 
-def gen_rank_plot(V, ranking_ids):
+def gen_rank_plot(V, ranking_ids, feature_metadata_cols):
     """Generates altair.Chart object describing the rank plot.
 
     Arguments:
 
-    V: feature ranks
-    ranking_ids: IDs of the actual "ranking" columns in V (since V can include
-                 feature metadata)
+    V: pd.DataFrame
+        feature ranks
+
+    ranking_ids: pd.Index
+        IDs of the actual "ranking" columns in V (since V can include
+        feature metadata)
+
+    feature_metadata_cols: pd.Index or list
+        IDs of the feature metadata columns in V (if no such IDs present, an
+        empty list should be passed)
 
     Returns:
 
@@ -327,6 +338,7 @@ def gen_rank_plot(V, ranking_ids):
                 ),
                 "Classification",
                 "Feature ID",
+                *feature_metadata_cols,
             ],
         )
         .configure_axis(
@@ -340,11 +352,13 @@ def gen_rank_plot(V, ranking_ids):
 
     rank_chart_json = rank_chart.to_dict()
     rank_ordering = "rankratioviz_rank_ordering"
+    fm_col_ordering = "rankratioviz_feature_metadata_ordering"
     # Note we don't use rank_data.columns for setting the rank ordering. This
     # is because rank_data's columns now include both the ranking IDs and the
     # "Feature ID" and "Classification" columns (as well as any feature
     # metadata the user saw fit to pass in).
     rank_chart_json["datasets"][rank_ordering] = list(ranking_ids)
+    rank_chart_json["datasets"][fm_col_ordering] = list(feature_metadata_cols)
     return rank_chart_json
 
 
@@ -439,7 +453,12 @@ def gen_sample_plot(table, metadata):
 
 
 def gen_visualization(
-    V, ranking_ids, processed_table, df_sample_metadata, output_dir
+    V,
+    ranking_ids,
+    feature_metadata_cols,
+    processed_table,
+    df_sample_metadata,
+    output_dir,
 ):
     """Creates a rankratioviz visualization. This function should be callable
        from both the QIIME 2 and standalone rankratioviz scripts.
@@ -454,7 +473,7 @@ def gen_visualization(
     alt.data_transformers.enable("default", max_rows=None)
 
     logging.debug("Generating rank plot JSON.")
-    rank_plot_json = gen_rank_plot(V, ranking_ids)
+    rank_plot_json = gen_rank_plot(V, ranking_ids, feature_metadata_cols)
     logging.debug("Generating sample plot JSON.")
     sample_plot_json, count_json = gen_sample_plot(
         processed_table, df_sample_metadata

diff --git a/rankratioviz/tests/web_tests/tests/test_data_export.js b/rankratioviz/tests/web_tests/tests/test_data_export.js
@@ -1,7 +1,7 @@
 define(["display", "mocha", "chai"], function(display, mocha, chai) {
     // Just the output from the python "matching" integration test
     // prettier-ignore
-    var rankPlotJSON = {"config": {"view": {"width": 400, "height": 300}, "mark": {"tooltip": null}, "axis": {"gridColor": "#f2f2f2", "labelBound": true}}, "data": {"name": "data-61695d98d9bfda69ee5ff39048c0571b"}, "mark": "bar", "autosize": {"resize": true}, "background": "#FFFFFF", "encoding": {"color": {"type": "nominal", "field": "Classification", "scale": {"domain": ["None", "Numerator", "Denominator", "Both"], "range": ["#e0e0e0", "#f00", "#00f", "#949"]}}, "tooltip": [{"type": "quantitative", "field": "rankratioviz_x", "title": "Current Ranking"}, {"type": "nominal", "field": "Classification"}, {"type": "nominal", "field": "Feature ID"}], "x": {"type": "ordinal", "axis": {"labelAngle": 0, "ticks": false}, "field": "rankratioviz_x", "scale": {"paddingInner": 0, "paddingOuter": 1, "rangeStep": 1}, "title": "Sorted Features"}, "y": {"type": "quantitative", "field": "Intercept"}}, "selection": {"selector013": {"type": "interval", "bind": "scales", "encodings": ["x", "y"]}}, "title": "Feature Ranks", "transform": [{"window": [{"op": "row_number", "as": "rankratioviz_x"}], "sort": [{"field": "Intercept", "order": "ascending"}]}], "$schema": "https://vega.github.io/schema/vega-lite/v3.2.1.json", "datasets": {"data-61695d98d9bfda69ee5ff39048c0571b": [{"Feature ID": "Taxon1", "Intercept": 5.0, "Rank 1": 6.0, "Rank 2": 7.0, "FeatureMetadata1": null, "FeatureMetadata2": null, "Classification": "None"}, {"Feature ID": "Taxon2", "Intercept": 1.0, "Rank 1": 2.0, "Rank 2": 3.0, "FeatureMetadata1": null, "FeatureMetadata2": null, "Classification": "None"}, {"Feature ID": "Taxon3", "Intercept": 4.0, "Rank 1": 5.0, "Rank 2": 6.0, "FeatureMetadata1": "Yeet", "FeatureMetadata2": 100.0, "Classification": "None"}, {"Feature ID": "Taxon4", "Intercept": 9.0, "Rank 1": 8.0, "Rank 2": 7.0, "FeatureMetadata1": null, "FeatureMetadata2": null, "Classification": "None"}, {"Feature ID": "Taxon5", "Intercept": 6.0, "Rank 1": 5.0, "Rank 2": 4.0, "FeatureMetadata1": null, "FeatureMetadata2": null, "Classification": "None"}], "rankratioviz_rank_ordering": ["Intercept", "Rank 1", "Rank 2"]}};
+    var rankPlotJSON = {"config": {"view": {"width": 400, "height": 300}, "mark": {"tooltip": null}, "axis": {"gridColor": "#f2f2f2", "labelBound": true}}, "data": {"name": "data-61695d98d9bfda69ee5ff39048c0571b"}, "mark": "bar", "autosize": {"resize": true}, "background": "#FFFFFF", "encoding": {"color": {"type": "nominal", "field": "Classification", "scale": {"domain": ["None", "Numerator", "Denominator", "Both"], "range": ["#e0e0e0", "#f00", "#00f", "#949"]}}, "tooltip": [{"type": "quantitative", "field": "rankratioviz_x", "title": "Current Ranking"}, {"type": "nominal", "field": "Classification"}, {"type": "nominal", "field": "Feature ID"}, {"type": "nominal", "field": "FeatureMetadata1"}, {"type": "quantitative", "field": "FeatureMetadata2"}], "x": {"type": "ordinal", "axis": {"labelAngle": 0, "ticks": false}, "field": "rankratioviz_x", "scale": {"paddingInner": 0, "paddingOuter": 1, "rangeStep": 1}, "title": "Sorted Features"}, "y": {"type": "quantitative", "field": "Intercept"}}, "selection": {"selector013": {"type": "interval", "bind": "scales", "encodings": ["x", "y"]}}, "title": "Feature Ranks", "transform": [{"window": [{"op": "row_number", "as": "rankratioviz_x"}], "sort": [{"field": "Intercept", "order": "ascending"}]}], "$schema": "https://vega.github.io/schema/vega-lite/v3.2.1.json", "datasets": {"data-61695d98d9bfda69ee5ff39048c0571b": [{"Feature ID": "Taxon1", "Intercept": 5.0, "Rank 1": 6.0, "Rank 2": 7.0, "FeatureMetadata1": null, "FeatureMetadata2": null, "Classification": "None"}, {"Feature ID": "Taxon2", "Intercept": 1.0, "Rank 1": 2.0, "Rank 2": 3.0, "FeatureMetadata1": null, "FeatureMetadata2": null, "Classification": "None"}, {"Feature ID": "Taxon3", "Intercept": 4.0, "Rank 1": 5.0, "Rank 2": 6.0, "FeatureMetadata1": "Yeet", "FeatureMetadata2": 100.0, "Classification": "None"}, {"Feature ID": "Taxon4", "Intercept": 9.0, "Rank 1": 8.0, "Rank 2": 7.0, "FeatureMetadata1": null, "FeatureMetadata2": null, "Classification": "None"}, {"Feature ID": "Taxon5", "Intercept": 6.0, "Rank 1": 5.0, "Rank 2": 4.0, "FeatureMetadata1": null, "FeatureMetadata2": null, "Classification": "None"}], "rankratioviz_rank_ordering": ["Intercept", "Rank 1", "Rank 2"], "rankratioviz_feature_metadata_ordering": ["FeatureMetadata1", "FeatureMetadata2"]}};
     // prettier-ignore
     var samplePlotJSON = {"config": {"view": {"width": 400, "height": 300}, "mark": {"tooltip": null}, "axis": {"labelBound": true}}, "data": {"name": "data-587975575c35e2a2f0cf84839938cac8"}, "mark": {"type": "circle"}, "autosize": {"resize": true}, "background": "#FFFFFF", "encoding": {"color": {"type": "nominal", "field": "Metadata1"}, "tooltip": [{"type": "nominal", "field": "Sample ID"}, {"type": "quantitative", "field": "rankratioviz_balance"}], "x": {"type": "quantitative", "field": "rankratioviz_balance"}, "y": {"type": "quantitative", "field": "rankratioviz_balance", "title": "log(Numerator / Denominator)"}}, "selection": {"selector014": {"type": "interval", "bind": "scales", "encodings": ["x", "y"]}}, "title": "Log Ratio of Abundances in Samples", "$schema": "https://vega.github.io/schema/vega-lite/v3.2.1.json", "datasets": {"data-587975575c35e2a2f0cf84839938cac8": [{"Sample ID": "Sample1", "rankratioviz_balance": null, "Metadata1": 1, "Metadata2": 2, "Metadata3": 3}, {"Sample ID": "Sample2", "rankratioviz_balance": null, "Metadata1": 4, "Metadata2": 5, "Metadata3": 6}, {"Sample ID": "Sample3", "rankratioviz_balance": null, "Metadata1": 7, "Metadata2": 8, "Metadata3": 9}, {"Sample ID": "Sample5", "rankratioviz_balance": null, "Metadata1": 13, "Metadata2": 14, "Metadata3": 15}, {"Sample ID": "Sample6", "rankratioviz_balance": null, "Metadata1": 16, "Metadata2": 17, "Metadata3": 18}, {"Sample ID": "Sample7", "rankratioviz_balance": null, "Metadata1": 19, "Metadata2": 20, "Metadata3": 21}]}};
     // prettier-ignore

diff --git a/rankratioviz/tests/web_tests/tests/test_rrvdisplay.js b/rankratioviz/tests/web_tests/tests/test_rrvdisplay.js
@@ -1,7 +1,7 @@
 define(["display", "mocha", "chai"], function(display, mocha, chai) {
     // Just the output from the python "matching" integration test
     // prettier-ignore
-    var rankPlotJSON = {"config": {"view": {"width": 400, "height": 300}, "mark": {"tooltip": null}, "axis": {"gridColor": "#f2f2f2", "labelBound": true}}, "data": {"name": "data-61695d98d9bfda69ee5ff39048c0571b"}, "mark": "bar", "autosize": {"resize": true}, "background": "#FFFFFF", "encoding": {"color": {"type": "nominal", "field": "Classification", "scale": {"domain": ["None", "Numerator", "Denominator", "Both"], "range": ["#e0e0e0", "#f00", "#00f", "#949"]}}, "tooltip": [{"type": "quantitative", "field": "rankratioviz_x", "title": "Current Ranking"}, {"type": "nominal", "field": "Classification"}, {"type": "nominal", "field": "Feature ID"}], "x": {"type": "ordinal", "axis": {"labelAngle": 0, "ticks": false}, "field": "rankratioviz_x", "scale": {"paddingInner": 0, "paddingOuter": 1, "rangeStep": 1}, "title": "Sorted Features"}, "y": {"type": "quantitative", "field": "Intercept"}}, "selection": {"selector013": {"type": "interval", "bind": "scales", "encodings": ["x", "y"]}}, "title": "Feature Ranks", "transform": [{"window": [{"op": "row_number", "as": "rankratioviz_x"}], "sort": [{"field": "Intercept", "order": "ascending"}]}], "$schema": "https://vega.github.io/schema/vega-lite/v3.2.1.json", "datasets": {"data-61695d98d9bfda69ee5ff39048c0571b": [{"Feature ID": "Taxon1", "Intercept": 5.0, "Rank 1": 6.0, "Rank 2": 7.0, "FeatureMetadata1": null, "FeatureMetadata2": null, "Classification": "None"}, {"Feature ID": "Taxon2", "Intercept": 1.0, "Rank 1": 2.0, "Rank 2": 3.0, "FeatureMetadata1": null, "FeatureMetadata2": null, "Classification": "None"}, {"Feature ID": "Taxon3", "Intercept": 4.0, "Rank 1": 5.0, "Rank 2": 6.0, "FeatureMetadata1": "Yeet", "FeatureMetadata2": 100.0, "Classification": "None"}, {"Feature ID": "Taxon4", "Intercept": 9.0, "Rank 1": 8.0, "Rank 2": 7.0, "FeatureMetadata1": null, "FeatureMetadata2": null, "Classification": "None"}, {"Feature ID": "Taxon5", "Intercept": 6.0, "Rank 1": 5.0, "Rank 2": 4.0, "FeatureMetadata1": null, "FeatureMetadata2": null, "Classification": "None"}], "rankratioviz_rank_ordering": ["Intercept", "Rank 1", "Rank 2"]}};
+    var rankPlotJSON = {"config": {"view": {"width": 400, "height": 300}, "mark": {"tooltip": null}, "axis": {"gridColor": "#f2f2f2", "labelBound": true}}, "data": {"name": "data-61695d98d9bfda69ee5ff39048c0571b"}, "mark": "bar", "autosize": {"resize": true}, "background": "#FFFFFF", "encoding": {"color": {"type": "nominal", "field": "Classification", "scale": {"domain": ["None", "Numerator", "Denominator", "Both"], "range": ["#e0e0e0", "#f00", "#00f", "#949"]}}, "tooltip": [{"type": "quantitative", "field": "rankratioviz_x", "title": "Current Ranking"}, {"type": "nominal", "field": "Classification"}, {"type": "nominal", "field": "Feature ID"}, {"type": "nominal", "field": "FeatureMetadata1"}, {"type": "quantitative", "field": "FeatureMetadata2"}], "x": {"type": "ordinal", "axis": {"labelAngle": 0, "ticks": false}, "field": "rankratioviz_x", "scale": {"paddingInner": 0, "paddingOuter": 1, "rangeStep": 1}, "title": "Sorted Features"}, "y": {"type": "quantitative", "field": "Intercept"}}, "selection": {"selector013": {"type": "interval", "bind": "scales", "encodings": ["x", "y"]}}, "title": "Feature Ranks", "transform": [{"window": [{"op": "row_number", "as": "rankratioviz_x"}], "sort": [{"field": "Intercept", "order": "ascending"}]}], "$schema": "https://vega.github.io/schema/vega-lite/v3.2.1.json", "datasets": {"data-61695d98d9bfda69ee5ff39048c0571b": [{"Feature ID": "Taxon1", "Intercept": 5.0, "Rank 1": 6.0, "Rank 2": 7.0, "FeatureMetadata1": null, "FeatureMetadata2": null, "Classification": "None"}, {"Feature ID": "Taxon2", "Intercept": 1.0, "Rank 1": 2.0, "Rank 2": 3.0, "FeatureMetadata1": null, "FeatureMetadata2": null, "Classification": "None"}, {"Feature ID": "Taxon3", "Intercept": 4.0, "Rank 1": 5.0, "Rank 2": 6.0, "FeatureMetadata1": "Yeet", "FeatureMetadata2": 100.0, "Classification": "None"}, {"Feature ID": "Taxon4", "Intercept": 9.0, "Rank 1": 8.0, "Rank 2": 7.0, "FeatureMetadata1": null, "FeatureMetadata2": null, "Classification": "None"}, {"Feature ID": "Taxon5", "Intercept": 6.0, "Rank 1": 5.0, "Rank 2": 4.0, "FeatureMetadata1": null, "FeatureMetadata2": null, "Classification": "None"}], "rankratioviz_rank_ordering": ["Intercept", "Rank 1", "Rank 2"], "rankratioviz_feature_metadata_ordering": ["FeatureMetadata1", "FeatureMetadata2"]}};
     // prettier-ignore
     var samplePlotJSON = {"config": {"view": {"width": 400, "height": 300}, "mark": {"tooltip": null}, "axis": {"labelBound": true}}, "data": {"name": "data-587975575c35e2a2f0cf84839938cac8"}, "mark": {"type": "circle"}, "autosize": {"resize": true}, "background": "#FFFFFF", "encoding": {"color": {"type": "nominal", "field": "Metadata1"}, "tooltip": [{"type": "nominal", "field": "Sample ID"}, {"type": "quantitative", "field": "rankratioviz_balance"}], "x": {"type": "quantitative", "field": "rankratioviz_balance"}, "y": {"type": "quantitative", "field": "rankratioviz_balance", "title": "log(Numerator / Denominator)"}}, "selection": {"selector014": {"type": "interval", "bind": "scales", "encodings": ["x", "y"]}}, "title": "Log Ratio of Abundances in Samples", "$schema": "https://vega.github.io/schema/vega-lite/v3.2.1.json", "datasets": {"data-587975575c35e2a2f0cf84839938cac8": [{"Sample ID": "Sample1", "rankratioviz_balance": null, "Metadata1": 1, "Metadata2": 2, "Metadata3": 3}, {"Sample ID": "Sample2", "rankratioviz_balance": null, "Metadata1": 4, "Metadata2": 5, "Metadata3": 6}, {"Sample ID": "Sample3", "rankratioviz_balance": null, "Metadata1": 7, "Metadata2": 8, "Metadata3": 9}, {"Sample ID": "Sample5", "rankratioviz_balance": null, "Metadata1": 13, "Metadata2": 14, "Metadata3": 15}, {"Sample ID": "Sample6", "rankratioviz_balance": null, "Metadata1": 16, "Metadata2": 17, "Metadata3": 18}, {"Sample ID": "Sample7", "rankratioviz_balance": null, "Metadata1": 19, "Metadata2": 20, "Metadata3": 21}]}};
     // prettier-ignore