From 0aea873748c7e31dba476722210e466c2886f46a Mon Sep 17 00:00:00 2001 From: Yongjie Zhao Date: Thu, 14 Apr 2022 17:50:35 +0800 Subject: [PATCH 1/9] fix: should drop first level of metrics in flatten operator --- .../src/operators/flattenOperator.ts | 15 +++++++-- .../utils/operators/flattenOperator.test.ts | 31 ++++++++++++++++++ .../src/query/types/PostProcessing.ts | 1 + .../utils/pandas_postprocessing/flatten.py | 6 ++++ .../pandas_postprocessing/test_flatten.py | 32 +++++++++++++++++++ 5 files changed, 83 insertions(+), 2 deletions(-) diff --git a/superset-frontend/packages/superset-ui-chart-controls/src/operators/flattenOperator.ts b/superset-frontend/packages/superset-ui-chart-controls/src/operators/flattenOperator.ts index 1348f4b9879fc..1670a84170249 100644 --- a/superset-frontend/packages/superset-ui-chart-controls/src/operators/flattenOperator.ts +++ b/superset-frontend/packages/superset-ui-chart-controls/src/operators/flattenOperator.ts @@ -17,10 +17,21 @@ * specific language governing permissions and limitationsxw * under the License. */ -import { PostProcessingFlatten } from '@superset-ui/core'; +import { ensureIsArray, PostProcessingFlatten } from '@superset-ui/core'; import { PostProcessingFactory } from './types'; export const flattenOperator: PostProcessingFactory = ( formData, queryObject, -) => ({ operation: 'flatten' }); +) => { + const drop_levels: number[] = []; + if (ensureIsArray(queryObject.metrics).length === 1) { + drop_levels.push(0); + } + return { + operation: 'flatten', + options: { + drop_levels, + }, + }; +}; diff --git a/superset-frontend/packages/superset-ui-chart-controls/test/utils/operators/flattenOperator.test.ts b/superset-frontend/packages/superset-ui-chart-controls/test/utils/operators/flattenOperator.test.ts index 94a9b0068705a..e63525b82e781 100644 --- a/superset-frontend/packages/superset-ui-chart-controls/test/utils/operators/flattenOperator.test.ts +++ b/superset-frontend/packages/superset-ui-chart-controls/test/utils/operators/flattenOperator.test.ts @@ -51,9 +51,40 @@ const queryObject: QueryObject = { }, ], }; +const singleMetricQueryObject: QueryObject = { + metrics: ['count(*)'], + time_range: '2015 : 2016', + granularity: 'month', + post_processing: [ + { + operation: 'pivot', + options: { + index: ['__timestamp'], + columns: ['nation'], + aggregates: { + 'count(*)': { + operator: 'sum', + }, + }, + }, + }, + ], +}; test('should do flattenOperator', () => { expect(flattenOperator(formData, queryObject)).toEqual({ operation: 'flatten', + options: { + drop_levels: [], + }, + }); +}); + +test('should add drop level', () => { + expect(flattenOperator(formData, singleMetricQueryObject)).toEqual({ + operation: 'flatten', + options: { + drop_levels: [0], + }, }); }); diff --git a/superset-frontend/packages/superset-ui-core/src/query/types/PostProcessing.ts b/superset-frontend/packages/superset-ui-core/src/query/types/PostProcessing.ts index 7e5ce853585ab..0ba7e4fc4af59 100644 --- a/superset-frontend/packages/superset-ui-core/src/query/types/PostProcessing.ts +++ b/superset-frontend/packages/superset-ui-core/src/query/types/PostProcessing.ts @@ -205,6 +205,7 @@ interface _PostProcessingFlatten { operation: 'flatten'; options?: { reset_index?: boolean; + drop_levels?: number[] | string[]; }; } export type PostProcessingFlatten = diff --git a/superset/utils/pandas_postprocessing/flatten.py b/superset/utils/pandas_postprocessing/flatten.py index 49f250ec1c9b9..e5f38ff36e4fd 100644 --- a/superset/utils/pandas_postprocessing/flatten.py +++ b/superset/utils/pandas_postprocessing/flatten.py @@ -14,6 +14,8 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from typing import Tuple, Union + import pandas as pd from superset.utils.pandas_postprocessing.utils import ( @@ -25,12 +27,14 @@ def flatten( df: pd.DataFrame, reset_index: bool = True, + drop_levels: Union[Tuple[int, ...], Tuple[str, ...]] = (), ) -> pd.DataFrame: """ Convert N-dimensional DataFrame to a flat DataFrame :param df: N-dimensional DataFrame. :param reset_index: Convert index to column when df.index isn't RangeIndex + :param drop_levels: index or name of level needs to ignore :return: a flat DataFrame Examples @@ -72,6 +76,8 @@ def flatten( 1 2021-01-02 1 1 1 1 2 2021-01-03 1 1 1 1 """ + df.columns = df.columns.droplevel(drop_levels) + if _is_multi_index_on_columns(df): # every cell should be converted to string df.columns = [ diff --git a/tests/unit_tests/pandas_postprocessing/test_flatten.py b/tests/unit_tests/pandas_postprocessing/test_flatten.py index 028d25e9ecdd0..5fb7f255084a5 100644 --- a/tests/unit_tests/pandas_postprocessing/test_flatten.py +++ b/tests/unit_tests/pandas_postprocessing/test_flatten.py @@ -73,3 +73,35 @@ def test_flat_should_flat_multiple_index(): } ) ) + + +def test_flat_should_drop_index_level(): + index = pd.to_datetime(["2021-01-01", "2021-01-02", "2021-01-03"]) + index.name = "__timestamp" + columns = pd.MultiIndex.from_arrays( + [["a"] * 3, ["b"] * 3, ["c", "d", "e"], ["f", "i", "g"]], + names=["level1", "level2", "level3", "level4"], + ) + df = pd.DataFrame(index=index, columns=columns, data=1) + + assert pp.flatten(df.copy(), drop_levels=(0, 1,)).equals( + pd.DataFrame( + { + "__timestamp": index, + FLAT_COLUMN_SEPARATOR.join(["c", "f"]): [1, 1, 1], + FLAT_COLUMN_SEPARATOR.join(["d", "i"]): [1, 1, 1], + FLAT_COLUMN_SEPARATOR.join(["e", "g"]): [1, 1, 1], + } + ) + ) + + assert pp.flatten(df.copy(), drop_levels=("level1", "level2")).equals( + pd.DataFrame( + { + "__timestamp": index, + FLAT_COLUMN_SEPARATOR.join(["c", "f"]): [1, 1, 1], + FLAT_COLUMN_SEPARATOR.join(["d", "i"]): [1, 1, 1], + FLAT_COLUMN_SEPARATOR.join(["e", "g"]): [1, 1, 1], + } + ) + ) From 178e5b38d7a550ec8aaeb2ff3a6793b0a71f7cb5 Mon Sep 17 00:00:00 2001 From: Yongjie Zhao Date: Thu, 14 Apr 2022 18:51:35 +0800 Subject: [PATCH 2/9] fix column name --- .../utils/pandas_postprocessing/flatten.py | 11 +++-- .../pandas_postprocessing/test_flatten.py | 43 ++++++++++++++++--- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/superset/utils/pandas_postprocessing/flatten.py b/superset/utils/pandas_postprocessing/flatten.py index e5f38ff36e4fd..1bfcbf483cfbe 100644 --- a/superset/utils/pandas_postprocessing/flatten.py +++ b/superset/utils/pandas_postprocessing/flatten.py @@ -34,7 +34,7 @@ def flatten( :param df: N-dimensional DataFrame. :param reset_index: Convert index to column when df.index isn't RangeIndex - :param drop_levels: index or name of level needs to ignore + :param drop_levels: index of levels or name of level might be dropped if df is N-dimensional :return: a flat DataFrame Examples @@ -76,12 +76,17 @@ def flatten( 1 2021-01-02 1 1 1 1 2 2021-01-03 1 1 1 1 """ - df.columns = df.columns.droplevel(drop_levels) if _is_multi_index_on_columns(df): + df.columns = df.columns.droplevel(drop_levels) # every cell should be converted to string df.columns = [ - FLAT_COLUMN_SEPARATOR.join([str(cell) for cell in series]) + FLAT_COLUMN_SEPARATOR.join( + [ + str(cell) + for cell in ([series] if isinstance(series, str) else series) + ] + ) for series in df.columns.to_flat_index() ] diff --git a/tests/unit_tests/pandas_postprocessing/test_flatten.py b/tests/unit_tests/pandas_postprocessing/test_flatten.py index 5fb7f255084a5..217fbd31cbe1d 100644 --- a/tests/unit_tests/pandas_postprocessing/test_flatten.py +++ b/tests/unit_tests/pandas_postprocessing/test_flatten.py @@ -18,6 +18,7 @@ from superset.utils import pandas_postprocessing as pp from superset.utils.pandas_postprocessing.utils import FLAT_COLUMN_SEPARATOR +from tests.unit_tests.fixtures.dataframes import timeseries_df def test_flat_should_not_change(): @@ -79,29 +80,57 @@ def test_flat_should_drop_index_level(): index = pd.to_datetime(["2021-01-01", "2021-01-02", "2021-01-03"]) index.name = "__timestamp" columns = pd.MultiIndex.from_arrays( - [["a"] * 3, ["b"] * 3, ["c", "d", "e"], ["f", "i", "g"]], + [["a"] * 3, ["b"] * 3, ["c", "d", "e"], ["ff", "ii", "gg"]], names=["level1", "level2", "level3", "level4"], ) df = pd.DataFrame(index=index, columns=columns, data=1) + # drop level by index assert pp.flatten(df.copy(), drop_levels=(0, 1,)).equals( pd.DataFrame( { "__timestamp": index, - FLAT_COLUMN_SEPARATOR.join(["c", "f"]): [1, 1, 1], - FLAT_COLUMN_SEPARATOR.join(["d", "i"]): [1, 1, 1], - FLAT_COLUMN_SEPARATOR.join(["e", "g"]): [1, 1, 1], + FLAT_COLUMN_SEPARATOR.join(["c", "ff"]): [1, 1, 1], + FLAT_COLUMN_SEPARATOR.join(["d", "ii"]): [1, 1, 1], + FLAT_COLUMN_SEPARATOR.join(["e", "gg"]): [1, 1, 1], } ) ) + # drop level by name assert pp.flatten(df.copy(), drop_levels=("level1", "level2")).equals( pd.DataFrame( { "__timestamp": index, - FLAT_COLUMN_SEPARATOR.join(["c", "f"]): [1, 1, 1], - FLAT_COLUMN_SEPARATOR.join(["d", "i"]): [1, 1, 1], - FLAT_COLUMN_SEPARATOR.join(["e", "g"]): [1, 1, 1], + FLAT_COLUMN_SEPARATOR.join(["c", "ff"]): [1, 1, 1], + FLAT_COLUMN_SEPARATOR.join(["d", "ii"]): [1, 1, 1], + FLAT_COLUMN_SEPARATOR.join(["e", "gg"]): [1, 1, 1], + } + ) + ) + + # only leave 1 level + assert pp.flatten(df.copy(), drop_levels=(0, 1, 2)).equals( + pd.DataFrame( + { + "__timestamp": index, + FLAT_COLUMN_SEPARATOR.join(["ff"]): [1, 1, 1], + FLAT_COLUMN_SEPARATOR.join(["ii"]): [1, 1, 1], + FLAT_COLUMN_SEPARATOR.join(["gg"]): [1, 1, 1], + } + ) + ) + + +def test_flat_should_not_droplevel(): + assert pp.flatten(timeseries_df, drop_levels=(0,)).equals( + pd.DataFrame( + { + "index": pd.to_datetime( + ["2019-01-01", "2019-01-02", "2019-01-05", "2019-01-07"] + ), + "label": ["x", "y", "z", "q"], + "y": [1.0, 2.0, 3.0, 4.0], } ) ) From 568301f1f1fa7f39bae6a91b2eba8977d69aadc6 Mon Sep 17 00:00:00 2001 From: Yongjie Zhao Date: Thu, 14 Apr 2022 19:00:32 +0800 Subject: [PATCH 3/9] typo --- superset/utils/pandas_postprocessing/flatten.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/utils/pandas_postprocessing/flatten.py b/superset/utils/pandas_postprocessing/flatten.py index 1bfcbf483cfbe..2559bfd437c50 100644 --- a/superset/utils/pandas_postprocessing/flatten.py +++ b/superset/utils/pandas_postprocessing/flatten.py @@ -34,7 +34,7 @@ def flatten( :param df: N-dimensional DataFrame. :param reset_index: Convert index to column when df.index isn't RangeIndex - :param drop_levels: index of levels or name of level might be dropped if df is N-dimensional + :param drop_levels: index of level or names of level might be dropped if df is N-dimensional :return: a flat DataFrame Examples From 8325a19c5aadbb7871bd67faa731e1cb9a372a32 Mon Sep 17 00:00:00 2001 From: Yongjie Zhao Date: Thu, 14 Apr 2022 19:32:24 +0800 Subject: [PATCH 4/9] typing --- superset/utils/pandas_postprocessing/flatten.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/superset/utils/pandas_postprocessing/flatten.py b/superset/utils/pandas_postprocessing/flatten.py index 2559bfd437c50..7d6f021d4b9f5 100644 --- a/superset/utils/pandas_postprocessing/flatten.py +++ b/superset/utils/pandas_postprocessing/flatten.py @@ -14,7 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from typing import Tuple, Union +from typing import Sequence, Union import pandas as pd @@ -27,7 +27,7 @@ def flatten( df: pd.DataFrame, reset_index: bool = True, - drop_levels: Union[Tuple[int, ...], Tuple[str, ...]] = (), + drop_levels: Union[Sequence[int], Sequence[str]] = (), ) -> pd.DataFrame: """ Convert N-dimensional DataFrame to a flat DataFrame From d0503087a7abab36dd554cadc4fc9212593e98ff Mon Sep 17 00:00:00 2001 From: Yongjie Zhao Date: Thu, 14 Apr 2022 20:36:29 +0800 Subject: [PATCH 5/9] fix pylint --- superset/utils/pandas_postprocessing/flatten.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/superset/utils/pandas_postprocessing/flatten.py b/superset/utils/pandas_postprocessing/flatten.py index 7d6f021d4b9f5..2613dbf082f72 100644 --- a/superset/utils/pandas_postprocessing/flatten.py +++ b/superset/utils/pandas_postprocessing/flatten.py @@ -34,7 +34,8 @@ def flatten( :param df: N-dimensional DataFrame. :param reset_index: Convert index to column when df.index isn't RangeIndex - :param drop_levels: index of level or names of level might be dropped if df is N-dimensional + :param drop_levels: index of level or names of level might be dropped + if df is N-dimensional :return: a flat DataFrame Examples From ffb198632d662a551bf18fedb1978d825196670b Mon Sep 17 00:00:00 2001 From: Yongjie Zhao Date: Thu, 14 Apr 2022 21:57:26 +0800 Subject: [PATCH 6/9] fix numeric column --- superset/utils/pandas_postprocessing/flatten.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/superset/utils/pandas_postprocessing/flatten.py b/superset/utils/pandas_postprocessing/flatten.py index 2613dbf082f72..0d2b68e4ddcc3 100644 --- a/superset/utils/pandas_postprocessing/flatten.py +++ b/superset/utils/pandas_postprocessing/flatten.py @@ -17,6 +17,7 @@ from typing import Sequence, Union import pandas as pd +from numpy.distutils.misc_util import is_sequence from superset.utils.pandas_postprocessing.utils import ( _is_multi_index_on_columns, @@ -83,10 +84,7 @@ def flatten( # every cell should be converted to string df.columns = [ FLAT_COLUMN_SEPARATOR.join( - [ - str(cell) - for cell in ([series] if isinstance(series, str) else series) - ] + [str(cell) for cell in (series if is_sequence(series) else [series])] ) for series in df.columns.to_flat_index() ] From 49c23951612e116ed55ee879797f170c92ca3eb5 Mon Sep 17 00:00:00 2001 From: Yongjie Zhao Date: Thu, 14 Apr 2022 22:07:21 +0800 Subject: [PATCH 7/9] fix numeric column --- .../pandas_postprocessing/test_flatten.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/unit_tests/pandas_postprocessing/test_flatten.py b/tests/unit_tests/pandas_postprocessing/test_flatten.py index 217fbd31cbe1d..78a2e3eea4421 100644 --- a/tests/unit_tests/pandas_postprocessing/test_flatten.py +++ b/tests/unit_tests/pandas_postprocessing/test_flatten.py @@ -134,3 +134,25 @@ def test_flat_should_not_droplevel(): } ) ) + + +def test_flat_integer_column_name(): + index = pd.to_datetime(["2021-01-01", "2021-01-02", "2021-01-03"]) + index.name = "__timestamp" + columns = pd.MultiIndex.from_arrays( + [["a"] * 3, [100, 200, 300]], + names=["level1", "level2"], + ) + df = pd.DataFrame(index=index, columns=columns, data=1) + assert pp.flatten(df, drop_levels=(0,)).equals( + pd.DataFrame( + { + "__timestamp": pd.to_datetime( + ["2021-01-01", "2021-01-02", "2021-01-03"] + ), + "100": [1, 1, 1], + "200": [1, 1, 1], + "300": [1, 1, 1], + } + ) + ) From 51aa319e17c27d53f8acbd2cecc7b941b7ae253f Mon Sep 17 00:00:00 2001 From: Yongjie Zhao Date: Thu, 14 Apr 2022 22:29:03 +0800 Subject: [PATCH 8/9] fix pylint --- superset/utils/pandas_postprocessing/flatten.py | 1 - 1 file changed, 1 deletion(-) diff --git a/superset/utils/pandas_postprocessing/flatten.py b/superset/utils/pandas_postprocessing/flatten.py index 0d2b68e4ddcc3..04fc7dbfa6fb4 100644 --- a/superset/utils/pandas_postprocessing/flatten.py +++ b/superset/utils/pandas_postprocessing/flatten.py @@ -78,7 +78,6 @@ def flatten( 1 2021-01-02 1 1 1 1 2 2021-01-03 1 1 1 1 """ - if _is_multi_index_on_columns(df): df.columns = df.columns.droplevel(drop_levels) # every cell should be converted to string From 72ed22a34341c552b74f3536f9bee71a1b4bfb15 Mon Sep 17 00:00:00 2001 From: Yongjie Zhao Date: Thu, 14 Apr 2022 22:35:17 +0800 Subject: [PATCH 9/9] fix pylint --- superset/utils/pandas_postprocessing/flatten.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/superset/utils/pandas_postprocessing/flatten.py b/superset/utils/pandas_postprocessing/flatten.py index 04fc7dbfa6fb4..3d5a003bf1e5d 100644 --- a/superset/utils/pandas_postprocessing/flatten.py +++ b/superset/utils/pandas_postprocessing/flatten.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. + from typing import Sequence, Union import pandas as pd @@ -83,6 +84,7 @@ def flatten( # every cell should be converted to string df.columns = [ FLAT_COLUMN_SEPARATOR.join( + # pylint: disable=superfluous-parens [str(cell) for cell in (series if is_sequence(series) else [series])] ) for series in df.columns.to_flat_index()