From 52b28ecd46081e2cfbd26fb1e56a12ccda46ffee Mon Sep 17 00:00:00 2001 From: Dave Shoup Date: Fri, 2 Sep 2022 16:30:15 -0400 Subject: [PATCH 1/2] add tests for concat dataframes --- dx/tests/test_formatting.py | 38 +++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/dx/tests/test_formatting.py b/dx/tests/test_formatting.py index 4967c4a4..4a6e5d68 100644 --- a/dx/tests/test_formatting.py +++ b/dx/tests/test_formatting.py @@ -12,6 +12,10 @@ def test_dataresource_media_type( sample_dataframe: pd.DataFrame, get_ipython: TerminalInteractiveShell, ): + """ + Test dataresource formatting returns the right media types + and doesn't fail at any point with a basic dataframe. + """ payload, metadata = handle_dataresource_format(sample_dataframe, ipython_shell=get_ipython) assert dataresource_settings.DATARESOURCE_MEDIA_TYPE in payload assert dataresource_settings.DATARESOURCE_MEDIA_TYPE in metadata @@ -21,6 +25,40 @@ def test_dx_media_type( sample_dataframe: pd.DataFrame, get_ipython: TerminalInteractiveShell, ): + """ + Test dx formatting returns the right media types + and doesn't fail at any point with a basic dataframe. + """ payload, metadata = handle_dx_format(sample_dataframe, ipython_shell=get_ipython) assert dx_settings.DX_MEDIA_TYPE in payload assert dx_settings.DX_MEDIA_TYPE in metadata + + +def test_dataresource_nonunique_index_succeeds( + sample_dataframe: pd.DataFrame, + get_ipython: TerminalInteractiveShell, +): + """ + Test dataresource formatting doesn't fail while formatting + a dataframe with duplicate series and index values. + """ + double_df = pd.concat([sample_dataframe, sample_dataframe]) + try: + handle_dataresource_format(double_df, ipython_shell=get_ipython) + except Exception as e: + assert False, f"{e}" + + +def test_dx_nonunique_index_succeeds( + sample_dataframe: pd.DataFrame, + get_ipython: TerminalInteractiveShell, +): + """ + Test dataresource formatting doesn't fail while formatting + a dataframe with duplicate series and index values. + """ + double_df = pd.concat([sample_dataframe, sample_dataframe]) + try: + handle_dx_format(double_df, ipython_shell=get_ipython) + except Exception as e: + assert False, f"{e}" From 98f274a45157db1b33946651f02e774e37ca68ad Mon Sep 17 00:00:00 2001 From: Dave Shoup Date: Fri, 2 Sep 2022 16:30:40 -0400 Subject: [PATCH 2/2] handle ValueError for duplicate index --- dx/utils/datatypes.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dx/utils/datatypes.py b/dx/utils/datatypes.py index ac7a6d82..c30c60ed 100644 --- a/dx/utils/datatypes.py +++ b/dx/utils/datatypes.py @@ -205,6 +205,12 @@ def is_json_serializable(s: pd.Series) -> bool: s.to_json() return True except (TypeError, OverflowError, UnicodeDecodeError): + # these are the main serialization errors we expect + return False + except ValueError as ve: + # ...but we may get here if we have a series with duplicate index values + # "ValueError: Series index must be unique for orient='index'" + logger.debug(ve) return False