diff --git a/dx/tests/test_formatting.py b/dx/tests/test_formatting.py index 4967c4a4..4a6e5d68 100644 --- a/dx/tests/test_formatting.py +++ b/dx/tests/test_formatting.py @@ -12,6 +12,10 @@ def test_dataresource_media_type( sample_dataframe: pd.DataFrame, get_ipython: TerminalInteractiveShell, ): + """ + Test dataresource formatting returns the right media types + and doesn't fail at any point with a basic dataframe. + """ payload, metadata = handle_dataresource_format(sample_dataframe, ipython_shell=get_ipython) assert dataresource_settings.DATARESOURCE_MEDIA_TYPE in payload assert dataresource_settings.DATARESOURCE_MEDIA_TYPE in metadata @@ -21,6 +25,40 @@ def test_dx_media_type( sample_dataframe: pd.DataFrame, get_ipython: TerminalInteractiveShell, ): + """ + Test dx formatting returns the right media types + and doesn't fail at any point with a basic dataframe. + """ payload, metadata = handle_dx_format(sample_dataframe, ipython_shell=get_ipython) assert dx_settings.DX_MEDIA_TYPE in payload assert dx_settings.DX_MEDIA_TYPE in metadata + + +def test_dataresource_nonunique_index_succeeds( + sample_dataframe: pd.DataFrame, + get_ipython: TerminalInteractiveShell, +): + """ + Test dataresource formatting doesn't fail while formatting + a dataframe with duplicate series and index values. + """ + double_df = pd.concat([sample_dataframe, sample_dataframe]) + try: + handle_dataresource_format(double_df, ipython_shell=get_ipython) + except Exception as e: + assert False, f"{e}" + + +def test_dx_nonunique_index_succeeds( + sample_dataframe: pd.DataFrame, + get_ipython: TerminalInteractiveShell, +): + """ + Test dataresource formatting doesn't fail while formatting + a dataframe with duplicate series and index values. + """ + double_df = pd.concat([sample_dataframe, sample_dataframe]) + try: + handle_dx_format(double_df, ipython_shell=get_ipython) + except Exception as e: + assert False, f"{e}" diff --git a/dx/utils/datatypes.py b/dx/utils/datatypes.py index ac7a6d82..c30c60ed 100644 --- a/dx/utils/datatypes.py +++ b/dx/utils/datatypes.py @@ -205,6 +205,12 @@ def is_json_serializable(s: pd.Series) -> bool: s.to_json() return True except (TypeError, OverflowError, UnicodeDecodeError): + # these are the main serialization errors we expect + return False + except ValueError as ve: + # ...but we may get here if we have a series with duplicate index values + # "ValueError: Series index must be unique for orient='index'" + logger.debug(ve) return False