From 52b28ecd46081e2cfbd26fb1e56a12ccda46ffee Mon Sep 17 00:00:00 2001
From: Dave Shoup <dave.shoup@gmail.com>
Date: Fri, 2 Sep 2022 16:30:15 -0400
Subject: [PATCH 1/2] add tests for concat dataframes

---
 dx/tests/test_formatting.py | 38 +++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/dx/tests/test_formatting.py b/dx/tests/test_formatting.py
index 4967c4a4..4a6e5d68 100644
--- a/dx/tests/test_formatting.py
+++ b/dx/tests/test_formatting.py
@@ -12,6 +12,10 @@ def test_dataresource_media_type(
     sample_dataframe: pd.DataFrame,
     get_ipython: TerminalInteractiveShell,
 ):
+    """
+    Test dataresource formatting returns the right media types
+    and doesn't fail at any point with a basic dataframe.
+    """
     payload, metadata = handle_dataresource_format(sample_dataframe, ipython_shell=get_ipython)
     assert dataresource_settings.DATARESOURCE_MEDIA_TYPE in payload
     assert dataresource_settings.DATARESOURCE_MEDIA_TYPE in metadata
@@ -21,6 +25,40 @@ def test_dx_media_type(
     sample_dataframe: pd.DataFrame,
     get_ipython: TerminalInteractiveShell,
 ):
+    """
+    Test dx formatting returns the right media types
+    and doesn't fail at any point with a basic dataframe.
+    """
     payload, metadata = handle_dx_format(sample_dataframe, ipython_shell=get_ipython)
     assert dx_settings.DX_MEDIA_TYPE in payload
     assert dx_settings.DX_MEDIA_TYPE in metadata
+
+
+def test_dataresource_nonunique_index_succeeds(
+    sample_dataframe: pd.DataFrame,
+    get_ipython: TerminalInteractiveShell,
+):
+    """
+    Test dataresource formatting doesn't fail while formatting
+    a dataframe with duplicate series and index values.
+    """
+    double_df = pd.concat([sample_dataframe, sample_dataframe])
+    try:
+        handle_dataresource_format(double_df, ipython_shell=get_ipython)
+    except Exception as e:
+        assert False, f"{e}"
+
+
+def test_dx_nonunique_index_succeeds(
+    sample_dataframe: pd.DataFrame,
+    get_ipython: TerminalInteractiveShell,
+):
+    """
+    Test dataresource formatting doesn't fail while formatting
+    a dataframe with duplicate series and index values.
+    """
+    double_df = pd.concat([sample_dataframe, sample_dataframe])
+    try:
+        handle_dx_format(double_df, ipython_shell=get_ipython)
+    except Exception as e:
+        assert False, f"{e}"

From 98f274a45157db1b33946651f02e774e37ca68ad Mon Sep 17 00:00:00 2001
From: Dave Shoup <dave.shoup@gmail.com>
Date: Fri, 2 Sep 2022 16:30:40 -0400
Subject: [PATCH 2/2] handle ValueError for duplicate index

---
 dx/utils/datatypes.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/dx/utils/datatypes.py b/dx/utils/datatypes.py
index ac7a6d82..c30c60ed 100644
--- a/dx/utils/datatypes.py
+++ b/dx/utils/datatypes.py
@@ -205,6 +205,12 @@ def is_json_serializable(s: pd.Series) -> bool:
         s.to_json()
         return True
     except (TypeError, OverflowError, UnicodeDecodeError):
+        # these are the main serialization errors we expect
+        return False
+    except ValueError as ve:
+        # ...but we may get here if we have a series with duplicate index values
+        # "ValueError: Series index must be unique for orient='index'"
+        logger.debug(ve)
         return False