MSeal · shouples · Sep 10, 2022 · Sep 10, 2022 · Sep 10, 2022 · Sep 10, 2022
diff --git a/dx/formatters/dataresource.py b/dx/formatters/dataresource.py
@@ -143,8 +143,14 @@ def generate_dataresource_body(
     schema = build_table_schema(df)
     logger.debug(f"{schema=}")
 
-    # fillna(np.nan) to handle pd.NA values
-    data = df.fillna(np.nan).reset_index().to_dict("records")
+    # This is a little odd, but it allows replacing `pd.NA` and np.nan
+    # with `None` values without altering any of the other values.
+    # Without converting to `object`, `NaN`s will persist (but `pd.NA`s
+    # will be converted to `None`).
+    # We build the schema first since, after this, the dtypes will be
+    # changed to `object` for any Series whose values were replaced with `None`s.
+    clean_df = df.astype(object).where(df.notnull(), None)
+    data = clean_df.reset_index().to_dict("records")
 
     payload = {
         "schema": schema,

diff --git a/dx/formatters/dx.py b/dx/formatters/dx.py
@@ -142,8 +142,14 @@ def generate_dx_body(
     schema = build_table_schema(df)
     logger.debug(f"{schema=}")
 
-    # fillna(np.nan) to handle pd.NA values
-    data = df.fillna(np.nan).reset_index().transpose().values.tolist()
+    # This is a little odd, but it allows replacing `pd.NA` and np.nan
+    # with `None` values without altering any of the other values.
+    # Without converting to `object`, `NaN`s will persist (but `pd.NA`s
+    # will be converted to `None`).
+    # We build the schema first since, after this, the dtypes will be
+    # changed to `object` for any Series whose values were replaced with `None`s.
+    clean_df = df.astype(object).where(df.notnull(), None)
+    data = clean_df.reset_index().transpose().values.tolist()
 
     # this will include the `df.index` by default (e.g. slicing/sampling)
     payload = {

diff --git a/dx/tests/test_dataresource.py b/dx/tests/test_dataresource.py
@@ -1,5 +1,7 @@
 import uuid
 
+import numpy as np
+import pandas as pd
 import pytest
 
 from dx.formatters.dataresource import (
@@ -60,3 +62,19 @@ def test_datalink_toggle(enabled: bool):
             format_dataresource(df)
         except Exception as e:
             assert False, f"failed with {e}"
+
+
+@pytest.mark.parametrize("null_value", [np.nan, pd.NA])
+def test_dx_converts_na_to_none(null_value):
+    """
+    Test dataresource formatting properly converts `pd.NA` and `NaN`
+    values to `None` before passing along the payload.
+    """
+    df = pd.DataFrame({
+        "foo": [1, 2, null_value],
+        "bar": ["a", null_value, "b"],
+    })
+    payload = generate_dataresource_body(df)
+    assert payload["data"][0] == {"index": 0, "foo": 1, "bar": "a"}
+    assert payload["data"][1] == {"index": 1, "foo": 2, "bar": None}
+    assert payload["data"][2] == {"index": 2, "foo": None, "bar": "b"}
diff --git a/dx/tests/test_dx.py b/dx/tests/test_dx.py
@@ -1,5 +1,7 @@
 import uuid
 
+import numpy as np
+import pandas as pd
 import pytest
 
 from dx.formatters.dx import format_dx, generate_dx_body, get_dx_settings
@@ -57,3 +59,18 @@ def test_datalink_toggle(enabled: bool):
             format_dx(df)
         except Exception as e:
             assert False, f"failed with {e}"
+
+
+@pytest.mark.parametrize("null_value", [np.nan, pd.NA])
+def test_dx_converts_na_to_none(null_value):
+    """
+    Test dx formatting properly converts `pd.NA` and `NaN`
+    values to `None` before passing along the payload.
+    """
+    df = pd.DataFrame({
+        "foo": [1, 2, null_value],
+        "bar": ["a", null_value, "b"],
+    })
+    payload = generate_dx_body(df)
+    assert payload["data"][1] == [1, 2, None]
+    assert payload["data"][2] == ["a", None, "b"]