Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix for nonunique index checking JSON serialization #35

Merged
merged 2 commits into from
Sep 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions dx/tests/test_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ def test_dataresource_media_type(
sample_dataframe: pd.DataFrame,
get_ipython: TerminalInteractiveShell,
):
"""
Test dataresource formatting returns the right media types
and doesn't fail at any point with a basic dataframe.
"""
payload, metadata = handle_dataresource_format(sample_dataframe, ipython_shell=get_ipython)
assert dataresource_settings.DATARESOURCE_MEDIA_TYPE in payload
assert dataresource_settings.DATARESOURCE_MEDIA_TYPE in metadata
Expand All @@ -21,6 +25,40 @@ def test_dx_media_type(
sample_dataframe: pd.DataFrame,
get_ipython: TerminalInteractiveShell,
):
"""
Test dx formatting returns the right media types
and doesn't fail at any point with a basic dataframe.
"""
payload, metadata = handle_dx_format(sample_dataframe, ipython_shell=get_ipython)
assert dx_settings.DX_MEDIA_TYPE in payload
assert dx_settings.DX_MEDIA_TYPE in metadata


def test_dataresource_nonunique_index_succeeds(
sample_dataframe: pd.DataFrame,
get_ipython: TerminalInteractiveShell,
):
"""
Test dataresource formatting doesn't fail while formatting
a dataframe with duplicate series and index values.
"""
double_df = pd.concat([sample_dataframe, sample_dataframe])
try:
handle_dataresource_format(double_df, ipython_shell=get_ipython)
except Exception as e:
assert False, f"{e}"


def test_dx_nonunique_index_succeeds(
sample_dataframe: pd.DataFrame,
get_ipython: TerminalInteractiveShell,
):
"""
Test dataresource formatting doesn't fail while formatting
a dataframe with duplicate series and index values.
"""
double_df = pd.concat([sample_dataframe, sample_dataframe])
try:
handle_dx_format(double_df, ipython_shell=get_ipython)
except Exception as e:
assert False, f"{e}"
6 changes: 6 additions & 0 deletions dx/utils/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,12 @@ def is_json_serializable(s: pd.Series) -> bool:
s.to_json()
return True
except (TypeError, OverflowError, UnicodeDecodeError):
# these are the main serialization errors we expect
return False
except ValueError as ve:
# ...but we may get here if we have a series with duplicate index values
# "ValueError: Series index must be unique for orient='index'"
logger.debug(ve)
return False


Expand Down