-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Closes #47 - updated datatype handling - Decimal, datetime.date, date…
…time.time (#70) * add Decimal handler and generator functions; clean up random_dataframe() arguments and add decimal_column/date_column/time_column * add datetime.date and datetime.time generators and handlers * check for and handle decimals and datetime.dates by default * return gpd.GeoSeries instead of GeometryArray * add boolean series generator option * add datatype imports with new directory structure * ignore flake8 C901 - "too complex" * add datatype compatibility helpers * add optional with_ipython_display argument to prevent calling IPython.display() on an object that goes through handle_format()
- Loading branch information
Showing
20 changed files
with
1,005 additions
and
523 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from .compatibility import * | ||
from .date_time import * | ||
from .geometry import * | ||
from .main import * | ||
from .misc import * | ||
from .numeric import * | ||
from .text import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
import traceback | ||
from typing import Any | ||
|
||
import pandas as pd | ||
from pandas.io.json import build_table_schema | ||
|
||
from dx.settings import get_settings | ||
|
||
settings = get_settings() | ||
|
||
|
||
def test_compatibility(value: Any, as_dataframe: bool = True) -> dict: | ||
""" | ||
Convenience function to test the compatibility of a given object | ||
with the different steps involved with the dx display modes. | ||
- pandas.io.json.build_table_schema (https://github.com/pandas-dev/pandas/blob/main/pandas/io/json/_table_schema.py) | ||
- jupyter_client.jsonutil.json_clean (https://github.com/jupyter/jupyter_client/blob/main/jupyter_client/jsonutil.py) | ||
- duckdb conn.register | ||
- final dx output type | ||
""" | ||
result = {} | ||
result.update(test_build_table_schema(value)) | ||
result.update(test_json_clean(value)) | ||
result.update(test_db_write(value)) | ||
result.update(test_dx_handling(value)) | ||
if as_dataframe: | ||
return pd.DataFrame(result).transpose() | ||
return result | ||
|
||
|
||
def test_build_table_schema(value: Any, as_dataframe: bool = False) -> dict: | ||
""" | ||
Convenience function to test the compatibility of a given object | ||
with the pandas.io.json.build_table_schema function, which | ||
is called to set up the initial column schema during dx formatting. | ||
""" | ||
df = pd.DataFrame({"test": [value]}) | ||
result = {} | ||
|
||
try: | ||
schema = build_table_schema(df, index=False) | ||
fields = schema["fields"] | ||
field_type = [ | ||
field_schema["type"] for field_schema in fields if field_schema["name"] == "test" | ||
][0] | ||
result["pandas.io.json.build_table_schema"] = { | ||
"success": True, | ||
"type": field_type, | ||
} | ||
except Exception as e: | ||
result["pandas.io.json.build_table_schema"] = { | ||
"error": str(e), | ||
"success": False, | ||
"traceback": traceback.format_exc(), | ||
} | ||
|
||
if as_dataframe: | ||
return pd.DataFrame(result).transpose() | ||
return result | ||
|
||
|
||
def test_json_clean(value: Any, as_dataframe: bool = False) -> dict: | ||
""" | ||
Convenience function to test the compatibility of a given object | ||
with the jupyter_client.jsonutil.json_clean function, which | ||
is called during IPython.display after dx formatting. | ||
""" | ||
df = pd.DataFrame({"test": [value]}) | ||
result = {} | ||
|
||
try: | ||
from jupyter_client.jsonutil import json_clean | ||
|
||
clean_json = json_clean(df.to_dict("records")) | ||
clean_json_value = clean_json[0]["test"] | ||
result["jupyter_client.jsonutil.json_clean"] = { | ||
"success": True, | ||
"type": type(clean_json_value), | ||
"value": clean_json_value, | ||
} | ||
except Exception as e: | ||
result["jupyter_client.jsonutil.json_clean"] = { | ||
"error": str(e), | ||
"success": False, | ||
"traceback": traceback.format_exc(), | ||
} | ||
|
||
if as_dataframe: | ||
return pd.DataFrame(result).transpose() | ||
return result | ||
|
||
|
||
def test_db_write(value: Any, as_dataframe: bool = False) -> dict: | ||
""" | ||
Convenience function to test the compatibility of a given object | ||
inside a pandas DataFrame during registration with a duckdb connection, | ||
which is used during Datalink-enabled dataframe tracking for | ||
push-down filtering. | ||
""" | ||
from dx.utils.tracking import get_db_connection # circular import | ||
|
||
df = pd.DataFrame({"test": [value]}) | ||
result = {} | ||
|
||
db_connection = get_db_connection() | ||
try: | ||
db_connection.register("test", df) | ||
db_df = db_connection.execute("SELECT * FROM test").df() | ||
db_df_value = db_df.iloc[0]["test"] | ||
result["duckdb.conn.register"] = { | ||
"type": type(db_df_value), | ||
"success": True, | ||
"value": db_df_value, | ||
} | ||
except Exception as e: | ||
result["duckdb.conn.register"] = { | ||
"error": str(e), | ||
"success": False, | ||
"traceback": traceback.format_exc(), | ||
} | ||
|
||
if as_dataframe: | ||
return pd.DataFrame(result).transpose() | ||
return result | ||
|
||
|
||
def test_dx_handling(value: Any, as_dataframe: bool = False) -> dict: | ||
""" | ||
Convenience function to test the compatibility of a given object | ||
inside a pandas DataFrame through the entire dx formatting | ||
and data type handling process | ||
""" | ||
from dx.formatters.main import handle_format # circular import | ||
|
||
df = pd.DataFrame({"test": [value]}) | ||
result = {} | ||
|
||
try: | ||
payload, _ = handle_format(df, with_ipython_display=False) | ||
|
||
if settings.DISPLAY_MODE == "simple": | ||
dx_value = payload[settings.MEDIA_TYPE]["data"][0]["test"] | ||
if settings.DISPLAY_MODE == "enhanced": | ||
dx_value = payload[settings.MEDIA_TYPE]["data"][0][0] | ||
|
||
dx_schema_fields = payload[settings.MEDIA_TYPE]["schema"]["fields"] | ||
# should only be two fields here by default: `index` and `test` | ||
# but we wanted to run the entire formatting process, which doesn't need | ||
# an option to disable `index` from being included | ||
dx_schema_type = [field["type"] for field in dx_schema_fields if field["name"] == "test"][0] | ||
|
||
result["dx.handle_format"] = { | ||
"type": type(dx_value), | ||
"success": True, | ||
"value": dx_value, | ||
"schema_type": dx_schema_type, | ||
} | ||
except Exception as e: | ||
result["dx.handle_format"] = { | ||
"error": str(e), | ||
"success": False, | ||
"traceback": traceback.format_exc(), | ||
} | ||
|
||
if as_dataframe: | ||
return pd.DataFrame(result).transpose() | ||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.