Skip to content

Commit

Permalink
ENH: Format decimal.Decimal as full precision strings in `.to_json(…
Browse files Browse the repository at this point in the history
…...)` (#60698)

* Format decimal.Decimal as full precision strings in .to_json(...)

* Fix failing tests

* Clean up Decimal to utf8 convertion and switch to using PyObject_Format() to suppress scientific notation

* Add whatsnew entry
  • Loading branch information
Tolker-KU authored Jan 14, 2025
1 parent 8bc8c0a commit 817b706
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 25 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Other enhancements
- :meth:`DataFrame.ewm` now allows ``adjust=False`` when ``times`` is provided (:issue:`54328`)
- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
- :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
- :meth:`DataFrame.to_json` now encodes ``Decimal`` as strings instead of floats (:issue:`60698`)
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
Expand Down
35 changes: 33 additions & 2 deletions pandas/_libs/src/vendored/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,27 @@ static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) {
return outValue;
}

static char *PyDecimalToUTF8Callback(JSOBJ _obj, JSONTypeContext *tc,
size_t *len) {
PyObject *obj = (PyObject *)_obj;
PyObject *format_spec = PyUnicode_FromStringAndSize("f", 1);
PyObject *str = PyObject_Format(obj, format_spec);
Py_DECREF(format_spec);

if (str == NULL) {
((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
return NULL;
}

GET_TC(tc)->newObj = str;

Py_ssize_t s_len;
char *outValue = (char *)PyUnicode_AsUTF8AndSize(str, &s_len);
*len = s_len;

return outValue;
}

//=============================================================================
// Numpy array iteration functions
//=============================================================================
Expand Down Expand Up @@ -1467,8 +1488,18 @@ static void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
tc->type = JT_UTF8;
return;
} else if (object_is_decimal_type(obj)) {
pc->doubleValue = PyFloat_AsDouble(obj);
tc->type = JT_DOUBLE;
PyObject *is_nan_py = PyObject_RichCompare(obj, obj, Py_NE);
if (is_nan_py == NULL) {
goto INVALID;
}
int is_nan = (is_nan_py == Py_True);
Py_DECREF(is_nan_py);
if (is_nan) {
tc->type = JT_NULL;
return;
}
pc->PyTypeToUTF8 = PyDecimalToUTF8Callback;
tc->type = JT_UTF8;
return;
} else if (PyDateTime_Check(obj) || PyDate_Check(obj)) {
if (object_is_nat_type(obj)) {
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/json/test_json_table_schema_ext_dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def test_build_decimal_series(self, dc):
expected = OrderedDict(
[
("schema", schema),
("data", [OrderedDict([("id", 0), ("a", 10.0)])]),
("data", [OrderedDict([("id", 0), ("a", "10")])]),
]
)

Expand Down Expand Up @@ -245,7 +245,7 @@ def test_to_json(self, da, dc, sa, ia):
[
("idx", 0),
("A", "2021-10-10T00:00:00.000"),
("B", 10.0),
("B", "10"),
("C", "pandas"),
("D", 10),
]
Expand Down
7 changes: 1 addition & 6 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import datetime
from datetime import timedelta
from decimal import Decimal
from io import StringIO
import json
import os
Expand Down Expand Up @@ -2025,12 +2024,8 @@ def test_to_s3(self, s3_public_bucket, s3so):
timeout -= 0.1
assert timeout > 0, "Timed out waiting for file to appear on moto"

def test_json_pandas_nulls(self, nulls_fixture, request):
def test_json_pandas_nulls(self, nulls_fixture):
# GH 31615
if isinstance(nulls_fixture, Decimal):
mark = pytest.mark.xfail(reason="not implemented")
request.applymarker(mark)

expected_warning = None
msg = (
"The default 'epoch' date format is deprecated and will be removed "
Expand Down
30 changes: 15 additions & 15 deletions pandas/tests/io/json/test_ujson.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,56 +57,56 @@ def test_encode_decimal(self):
sut = decimal.Decimal("1337.1337")
encoded = ujson.ujson_dumps(sut, double_precision=15)
decoded = ujson.ujson_loads(encoded)
assert decoded == 1337.1337
assert decoded == "1337.1337"

sut = decimal.Decimal("0.95")
encoded = ujson.ujson_dumps(sut, double_precision=1)
assert encoded == "1.0"
assert encoded == '"0.95"'

decoded = ujson.ujson_loads(encoded)
assert decoded == 1.0
assert decoded == "0.95"

sut = decimal.Decimal("0.94")
encoded = ujson.ujson_dumps(sut, double_precision=1)
assert encoded == "0.9"
assert encoded == '"0.94"'

decoded = ujson.ujson_loads(encoded)
assert decoded == 0.9
assert decoded == "0.94"

sut = decimal.Decimal("1.95")
encoded = ujson.ujson_dumps(sut, double_precision=1)
assert encoded == "2.0"
assert encoded == '"1.95"'

decoded = ujson.ujson_loads(encoded)
assert decoded == 2.0
assert decoded == "1.95"

sut = decimal.Decimal("-1.95")
encoded = ujson.ujson_dumps(sut, double_precision=1)
assert encoded == "-2.0"
assert encoded == '"-1.95"'

decoded = ujson.ujson_loads(encoded)
assert decoded == -2.0
assert decoded == "-1.95"

sut = decimal.Decimal("0.995")
encoded = ujson.ujson_dumps(sut, double_precision=2)
assert encoded == "1.0"
assert encoded == '"0.995"'

decoded = ujson.ujson_loads(encoded)
assert decoded == 1.0
assert decoded == "0.995"

sut = decimal.Decimal("0.9995")
encoded = ujson.ujson_dumps(sut, double_precision=3)
assert encoded == "1.0"
assert encoded == '"0.9995"'

decoded = ujson.ujson_loads(encoded)
assert decoded == 1.0
assert decoded == "0.9995"

sut = decimal.Decimal("0.99999999999999944")
encoded = ujson.ujson_dumps(sut, double_precision=15)
assert encoded == "1.0"
assert encoded == '"0.99999999999999944"'

decoded = ujson.ujson_loads(encoded)
assert decoded == 1.0
assert decoded == "0.99999999999999944"

@pytest.mark.parametrize("ensure_ascii", [True, False])
def test_encode_string_conversion(self, ensure_ascii):
Expand Down

0 comments on commit 817b706

Please sign in to comment.