Skip to content

Commit

Permalink
BUG: to_json - prevent various segfault conditions (GH14256) (#17857)
Browse files Browse the repository at this point in the history
  • Loading branch information
matthiashuschle authored and jreback committed Oct 14, 2017
1 parent 3c964a4 commit 446d5b4
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 6 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -956,6 +956,7 @@ I/O
- Bug in :meth:`DataFrame.to_html` with ``notebook=True`` where DataFrames with named indices or non-MultiIndex indices had undesired horizontal or vertical alignment for column or row labels, respectively (:issue:`16792`)
- Bug in :meth:`DataFrame.to_html` in which there was no validation of the ``justify`` parameter (:issue:`17527`)
- Bug in :func:`HDFStore.select` when reading a contiguous mixed-data table featuring VLArray (:issue:`17021`)
- Bug in :func:`to_json` where several conditions (including objects with unprintable symbols, objects with deep recursion, overlong labels) caused segfaults instead of raising the appropriate exception (:issue:`14256`)

Plotting
^^^^^^^^
Expand Down Expand Up @@ -1033,3 +1034,4 @@ Other
^^^^^
- Bug where some inplace operators were not being wrapped and produced a copy when invoked (:issue:`12962`)
- Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`)

7 changes: 7 additions & 0 deletions pandas/_libs/src/ujson/lib/ultrajson.h
Original file line number Diff line number Diff line change
Expand Up @@ -307,4 +307,11 @@ EXPORTFUNCTION JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec,
const char *buffer, size_t cbBuffer);
EXPORTFUNCTION void encode(JSOBJ, JSONObjectEncoder *, const char *, size_t);

#define Buffer_Reserve(__enc, __len) \
if ((size_t)((__enc)->end - (__enc)->offset) < (size_t)(__len)) { \
Buffer_Realloc((__enc), (__len)); \
}

void Buffer_Realloc(JSONObjectEncoder *enc, size_t cbNeeded);

#endif // PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_
7 changes: 2 additions & 5 deletions pandas/_libs/src/ujson/lib/ultrajsonenc.c
Original file line number Diff line number Diff line change
Expand Up @@ -714,11 +714,6 @@ int Buffer_EscapeStringValidated(JSOBJ obj, JSONObjectEncoder *enc,
}
}

#define Buffer_Reserve(__enc, __len) \
if ((size_t)((__enc)->end - (__enc)->offset) < (size_t)(__len)) { \
Buffer_Realloc((__enc), (__len)); \
}

#define Buffer_AppendCharUnchecked(__enc, __chr) *((__enc)->offset++) = __chr;

FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char *begin,
Expand Down Expand Up @@ -976,6 +971,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
}

enc->iterEnd(obj, &tc);
Buffer_Reserve(enc, 2);
Buffer_AppendCharUnchecked(enc, ']');
break;
}
Expand Down Expand Up @@ -1003,6 +999,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
}

enc->iterEnd(obj, &tc);
Buffer_Reserve(enc, 2);
Buffer_AppendCharUnchecked(enc, '}');
break;
}
Expand Down
7 changes: 6 additions & 1 deletion pandas/_libs/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,7 @@ static void NpyArr_getLabel(JSOBJ obj, JSONTypeContext *tc, size_t *outLen,
JSONObjectEncoder *enc = (JSONObjectEncoder *)tc->encoder;
PRINTMARK();
*outLen = strlen(labels[idx]);
Buffer_Reserve(enc, *outLen);
memcpy(enc->offset, labels[idx], sizeof(char) * (*outLen));
enc->offset += *outLen;
*outLen = 0;
Expand Down Expand Up @@ -879,7 +880,7 @@ int PdBlock_iterNext(JSOBJ obj, JSONTypeContext *tc) {
NpyArrContext *npyarr;
PRINTMARK();

if (PyErr_Occurred()) {
if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) {
return 0;
}

Expand Down Expand Up @@ -1224,6 +1225,10 @@ int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) {
PyObject *attrName;
char *attrStr;

if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) {
return 0;
}

if (itemValue) {
Py_DECREF(GET_TC(tc)->itemValue);
GET_TC(tc)->itemValue = itemValue = NULL;
Expand Down
45 changes: 45 additions & 0 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,51 @@ def test_blocks_compat_GH9037(self):
by_blocks=True,
check_exact=True)

def test_frame_nonprintable_bytes(self):
# GH14256: failing column caused segfaults, if it is not the last one

class BinaryThing(object):

def __init__(self, hexed):
self.hexed = hexed
if compat.PY2:
self.binary = hexed.decode('hex')
else:
self.binary = bytes.fromhex(hexed)

def __str__(self):
return self.hexed

hexed = '574b4454ba8c5eb4f98a8f45'
binthing = BinaryThing(hexed)

# verify the proper conversion of printable content
df_printable = DataFrame({'A': [binthing.hexed]})
assert df_printable.to_json() == '{"A":{"0":"%s"}}' % hexed

# check if non-printable content throws appropriate Exception
df_nonprintable = DataFrame({'A': [binthing]})
with pytest.raises(OverflowError):
df_nonprintable.to_json()

# the same with multiple columns threw segfaults
df_mixed = DataFrame({'A': [binthing], 'B': [1]},
columns=['A', 'B'])
with pytest.raises(OverflowError):
df_mixed.to_json()

# default_handler should resolve exceptions for non-string types
assert df_nonprintable.to_json(default_handler=str) == \
'{"A":{"0":"%s"}}' % hexed
assert df_mixed.to_json(default_handler=str) == \
'{"A":{"0":"%s"},"B":{"0":1}}' % hexed

def test_label_overflow(self):
# GH14256: buffer length not checked when writing label
df = pd.DataFrame({'foo': [1337], 'bar' * 100000: [1]})
assert df.to_json() == \
'{"%s":{"0":1},"foo":{"0":1337}}' % ('bar' * 100000)

def test_series_non_unique_index(self):
s = Series(['a', 'b'], index=[1, 1])

Expand Down

0 comments on commit 446d5b4

Please sign in to comment.