Skip to content

Commit

Permalink
pythongh-123378: Ensure results of PyUnicode*Error_Get{Start,End} a…
Browse files Browse the repository at this point in the history
…re clamped (pythonGH-123380)


Co-authored-by: Sergey B Kirpichev <skirpichev@gmail.com>
  • Loading branch information
picnixz and skirpichev authored Dec 4, 2024
1 parent ad9d059 commit bc0f2e9
Show file tree
Hide file tree
Showing 7 changed files with 492 additions and 111 deletions.
20 changes: 18 additions & 2 deletions Doc/c-api/exceptions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -853,12 +853,23 @@ The following functions are used to create and modify Unicode exceptions from C.
*\*start*. *start* must not be ``NULL``. Return ``0`` on success, ``-1`` on
failure.
If the :attr:`UnicodeError.object` is an empty sequence, the resulting
*start* is ``0``. Otherwise, it is clipped to ``[0, len(object) - 1]``.
.. seealso:: :attr:`UnicodeError.start`
.. c:function:: int PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start)
int PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start)
int PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start)
Set the *start* attribute of the given exception object to *start*. Return
``0`` on success, ``-1`` on failure.
Set the *start* attribute of the given exception object to *start*.
Return ``0`` on success, ``-1`` on failure.
.. note::
While passing a negative *start* does not raise an exception,
the corresponding getters will not consider it as a relative
offset.
.. c:function:: int PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
int PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
Expand All @@ -868,13 +879,18 @@ The following functions are used to create and modify Unicode exceptions from C.
*\*end*. *end* must not be ``NULL``. Return ``0`` on success, ``-1`` on
failure.
If the :attr:`UnicodeError.object` is an empty sequence, the resulting
*end* is ``0``. Otherwise, it is clipped to ``[1, len(object)]``.
.. c:function:: int PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end)
int PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end)
int PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end)
Set the *end* attribute of the given exception object to *end*. Return ``0``
on success, ``-1`` on failure.
.. seealso:: :attr:`UnicodeError.end`
.. c:function:: PyObject* PyUnicodeDecodeError_GetReason(PyObject *exc)
PyObject* PyUnicodeEncodeError_GetReason(PyObject *exc)
PyObject* PyUnicodeTranslateError_GetReason(PyObject *exc)
Expand Down
6 changes: 6 additions & 0 deletions Doc/library/exceptions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -644,10 +644,16 @@ The following exceptions are the exceptions that are usually raised.

The first index of invalid data in :attr:`object`.

This value should not be negative as it is interpreted as an
absolute offset but this constraint is not enforced at runtime.

.. attribute:: end

The index after the last invalid data in :attr:`object`.

This value should not be negative as it is interpreted as an
absolute offset but this constraint is not enforced at runtime.


.. exception:: UnicodeEncodeError

Expand Down
150 changes: 150 additions & 0 deletions Lib/test/test_capi/test_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,156 @@ def test_err_formatunraisable(self):
# CRASHES formatunraisable(NULL, NULL)


class TestUnicodeTranslateError(UnicodeTranslateError):
# UnicodeTranslateError takes 4 arguments instead of 5,
# so we just make a UnicodeTranslateError class that is
# compatible with the UnicodeError.__init__.
def __init__(self, encoding, *args, **kwargs):
super().__init__(*args, **kwargs)


class TestUnicodeError(unittest.TestCase):

def _check_no_crash(self, exc):
# ensure that the __str__() method does not crash
_ = str(exc)

def test_unicode_encode_error_get_start(self):
get_start = _testcapi.unicode_encode_get_start
self._test_unicode_error_get_start('x', UnicodeEncodeError, get_start)

def test_unicode_decode_error_get_start(self):
get_start = _testcapi.unicode_decode_get_start
self._test_unicode_error_get_start(b'x', UnicodeDecodeError, get_start)

def test_unicode_translate_error_get_start(self):
get_start = _testcapi.unicode_translate_get_start
self._test_unicode_error_get_start('x', TestUnicodeTranslateError, get_start)

def _test_unicode_error_get_start(self, literal, exc_type, get_start):
for obj_len, start, c_start in [
# normal cases
(5, 0, 0),
(5, 1, 1),
(5, 2, 2),
# out of range start is clamped to max(0, obj_len - 1)
(0, 0, 0),
(0, 1, 0),
(0, 10, 0),
(5, 5, 4),
(5, 10, 4),
# negative values are allowed but clipped in the getter
(0, -1, 0),
(1, -1, 0),
(2, -1, 0),
(2, -2, 0),
]:
obj = literal * obj_len
with self.subTest(obj, exc_type=exc_type, start=start):
exc = exc_type('utf-8', obj, start, obj_len, 'reason')
self.assertEqual(get_start(exc), c_start)
self._check_no_crash(exc)

def test_unicode_encode_error_set_start(self):
set_start = _testcapi.unicode_encode_set_start
self._test_unicode_error_set_start('x', UnicodeEncodeError, set_start)

def test_unicode_decode_error_set_start(self):
set_start = _testcapi.unicode_decode_set_start
self._test_unicode_error_set_start(b'x', UnicodeDecodeError, set_start)

def test_unicode_translate_error_set_start(self):
set_start = _testcapi.unicode_translate_set_start
self._test_unicode_error_set_start('x', TestUnicodeTranslateError, set_start)

def _test_unicode_error_set_start(self, literal, exc_type, set_start):
obj_len = 5
obj = literal * obj_len
for new_start in range(-2 * obj_len, 2 * obj_len):
with self.subTest('C-API', obj=obj, exc_type=exc_type, new_start=new_start):
exc = exc_type('utf-8', obj, 0, obj_len, 'reason')
# arbitrary value is allowed in the C API setter
set_start(exc, new_start)
self.assertEqual(exc.start, new_start)
self._check_no_crash(exc)

with self.subTest('Py-API', obj=obj, exc_type=exc_type, new_start=new_start):
exc = exc_type('utf-8', obj, 0, obj_len, 'reason')
# arbitrary value is allowed in the attribute setter
exc.start = new_start
self.assertEqual(exc.start, new_start)
self._check_no_crash(exc)

def test_unicode_encode_error_get_end(self):
get_end = _testcapi.unicode_encode_get_end
self._test_unicode_error_get_end('x', UnicodeEncodeError, get_end)

def test_unicode_decode_error_get_end(self):
get_end = _testcapi.unicode_decode_get_end
self._test_unicode_error_get_end(b'x', UnicodeDecodeError, get_end)

def test_unicode_translate_error_get_end(self):
get_end = _testcapi.unicode_translate_get_end
self._test_unicode_error_get_end('x', TestUnicodeTranslateError, get_end)

def _test_unicode_error_get_end(self, literal, exc_type, get_end):
for obj_len, end, c_end in [
# normal cases
(5, 0, 1),
(5, 1, 1),
(5, 2, 2),
# out-of-range clipped in [MIN(1, OBJLEN), MAX(MIN(1, OBJLEN), OBJLEN)]
(0, 0, 0),
(0, 1, 0),
(0, 10, 0),
(1, 1, 1),
(1, 2, 1),
(5, 5, 5),
(5, 5, 5),
(5, 10, 5),
# negative values are allowed but clipped in the getter
(0, -1, 0),
(1, -1, 1),
(2, -1, 1),
(2, -2, 1),
]:
obj = literal * obj_len
with self.subTest(obj, exc_type=exc_type, end=end):
exc = exc_type('utf-8', obj, 0, end, 'reason')
self.assertEqual(get_end(exc), c_end)
self._check_no_crash(exc)

def test_unicode_encode_error_set_end(self):
set_end = _testcapi.unicode_encode_set_end
self._test_unicode_error_set_end('x', UnicodeEncodeError, set_end)

def test_unicode_decode_error_set_end(self):
set_end = _testcapi.unicode_decode_set_end
self._test_unicode_error_set_end(b'x', UnicodeDecodeError, set_end)

def test_unicode_translate_error_set_end(self):
set_end = _testcapi.unicode_translate_set_end
self._test_unicode_error_set_end('x', TestUnicodeTranslateError, set_end)

def _test_unicode_error_set_end(self, literal, exc_type, set_end):
obj_len = 5
obj = literal * obj_len
for new_end in range(-2 * obj_len, 2 * obj_len):
with self.subTest('C-API', obj=obj, exc_type=exc_type, new_end=new_end):
exc = exc_type('utf-8', obj, 0, obj_len, 'reason')
# arbitrary value is allowed in the C API setter
set_end(exc, new_end)
self.assertEqual(exc.end, new_end)
self._check_no_crash(exc)

with self.subTest('Py-API', obj=obj, exc_type=exc_type, new_end=new_end):
exc = exc_type('utf-8', obj, 0, obj_len, 'reason')
# arbitrary value is allowed in the attribute setter
exc.end = new_end
self.assertEqual(exc.end, new_end)
self._check_no_crash(exc)


class Test_PyUnstable_Exc_PrepReraiseStar(ExceptionIsLikeMixin, unittest.TestCase):

def setUp(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Ensure that the value of :attr:`UnicodeEncodeError.start <UnicodeError.start>`
retrieved by :c:func:`PyUnicodeEncodeError_GetStart` lie in
``[0, max(0, objlen - 1)]`` where *objlen* is the length of
:attr:`UnicodeEncodeError.object <UnicodeError.object>`. Similar
arguments apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError`
and their corresponding C interface. Patch by Bénédikt Tran.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Ensure that the value of :attr:`UnicodeEncodeError.end <UnicodeError.end>`
retrieved by :c:func:`PyUnicodeEncodeError_GetEnd` lies in ``[min(1, objlen),
max(min(1, objlen), objlen)]`` where *objlen* is the length of
:attr:`UnicodeEncodeError.object <UnicodeError.object>`. Similar arguments
apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` and their
corresponding C interface. Patch by Bénédikt Tran.
Loading

0 comments on commit bc0f2e9

Please sign in to comment.