diff --git a/Include/stringobject.h b/Include/stringobject.h index 12cc093c629e973..8232258b6c58cdc 100644 --- a/Include/stringobject.h +++ b/Include/stringobject.h @@ -36,6 +36,7 @@ typedef struct { PyObject_VAR_HEAD long ob_shash; int ob_sstate; + Py_ssize_t ob_bstate; char ob_sval[1]; /* Invariants: @@ -52,6 +53,10 @@ typedef struct { #define SSTATE_INTERNED_MORTAL 1 #define SSTATE_INTERNED_IMMORTAL 2 +#define BSTATE_NOT_SURE 0 +#define BSTATE_BYTE 1 +#define BSTATE_UNICODE 2 + PyAPI_DATA(PyTypeObject) PyBaseString_Type; PyAPI_DATA(PyTypeObject) PyString_Type; diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 7781f9663015fbb..f6e46ac29073275 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -415,6 +415,7 @@ extern "C" { typedef struct { PyObject_HEAD Py_ssize_t length; /* Length of raw Unicode data in buffer */ + Py_ssize_t ob_bstate; Py_UNICODE *str; /* Raw Unicode buffer */ long hash; /* Hash value; -1 if not set */ PyObject *defenc; /* (Default) Encoded version as Python @@ -422,6 +423,10 @@ typedef struct { implementing the buffer protocol */ } PyUnicodeObject; +#define BSTATE_NOT_SURE 0 +#define BSTATE_BYTE 1 +#define BSTATE_UNICODE 2 + PyAPI_DATA(PyTypeObject) PyUnicode_Type; #define PyUnicode_Check(op) \ diff --git a/Lib/test/test_py3kwarn.py b/Lib/test/test_py3kwarn.py index efcd1d2f9d8447d..9ec40a0b64b6975 100644 --- a/Lib/test/test_py3kwarn.py +++ b/Lib/test/test_py3kwarn.py @@ -208,6 +208,22 @@ def set(): with check_py3k_warnings() as w: self.assertWarning(set(), w, expected) + def test_bytes_parsing(self): + with check_py3k_warnings(): + b"{0}-{1}: {2}".format(1,"foo",True) + b"{0}-{1}: {2}".encode() + + def test_str_parsing(self): + with check_py3k_warnings(): + "{0}-{1}: {2}".decode() + + def test_string_parsing(self): + with check_py3k_warnings(): + b"{0}-{1}: {2}"._formatter_parser() + b"{0}-{1}: {2}"._formatter_field_name_split() + "{0}-{1}: {2}"._formatter_parser() + "{0}-{1}: {2}"._formatter_field_name_split() + def test_slice_methods(self): class Spam(object): def __getslice__(self, i, j): pass diff --git a/Objects/stringlib/string_format.h b/Objects/stringlib/string_format.h index 2bd1839d72e73fb..c9f9f655ef2900a 100644 --- a/Objects/stringlib/string_format.h +++ b/Objects/stringlib/string_format.h @@ -1186,6 +1186,10 @@ formatter_parser(STRINGLIB_OBJECT *self) { formatteriterobject *it; + if (PyErr_WarnPy3k("'_format_parser()' is not supported for both unicode and bytes in 3.x: use alternate format parsing syntax.", 1) < 0) { + return NULL; + } + it = PyObject_New(formatteriterobject, &PyFormatterIter_Type); if (it == NULL) return NULL; @@ -1326,6 +1330,10 @@ formatter_field_name_split(STRINGLIB_OBJECT *self) PyObject *first_obj = NULL; PyObject *result = NULL; + if (PyErr_WarnPy3k("'_formatter_field_name_split()' is not supported for both unicode and bytes in 3.x: use alternate formatter split syntax.", 1) < 0) { + return NULL; + } + it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type); if (it == NULL) return NULL; diff --git a/Objects/stringobject.c b/Objects/stringobject.c index fd477627f6e62e9..d4b536565f760c5 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -3016,6 +3016,9 @@ string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs) char *errors = NULL; PyObject *v; + if (PyErr_WarnPy3k("'encode()' is not supported on bytes in 3.x: convert the string to unicode.", 1) < 0) { + return NULL; + } if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode", kwlist, &encoding, &errors)) return NULL; @@ -3055,6 +3058,15 @@ string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs) char *errors = NULL; PyObject *v; + if (PyString_CheckExact(self)) { + self->ob_bstate = BSTATE_BYTE; + } + + if ((self->ob_bstate == BSTATE_BYTE) && + PyErr_WarnPy3k("'decode()' is not supported on 'str' in 3.x: convert the string to bytes.", 1) < 0) { + return NULL; + } + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors)) return NULL; @@ -3610,6 +3622,10 @@ string__format__(PyObject* self, PyObject* args) PyObject *result = NULL; PyObject *tmp = NULL; + if (PyErr_WarnPy3k("'format()' is not supported for bytes in 3.x: use alternate format syntax.", 1) < 0) { + return NULL; + } + /* If 2.x, convert format_spec to the same type as value */ /* This is to allow things like u''.format('') */ if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))