From 4d5ad03e1fd09b8be26843f7cb165051ab9a9c05 Mon Sep 17 00:00:00 2001 From: Jeremy Maitin-Shepard Date: Thu, 9 Sep 2021 12:56:10 -0700 Subject: [PATCH] Avoid use of temporary `bytes` object in string_caster for UTF-8 (#3257) Fixes #3252 --- include/pybind11/cast.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index db79f57cda..3e621eba7d 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -377,6 +377,22 @@ template struct string_caster { #endif } +#if PY_VERSION_HEX >= 0x03030000 + // On Python >= 3.3, for UTF-8 we avoid the need for a temporary `bytes` + // object by using `PyUnicode_AsUTF8AndSize`. + if (PYBIND11_SILENCE_MSVC_C4127(UTF_N == 8)) { + Py_ssize_t size = -1; + const auto *buffer + = reinterpret_cast(PyUnicode_AsUTF8AndSize(load_src.ptr(), &size)); + if (!buffer) { + PyErr_Clear(); + return false; + } + value = StringType(buffer, static_cast(size)); + return true; + } +#endif + auto utfNbytes = reinterpret_steal(PyUnicode_AsEncodedString( load_src.ptr(), UTF_N == 8 ? "utf-8" : UTF_N == 16 ? "utf-16" : "utf-32", nullptr)); if (!utfNbytes) { PyErr_Clear(); return false; }