diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index 07b77d3e04bbe03..15d90bdfc448b2c 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -9,10 +9,848 @@ _testcapi = None +NULL = None + class CAPITest(unittest.TestCase): + # TODO: Test the following functions: + # + # PyUnicode_ClearFreeList + # PyUnicode_FSConverter + # PyUnicode_FSDecoder + # PyUnicode_DecodeMBCS + # PyUnicode_DecodeMBCSStateful + # PyUnicode_DecodeCodePageStateful + # PyUnicode_AsMBCSString + # PyUnicode_EncodeCodePage + # PyUnicode_DecodeLocaleAndSize + # PyUnicode_DecodeLocale + # PyUnicode_EncodeLocale + # PyUnicode_DecodeFSDefault + # PyUnicode_DecodeFSDefaultAndSize + # PyUnicode_EncodeFSDefault + # + # Not tested deprecated functions: + # + # PyUnicode_AsDecodedObject + # PyUnicode_AsDecodedUnicode + # PyUnicode_AsEncodedObject + # PyUnicode_AsEncodedUnicode + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_new(self): + """Test PyUnicode_New()""" + from _testcapi import unicode_new as new + + for maxchar in 0, 0x61, 0xa1, 0x4f60, 0x1f600: + # XXX assertIs? + self.assertEqual(new(0, maxchar), '') + self.assertEqual(new(5, maxchar), chr(maxchar)*5) + self.assertEqual(new(0, 0x110000), '') + self.assertRaises(SystemError, new, 5, 0x110000) + self.assertRaises(SystemError, new, -1, 0) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_fill(self): + """Test PyUnicode_Fill()""" + from _testcapi import unicode_fill as fill + + strings = [ + 'abcde', '\xa1\xa2\xa3\xa4\xa5', + '\u4f60\u597d\u4e16\u754c\uff01', + '\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604' + ] + chars = [0x78, 0xa9, 0x20ac, 0x1f638] + + for idx, fill_char in enumerate(chars): + # wide -> narrow: exceed maxchar limitation + for to in strings[:idx]: + self.assertRaises(ValueError, fill, to, 0, 0, fill_char) + for to in strings[idx:]: + for start in range(7): + for length in range(-1, 7 - start): + filled = max(min(length, 5 - start), 0) + if filled == 5 and to != strings[idx]: + # narrow -> wide + # Tests omitted since this creates invalid strings. + continue + expected = to[:start] + chr(fill_char) * filled + to[start + filled:] + self.assertEqual(fill(to, start, length, fill_char), + (expected, filled)) + + s = strings[0] + self.assertRaises(IndexError, fill, s, -1, 0, 0x78) + self.assertRaises(ValueError, fill, s, 0, 0, 0x110000) + self.assertRaises(SystemError, fill, b'abc', 0, 0, 0x78) + self.assertRaises(SystemError, fill, [], 0, 0, 0x78) + # CRASHES fill(s, 0, NULL, 0, 0) + # CRASHES fill(NULL, 0, 0, 0x78) + # TODO: Test PyUnicode_CopyCharacters() with non-modifiable unicode. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_writechar(self): + """Test PyUnicode_ReadChar()""" + from _testcapi import unicode_writechar as writechar + + strings = [ + 'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16', + '\U0001f600\U0001f601\U0001f602' + ] + chars = [0x78, 0xa9, 0x20ac, 0x1f638, 0x110000] + for i, s in enumerate(strings): + for j, c in enumerate(chars): + if j <= i: + self.assertEqual(writechar(s, 1, c), + (s[:1] + chr(c) + s[2:], 0)) + else: + self.assertRaises(ValueError, writechar, s, 1, c) + + self.assertRaises(IndexError, writechar, 'abc', 3, 0x78) + self.assertRaises(IndexError, writechar, 'abc', -1, 0x78) + self.assertRaises(TypeError, writechar, b'abc', 0, 0x78) + self.assertRaises(TypeError, writechar, [], 0, 0x78) + # CRASHES writechar(NULL, 0, 0x78) + # TODO: Test PyUnicode_CopyCharacters() with non-modifiable and legacy + # unicode. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_resize(self): + """Test PyUnicode_Resize()""" + from _testcapi import unicode_resize as resize + + strings = [ + 'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16', + '\U0001f600\U0001f601\U0001f602' + ] + for s in strings: + self.assertEqual(resize(s, 3), (s, 0)) + self.assertEqual(resize(s, 2), (s[:2], 0)) + self.assertEqual(resize(s, 4), (s + '\0', 0)) + self.assertEqual(resize(s, 0), ('', 0)) + self.assertRaises(SystemError, resize, b'abc', 0) + self.assertRaises(SystemError, resize, [], 0) + self.assertRaises(SystemError, resize, NULL, 0) + # TODO: Test PyUnicode_Resize() with non-modifiable and legacy unicode + # and with NULL as the address. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_append(self): + """Test PyUnicode_Append()""" + from _testcapi import unicode_append as append + + strings = [ + 'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16', + '\U0001f600\U0001f601\U0001f602' + ] + for left in strings: + left = left[::-1] + for right in strings: + self.assertEqual(append(left, right), left + right) + + self.assertRaises(SystemError, append, 'abc', b'abc') + self.assertRaises(SystemError, append, b'abc', 'abc') + self.assertRaises(SystemError, append, b'abc', b'abc') + self.assertRaises(SystemError, append, 'abc', []) + self.assertRaises(SystemError, append, [], 'abc') + self.assertRaises(SystemError, append, [], []) + self.assertRaises(SystemError, append, NULL, 'abc') + self.assertRaises(SystemError, append, 'abc', NULL) + # TODO: Test PyUnicode_Append() with modifiable unicode + # and with NULL as the address. + # TODO: Check reference counts. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_appendanddel(self): + """Test PyUnicode_AppendAndDel()""" + from _testcapi import unicode_appendanddel as appendanddel + + strings = [ + 'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16', + '\U0001f600\U0001f601\U0001f602' + ] + for left in strings: + left = left[::-1] + for right in strings: + self.assertEqual(appendanddel(left, right), left + right) + + self.assertRaises(SystemError, appendanddel, 'abc', b'abc') + self.assertRaises(SystemError, appendanddel, b'abc', 'abc') + self.assertRaises(SystemError, appendanddel, b'abc', b'abc') + self.assertRaises(SystemError, appendanddel, 'abc', []) + self.assertRaises(SystemError, appendanddel, [], 'abc') + self.assertRaises(SystemError, appendanddel, [], []) + self.assertRaises(SystemError, appendanddel, NULL, 'abc') + self.assertRaises(SystemError, appendanddel, 'abc', NULL) + # TODO: Test PyUnicode_AppendAndDel() with modifiable unicode + # and with NULL as the address. + # TODO: Check reference counts. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_fromstringandsize(self): + """Test PyUnicode_FromStringAndSize()""" + from _testcapi import unicode_fromstringandsize as fromstringandsize + + self.assertEqual(fromstringandsize(b'abc'), 'abc') + self.assertEqual(fromstringandsize(b'abc', 2), 'ab') + self.assertEqual(fromstringandsize(b'abc\0def'), 'abc\0def') + self.assertEqual(fromstringandsize(b'\xc2\xa1\xc2\xa2'), '\xa1\xa2') + self.assertEqual(fromstringandsize(b'\xe4\xbd\xa0'), '\u4f60') + self.assertEqual(fromstringandsize(b'\xf0\x9f\x98\x80'), '\U0001f600') + self.assertRaises(UnicodeDecodeError, fromstringandsize, b'\xc2\xa1', 1) + self.assertRaises(UnicodeDecodeError, fromstringandsize, b'\xa1', 1) + self.assertEqual(fromstringandsize(b'', 0), '') + self.assertEqual(fromstringandsize(NULL, 0), '') + + self.assertRaises(SystemError, fromstringandsize, b'abc', -1) + # TODO: Test PyUnicode_FromStringAndSize(NULL, size) for size != 0 + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_fromstring(self): + """Test PyUnicode_FromString()""" + from _testcapi import unicode_fromstring as fromstring + + self.assertEqual(fromstring(b'abc'), 'abc') + self.assertEqual(fromstring(b'\xc2\xa1\xc2\xa2'), '\xa1\xa2') + self.assertEqual(fromstring(b'\xe4\xbd\xa0'), '\u4f60') + self.assertEqual(fromstring(b'\xf0\x9f\x98\x80'), '\U0001f600') + self.assertRaises(UnicodeDecodeError, fromstring, b'\xc2') + self.assertRaises(UnicodeDecodeError, fromstring, b'\xa1') + self.assertEqual(fromstring(b''), '') + + # CRASHES fromstring(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_fromkindanddata(self): + """Test PyUnicode_FromKindAndData()""" + from _testcapi import unicode_fromkindanddata as fromkindanddata + + strings = [ + 'abcde', '\xa1\xa2\xa3\xa4\xa5', + '\u4f60\u597d\u4e16\u754c\uff01', + '\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604' + ] + enc1 = 'latin1' + for s in strings[:2]: + self.assertEqual(fromkindanddata(1, s.encode(enc1)), s) + enc2 = 'utf-16le' if sys.byteorder == 'little' else 'utf-16be' + for s in strings[:3]: + self.assertEqual(fromkindanddata(2, s.encode(enc2)), s) + enc4 = 'utf-32le' if sys.byteorder == 'little' else 'utf-32be' + for s in strings: + self.assertEqual(fromkindanddata(4, s.encode(enc4)), s) + self.assertEqual(fromkindanddata(2, '\U0001f600'.encode(enc2)), + '\ud83d\ude00') + for kind in 1, 2, 4: + self.assertEqual(fromkindanddata(kind, b''), '') + self.assertEqual(fromkindanddata(kind, b'\0'*kind), '\0') + self.assertEqual(fromkindanddata(kind, NULL, 0), '') + + for kind in -1, 0, 3, 5, 8: + self.assertRaises(SystemError, fromkindanddata, kind, b'') + self.assertRaises(ValueError, fromkindanddata, 1, b'abc', -1) + self.assertRaises(ValueError, fromkindanddata, 1, NULL, -1) + # CRASHES fromkindanddata(1, NULL, 1) + # CRASHES fromkindanddata(4, b'\xff\xff\xff\xff') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_substring(self): + """Test PyUnicode_Substring()""" + from _testcapi import unicode_substring as substring + + strings = [ + 'ab', 'ab\xa1\xa2', + 'ab\xa1\xa2\u4f60\u597d', + 'ab\xa1\xa2\u4f60\u597d\U0001f600\U0001f601' + ] + for s in strings: + for start in range(0, len(s) + 2): + for end in range(max(start-1, 0), len(s) + 2): + self.assertEqual(substring(s, start, end), s[start:end]) + + self.assertRaises(IndexError, substring, 'abc', -1, 0) + self.assertRaises(IndexError, substring, 'abc', 0, -1) + # CRASHES substring(b'abc', 0, 0) + # CRASHES substring([], 0, 0) + # CRASHES substring(NULL, 0, 0) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_getlength(self): + """Test PyUnicode_GetLength()""" + from _testcapi import unicode_getlength as getlength + + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600', + 'a\ud800b\udfffc', '\ud834\udd1e']: + self.assertEqual(getlength(s), len(s)) + + self.assertRaises(TypeError, getlength, b'abc') + self.assertRaises(TypeError, getlength, []) + # CRASHES getlength(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_readchar(self): + """Test PyUnicode_ReadChar()""" + from _testcapi import unicode_readchar as readchar + + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600', + 'a\ud800b\udfffc', '\ud834\udd1e']: + for i, c in enumerate(s): + self.assertEqual(readchar(s, i), ord(c)) + self.assertRaises(IndexError, readchar, s, len(s)) + self.assertRaises(IndexError, readchar, s, -1) + + self.assertRaises(TypeError, readchar, b'abc', 0) + self.assertRaises(TypeError, readchar, [], 0) + # CRASHES readchar(NULL, 0) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_fromencodedobject(self): + """Test PyUnicode_FromEncodedObject()""" + from _testcapi import unicode_fromencodedobject as fromencodedobject + + self.assertEqual(fromencodedobject(b'abc', NULL), 'abc') + self.assertEqual(fromencodedobject(b'abc', 'ascii'), 'abc') + b = b'a\xc2\xa1\xe4\xbd\xa0\xf0\x9f\x98\x80' + s = 'a\xa1\u4f60\U0001f600' + self.assertEqual(fromencodedobject(b, NULL), s) + self.assertEqual(fromencodedobject(b, 'utf-8'), s) + self.assertEqual(fromencodedobject(b, 'latin1'), b.decode('latin1')) + self.assertRaises(UnicodeDecodeError, fromencodedobject, b, 'ascii') + self.assertEqual(fromencodedobject(b, 'ascii', 'replace'), + 'a' + '\ufffd'*9) + self.assertEqual(fromencodedobject(bytearray(b), NULL), s) + self.assertEqual(fromencodedobject(bytearray(b), 'utf-8'), s) + self.assertRaises(LookupError, fromencodedobject, b'abc', 'foo') + self.assertRaises(LookupError, fromencodedobject, b, 'ascii', 'foo') + self.assertRaises(TypeError, fromencodedobject, 'abc', NULL) + self.assertRaises(TypeError, fromencodedobject, 'abc', 'ascii') + self.assertRaises(TypeError, fromencodedobject, [], NULL) + self.assertRaises(TypeError, fromencodedobject, [], 'ascii') + self.assertRaises(SystemError, fromencodedobject, NULL, NULL) + self.assertRaises(SystemError, fromencodedobject, NULL, 'ascii') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_asencodedstring(self): + """Test PyUnicode_AsEncodedString()""" + from _testcapi import unicode_asencodedstring as asencodedstring + + self.assertEqual(asencodedstring('abc', NULL), b'abc') + self.assertEqual(asencodedstring('abc', 'ascii'), b'abc') + s = 'a\xa1\u4f60\U0001f600' + b = b'a\xc2\xa1\xe4\xbd\xa0\xf0\x9f\x98\x80' + self.assertEqual(asencodedstring(s, NULL), b) + self.assertEqual(asencodedstring(s, 'utf-8'), b) + self.assertEqual(asencodedstring('\xa1\xa2', 'latin1'), b'\xa1\xa2') + self.assertRaises(UnicodeEncodeError, asencodedstring, '\xa1\xa2', 'ascii') + self.assertEqual(asencodedstring(s, 'ascii', 'replace'), b'a???') + + self.assertRaises(LookupError, asencodedstring, 'abc', 'foo') + self.assertRaises(LookupError, asencodedstring, s, 'ascii', 'foo') + self.assertRaises(TypeError, asencodedstring, b'abc', NULL) + self.assertRaises(TypeError, asencodedstring, b'abc', 'ascii') + self.assertRaises(TypeError, asencodedstring, [], NULL) + self.assertRaises(TypeError, asencodedstring, [], 'ascii') + # CRASHES asencodedstring(NULL, NULL) + # CRASHES asencodedstring(NULL, 'ascii') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_buildencodingmap(self): + """Test PyUnicode_BuildEncodingMap()""" + from _testcapi import unicode_buildencodingmap as buildencodingmap + + # XXX + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodeutf8(self): + """Test PyUnicode_DecodeUTF8()""" + from _testcapi import unicode_decodeutf8 as decodeutf8 + + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + b = s.encode('utf-8') + self.assertEqual(decodeutf8(b), s) + self.assertEqual(decodeutf8(b, 'strict'), s) + + self.assertRaises(UnicodeDecodeError, decodeutf8, b'\x80') + self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xc0') + self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xff') + self.assertRaises(UnicodeDecodeError, decodeutf8, b'a\xf0\x9f') + self.assertEqual(decodeutf8(b'a\xf0\x9f', 'replace'), 'a\ufffd') + self.assertEqual(decodeutf8(b'a\xf0\x9fb', 'replace'), 'a\ufffdb') + + self.assertRaises(LookupError, decodeutf8, b'a\x80', 'foo') + # TODO: Test PyUnicode_DecodeUTF8() with NULL as data and + # negative size. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodeutf8stateful(self): + """Test PyUnicode_DecodeUTF8Stateful()""" + from _testcapi import unicode_decodeutf8stateful as decodeutf8stateful + + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + b = s.encode('utf-8') + self.assertEqual(decodeutf8stateful(b), (s, len(b))) + self.assertEqual(decodeutf8stateful(b, 'strict'), (s, len(b))) + + self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\x80') + self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xc0') + self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xff') + self.assertEqual(decodeutf8stateful(b'a\xf0\x9f'), ('a', 1)) + self.assertEqual(decodeutf8stateful(b'a\xf0\x9f', 'replace'), ('a', 1)) + self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'a\xf0\x9fb') + self.assertEqual(decodeutf8stateful(b'a\xf0\x9fb', 'replace'), ('a\ufffdb', 4)) + + self.assertRaises(LookupError, decodeutf8stateful, b'a\x80', 'foo') + # TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as data and + # negative size. + # TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as the address of + # "consumed". + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_asutf8string(self): + """Test PyUnicode_AsUTF8String()""" + from _testcapi import unicode_asutf8string as asutf8string + + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + self.assertEqual(asutf8string(s), s.encode('utf-8')) + + self.assertRaises(UnicodeEncodeError, asutf8string, '\ud8ff') + self.assertRaises(TypeError, asutf8string, b'abc') + self.assertRaises(TypeError, asutf8string, []) + # CRASHES asutf8string(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodeutf16(self): + """Test PyUnicode_DecodeUTF16()""" + from _testcapi import unicode_decodeutf16 as decodeutf16 + + naturalbyteorder = -1 if sys.byteorder == 'little' else 1 + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + b = s.encode('utf-16') + self.assertEqual(decodeutf16(0, b), (naturalbyteorder, s)) + b = s.encode('utf-16le') + self.assertEqual(decodeutf16(-1, b), (-1, s)) + self.assertEqual(decodeutf16(0, b'\xff\xfe'+b), (-1, s)) + b = s.encode('utf-16be') + self.assertEqual(decodeutf16(1, b), (1, s)) + self.assertEqual(decodeutf16(0, b'\xfe\xff'+b), (1, s)) + + self.assertRaises(UnicodeDecodeError, decodeutf16, -1, b'a') + self.assertRaises(UnicodeDecodeError, decodeutf16, 1, b'a') + self.assertRaises(UnicodeDecodeError, decodeutf16, 0, b'\xff\xfea') + self.assertRaises(UnicodeDecodeError, decodeutf16, 0, b'\xfe\xffa') + + self.assertRaises(UnicodeDecodeError, decodeutf16, -1, b'\x00\xde') + self.assertRaises(UnicodeDecodeError, decodeutf16, 1, b'\xde\x00') + self.assertRaises(UnicodeDecodeError, decodeutf16, 0, b'\xde\xde') + self.assertEqual(decodeutf16(-1, b'\x00\xde', 'replace'), (-1, '\ufffd')) + self.assertEqual(decodeutf16(1, b'\xde\x00', 'replace'), (1, '\ufffd')) + self.assertEqual(decodeutf16(0, b'\xde\xde', 'replace'), (0, '\ufffd')) + self.assertEqual(decodeutf16(0, b'\xff\xfe\x00\xde', 'replace'), (-1, '\ufffd')) + self.assertEqual(decodeutf16(0, b'\xfe\xff\xde\x00', 'replace'), (1, '\ufffd')) + + self.assertRaises(UnicodeDecodeError, decodeutf16, -1, b'\x3d\xd8') + self.assertRaises(UnicodeDecodeError, decodeutf16, 1, b'\xd8\x3d') + self.assertRaises(UnicodeDecodeError, decodeutf16, 0, b'\xd8\xd8') + self.assertEqual(decodeutf16(-1, b'\x3d\xd8', 'replace'), (-1, '\ufffd')) + self.assertEqual(decodeutf16(1, b'\xd8\x3d', 'replace'), (1, '\ufffd')) + self.assertEqual(decodeutf16(0, b'\xd8\xd8', 'replace'), (0, '\ufffd')) + self.assertEqual(decodeutf16(0, b'\xff\xfe\x3d\xd8', 'replace'), (-1, '\ufffd')) + self.assertEqual(decodeutf16(0, b'\xfe\xff\xd8\x3d', 'replace'), (1, '\ufffd')) + + self.assertRaises(LookupError, decodeutf16, -1, b'\x00\xde', 'foo') + self.assertRaises(LookupError, decodeutf16, 1, b'\xde\x00', 'foo') + self.assertRaises(LookupError, decodeutf16, 0, b'\xde\xde', 'foo') + # TODO: Test PyUnicode_DecodeUTF16() with NULL as data and + # negative size. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodeutf16stateful(self): + """Test PyUnicode_DecodeUTF16Stateful()""" + from _testcapi import unicode_decodeutf16stateful as decodeutf16stateful + + naturalbyteorder = -1 if sys.byteorder == 'little' else 1 + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + b = s.encode('utf-16') + self.assertEqual(decodeutf16stateful(0, b), (naturalbyteorder, s, len(b))) + b = s.encode('utf-16le') + self.assertEqual(decodeutf16stateful(-1, b), (-1, s, len(b))) + self.assertEqual(decodeutf16stateful(0, b'\xff\xfe'+b), (-1, s, len(b)+2)) + b = s.encode('utf-16be') + self.assertEqual(decodeutf16stateful(1, b), (1, s, len(b))) + self.assertEqual(decodeutf16stateful(0, b'\xfe\xff'+b), (1, s, len(b)+2)) + + self.assertEqual(decodeutf16stateful(-1, b'\x61\x00\x3d'), (-1, 'a', 2)) + self.assertEqual(decodeutf16stateful(-1, b'\x61\x00\x3d\xd8'), (-1, 'a', 2)) + self.assertEqual(decodeutf16stateful(-1, b'\x61\x00\x3d\xd8\x00'), (-1, 'a', 2)) + self.assertEqual(decodeutf16stateful(1, b'\x00\x61\xd8'), (1, 'a', 2)) + self.assertEqual(decodeutf16stateful(1, b'\x00\x61\xd8\x3d'), (1, 'a', 2)) + self.assertEqual(decodeutf16stateful(1, b'\x00\x61\xd8\x3d\xde'), (1, 'a', 2)) + self.assertEqual(decodeutf16stateful(0, b'\xff\xfe\x61\x00\x3d\xd8\x00'), (-1, 'a', 4)) + self.assertEqual(decodeutf16stateful(0, b'\xfe\xff\x00\x61\xd8\x3d\xde'), (1, 'a', 4)) + + self.assertRaises(UnicodeDecodeError, decodeutf16stateful, -1, b'\x00\xde') + self.assertRaises(UnicodeDecodeError, decodeutf16stateful, 1, b'\xde\x00') + self.assertRaises(UnicodeDecodeError, decodeutf16stateful, 0, b'\xde\xde') + self.assertEqual(decodeutf16stateful(-1, b'\x00\xde', 'replace'), (-1, '\ufffd', 2)) + self.assertEqual(decodeutf16stateful(1, b'\xde\x00', 'replace'), (1, '\ufffd', 2)) + self.assertEqual(decodeutf16stateful(0, b'\xde\xde', 'replace'), (0, '\ufffd', 2)) + self.assertEqual(decodeutf16stateful(0, b'\xff\xfe\x00\xde', 'replace'), (-1, '\ufffd', 4)) + self.assertEqual(decodeutf16stateful(0, b'\xfe\xff\xde\x00', 'replace'), (1, '\ufffd', 4)) + + self.assertRaises(UnicodeDecodeError, decodeutf16stateful, -1, b'\x3d\xd8\x61\x00') + self.assertEqual(decodeutf16stateful(-1, b'\x3d\xd8\x61\x00', 'replace'), (-1, '\ufffda', 4)) + self.assertRaises(UnicodeDecodeError, decodeutf16stateful, 1, b'\xd8\x3d\x00\x61') + self.assertEqual(decodeutf16stateful(1, b'\xd8\x3d\x00\x61', 'replace'), (1, '\ufffda', 4)) + + self.assertRaises(LookupError, decodeutf16stateful, -1, b'\x00\xde', 'foo') + self.assertRaises(LookupError, decodeutf16stateful, 1, b'\xde\x00', 'foo') + self.assertRaises(LookupError, decodeutf16stateful, 0, b'\xde\xde', 'foo') + # TODO: Test PyUnicode_DecodeUTF16Stateful() with NULL as data and + # negative size. + # TODO: Test PyUnicode_DecodeUTF16Stateful() with NULL as the address of + # "consumed". + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_asutf16string(self): + """Test PyUnicode_AsUTF16String()""" + from _testcapi import unicode_asutf16string as asutf16string + + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + self.assertEqual(asutf16string(s), s.encode('utf-16')) + + self.assertRaises(UnicodeEncodeError, asutf16string, '\ud8ff') + self.assertRaises(TypeError, asutf16string, b'abc') + self.assertRaises(TypeError, asutf16string, []) + # CRASHES asutf16string(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodeutf32(self): + """Test PyUnicode_DecodeUTF8()""" + from _testcapi import unicode_decodeutf32 as decodeutf32 + + naturalbyteorder = -1 if sys.byteorder == 'little' else 1 + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + b = s.encode('utf-32') + self.assertEqual(decodeutf32(0, b), (naturalbyteorder, s)) + b = s.encode('utf-32le') + self.assertEqual(decodeutf32(-1, b), (-1, s)) + self.assertEqual(decodeutf32(0, b'\xff\xfe\x00\x00'+b), (-1, s)) + b = s.encode('utf-32be') + self.assertEqual(decodeutf32(1, b), (1, s)) + self.assertEqual(decodeutf32(0, b'\x00\x00\xfe\xff'+b), (1, s)) + + self.assertRaises(UnicodeDecodeError, decodeutf32, -1, b'\x61\x00\x00\x00\x00') + self.assertRaises(UnicodeDecodeError, decodeutf32, 1, b'\x00\x00\x00\x61\x00') + self.assertRaises(UnicodeDecodeError, decodeutf32, 0, b'\xff\xfe\x00\x00\x61\x00\x00\x00\x00') + self.assertRaises(UnicodeDecodeError, decodeutf32, 0, b'\x00\x00\xfe\xff\x00\x00\x00\x61\x00') + + self.assertRaises(UnicodeDecodeError, decodeutf32, -1, b'\xff\xff\xff\xff') + self.assertRaises(UnicodeDecodeError, decodeutf32, 1, b'\xff\xff\xff\xff') + self.assertRaises(UnicodeDecodeError, decodeutf32, 0, b'\xff\xff\xff\xff') + self.assertEqual(decodeutf32(-1, b'\xff\xff\xff\xff', 'replace'), (-1, '\ufffd')) + self.assertEqual(decodeutf32(1, b'\xff\xff\xff\xff', 'replace'), (1, '\ufffd')) + self.assertEqual(decodeutf32(0, b'\xff\xff\xff\xff', 'replace'), (0, '\ufffd')) + self.assertEqual(decodeutf32(0, b'\xff\xfe\x00\x00\xff\xff\xff\xff', 'replace'), (-1, '\ufffd')) + self.assertEqual(decodeutf32(0, b'\x00\x00\xfe\xff\xff\xff\xff\xff', 'replace'), (1, '\ufffd')) + + self.assertRaises(UnicodeDecodeError, decodeutf32, -1, b'\x3d\xd8\x00\x00') + self.assertEqual(decodeutf32(-1, b'\x3d\xd8\x00\x00', 'replace'), (-1, '\ufffd')) + self.assertRaises(UnicodeDecodeError, decodeutf32, 1, b'\x00\x00\xd8\x3d') + self.assertEqual(decodeutf32(1, b'\x00\x00\xd8\x3d', 'replace'), (1, '\ufffd')) + + self.assertRaises(LookupError, decodeutf32, -1, b'\xff\xff\xff\xff', 'foo') + self.assertRaises(LookupError, decodeutf32, 1, b'\xff\xff\xff\xff', 'foo') + self.assertRaises(LookupError, decodeutf32, 0, b'\xff\xff\xff\xff', 'foo') + # TODO: Test PyUnicode_DecodeUTF32() with NULL as data and + # negative size. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodeutf32stateful(self): + """Test PyUnicode_DecodeUTF32Stateful()""" + from _testcapi import unicode_decodeutf32stateful as decodeutf32stateful + + naturalbyteorder = -1 if sys.byteorder == 'little' else 1 + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + b = s.encode('utf-32') + self.assertEqual(decodeutf32stateful(0, b), (naturalbyteorder, s, len(b))) + b = s.encode('utf-32le') + self.assertEqual(decodeutf32stateful(-1, b), (-1, s, len(b))) + self.assertEqual(decodeutf32stateful(0, b'\xff\xfe\x00\x00'+b), (-1, s, len(b)+4)) + b = s.encode('utf-32be') + self.assertEqual(decodeutf32stateful(1, b), (1, s, len(b))) + self.assertEqual(decodeutf32stateful(0, b'\x00\x00\xfe\xff'+b), (1, s, len(b)+4)) + + self.assertEqual(decodeutf32stateful(-1, b'\x61\x00\x00\x00\x00'), (-1, 'a', 4)) + self.assertEqual(decodeutf32stateful(-1, b'\x61\x00\x00\x00\x00\xf6'), (-1, 'a', 4)) + self.assertEqual(decodeutf32stateful(-1, b'\x61\x00\x00\x00\x00\xf6\x01'), (-1, 'a', 4)) + self.assertEqual(decodeutf32stateful(1, b'\x00\x00\x00\x61\x00'), (1, 'a', 4)) + self.assertEqual(decodeutf32stateful(1, b'\x00\x00\x00\x61\x00\x01'), (1, 'a', 4)) + self.assertEqual(decodeutf32stateful(1, b'\x00\x00\x00\x61\x00\x01\xf6'), (1, 'a', 4)) + self.assertEqual(decodeutf32stateful(0, b'\xff\xfe\x00\x00\x61\x00\x00\x00\x00\xf6\x01'), (-1, 'a', 8)) + self.assertEqual(decodeutf32stateful(0, b'\x00\x00\xfe\xff\x00\x00\x00\x61\x00\x01\xf6'), (1, 'a', 8)) + + for b in b'\xff', b'\xff\xff', b'\xff\xff\xff': + self.assertEqual(decodeutf32stateful(-1, b), (-1, '', 0)) + self.assertEqual(decodeutf32stateful(1, b), (1, '', 0)) + self.assertEqual(decodeutf32stateful(0, b), (0, '', 0)) + self.assertEqual(decodeutf32stateful(0, b'\xff\xfe\x00\x00'+b), (-1, '', 4)) + self.assertEqual(decodeutf32stateful(0, b'\x00\x00\xfe\xff'+b), (1, '', 4)) + self.assertRaises(UnicodeDecodeError, decodeutf32stateful, -1, b'\xff\xff\xff\xff') + self.assertRaises(UnicodeDecodeError, decodeutf32stateful, 1, b'\xff\xff\xff\xff') + self.assertRaises(UnicodeDecodeError, decodeutf32stateful, 0, b'\xff\xff\xff\xff') + self.assertEqual(decodeutf32stateful(-1, b'\xff\xff\xff\xff', 'replace'), (-1, '\ufffd', 4)) + self.assertEqual(decodeutf32stateful(1, b'\xff\xff\xff\xff', 'replace'), (1, '\ufffd', 4)) + self.assertEqual(decodeutf32stateful(0, b'\xff\xff\xff\xff', 'replace'), (0, '\ufffd', 4)) + self.assertEqual(decodeutf32stateful(0, b'\xff\xfe\x00\x00\xff\xff\xff\xff', 'replace'), (-1, '\ufffd', 8)) + self.assertEqual(decodeutf32stateful(0, b'\x00\x00\xfe\xff\xff\xff\xff\xff', 'replace'), (1, '\ufffd', 8)) + + self.assertRaises(UnicodeDecodeError, decodeutf32stateful, -1, b'\x3d\xd8\x00\x00') + self.assertEqual(decodeutf32stateful(-1, b'\x3d\xd8\x00\x00', 'replace'), (-1, '\ufffd', 4)) + self.assertRaises(UnicodeDecodeError, decodeutf32stateful, 1, b'\x00\x00\xd8\x3d') + self.assertEqual(decodeutf32stateful(1, b'\x00\x00\xd8\x3d', 'replace'), (1, '\ufffd', 4)) + + self.assertRaises(LookupError, decodeutf32stateful, -1, b'\xff\xff\xff\xff', 'foo') + self.assertRaises(LookupError, decodeutf32stateful, 1, b'\xff\xff\xff\xff', 'foo') + self.assertRaises(LookupError, decodeutf32stateful, 0, b'\xff\xff\xff\xff', 'foo') + # TODO: Test PyUnicode_DecodeUTF32Stateful() with NULL as data and + # negative size. + # TODO: Test PyUnicode_DecodeUTF32Stateful() with NULL as the address of + # "consumed". + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_asutf32string(self): + """Test PyUnicode_AsUTF32String()""" + from _testcapi import unicode_asutf32string as asutf32string + + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + self.assertEqual(asutf32string(s), s.encode('utf-32')) + + self.assertRaises(UnicodeEncodeError, asutf32string, '\ud8ff') + self.assertRaises(TypeError, asutf32string, b'abc') + self.assertRaises(TypeError, asutf32string, []) + # CRASHES asutf32string(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodeunicodeescape(self): + """Test PyUnicode_DecodeUnicodeEscape()""" + from _testcapi import unicode_decodeunicodeescape as decodeunicodeescape + + self.assertEqual(decodeunicodeescape(b'abc'), 'abc') + self.assertEqual(decodeunicodeescape(br'\t\n\r\x0b\x0c\x00\\'), '\t\n\r\v\f\0\\') + self.assertEqual(decodeunicodeescape(b'\t\n\r\x0b\x0c\x00'), '\t\n\r\v\f\0') + self.assertEqual(decodeunicodeescape(br'\xa1\xa2'), '\xa1\xa2') + self.assertEqual(decodeunicodeescape(b'\xa1\xa2'), '\xa1\xa2') + self.assertEqual(decodeunicodeescape(br'\u4f60\u597d'), '\u4f60\u597d') + self.assertEqual(decodeunicodeescape(br'\U0001f600'), '\U0001f600') + with self.assertWarns(DeprecationWarning): + self.assertEqual(decodeunicodeescape(br'\z'), r'\z') + + for b in b'\\', br'\xa', br'\u4f6', br'\U0001f60': + self.assertRaises(UnicodeDecodeError, decodeunicodeescape, b) + self.assertRaises(UnicodeDecodeError, decodeunicodeescape, b, 'strict') + self.assertEqual(decodeunicodeescape(br'x\U0001f60', 'replace'), 'x\ufffd') + self.assertEqual(decodeunicodeescape(br'x\U0001f60y', 'replace'), 'x\ufffdy') + + self.assertRaises(LookupError, decodeunicodeescape, b'\\', 'foo') + # TODO: Test PyUnicode_DecodeUnicodeEscape() with NULL as data and + # negative size. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_asunicodeescapestring(self): + """Test PyUnicode_AsUnicodeEscapeString()""" + from _testcapi import unicode_asunicodeescapestring as asunicodeescapestring + + self.assertEqual(asunicodeescapestring('abc'), b'abc') + self.assertEqual(asunicodeescapestring('\t\n\r\v\f\0\\'), br'\t\n\r\x0b\x0c\x00\\') + self.assertEqual(asunicodeescapestring('\xa1\xa2'), br'\xa1\xa2') + self.assertEqual(asunicodeescapestring('\u4f60\u597d'), br'\u4f60\u597d') + self.assertEqual(asunicodeescapestring('\U0001f600'), br'\U0001f600') + + self.assertRaises(TypeError, asunicodeescapestring, b'abc') + self.assertRaises(TypeError, asunicodeescapestring, []) + # CRASHES asunicodeescapestring(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decoderawunicodeescape(self): + """Test PyUnicode_DecodeRawUnicodeEscape()""" + from _testcapi import unicode_decoderawunicodeescape as decoderawunicodeescape + + self.assertEqual(decoderawunicodeescape(b'abc'), 'abc') + self.assertEqual(decoderawunicodeescape(b'\t\n\r\v\f\0\\'), '\t\n\r\v\f\0\\') + self.assertEqual(decoderawunicodeescape(b'\xa1\xa2'), '\xa1\xa2') + self.assertEqual(decoderawunicodeescape(br'\u4f60\u597d'), '\u4f60\u597d') + self.assertEqual(decoderawunicodeescape(br'\U0001f600'), '\U0001f600') + self.assertEqual(decoderawunicodeescape(br'\xa1\xa2'), r'\xa1\xa2') + self.assertEqual(decoderawunicodeescape(br'\z'), r'\z') + + for b in br'\u4f6', br'\U0001f60': + self.assertRaises(UnicodeDecodeError, decoderawunicodeescape, b) + self.assertRaises(UnicodeDecodeError, decoderawunicodeescape, b, 'strict') + self.assertEqual(decoderawunicodeescape(br'x\U0001f60', 'replace'), 'x\ufffd') + self.assertEqual(decoderawunicodeescape(br'x\U0001f60y', 'replace'), 'x\ufffdy') + + self.assertRaises(LookupError, decoderawunicodeescape, br'\U0001f60', 'foo') + # TODO: Test PyUnicode_DecodeRawUnicodeEscape() with NULL as data and + # negative size. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_asrawunicodeescapestring(self): + """Test PyUnicode_AsRawUnicodeEscapeString()""" + from _testcapi import unicode_asrawunicodeescapestring as asrawunicodeescapestring + + self.assertEqual(asrawunicodeescapestring('abc'), b'abc') + self.assertEqual(asrawunicodeescapestring('\t\n\r\v\f\0\\'), b'\t\n\r\v\f\0\\') + self.assertEqual(asrawunicodeescapestring('\xa1\xa2'), b'\xa1\xa2') + self.assertEqual(asrawunicodeescapestring('\u4f60\u597d'), br'\u4f60\u597d') + self.assertEqual(asrawunicodeescapestring('\U0001f600'), br'\U0001f600') + + self.assertRaises(TypeError, asrawunicodeescapestring, b'abc') + self.assertRaises(TypeError, asrawunicodeescapestring, []) + # CRASHES asrawunicodeescapestring(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodelatin1(self): + """Test PyUnicode_DecodeLatin1()""" + from _testcapi import unicode_decodelatin1 as decodelatin1 + + self.assertEqual(decodelatin1(b'abc'), 'abc') + self.assertEqual(decodelatin1(b'abc', 'strict'), 'abc') + self.assertEqual(decodelatin1(b'\xa1\xa2'), '\xa1\xa2') + self.assertEqual(decodelatin1(b'\xa1\xa2', 'strict'), '\xa1\xa2') + # TODO: Test PyUnicode_DecodeLatin1() with NULL as data and + # negative size. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_aslatin1string(self): + """Test PyUnicode_AsLatin1String()""" + from _testcapi import unicode_aslatin1string as aslatin1string + + self.assertEqual(aslatin1string('abc'), b'abc') + self.assertEqual(aslatin1string('\xa1\xa2'), b'\xa1\xa2') + + self.assertRaises(UnicodeEncodeError, aslatin1string, '\u4f60') + self.assertRaises(TypeError, aslatin1string, b'abc') + self.assertRaises(TypeError, aslatin1string, []) + # CRASHES aslatin1string(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodeascii(self): + """Test PyUnicode_DecodeASCII()""" + from _testcapi import unicode_decodeascii as decodeascii + + self.assertEqual(decodeascii(b'abc'), 'abc') + self.assertEqual(decodeascii(b'abc', 'strict'), 'abc') + + self.assertRaises(UnicodeDecodeError, decodeascii, b'\xff') + self.assertEqual(decodeascii(b'a\xff', 'replace'), 'a\ufffd') + self.assertEqual(decodeascii(b'a\xffb', 'replace'), 'a\ufffdb') + + self.assertRaises(LookupError, decodeascii, b'a\xff', 'foo') + # TODO: Test PyUnicode_DecodeASCII() with NULL as data and + # negative size. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_asasciistring(self): + """Test PyUnicode_AsASCIIString()""" + from _testcapi import unicode_asasciistring as asasciistring + + self.assertEqual(asasciistring('abc'), b'abc') + + self.assertRaises(UnicodeEncodeError, asasciistring, '\x80') + self.assertRaises(TypeError, asasciistring, b'abc') + self.assertRaises(TypeError, asasciistring, []) + # CRASHES asasciistring(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodecharmap(self): + """Test PyUnicode_DecodeCharmap()""" + from _testcapi import unicode_decodecharmap as decodecharmap + + self.assertEqual(decodecharmap(b'\3\0\7', {0: 'a', 3: 'b', 7: 'c'}), 'bac') + self.assertEqual(decodecharmap(b'\1\0\2', ['a', 'b', 'c']), 'bac') + self.assertEqual(decodecharmap(b'\1\0\2', 'abc'), 'bac') + self.assertEqual(decodecharmap(b'\1\0\2', ['\xa1', '\xa2', '\xa3']), '\xa2\xa1\xa3') + self.assertEqual(decodecharmap(b'\1\0\2', ['\u4f60', '\u597d', '\u4e16']), '\u597d\u4f60\u4e16') + self.assertEqual(decodecharmap(b'\1\0\2', ['\U0001f600', '\U0001f601', '\U0001f602']), '\U0001f601\U0001f600\U0001f602') + + self.assertEqual(decodecharmap(b'\1\0\2', [97, 98, 99]), 'bac') + self.assertEqual(decodecharmap(b'\1\0\2', ['', 'b', 'cd']), 'bcd') + + self.assertRaises(UnicodeDecodeError, decodecharmap, b'\0', {}) + self.assertRaises(UnicodeDecodeError, decodecharmap, b'\0', {0: None}) + self.assertEqual(decodecharmap(b'\1\0\2', [None, 'b', 'c'], 'replace'), 'b\ufffdc') + self.assertEqual(decodecharmap(b'\1\0\2\xff', NULL), '\1\0\2\xff') + self.assertRaises(TypeError, decodecharmap, b'\0', 42) + + # TODO: Test PyUnicode_DecodeCharmap() with NULL as data and + # negative size. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_ascharmapstring(self): + """Test PyUnicode_AsCharmapString()""" + from _testcapi import unicode_ascharmapstring as ascharmapstring + + self.assertEqual(ascharmapstring('abc', {97: 3, 98: 0, 99: 7}), b'\3\0\7') + self.assertEqual(ascharmapstring('\xa1\xa2\xa3', {0xa1: 3, 0xa2: 0, 0xa3: 7}), b'\3\0\7') + self.assertEqual(ascharmapstring('\u4f60\u597d\u4e16', {0x4f60: 3, 0x597d: 0, 0x4e16: 7}), b'\3\0\7') + self.assertEqual(ascharmapstring('\U0001f600\U0001f601\U0001f602', {0x1f600: 3, 0x1f601: 0, 0x1f602: 7}), b'\3\0\7') + self.assertEqual(ascharmapstring('abc', {97: 3, 98: b'', 99: b'spam'}), b'\3spam') + + self.assertRaises(UnicodeEncodeError, ascharmapstring, 'a', {}) + self.assertRaises(UnicodeEncodeError, ascharmapstring, 'a', {97: None}) + self.assertRaises(TypeError, ascharmapstring, b'a', {}) + self.assertRaises(TypeError, ascharmapstring, [], {}) + self.assertRaises(TypeError, ascharmapstring, 'a', NULL) + # CRASHES ascharmapstring(NULL, {}) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_fromobject(self): + """Test PyUnicode_FromObject()""" + from _testcapi import unicode_fromobject as fromobject + + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600', + 'a\ud800b\udfffc', '\ud834\udd1e']: + self.assertEqual(fromobject(s), s) + + self.assertRaises(TypeError, fromobject, b'abc') + self.assertRaises(TypeError, fromobject, []) + # CRASHES fromobject(NULL) - # Test PyUnicode_FromFormat() def test_from_format(self): + """Test PyUnicode_FromFormat()""" import_helper.import_module('ctypes') from ctypes import ( c_char_p, @@ -268,13 +1106,76 @@ def check_format(expected, format, *args): self.assertRaisesRegex(SystemError, 'invalid format string', PyUnicode_FromFormat, b'%+i', c_int(10)) - # Test PyUnicode_AsWideChar() + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_interninplace(self): + """Test PyUnicode_InternInPlace()""" + from _testcapi import unicode_interninplace as interninplace + + s = b'abc'.decode() + r = interninplace(s) + self.assertEqual(r, 'abc') + + # CRASHES interninplace(b'abc') + # CRASHES interninplace(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_internfromstring(self): + """Test PyUnicode_InternFromString()""" + from _testcapi import unicode_internfromstring as internfromstring + + self.assertEqual(internfromstring(b'abc'), 'abc') + self.assertEqual(internfromstring(b'\xf0\x9f\x98\x80'), '\U0001f600') + self.assertRaises(UnicodeDecodeError, internfromstring, b'\xc2') + self.assertRaises(UnicodeDecodeError, internfromstring, b'\xa1') + self.assertEqual(internfromstring(b''), '') + + # CRASHES internfromstring(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_fromwidechar(self): + """Test PyUnicode_FromWideChar()""" + from _testcapi import unicode_fromwidechar as fromwidechar + from _testcapi import SIZEOF_WCHAR_T + + if SIZEOF_WCHAR_T == 2: + if sys.byteorder == 'little': + encoding = 'utf-16le' + elif sys.byteorder == 'little': + encoding = 'utf-16be' + elif SIZEOF_WCHAR_T == 4: + if sys.byteorder == 'little': + encoding = 'utf-32le' + elif sys.byteorder == 'little': + encoding = 'utf-32be' + + for s in '', 'abc', '\xa1\xa2', '\u4f60', '\U0001f600': + b = s.encode(encoding) + self.assertEqual(fromwidechar(b), s) + self.assertEqual(fromwidechar(b + b'\0'*SIZEOF_WCHAR_T, -1), s) + for s in '\ud83d', '\ude00': + b = s.encode(encoding, 'surrogatepass') + self.assertEqual(fromwidechar(b), s) + self.assertEqual(fromwidechar(b + b'\0'*SIZEOF_WCHAR_T, -1), s) + + self.assertEqual(fromwidechar('abc'.encode(encoding), 2), 'ab') + if SIZEOF_WCHAR_T == 2: + self.assertEqual(fromwidechar('a\U0001f600'.encode(encoding), 2), 'a\ud83d') + + self.assertRaises(SystemError, fromwidechar, b'\0'*SIZEOF_WCHAR_T, -2) + self.assertEqual(fromwidechar(NULL, 0), '') + self.assertRaises(SystemError, fromwidechar, NULL, 1) + self.assertRaises(SystemError, fromwidechar, NULL, -1) + @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_aswidechar(self): + """Test PyUnicode_AsWideChar()""" from _testcapi import unicode_aswidechar - import_helper.import_module('ctypes') - from ctypes import c_wchar, sizeof + from _testcapi import unicode_aswidechar_null + from _testcapi import SIZEOF_WCHAR_T wchar, size = unicode_aswidechar('abcdef', 2) self.assertEqual(size, 2) @@ -283,6 +1184,8 @@ def test_aswidechar(self): wchar, size = unicode_aswidechar('abc', 3) self.assertEqual(size, 3) self.assertEqual(wchar, 'abc') + self.assertEqual(unicode_aswidechar_null('abc', 10), 4) + self.assertEqual(unicode_aswidechar_null('abc', 0), 4) wchar, size = unicode_aswidechar('abc', 4) self.assertEqual(size, 3) @@ -295,139 +1198,468 @@ def test_aswidechar(self): wchar, size = unicode_aswidechar('abc\0def', 20) self.assertEqual(size, 7) self.assertEqual(wchar, 'abc\0def\0') + self.assertEqual(unicode_aswidechar_null('abc\0def', 20), 8) nonbmp = chr(0x10ffff) - if sizeof(c_wchar) == 2: - buflen = 3 + if SIZEOF_WCHAR_T == 2: nchar = 2 - else: # sizeof(c_wchar) == 4 - buflen = 2 + else: # SIZEOF_WCHAR_T == 4 nchar = 1 - wchar, size = unicode_aswidechar(nonbmp, buflen) + wchar, size = unicode_aswidechar(nonbmp, 10) self.assertEqual(size, nchar) self.assertEqual(wchar, nonbmp + '\0') + self.assertEqual(unicode_aswidechar_null(nonbmp, 10), nchar + 1) + + self.assertRaises(TypeError, unicode_aswidechar, b'abc', 10) + self.assertRaises(TypeError, unicode_aswidechar, [], 10) + self.assertRaises(SystemError, unicode_aswidechar, NULL, 10) + self.assertRaises(TypeError, unicode_aswidechar_null, b'abc', 10) + self.assertRaises(TypeError, unicode_aswidechar_null, [], 10) + self.assertRaises(SystemError, unicode_aswidechar_null, NULL, 10) - # Test PyUnicode_AsWideCharString() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_aswidecharstring(self): + """Test PyUnicode_AsWideCharString()""" from _testcapi import unicode_aswidecharstring - import_helper.import_module('ctypes') - from ctypes import c_wchar, sizeof + from _testcapi import unicode_aswidecharstring_null + from _testcapi import SIZEOF_WCHAR_T wchar, size = unicode_aswidecharstring('abc') self.assertEqual(size, 3) self.assertEqual(wchar, 'abc\0') + self.assertEqual(unicode_aswidecharstring_null('abc'), 'abc') wchar, size = unicode_aswidecharstring('abc\0def') self.assertEqual(size, 7) self.assertEqual(wchar, 'abc\0def\0') + self.assertRaises(ValueError, unicode_aswidecharstring_null, 'abc\0def') nonbmp = chr(0x10ffff) - if sizeof(c_wchar) == 2: + if SIZEOF_WCHAR_T == 2: nchar = 2 - else: # sizeof(c_wchar) == 4 + else: # SIZEOF_WCHAR_T == 4 nchar = 1 wchar, size = unicode_aswidecharstring(nonbmp) self.assertEqual(size, nchar) self.assertEqual(wchar, nonbmp + '\0') + self.assertEqual(unicode_aswidecharstring_null(nonbmp), nonbmp) + + self.assertRaises(TypeError, unicode_aswidecharstring, b'abc') + self.assertRaises(TypeError, unicode_aswidecharstring, []) + self.assertRaises(SystemError, unicode_aswidecharstring, NULL) + self.assertRaises(TypeError, unicode_aswidecharstring_null, b'abc') + self.assertRaises(TypeError, unicode_aswidecharstring_null, []) + self.assertRaises(SystemError, unicode_aswidecharstring_null, NULL) - # Test PyUnicode_AsUCS4() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_asucs4(self): + """Test PyUnicode_AsUCS4()""" from _testcapi import unicode_asucs4 + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600', 'a\ud800b\udfffc', '\ud834\udd1e']: l = len(s) - self.assertEqual(unicode_asucs4(s, l, True), s+'\0') - self.assertEqual(unicode_asucs4(s, l, False), s+'\uffff') - self.assertEqual(unicode_asucs4(s, l+1, True), s+'\0\uffff') - self.assertEqual(unicode_asucs4(s, l+1, False), s+'\0\uffff') - self.assertRaises(SystemError, unicode_asucs4, s, l-1, True) - self.assertRaises(SystemError, unicode_asucs4, s, l-2, False) + self.assertEqual(unicode_asucs4(s, l, 1), s+'\0') + self.assertEqual(unicode_asucs4(s, l, 0), s+'\uffff') + self.assertEqual(unicode_asucs4(s, l+1, 1), s+'\0\uffff') + self.assertEqual(unicode_asucs4(s, l+1, 0), s+'\0\uffff') + self.assertRaises(SystemError, unicode_asucs4, s, l-1, 1) + self.assertRaises(SystemError, unicode_asucs4, s, l-2, 0) s = '\0'.join([s, s]) - self.assertEqual(unicode_asucs4(s, len(s), True), s+'\0') - self.assertEqual(unicode_asucs4(s, len(s), False), s+'\uffff') + self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0') + self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff') + + # CRASHES unicode_asucs4(b'abc', 1, 0) + # CRASHES unicode_asucs4(b'abc', 1, 1) + # CRASHES unicode_asucs4([], 1, 1) + # CRASHES unicode_asucs4(NULL, 1, 0) + # CRASHES unicode_asucs4(NULL, 1, 1) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_asucs4copy(self): + """Test PyUnicode_AsUCS4Copy()""" + from _testcapi import unicode_asucs4copy as asucs4copy + + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600', + 'a\ud800b\udfffc', '\ud834\udd1e']: + self.assertEqual(asucs4copy(s), s+'\0') + s = '\0'.join([s, s]) + self.assertEqual(asucs4copy(s), s+'\0') + + # CRASHES asucs4copy(b'abc') + # CRASHES asucs4copy([]) + # CRASHES asucs4copy(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_fromordinal(self): + """Test PyUnicode_FromOrdinal()""" + from _testcapi import unicode_fromordinal as fromordinal + + self.assertEqual(fromordinal(0x61), 'a') + self.assertEqual(fromordinal(0x20ac), '\u20ac') + self.assertEqual(fromordinal(0x1f600), '\U0001f600') + + self.assertRaises(ValueError, fromordinal, 0x110000) + self.assertRaises(ValueError, fromordinal, -1) - # Test PyUnicode_AsUTF8() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_asutf8(self): + """Test PyUnicode_AsUTF8()""" from _testcapi import unicode_asutf8 - bmp = '\u0100' - bmp2 = '\uffff' - nonbmp = chr(0x10ffff) + self.assertEqual(unicode_asutf8('abc', 4), b'abc\0') + self.assertEqual(unicode_asutf8('абв', 7), b'\xd0\xb0\xd0\xb1\xd0\xb2\0') + self.assertEqual(unicode_asutf8('\U0001f600', 5), b'\xf0\x9f\x98\x80\0') + self.assertEqual(unicode_asutf8('abc\0def', 8), b'abc\0def\0') - self.assertEqual(unicode_asutf8(bmp), b'\xc4\x80') - self.assertEqual(unicode_asutf8(bmp2), b'\xef\xbf\xbf') - self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf') - self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc') + self.assertRaises(UnicodeEncodeError, unicode_asutf8, '\ud8ff', 0) + self.assertRaises(TypeError, unicode_asutf8, b'abc', 0) + self.assertRaises(TypeError, unicode_asutf8, [], 0) + # CRASHES unicode_asutf8(NULL, 0) - # Test PyUnicode_AsUTF8AndSize() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_asutf8andsize(self): + """Test PyUnicode_AsUTF8AndSize()""" from _testcapi import unicode_asutf8andsize + from _testcapi import unicode_asutf8andsize_null - bmp = '\u0100' - bmp2 = '\uffff' - nonbmp = chr(0x10ffff) + self.assertEqual(unicode_asutf8andsize('abc', 4), (b'abc\0', 3)) + self.assertEqual(unicode_asutf8andsize('абв', 7), (b'\xd0\xb0\xd0\xb1\xd0\xb2\0', 6)) + self.assertEqual(unicode_asutf8andsize('\U0001f600', 5), (b'\xf0\x9f\x98\x80\0', 4)) + self.assertEqual(unicode_asutf8andsize('abc\0def', 8), (b'abc\0def\0', 7)) + self.assertEqual(unicode_asutf8andsize_null('abc', 4), b'abc\0') + self.assertEqual(unicode_asutf8andsize_null('abc\0def', 8), b'abc\0def\0') + + self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, '\ud8ff', 0) + self.assertRaises(TypeError, unicode_asutf8andsize, b'abc', 0) + self.assertRaises(TypeError, unicode_asutf8andsize, [], 0) + # CRASHES unicode_asutf8andsize(NULL, 0) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_getdefaultencoding(self): + """Test PyUnicode_GetDefaultEncoding()""" + from _testcapi import unicode_getdefaultencoding as getdefaultencoding - self.assertEqual(unicode_asutf8andsize(bmp), (b'\xc4\x80', 2)) - self.assertEqual(unicode_asutf8andsize(bmp2), (b'\xef\xbf\xbf', 3)) - self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4)) - self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc') + self.assertEqual(getdefaultencoding(), b'utf-8') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decode(self): + """Test PyUnicode_Decode()""" + from _testcapi import unicode_decode as decode + + self.assertEqual(decode(b'[\xe2\x82\xac]', 'utf-8'), '[\u20ac]') + self.assertEqual(decode(b'[\xa4]', 'iso8859-15'), '[\u20ac]') + self.assertEqual(decode(b'[\xa4]', 'iso8859-15', 'strict'), '[\u20ac]') + self.assertRaises(UnicodeDecodeError, decode, b'[\xa4]', 'utf-8') + self.assertEqual(decode(b'[\xa4]', 'utf-8', 'replace'), '[\ufffd]') + + self.assertEqual(decode(b'[\xe2\x82\xac]', NULL), '[\u20ac]') + self.assertEqual(decode(b'[\xa4]', NULL, 'replace'), '[\ufffd]') + + self.assertRaises(LookupError, decode, b'\xa4', 'foo') + self.assertRaises(LookupError, decode, b'\xa4', 'utf-8', 'foo') + # TODO: Test PyUnicode_Decode() with NULL as data and + # negative size. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_transform_decimal_and_space(self): + """Test _PyUnicode_TransformDecimalAndSpaceToASCII()""" + from _testcapi import unicode_transformdecimalandspacetoascii as transform_decimal + + self.assertEqual(transform_decimal('123'), + '123') + self.assertEqual(transform_decimal('\u0663.\u0661\u0664'), + '3.14') + self.assertEqual(transform_decimal("\N{EM SPACE}3.14\N{EN SPACE}"), + " 3.14 ") + self.assertEqual(transform_decimal('12\u20ac3'), + '12?') + self.assertEqual(transform_decimal(''), '') + + self.assertRaises(SystemError, transform_decimal, b'123') + self.assertRaises(SystemError, transform_decimal, []) + # CRASHES transform_decimal(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_concat(self): + """Test PyUnicode_Concat()""" + from _testcapi import unicode_concat as concat + + self.assertEqual(concat('abc', 'def'), 'abcdef') + self.assertEqual(concat('abc', 'где'), 'abcгде') + self.assertEqual(concat('абв', 'def'), 'абвdef') + self.assertEqual(concat('абв', 'где'), 'абвгде') + self.assertEqual(concat('a\0b', 'c\0d'), 'a\0bc\0d') + + self.assertRaises(TypeError, concat, b'abc', 'def') + self.assertRaises(TypeError, concat, 'abc', b'def') + self.assertRaises(TypeError, concat, b'abc', b'def') + self.assertRaises(TypeError, concat, [], 'def') + self.assertRaises(TypeError, concat, 'abc', []) + self.assertRaises(TypeError, concat, [], []) + # CRASHES concat(NULL, 'def') + # CRASHES concat('abc', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_split(self): + """Test PyUnicode_Split()""" + from _testcapi import unicode_split as split + + self.assertEqual(split('a|b|c|d', '|'), ['a', 'b', 'c', 'd']) + self.assertEqual(split('a|b|c|d', '|', 2), ['a', 'b', 'c|d']) + self.assertEqual(split('a|b|c|d', '\u20ac'), ['a|b|c|d']) + self.assertEqual(split('a||b|c||d', '||'), ['a', 'b|c', 'd']) + self.assertEqual(split('а|б|в|г', '|'), ['а', 'б', 'в', 'г']) + self.assertEqual(split('абабагаламага', 'а'), + ['', 'б', 'б', 'г', 'л', 'м', 'г', '']) + self.assertEqual(split(' a\tb\nc\rd\ve\f', NULL), + ['a', 'b', 'c', 'd', 'e']) + self.assertEqual(split('a\x85b\xa0c\u1680d\u2000e', NULL), + ['a', 'b', 'c', 'd', 'e']) + + self.assertRaises(ValueError, split, 'a|b|c|d', '') + self.assertRaises(TypeError, split, 'a|b|c|d', ord('|')) + self.assertRaises(TypeError, split, [], '|') + # split(NULL, '|') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_rsplit(self): + """Test PyUnicode_RSplit()""" + from _testcapi import unicode_rsplit as rsplit + + self.assertEqual(rsplit('a|b|c|d', '|'), ['a', 'b', 'c', 'd']) + self.assertEqual(rsplit('a|b|c|d', '|', 2), ['a|b', 'c', 'd']) + self.assertEqual(rsplit('a|b|c|d', '\u20ac'), ['a|b|c|d']) + self.assertEqual(rsplit('a||b|c||d', '||'), ['a', 'b|c', 'd']) + self.assertEqual(rsplit('а|б|в|г', '|'), ['а', 'б', 'в', 'г']) + self.assertEqual(rsplit('абабагаламага', 'а'), + ['', 'б', 'б', 'г', 'л', 'м', 'г', '']) + self.assertEqual(rsplit('aжbжcжd', 'ж'), ['a', 'b', 'c', 'd']) + self.assertEqual(rsplit(' a\tb\nc\rd\ve\f', NULL), + ['a', 'b', 'c', 'd', 'e']) + self.assertEqual(rsplit('a\x85b\xa0c\u1680d\u2000e', NULL), + ['a', 'b', 'c', 'd', 'e']) + + self.assertRaises(ValueError, rsplit, 'a|b|c|d', '') + self.assertRaises(TypeError, rsplit, 'a|b|c|d', ord('|')) + self.assertRaises(TypeError, rsplit, [], '|') + # rsplit(NULL, '|') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_partition(self): + """Test PyUnicode_Partition()""" + from _testcapi import unicode_partition as partition + + self.assertEqual(partition('a|b|c', '|'), ('a', '|', 'b|c')) + self.assertEqual(partition('a||b||c', '||'), ('a', '||', 'b||c')) + self.assertEqual(partition('а|б|в', '|'), ('а', '|', 'б|в')) + self.assertEqual(partition('кабан', 'а'), ('к', 'а', 'бан')) + self.assertEqual(partition('aжbжc', 'ж'), ('a', 'ж', 'bжc')) + + self.assertRaises(ValueError, partition, 'a|b|c', '') + self.assertRaises(TypeError, partition, b'a|b|c', '|') + self.assertRaises(TypeError, partition, 'a|b|c', b'|') + self.assertRaises(TypeError, partition, 'a|b|c', ord('|')) + self.assertRaises(TypeError, partition, [], '|') + # CRASHES partition(NULL, '|') + # CRASHES partition('a|b|c', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_rpartition(self): + """Test PyUnicode_RPartition()""" + from _testcapi import unicode_rpartition as rpartition + + self.assertEqual(rpartition('a|b|c', '|'), ('a|b', '|', 'c')) + self.assertEqual(rpartition('a||b||c', '||'), ('a||b', '||', 'c')) + self.assertEqual(rpartition('а|б|в', '|'), ('а|б', '|', 'в')) + self.assertEqual(rpartition('кабан', 'а'), ('каб', 'а', 'н')) + self.assertEqual(rpartition('aжbжc', 'ж'), ('aжb', 'ж', 'c')) + + self.assertRaises(ValueError, rpartition, 'a|b|c', '') + self.assertRaises(TypeError, rpartition, b'a|b|c', '|') + self.assertRaises(TypeError, rpartition, 'a|b|c', b'|') + self.assertRaises(TypeError, rpartition, 'a|b|c', ord('|')) + self.assertRaises(TypeError, rpartition, [], '|') + # CRASHES rpartition(NULL, '|') + # CRASHES rpartition('a|b|c', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_splitlines(self): + """Test PyUnicode_SplitLines()""" + from _testcapi import unicode_splitlines as splitlines + + self.assertEqual(splitlines('a\nb\rc\r\nd'), ['a', 'b', 'c', 'd']) + self.assertEqual(splitlines('a\nb\rc\r\nd', True), + ['a\n', 'b\r', 'c\r\n', 'd']) + self.assertEqual(splitlines('a\x85b\u2028c\u2029d'), + ['a', 'b', 'c', 'd']) + self.assertEqual(splitlines('a\x85b\u2028c\u2029d', True), + ['a\x85', 'b\u2028', 'c\u2029', 'd']) + self.assertEqual(splitlines('а\nб\rв\r\nг'), ['а', 'б', 'в', 'г']) + + self.assertRaises(TypeError, splitlines, b'a\nb\rc\r\nd') + # CRASHES splitlines(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_translate(self): + """Test PyUnicode_Translate()""" + from _testcapi import unicode_translate as translate + + self.assertEqual(translate('abcd', {ord('a'): 'A', ord('b'): ord('B'), ord('c'): '<>'}), 'AB<>d') + self.assertEqual(translate('абвг', {ord('а'): 'А', ord('б'): ord('Б'), ord('в'): '<>'}), 'АБ<>г') + self.assertEqual(translate('abc', []), 'abc') + self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None}) + self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None}, 'strict') + self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo') + self.assertEqual(translate('abc', {ord('b'): None}, 'ignore'), 'ac') + self.assertEqual(translate('abc', {ord('b'): None}, 'replace'), 'a\ufffdc') + self.assertEqual(translate('abc', {ord('b'): None}, 'backslashreplace'), r'a\x62c') + # XXX Other error handlers do not support UnicodeTranslateError + self.assertRaises(TypeError, translate, b'abc', []) + self.assertRaises(TypeError, translate, 123, []) + self.assertRaises(TypeError, translate, 'abc', {ord('a'): b'A'}) + self.assertRaises(TypeError, translate, 'abc', 123) + self.assertRaises(TypeError, translate, 'abc', NULL) + # CRASHES translate(NULL, []) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_join(self): + """Test PyUnicode_Join()""" + from _testcapi import unicode_join as join + self.assertEqual(join('|', ['a', 'b', 'c']), 'a|b|c') + self.assertEqual(join('', ['a', 'b', 'c']), 'abc') + self.assertEqual(join(NULL, ['a', 'b', 'c']), 'a b c') + self.assertEqual(join('|', ['а', 'б', 'в']), 'а|б|в') + self.assertEqual(join('ж', ['а', 'б', 'в']), 'ажбжв') + self.assertRaises(TypeError, join, b'|', ['a', 'b', 'c']) + self.assertRaises(TypeError, join, '|', [b'a', b'b', b'c']) + self.assertRaises(TypeError, join, NULL, [b'a', b'b', b'c']) + self.assertRaises(TypeError, join, '|', b'123') + self.assertRaises(TypeError, join, '|', 123) + self.assertRaises(SystemError, join, '|', NULL) - # Test PyUnicode_Count() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_count(self): + """Test PyUnicode_Count()""" from _testcapi import unicode_count - st = 'abcabd' - self.assertEqual(unicode_count(st, 'a', 0, len(st)), 2) - self.assertEqual(unicode_count(st, 'ab', 0, len(st)), 2) - self.assertEqual(unicode_count(st, 'abc', 0, len(st)), 1) - self.assertEqual(unicode_count(st, 'а', 0, len(st)), 0) # cyrillic "a" + for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1": + for i, ch in enumerate(str): + self.assertEqual(unicode_count(str, ch, 0, len(str)), 1) + + str = "!>_= end - self.assertEqual(unicode_count(st, 'abc', 0, 0), 0) - self.assertEqual(unicode_count(st, 'a', 3, 2), 0) - self.assertEqual(unicode_count(st, 'a', sys.maxsize, 5), 0) + self.assertEqual(unicode_count(str, '!', 0, 0), 0) + self.assertEqual(unicode_count(str, '!', len(str), 0), 0) # negative - self.assertEqual(unicode_count(st, 'ab', -len(st), -1), 2) - self.assertEqual(unicode_count(st, 'a', -len(st), -3), 1) - # wrong args - self.assertRaises(TypeError, unicode_count, 'a', 'a') - self.assertRaises(TypeError, unicode_count, 'a', 'a', 1) - self.assertRaises(TypeError, unicode_count, 1, 'a', 0, 1) - self.assertRaises(TypeError, unicode_count, 'a', 1, 0, 1) - # empty string - self.assertEqual(unicode_count('abc', '', 0, 3), 4) - self.assertEqual(unicode_count('abc', '', 1, 3), 3) - self.assertEqual(unicode_count('', '', 0, 1), 1) - self.assertEqual(unicode_count('', 'a', 0, 1), 0) - # different unicode kinds - for uni in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1": - for ch in uni: - self.assertEqual(unicode_count(uni, ch, 0, len(uni)), 1) - self.assertEqual(unicode_count(st, ch, 0, len(st)), 0) - - # subclasses should still work - class MyStr(str): - pass - - self.assertEqual(unicode_count(MyStr('aab'), 'a', 0, 3), 2) - - # Test PyUnicode_FindChar() + self.assertEqual(unicode_count(str, '!', -len(str), -1), 1) + # bad arguments + self.assertRaises(TypeError, unicode_count, str, b'!', 0, len(str)) + self.assertRaises(TypeError, unicode_count, b"!>__= end + self.assertEqual(find(str, '!', 0, 0, 1), -1) + self.assertEqual(find(str, '!', len(str), 0, 1), -1) + # negative + self.assertEqual(find(str, '!', -len(str), -1, 1), 0) + self.assertEqual(find(str, '!', -len(str), -1, -1), 0) + # bad arguments + self.assertRaises(TypeError, find, str, b'!', 0, len(str), 1) + self.assertRaises(TypeError, find, b"!>__'), '<>br<>c<>d<>br<>') + self.assertEqual(replace(str, 'abra', '='), '=cad=') + self.assertEqual(replace(str, 'a', '=', 2), '=br=cadabra') + self.assertEqual(replace(str, 'a', '=', 0), str) + self.assertEqual(replace(str, 'a', '=', sys.maxsize), '=br=c=d=br=') + self.assertEqual(replace(str, 'z', '='), str) + self.assertEqual(replace(str, '', '='), '=a=b=r=a=c=a=d=a=b=r=a=') + self.assertEqual(replace(str, 'a', 'ж'), 'жbrжcжdжbrж') + self.assertEqual(replace('абабагаламага', 'а', '='), '=б=б=г=л=м=г=') + self.assertEqual(replace('Баден-Баден', 'Баден', 'Baden'), 'Baden-Baden') + # bad arguments + self.assertRaises(TypeError, replace, 'a', 'a', b'=') + self.assertRaises(TypeError, replace, 'a', b'a', '=') + self.assertRaises(TypeError, replace, b'a', 'a', '=') + self.assertRaises(TypeError, replace, 'a', 'a', ord('=')) + self.assertRaises(TypeError, replace, 'a', ord('a'), '=') + self.assertRaises(TypeError, replace, [], 'a', '=') + # CRASHES replace('a', 'a', NULL) + # CRASHES replace('a', NULL, '=') + # CRASHES replace(NULL, 'a', '=') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_compare(self): + """Test PyUnicode_Compare()""" + from _testcapi import unicode_compare as compare + + self.assertEqual(compare('abc', 'abc'), 0) + self.assertEqual(compare('abc', 'def'), -1) + self.assertEqual(compare('def', 'abc'), 1) + self.assertEqual(compare('abc', 'abc\0def'), -1) + self.assertEqual(compare('abc\0def', 'abc\0def'), 0) + self.assertEqual(compare('абв', 'abc'), 1) + + self.assertRaises(TypeError, compare, b'abc', 'abc') + self.assertRaises(TypeError, compare, 'abc', b'abc') + self.assertRaises(TypeError, compare, b'abc', b'abc') + self.assertRaises(TypeError, compare, [], 'abc') + self.assertRaises(TypeError, compare, 'abc', []) + self.assertRaises(TypeError, compare, [], []) + # CRASHES compare(NULL, 'abc') + # CRASHES compare('abc', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_comparewithasciistring(self): + """Test PyUnicode_CompareWithASCIIString()""" + from _testcapi import unicode_comparewithasciistring as comparewithasciistring + + self.assertEqual(comparewithasciistring('abc', b'abc'), 0) + self.assertEqual(comparewithasciistring('abc', b'def'), -1) + self.assertEqual(comparewithasciistring('def', b'abc'), 1) + self.assertEqual(comparewithasciistring('abc', b'abc\0def'), 0) + self.assertEqual(comparewithasciistring('abc\0def', b'abc\0def'), 1) + self.assertEqual(comparewithasciistring('абв', b'abc'), 1) + + # CRASHES comparewithasciistring(b'abc', b'abc') + # CRASHES comparewithasciistring([], b'abc') + # CRASHES comparewithasciistring(NULL, b'abc') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_richcompare(self): + """Test PyUnicode_RichCompare()""" + from _testcapi import unicode_richcompare as richcompare + + LT, LE, EQ, NE, GT, GE = range(6) + strings = ('abc', 'абв', '\U0001f600', 'abc\0') + for s1 in strings: + for s2 in strings: + self.assertIs(richcompare(s1, s2, LT), s1 < s2) + self.assertIs(richcompare(s1, s2, LE), s1 <= s2) + self.assertIs(richcompare(s1, s2, EQ), s1 == s2) + self.assertIs(richcompare(s1, s2, NE), s1 != s2) + self.assertIs(richcompare(s1, s2, GT), s1 > s2) + self.assertIs(richcompare(s1, s2, GE), s1 >= s2) + + for op in LT, LE, EQ, NE, GT, GE: + self.assertIs(richcompare(b'abc', 'abc', op), NotImplemented) + self.assertIs(richcompare('abc', b'abc', op), NotImplemented) + self.assertIs(richcompare(b'abc', b'abc', op), NotImplemented) + self.assertIs(richcompare([], 'abc', op), NotImplemented) + self.assertIs(richcompare('abc', [], op), NotImplemented) + self.assertIs(richcompare([], [], op), NotImplemented) + + # CRASHES richcompare(NULL, 'abc', op) + # CRASHES richcompare('abc', NULL, op) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_format(self): + """Test PyUnicode_Contains()""" + from _testcapi import unicode_format as format + + self.assertEqual(format('x=%d!', 42), 'x=42!') + self.assertEqual(format('x=%d!', (42,)), 'x=42!') + self.assertEqual(format('x=%d y=%s!', (42, [])), 'x=42 y=[]!') + + self.assertRaises(SystemError, format, 'x=%d!', NULL) + self.assertRaises(SystemError, format, NULL, 42) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_contains(self): + """Test PyUnicode_Contains()""" + from _testcapi import unicode_contains as contains + + self.assertEqual(contains('abcd', ''), 1) + self.assertEqual(contains('abcd', 'b'), 1) + self.assertEqual(contains('abcd', 'x'), 0) + self.assertEqual(contains('abcd', 'ж'), 0) + self.assertEqual(contains('abcd', '\0'), 0) + self.assertEqual(contains('abc\0def', '\0'), 1) + self.assertEqual(contains('abcd', 'bc'), 1) + + self.assertRaises(TypeError, contains, b'abcd', 'b') + self.assertRaises(TypeError, contains, 'abcd', b'b') + self.assertRaises(TypeError, contains, b'abcd', b'b') + self.assertRaises(TypeError, contains, [], 'b') + self.assertRaises(TypeError, contains, 'abcd', ord('b')) + # CRASHES contains(NULL, 'b') + # CRASHES contains('abcd', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_isidentifier(self): + """Test PyUnicode_IsIdentifier()""" + from _testcapi import unicode_isidentifier as isidentifier + + self.assertEqual(isidentifier("a"), 1) + self.assertEqual(isidentifier("b0"), 1) + self.assertEqual(isidentifier("µ"), 1) + self.assertEqual(isidentifier("𝔘𝔫𝔦𝔠𝔬𝔡𝔢"), 1) + + self.assertEqual(isidentifier(""), 0) + self.assertEqual(isidentifier(" "), 0) + self.assertEqual(isidentifier("["), 0) + self.assertEqual(isidentifier("©"), 0) + self.assertEqual(isidentifier("0"), 0) + + # CRASHES isidentifier(b"a") + # CRASHES isidentifier([]) + # CRASHES isidentifier(NULL) - # Test PyUnicode_CopyCharacters() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_copycharacters(self): + """Test PyUnicode_CopyCharacters()""" from _testcapi import unicode_copycharacters strings = [ @@ -491,6 +1879,10 @@ def test_copycharacters(self): self.assertRaises(SystemError, unicode_copycharacters, s, 1, s, 0, 5) self.assertRaises(SystemError, unicode_copycharacters, s, 0, s, 0, -1) self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0) + self.assertRaises(SystemError, unicode_copycharacters, s, 0, [], 0, 0) + # CRASHES unicode_copycharacters(s, 0, NULL, 0, 0) + # TODO: Test PyUnicode_CopyCharacters() with non-unicode and + # non-modifiable unicode as "to". @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') diff --git a/Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst b/Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst new file mode 100644 index 000000000000000..ec4cda2080323f9 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst @@ -0,0 +1 @@ +Cover the Unicode C API with tests. diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c index d5c4a9e5b95ec68..88ce33c673ef8a6 100644 --- a/Modules/_testcapi/unicode.c +++ b/Modules/_testcapi/unicode.c @@ -1,3 +1,4 @@ +#define PY_SSIZE_T_CLEAN #include "parts.h" static struct PyModuleDef *_testcapimodule = NULL; // set at initialization @@ -99,6 +100,341 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) Py_RETURN_NONE; } +#define NULLABLE(x) do { if (x == Py_None) x = NULL; } while (0); + +static PyObject * +unicode_copy(PyObject *unicode) +{ + PyObject *copy; + + if (!unicode) { + return NULL; + } + if (!PyUnicode_Check(unicode)) { + Py_INCREF(unicode); + return unicode; + } + + copy = PyUnicode_New(PyUnicode_GET_LENGTH(unicode), + PyUnicode_MAX_CHAR_VALUE(unicode)); + if (!copy) { + return NULL; + } + if (PyUnicode_CopyCharacters(copy, 0, unicode, + 0, PyUnicode_GET_LENGTH(unicode)) < 0) + { + Py_DECREF(copy); + return NULL; + } + return copy; +} + +/* Test PyUnicode_New() */ +static PyObject * +unicode_new(PyObject *self, PyObject *args) +{ + Py_ssize_t size; + unsigned int maxchar; + PyObject *result; + + if (!PyArg_ParseTuple(args, "nI", &size, &maxchar)) { + return NULL; + } + + result = PyUnicode_New(size, (Py_UCS4)maxchar); + if (!result) { + return NULL; + } + if (size > 0 && maxchar <= 0x10ffff && + PyUnicode_Fill(result, 0, size, (Py_UCS4)maxchar) < 0) + { + Py_DECREF(result); + return NULL; + } + return result; +} + +/* Test PyUnicode_Fill() */ +static PyObject * +unicode_fill(PyObject *self, PyObject *args) +{ + PyObject *to, *to_copy; + Py_ssize_t start, length, filled; + unsigned int fill_char; + + if (!PyArg_ParseTuple(args, "OnnI", &to, &start, &length, &fill_char)) { + return NULL; + } + + NULLABLE(to); + if (!(to_copy = unicode_copy(to)) && to) { + return NULL; + } + + filled = PyUnicode_Fill(to_copy, start, length, (Py_UCS4)fill_char); + if (filled == -1 && PyErr_Occurred()) { + Py_DECREF(to_copy); + return NULL; + } + return Py_BuildValue("(Nn)", to_copy, filled); +} + +/* Test PyUnicode_WriteChar() */ +static PyObject * +unicode_writechar(PyObject *self, PyObject *args) +{ + PyObject *to, *to_copy; + Py_ssize_t index; + unsigned int character; + int result; + + if (!PyArg_ParseTuple(args, "OnI", &to, &index, &character)) { + return NULL; + } + + NULLABLE(to); + if (!(to_copy = unicode_copy(to)) && to) { + return NULL; + } + + result = PyUnicode_WriteChar(to_copy, index, (Py_UCS4)character); + if (result == -1 && PyErr_Occurred()) { + Py_DECREF(to_copy); + return NULL; + } + return Py_BuildValue("(Ni)", to_copy, result); +} + +/* Test PyUnicode_Resize() */ +static PyObject * +unicode_resize(PyObject *self, PyObject *args) +{ + PyObject *obj, *copy; + Py_ssize_t length; + int result; + + if (!PyArg_ParseTuple(args, "On", &obj, &length)) { + return NULL; + } + + NULLABLE(obj); + if (!(copy = unicode_copy(obj)) && obj) { + return NULL; + } + result = PyUnicode_Resize(©, length); + if (result == -1 && PyErr_Occurred()) { + Py_XDECREF(copy); + return NULL; + } + if (obj && PyUnicode_Check(obj) && length > PyUnicode_GET_LENGTH(obj)) { + if (PyUnicode_Fill(copy, PyUnicode_GET_LENGTH(obj), length, 0U) < 0) { + Py_DECREF(copy); + return NULL; + } + } + return Py_BuildValue("(Ni)", copy, result); +} + +/* Test PyUnicode_Append() */ +static PyObject * +unicode_append(PyObject *self, PyObject *args) +{ + PyObject *left, *right, *left_copy; + + if (!PyArg_ParseTuple(args, "OO", &left, &right)) + return NULL; + + NULLABLE(left); + NULLABLE(right); + if (!(left_copy = unicode_copy(left)) && left) { + return NULL; + } + PyUnicode_Append(&left_copy, right); + if (PyErr_Occurred()) { + Py_XDECREF(left_copy); + } + return left_copy; +} + +/* Test PyUnicode_AppendAndDel() */ +static PyObject * +unicode_appendanddel(PyObject *self, PyObject *args) +{ + PyObject *left, *right, *left_copy; + + if (!PyArg_ParseTuple(args, "OO", &left, &right)) + return NULL; + + NULLABLE(left); + NULLABLE(right); + if (!(left_copy = unicode_copy(left)) && left) { + return NULL; + } + Py_XINCREF(right); + PyUnicode_AppendAndDel(&left_copy, right); + if (PyErr_Occurred()) { + Py_XDECREF(left_copy); + } + return left_copy; +} + +/* Test PyUnicode_FromStringAndSize() */ +static PyObject * +unicode_fromstringandsize(PyObject *self, PyObject *args) +{ + const char *s = NULL; + Py_ssize_t size = -100; + + if (!PyArg_ParseTuple(args, "z#|n", &s, &size, &size)) { + return NULL; + } + return PyUnicode_FromStringAndSize(s, size); +} + +/* Test PyUnicode_FromString() */ +static PyObject * +unicode_fromstring(PyObject *self, PyObject *arg) +{ + const char *s = NULL; + Py_ssize_t size = -100; + + if (!PyArg_Parse(arg, "z#", &s, &size)) { + return NULL; + } + return PyUnicode_FromString(s); +} + +/* Test PyUnicode_FromKindAndData() */ +static PyObject * +unicode_fromkindanddata(PyObject *self, PyObject *args) +{ + int kind; + void *buffer; + Py_ssize_t size = -100; + + if (!PyArg_ParseTuple(args, "iz#|n", &kind, &buffer, &size, &size)) { + return NULL; + } + + if (kind && size % kind) { + PyErr_SetString(PyExc_SystemError, "invalid size in unicode_fromkindanddata()"); + return NULL; + } + return PyUnicode_FromKindAndData(kind, buffer, kind ? size / kind : 0); +} + +/* Test PyUnicode_Substring() */ +static PyObject * +unicode_substring(PyObject *self, PyObject *args) +{ + PyObject *str; + Py_ssize_t start, end; + + if (!PyArg_ParseTuple(args, "Onn", &str, &start, &end)) { + return NULL; + } + + NULLABLE(str); + return PyUnicode_Substring(str, start, end); +} + +/* Test PyUnicode_GetLength() */ +static PyObject * +unicode_getlength(PyObject *self, PyObject *arg) +{ + Py_ssize_t result; + + NULLABLE(arg); + result = PyUnicode_GetLength(arg); + if (result == -1) + return NULL; + return PyLong_FromSsize_t(result); +} + +/* Test PyUnicode_ReadChar() */ +static PyObject * +unicode_readchar(PyObject *self, PyObject *args) +{ + PyObject *unicode; + Py_ssize_t index; + Py_UCS4 result; + + if (!PyArg_ParseTuple(args, "On", &unicode, &index)) { + return NULL; + } + + NULLABLE(unicode); + result = PyUnicode_ReadChar(unicode, index); + if (result == (Py_UCS4)-1) + return NULL; + return PyLong_FromUnsignedLong(result); +} + +/* Test PyUnicode_FromEncodedObject() */ +static PyObject * +unicode_fromencodedobject(PyObject *self, PyObject *args) +{ + PyObject *obj; + const char *encoding; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "Oz|z", &obj, &encoding, &errors)) { + return NULL; + } + + NULLABLE(obj); + return PyUnicode_FromEncodedObject(obj, encoding, errors); +} + +/* Test PyUnicode_FromObject() */ +static PyObject * +unicode_fromobject(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_FromObject(arg); +} + +/* Test PyUnicode_InternInPlace() */ +static PyObject * +unicode_interninplace(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + Py_XINCREF(arg); + PyUnicode_InternInPlace(&arg); + return arg; +} + +/* Test PyUnicode_InternFromString() */ +static PyObject * +unicode_internfromstring(PyObject *self, PyObject *arg) +{ + const char *s = NULL; + Py_ssize_t size = -100; + + if (!PyArg_Parse(arg, "z#", &s, &size)) { + return NULL; + } + return PyUnicode_InternFromString(s); +} + +/* Test PyUnicode_FromWideChar() */ +static PyObject * +unicode_fromwidechar(PyObject *self, PyObject *args) +{ + const char *s = NULL; + Py_ssize_t bsize = -100; + Py_ssize_t size = -100; + + if (!PyArg_ParseTuple(args, "z#|n", &s, &bsize, &size)) { + return NULL; + } + if (size == -100) { + size = bsize / SIZEOF_WCHAR_T; + } + return PyUnicode_FromWideChar((const wchar_t *)s, size); +} + +/* Test PyUnicode_AsWideChar() */ static PyObject * unicode_aswidechar(PyObject *self, PyObject *args) { @@ -106,8 +442,9 @@ unicode_aswidechar(PyObject *self, PyObject *args) Py_ssize_t buflen, size; wchar_t *buffer; - if (!PyArg_ParseTuple(args, "Un", &unicode, &buflen)) + if (!PyArg_ParseTuple(args, "On", &unicode, &buflen)) return NULL; + NULLABLE(unicode); buffer = PyMem_New(wchar_t, buflen); if (buffer == NULL) return PyErr_NoMemory(); @@ -130,16 +467,35 @@ unicode_aswidechar(PyObject *self, PyObject *args) return Py_BuildValue("(Nn)", result, size); } +/* Test PyUnicode_AsWideCharString() with NULL as buffer */ +static PyObject * +unicode_aswidechar_null(PyObject *self, PyObject *args) +{ + PyObject *unicode; + Py_ssize_t buflen, size; + + if (!PyArg_ParseTuple(args, "On", &unicode, &buflen)) + return NULL; + NULLABLE(unicode); + size = PyUnicode_AsWideChar(unicode, NULL, buflen); + if (size == -1) { + return NULL; + } + return PyLong_FromSsize_t(size); +} + +/* Test PyUnicode_AsWideCharString() */ static PyObject * unicode_aswidecharstring(PyObject *self, PyObject *args) { PyObject *unicode, *result; - Py_ssize_t size; + Py_ssize_t size = 100; wchar_t *buffer; - if (!PyArg_ParseTuple(args, "U", &unicode)) + if (!PyArg_ParseTuple(args, "O", &unicode)) return NULL; + NULLABLE(unicode); buffer = PyUnicode_AsWideCharString(unicode, &size); if (buffer == NULL) return NULL; @@ -151,6 +507,29 @@ unicode_aswidecharstring(PyObject *self, PyObject *args) return Py_BuildValue("(Nn)", result, size); } +/* Test PyUnicode_AsWideCharString() with NULL as the size address */ +static PyObject * +unicode_aswidecharstring_null(PyObject *self, PyObject *args) +{ + PyObject *unicode, *result; + wchar_t *buffer; + + if (!PyArg_ParseTuple(args, "O", &unicode)) + return NULL; + + NULLABLE(unicode); + buffer = PyUnicode_AsWideCharString(unicode, NULL); + if (buffer == NULL) + return NULL; + + result = PyUnicode_FromWideChar(buffer, -1); + PyMem_Free(buffer); + if (result == NULL) + return NULL; + return result; +} + +/* Test PyUnicode_AsUCS4() */ static PyObject * unicode_asucs4(PyObject *self, PyObject *args) { @@ -159,10 +538,11 @@ unicode_asucs4(PyObject *self, PyObject *args) int copy_null; Py_ssize_t str_len, buf_len; - if (!PyArg_ParseTuple(args, "Unp:unicode_asucs4", &unicode, &str_len, ©_null)) { + if (!PyArg_ParseTuple(args, "Onp:unicode_asucs4", &unicode, &str_len, ©_null)) { return NULL; } + NULLABLE(unicode); buf_len = str_len + 1; buffer = PyMem_NEW(Py_UCS4, buf_len); if (buffer == NULL) { @@ -172,109 +552,960 @@ unicode_asucs4(PyObject *self, PyObject *args) buffer[str_len] = 0xffffU; if (!PyUnicode_AsUCS4(unicode, buffer, buf_len, copy_null)) { - PyMem_Free(buffer); + PyMem_FREE(buffer); return NULL; } result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, buf_len); - PyMem_Free(buffer); + PyMem_FREE(buffer); return result; } +/* Test PyUnicode_AsUCS4Copy() */ static PyObject * -unicode_asutf8(PyObject *self, PyObject *args) +unicode_asucs4copy(PyObject *self, PyObject *args) { PyObject *unicode; - const char *buffer; + Py_UCS4 *buffer; + PyObject *result; - if (!PyArg_ParseTuple(args, "U", &unicode)) { + if (!PyArg_ParseTuple(args, "O", &unicode)) { return NULL; } - buffer = PyUnicode_AsUTF8(unicode); + NULLABLE(unicode); + buffer = PyUnicode_AsUCS4Copy(unicode); if (buffer == NULL) { return NULL; } - - return PyBytes_FromString(buffer); + result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, + buffer, + PyUnicode_GET_LENGTH(unicode) + 1); + PyMem_FREE(buffer); + return result; } +/* Test PyUnicode_FromOrdinal() */ static PyObject * -unicode_asutf8andsize(PyObject *self, PyObject *args) +unicode_fromordinal(PyObject *self, PyObject *args) { - PyObject *unicode, *result; - const char *buffer; - Py_ssize_t utf8_len; + int ordinal; - if(!PyArg_ParseTuple(args, "U", &unicode)) { + if (!PyArg_ParseTuple(args, "i", &ordinal)) return NULL; - } - buffer = PyUnicode_AsUTF8AndSize(unicode, &utf8_len); - if (buffer == NULL) { + return PyUnicode_FromOrdinal(ordinal); +} + +/* Test PyUnicode_AsUTF8() */ +static PyObject * +unicode_asutf8(PyObject *self, PyObject *args) +{ + PyObject *unicode; + Py_ssize_t buflen; + const char *s; + + if (!PyArg_ParseTuple(args, "On", &unicode, &buflen)) return NULL; - } - result = PyBytes_FromString(buffer); - if (result == NULL) { + NULLABLE(unicode); + s = PyUnicode_AsUTF8(unicode); + if (s == NULL) return NULL; - } - return Py_BuildValue("(Nn)", result, utf8_len); + return PyBytes_FromStringAndSize(s, buflen); } +/* Test PyUnicode_AsUTF8AndSize() */ static PyObject * -unicode_count(PyObject *self, PyObject *args) +unicode_asutf8andsize(PyObject *self, PyObject *args) { - PyObject *str; - PyObject *substr; - Py_ssize_t result; - Py_ssize_t start, end; + PyObject *unicode; + Py_ssize_t buflen; + const char *s; + Py_ssize_t size = -100; - if (!PyArg_ParseTuple(args, "UUnn:unicode_count", &str, &substr, - &start, &end)) { + if (!PyArg_ParseTuple(args, "On", &unicode, &buflen)) return NULL; - } - result = PyUnicode_Count(str, substr, start, end); - if (result == -1) + NULLABLE(unicode); + s = PyUnicode_AsUTF8AndSize(unicode, &size); + if (s == NULL) return NULL; - else - return PyLong_FromSsize_t(result); + + return Py_BuildValue("(y#n)", s, buflen, size); } +/* Test PyUnicode_AsUTF8AndSize() with NULL as the size address */ static PyObject * -unicode_findchar(PyObject *self, PyObject *args) +unicode_asutf8andsize_null(PyObject *self, PyObject *args) { - PyObject *str; - int direction; - unsigned int ch; - Py_ssize_t result; - Py_ssize_t start, end; + PyObject *unicode; + Py_ssize_t buflen; + const char *s; - if (!PyArg_ParseTuple(args, "UInni:unicode_findchar", &str, &ch, - &start, &end, &direction)) { + if (!PyArg_ParseTuple(args, "On", &unicode, &buflen)) return NULL; - } - result = PyUnicode_FindChar(str, (Py_UCS4)ch, start, end, direction); - if (result == -2) + NULLABLE(unicode); + s = PyUnicode_AsUTF8AndSize(unicode, NULL); + if (s == NULL) return NULL; - else - return PyLong_FromSsize_t(result); + + return PyBytes_FromStringAndSize(s, buflen); +} + +/* Test PyUnicode_GetDefaultEncoding() */ +static PyObject * +unicode_getdefaultencoding(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + const char *s = PyUnicode_GetDefaultEncoding(); + if (s == NULL) + return NULL; + + return PyBytes_FromString(s); +} + +/* Test PyUnicode_Decode() */ +static PyObject * +unicode_decode(PyObject *self, PyObject *args) +{ + const char *s; + Py_ssize_t size; + const char *encoding; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "y#z|z", &s, &size, &encoding, &errors)) + return NULL; + + return PyUnicode_Decode(s, size, encoding, errors); +} + +/* Test PyUnicode_AsEncodedString() */ +static PyObject * +unicode_asencodedstring(PyObject *self, PyObject *args) +{ + PyObject *unicode; + const char *encoding; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "Oz|z", &unicode, &encoding, &errors)) + return NULL; + + NULLABLE(unicode); + return PyUnicode_AsEncodedString(unicode, encoding, errors); +} + +/* Test PyUnicode_BuildEncodingMap() */ +static PyObject * +unicode_buildencodingmap(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_BuildEncodingMap(arg); +} + +/* Test PyUnicode_DecodeUTF7() */ +static PyObject * +unicode_decodeutf7(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + return PyUnicode_DecodeUTF7(data, size, errors); +} + +/* Test PyUnicode_DecodeUTF7Stateful() */ +static PyObject * +unicode_decodeutf7stateful(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + Py_ssize_t consumed; + PyObject *result; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + result = PyUnicode_DecodeUTF7Stateful(data, size, errors, &consumed); + if (!result) { + return NULL; + } + return Py_BuildValue("(Nn)", result, consumed); +} + +/* Test PyUnicode_DecodeUTF8() */ +static PyObject * +unicode_decodeutf8(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + return PyUnicode_DecodeUTF8(data, size, errors); +} + +/* Test PyUnicode_DecodeUTF8Stateful() */ +static PyObject * +unicode_decodeutf8stateful(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + Py_ssize_t consumed; + PyObject *result; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + result = PyUnicode_DecodeUTF8Stateful(data, size, errors, &consumed); + if (!result) { + return NULL; + } + return Py_BuildValue("(Nn)", result, consumed); +} + +/* Test PyUnicode_AsUTF8String() */ +static PyObject * +unicode_asutf8string(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_AsUTF8String(arg); +} + +/* Test PyUnicode_DecodeUTF32() */ +static PyObject * +unicode_decodeutf32(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + int byteorder; + PyObject *result; + + if (!PyArg_ParseTuple(args, "iy#|z", &byteorder, &data, &size, &errors)) + return NULL; + + result = PyUnicode_DecodeUTF32(data, size, errors, &byteorder); + if (!result) { + return NULL; + } + return Py_BuildValue("(iN)", byteorder, result); +} + +/* Test PyUnicode_DecodeUTF32Stateful() */ +static PyObject * +unicode_decodeutf32stateful(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + int byteorder; + Py_ssize_t consumed; + PyObject *result; + + if (!PyArg_ParseTuple(args, "iy#|z", &byteorder, &data, &size, &errors)) + return NULL; + + result = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder, &consumed); + if (!result) { + return NULL; + } + return Py_BuildValue("(iNn)", byteorder, result, consumed); +} + +/* Test PyUnicode_AsUTF32String() */ +static PyObject * +unicode_asutf32string(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_AsUTF32String(arg); +} + +/* Test PyUnicode_DecodeUTF16() */ +static PyObject * +unicode_decodeutf16(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + int byteorder = 0; + PyObject *result; + + if (!PyArg_ParseTuple(args, "iy#|z", &byteorder, &data, &size, &errors)) + return NULL; + + result = PyUnicode_DecodeUTF16(data, size, errors, &byteorder); + if (!result) { + return NULL; + } + return Py_BuildValue("(iN)", byteorder, result); +} + +/* Test PyUnicode_DecodeUTF16Stateful() */ +static PyObject * +unicode_decodeutf16stateful(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + int byteorder; + Py_ssize_t consumed; + PyObject *result; + + if (!PyArg_ParseTuple(args, "iy#|z", &byteorder, &data, &size, &errors)) + return NULL; + + result = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder, &consumed); + if (!result) { + return NULL; + } + return Py_BuildValue("(iNn)", byteorder, result, consumed); +} + +/* Test PyUnicode_AsUTF16String() */ +static PyObject * +unicode_asutf16string(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_AsUTF16String(arg); +} + +/* Test PyUnicode_DecodeUnicodeEscape() */ +static PyObject * +unicode_decodeunicodeescape(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + return PyUnicode_DecodeUnicodeEscape(data, size, errors); +} + +/* Test PyUnicode_AsUnicodeEscapeString() */ +static PyObject * +unicode_asunicodeescapestring(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_AsUnicodeEscapeString(arg); +} + +static PyObject * +unicode_decoderawunicodeescape(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + return PyUnicode_DecodeRawUnicodeEscape(data, size, errors); +} + +/* Test PyUnicode_AsRawUnicodeEscapeString() */ +static PyObject * +unicode_asrawunicodeescapestring(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_AsRawUnicodeEscapeString(arg); +} + +static PyObject * +unicode_decodelatin1(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + return PyUnicode_DecodeLatin1(data, size, errors); +} + +/* Test PyUnicode_AsLatin1String() */ +static PyObject * +unicode_aslatin1string(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_AsLatin1String(arg); +} + +/* Test PyUnicode_DecodeASCII() */ +static PyObject * +unicode_decodeascii(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + return PyUnicode_DecodeASCII(data, size, errors); +} + +/* Test PyUnicode_AsASCIIString() */ +static PyObject * +unicode_asasciistring(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_AsASCIIString(arg); +} + +/* Test PyUnicode_DecodeCharmap() */ +static PyObject * +unicode_decodecharmap(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + PyObject *mapping; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "y#O|z", &data, &size, &mapping, &errors)) + return NULL; + + NULLABLE(mapping); + return PyUnicode_DecodeCharmap(data, size, mapping, errors); +} + +/* Test PyUnicode_AsCharmapString() */ +static PyObject * +unicode_ascharmapstring(PyObject *self, PyObject *args) +{ + PyObject *unicode; + PyObject *mapping; + + if (!PyArg_ParseTuple(args, "OO", &unicode, &mapping)) + return NULL; + + NULLABLE(unicode); + NULLABLE(mapping); + return PyUnicode_AsCharmapString(unicode, mapping); +} + +#ifdef MS_WINDOWS + +/* Test PyUnicode_DecodeMBCS() */ +static PyObject * +unicode_decodembcs(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + return PyUnicode_DecodeMBCS(data, size, errors); +} + +/* Test PyUnicode_DecodeMBCSStateful() */ +static PyObject * +unicode_decodembcsstateful(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + Py_ssize_t consumed; + PyObject *result; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + result = PyUnicode_DecodeMBCSStateful(data, size, errors, &consumed); + if (!result) { + return NULL; + } + return Py_BuildValue("(Nn)", result, consumed); +} + +/* Test PyUnicode_DecodeCodePageStateful() */ +static PyObject * +unicode_decodecodepagestateful(PyObject *self, PyObject *args) +{ + int code_page; + const char *data; + Py_ssize_t size; + const char *errors = NULL; + Py_ssize_t consumed; + PyObject *result; + + if (!PyArg_ParseTuple(args, "iy#|z", &code_page, &data, &size, &errors)) + return NULL; + + result = PyUnicode_DecodeCodePageStateful(data, size, errors, &consumed); + if (!result) { + return NULL; + } + return Py_BuildValue("(Nn)", result, consumed); +} + +/* Test PyUnicode_AsMBCSString() */ +static PyObject * +unicode_asmbcsstring(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_AsMBCSString(arg); +} + +/* Test PyUnicode_EncodeCodePage() */ +static PyObject * +unicode_encodecodepage(PyObject *self, PyObject *args) +{ + int code_page; + PyObject *unicode; + const char *errors; + + if (!PyArg_ParseTuple(args, "iO|z", &code_page, &unicode, &errors)) + return NULL; + + NULLABLE(unicode); + return PyUnicode_EncodeCodePage(code_page, unicode, errors); +} + +#endif /* MS_WINDOWS */ + +/* Test _PyUnicode_TransformDecimalAndSpaceToASCII() */ +static PyObject * +unicode_transformdecimalandspacetoascii(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return _PyUnicode_TransformDecimalAndSpaceToASCII(arg); +} + +/* Test PyUnicode_DecodeLocaleAndSize() */ +static PyObject * +unicode_decodelocaleandsize(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + return PyUnicode_DecodeLocaleAndSize(data, size, errors); +} + +/* Test PyUnicode_DecodeLocale() */ +static PyObject * +unicode_decodelocale(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + return PyUnicode_DecodeLocale(data, errors); +} + +/* Test PyUnicode_EncodeLocale() */ +static PyObject * +unicode_encodelocale(PyObject *self, PyObject *args) +{ + PyObject *unicode; + const char *errors; + + if (!PyArg_ParseTuple(args, "O|z", &unicode, &errors)) + return NULL; + + NULLABLE(unicode); + return PyUnicode_EncodeLocale(unicode, errors); +} + +/* Test PyUnicode_DecodeFSDefault() */ +static PyObject * +unicode_decodefsdefault(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + + if (!PyArg_ParseTuple(args, "y#", &data, &size)) + return NULL; + + return PyUnicode_DecodeFSDefault(data); +} + +/* Test PyUnicode_DecodeFSDefaultAndSize() */ +static PyObject * +unicode_decodefsdefaultandsize(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + + if (!PyArg_ParseTuple(args, "y#|n", &data, &size, &size)) + return NULL; + + return PyUnicode_DecodeFSDefaultAndSize(data, size); +} + +/* Test PyUnicode_EncodeFSDefault() */ +static PyObject * +unicode_encodefsdefault(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_EncodeFSDefault(arg); +} + +/* Test PyUnicode_Concat() */ +static PyObject * +unicode_concat(PyObject *self, PyObject *args) +{ + PyObject *left; + PyObject *right; + + if (!PyArg_ParseTuple(args, "OO", &left, &right)) + return NULL; + + NULLABLE(left); + NULLABLE(right); + return PyUnicode_Concat(left, right); +} + +/* Test PyUnicode_Split() */ +static PyObject * +unicode_split(PyObject *self, PyObject *args) +{ + PyObject *s; + PyObject *sep; + Py_ssize_t maxsplit = -1; + + if (!PyArg_ParseTuple(args, "OO|n", &s, &sep, &maxsplit)) + return NULL; + + NULLABLE(s); + NULLABLE(sep); + return PyUnicode_Split(s, sep, maxsplit); +} + +/* Test PyUnicode_RSplit() */ +static PyObject * +unicode_rsplit(PyObject *self, PyObject *args) +{ + PyObject *s; + PyObject *sep; + Py_ssize_t maxsplit = -1; + + if (!PyArg_ParseTuple(args, "OO|n", &s, &sep, &maxsplit)) + return NULL; + + NULLABLE(s); + NULLABLE(sep); + return PyUnicode_RSplit(s, sep, maxsplit); +} + +/* Test PyUnicode_Splitlines() */ +static PyObject * +unicode_splitlines(PyObject *self, PyObject *args) +{ + PyObject *s; + int keepends = 0; + + if (!PyArg_ParseTuple(args, "O|i", &s, &keepends)) + return NULL; + + NULLABLE(s); + return PyUnicode_Splitlines(s, keepends); +} + +/* Test PyUnicode_Partition() */ +static PyObject * +unicode_partition(PyObject *self, PyObject *args) +{ + PyObject *s; + PyObject *sep; + + if (!PyArg_ParseTuple(args, "OO", &s, &sep)) + return NULL; + + NULLABLE(s); + NULLABLE(sep); + return PyUnicode_Partition(s, sep); +} + +/* Test PyUnicode_RPartition() */ +static PyObject * +unicode_rpartition(PyObject *self, PyObject *args) +{ + PyObject *s; + PyObject *sep; + + if (!PyArg_ParseTuple(args, "OO", &s, &sep)) + return NULL; + + NULLABLE(s); + NULLABLE(sep); + return PyUnicode_RPartition(s, sep); +} + +/* Test PyUnicode_Translate() */ +static PyObject * +unicode_translate(PyObject *self, PyObject *args) +{ + PyObject *obj; + PyObject *table; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "OO|z", &obj, &table, &errors)) + return NULL; + + NULLABLE(obj); + NULLABLE(table); + return PyUnicode_Translate(obj, table, errors); +} + +/* Test PyUnicode_Join() */ +static PyObject * +unicode_join(PyObject *self, PyObject *args) +{ + PyObject *sep; + PyObject *seq; + + if (!PyArg_ParseTuple(args, "OO", &sep, &seq)) + return NULL; + + NULLABLE(sep); + NULLABLE(seq); + return PyUnicode_Join(sep, seq); +} + +/* Test PyUnicode_Count() */ +unicode_count(PyObject *self, PyObject *args) +{ + PyObject *str; + PyObject *substr; + Py_ssize_t start; + Py_ssize_t end; + Py_ssize_t result; + + if (!PyArg_ParseTuple(args, "OOnn", &str, &substr, &start, &end)) + return NULL; + + NULLABLE(str); + NULLABLE(substr); + result = PyUnicode_Count(str, substr, start, end); + if (result == -1) + return NULL; + return PyLong_FromSsize_t(result); +} + +/* Test PyUnicode_Find() */ +static PyObject * +unicode_find(PyObject *self, PyObject *args) +{ + PyObject *str; + PyObject *substr; + Py_ssize_t start; + Py_ssize_t end; + int direction; + Py_ssize_t result; + + if (!PyArg_ParseTuple(args, "OOnni", &str, &substr, &start, &end, &direction)) + return NULL; + + NULLABLE(str); + NULLABLE(substr); + result = PyUnicode_Find(str, substr, start, end, direction); + if (result == -2) + return NULL; + return PyLong_FromSsize_t(result); +} + +/* Test PyUnicode_Tailmatch() */ +static PyObject * +unicode_tailmatch(PyObject *self, PyObject *args) +{ + PyObject *str; + PyObject *substr; + Py_ssize_t start; + Py_ssize_t end; + int direction; + Py_ssize_t result; + + if (!PyArg_ParseTuple(args, "OOnni", &str, &substr, &start, &end, &direction)) + return NULL; + + NULLABLE(str); + NULLABLE(substr); + result = PyUnicode_Tailmatch(str, substr, start, end, direction); + if (result == -1) + return NULL; + return PyLong_FromSsize_t(result); +} + +static PyObject * +/* Test PyUnicode_FindChar() */ +static PyObject * +unicode_findchar(PyObject *self, PyObject *args) +{ + PyObject *str; + int direction; + unsigned int ch; + Py_ssize_t result; + Py_ssize_t start, end; + + if (!PyArg_ParseTuple(args, "OInni:unicode_findchar", &str, &ch, + &start, &end, &direction)) { + return NULL; + } + NULLABLE(str); + result = PyUnicode_FindChar(str, (Py_UCS4)ch, start, end, direction); + if (result == -2) + return NULL; + else + return PyLong_FromSsize_t(result); +} + +/* Test PyUnicode_Replace() */ +static PyObject * +unicode_replace(PyObject *self, PyObject *args) +{ + PyObject *str; + PyObject *substr; + PyObject *replstr; + Py_ssize_t maxcount = -1; + + if (!PyArg_ParseTuple(args, "OOO|n", &str, &substr, &replstr, &maxcount)) + return NULL; + + NULLABLE(str); + NULLABLE(substr); + NULLABLE(replstr); + return PyUnicode_Replace(str, substr, replstr, maxcount); +} + +/* Test PyUnicode_Compare() */ +static PyObject * +unicode_compare(PyObject *self, PyObject *args) +{ + PyObject *left; + PyObject *right; + int result; + + if (!PyArg_ParseTuple(args, "OO", &left, &right)) + return NULL; + + NULLABLE(left); + NULLABLE(right); + result = PyUnicode_Compare(left, right); + if (result == -1 && PyErr_Occurred()) { + return NULL; + } + return PyLong_FromLong(result); +} + +/* Test PyUnicode_CompareWithASCIIString() */ +static PyObject * +unicode_comparewithasciistring(PyObject *self, PyObject *args) +{ + PyObject *left; + const char *right = NULL; + Py_ssize_t right_len; + int result; + + if (!PyArg_ParseTuple(args, "O|y#", &left, &right, &right_len)) + return NULL; + + NULLABLE(left); + result = PyUnicode_CompareWithASCIIString(left, right); + if (result == -1 && PyErr_Occurred()) { + return NULL; + } + return PyLong_FromLong(result); +} + +/* Test PyUnicode_RichCompare() */ +static PyObject * +unicode_richcompare(PyObject *self, PyObject *args) +{ + PyObject *left; + PyObject *right; + int op; + + if (!PyArg_ParseTuple(args, "OOi", &left, &right, &op)) + return NULL; + + NULLABLE(left); + NULLABLE(right); + return PyUnicode_RichCompare(left, right, op); +} + +/* Test PyUnicode_Format() */ +static PyObject * +unicode_format(PyObject *self, PyObject *args) +{ + PyObject *format; + PyObject *fargs; + + if (!PyArg_ParseTuple(args, "OO", &format, &fargs)) + return NULL; + + NULLABLE(format); + NULLABLE(fargs); + return PyUnicode_Format(format, fargs); +} + +/* Test PyUnicode_Contains() */ +static PyObject * +unicode_contains(PyObject *self, PyObject *args) +{ + PyObject *container; + PyObject *element; + int result; + + if (!PyArg_ParseTuple(args, "OO", &container, &element)) + return NULL; + + NULLABLE(container); + NULLABLE(element); + result = PyUnicode_Contains(container, element); + if (result == -1 && PyErr_Occurred()) { + return NULL; + } + return PyLong_FromLong(result); +} + +/* Test PyUnicode_IsIdentifier() */ +static PyObject * +unicode_isidentifier(PyObject *self, PyObject *arg) +{ + int result; + + NULLABLE(arg); + result = PyUnicode_IsIdentifier(arg); + if (result == -1 && PyErr_Occurred()) { + return NULL; + } + return PyLong_FromLong(result); } +/* Test PyUnicode_CopyCharacters() */ static PyObject * unicode_copycharacters(PyObject *self, PyObject *args) { PyObject *from, *to, *to_copy; Py_ssize_t from_start, to_start, how_many, copied; - if (!PyArg_ParseTuple(args, "UnOnn:unicode_copycharacters", &to, &to_start, + if (!PyArg_ParseTuple(args, "UnOnn", &to, &to_start, &from, &from_start, &how_many)) { return NULL; } + NULLABLE(from); if (!(to_copy = PyUnicode_New(PyUnicode_GET_LENGTH(to), PyUnicode_MAX_CHAR_VALUE(to)))) { return NULL; @@ -284,8 +1515,9 @@ unicode_copycharacters(PyObject *self, PyObject *args) return NULL; } - if ((copied = PyUnicode_CopyCharacters(to_copy, to_start, from, - from_start, how_many)) < 0) { + copied = PyUnicode_CopyCharacters(to_copy, to_start, from, + from_start, how_many); + if (copied == -1 && PyErr_Occurred()) { Py_DECREF(to_copy); return NULL; } @@ -711,13 +1943,93 @@ static PyMethodDef TestMethods[] = { test_unicode_compare_with_ascii, METH_NOARGS}, {"test_string_from_format", test_string_from_format, METH_NOARGS}, {"test_widechar", test_widechar, METH_NOARGS}, + {"unicode_new", unicode_new, METH_VARARGS}, + {"unicode_fill", unicode_fill, METH_VARARGS}, + {"unicode_writechar", unicode_writechar, METH_VARARGS}, + {"unicode_resize", unicode_resize, METH_VARARGS}, + {"unicode_append", unicode_append, METH_VARARGS}, + {"unicode_appendanddel", unicode_appendanddel, METH_VARARGS}, + {"unicode_fromstringandsize",unicode_fromstringandsize, METH_VARARGS}, + {"unicode_fromstring", unicode_fromstring, METH_O}, + {"unicode_fromkindanddata", unicode_fromkindanddata, METH_VARARGS}, + {"unicode_substring", unicode_substring, METH_VARARGS}, + {"unicode_getlength", unicode_getlength, METH_O}, + {"unicode_readchar", unicode_readchar, METH_VARARGS}, + {"unicode_fromencodedobject",unicode_fromencodedobject, METH_VARARGS}, + {"unicode_fromobject", unicode_fromobject, METH_O}, + {"unicode_interninplace", unicode_interninplace, METH_O}, + {"unicode_internfromstring", unicode_internfromstring, METH_O}, + {"unicode_fromwidechar", unicode_fromwidechar, METH_VARARGS}, {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, + {"unicode_aswidechar_null", unicode_aswidechar_null, METH_VARARGS}, {"unicode_aswidecharstring", unicode_aswidecharstring, METH_VARARGS}, + {"unicode_aswidecharstring_null",unicode_aswidecharstring_null,METH_VARARGS}, {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, + {"unicode_asucs4copy", unicode_asucs4copy, METH_VARARGS}, + {"unicode_fromordinal", unicode_fromordinal, METH_VARARGS}, {"unicode_asutf8", unicode_asutf8, METH_VARARGS}, {"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS}, + {"unicode_asutf8andsize_null",unicode_asutf8andsize_null, METH_VARARGS}, + {"unicode_getdefaultencoding",unicode_getdefaultencoding, METH_NOARGS}, + {"unicode_decode", unicode_decode, METH_VARARGS}, + {"unicode_asencodedstring", unicode_asencodedstring, METH_VARARGS}, + {"unicode_buildencodingmap", unicode_buildencodingmap, METH_O}, + {"unicode_decodeutf7", unicode_decodeutf7, METH_VARARGS}, + {"unicode_decodeutf7stateful",unicode_decodeutf7stateful, METH_VARARGS}, + {"unicode_decodeutf8", unicode_decodeutf8, METH_VARARGS}, + {"unicode_decodeutf8stateful",unicode_decodeutf8stateful, METH_VARARGS}, + {"unicode_asutf8string", unicode_asutf8string, METH_O}, + {"unicode_decodeutf16", unicode_decodeutf16, METH_VARARGS}, + {"unicode_decodeutf16stateful",unicode_decodeutf16stateful, METH_VARARGS}, + {"unicode_asutf16string", unicode_asutf16string, METH_O}, + {"unicode_decodeutf32", unicode_decodeutf32, METH_VARARGS}, + {"unicode_decodeutf32stateful",unicode_decodeutf32stateful, METH_VARARGS}, + {"unicode_asutf32string", unicode_asutf32string, METH_O}, + {"unicode_decodeunicodeescape",unicode_decodeunicodeescape, METH_VARARGS}, + {"unicode_asunicodeescapestring",unicode_asunicodeescapestring,METH_O}, + {"unicode_decoderawunicodeescape",unicode_decoderawunicodeescape,METH_VARARGS}, + {"unicode_asrawunicodeescapestring",unicode_asrawunicodeescapestring,METH_O}, + {"unicode_decodelatin1", unicode_decodelatin1, METH_VARARGS}, + {"unicode_aslatin1string", unicode_aslatin1string, METH_O}, + {"unicode_decodeascii", unicode_decodeascii, METH_VARARGS}, + {"unicode_asasciistring", unicode_asasciistring, METH_O}, + {"unicode_decodecharmap", unicode_decodecharmap, METH_VARARGS}, + {"unicode_ascharmapstring", unicode_ascharmapstring, METH_VARARGS}, +#ifdef MS_WINDOWS + {"unicode_decodembcs", unicode_decodembcs, METH_VARARGS}, + {"unicode_decodembcsstateful",unicode_decodembcsstateful, METH_VARARGS}, + {"unicode_decodecodepagestateful",unicode_decodecodepagestateful,METH_VARARGS}, + {"unicode_asmbcsstring", unicode_asmbcsstring, METH_O}, + {"unicode_encodecodepage", unicode_encodecodepage, METH_VARARGS}, +#endif /* MS_WINDOWS */ + + {"unicode_decodelocaleandsize",unicode_decodelocaleandsize, METH_VARARGS}, + {"unicode_decodelocale", unicode_decodelocale, METH_VARARGS}, + {"unicode_encodelocale", unicode_encodelocale, METH_VARARGS}, + {"unicode_decodefsdefault", unicode_decodefsdefault, METH_VARARGS}, + {"unicode_decodefsdefaultandsize",unicode_decodefsdefaultandsize,METH_VARARGS}, + {"unicode_encodefsdefault", unicode_encodefsdefault, METH_O}, + + {"unicode_transformdecimalandspacetoascii", unicode_transformdecimalandspacetoascii, METH_O}, + {"unicode_concat", unicode_concat, METH_VARARGS}, + {"unicode_splitlines", unicode_splitlines, METH_VARARGS}, + {"unicode_split", unicode_split, METH_VARARGS}, + {"unicode_rsplit", unicode_rsplit, METH_VARARGS}, + {"unicode_partition", unicode_partition, METH_VARARGS}, + {"unicode_rpartition", unicode_rpartition, METH_VARARGS}, + {"unicode_translate", unicode_translate, METH_VARARGS}, + {"unicode_join", unicode_join, METH_VARARGS}, {"unicode_count", unicode_count, METH_VARARGS}, + {"unicode_tailmatch", unicode_tailmatch, METH_VARARGS}, + {"unicode_find", unicode_find, METH_VARARGS}, {"unicode_findchar", unicode_findchar, METH_VARARGS}, + {"unicode_replace", unicode_replace, METH_VARARGS}, + {"unicode_compare", unicode_compare, METH_VARARGS}, + {"unicode_comparewithasciistring",unicode_comparewithasciistring,METH_VARARGS}, + {"unicode_richcompare", unicode_richcompare, METH_VARARGS}, + {"unicode_format", unicode_format, METH_VARARGS}, + {"unicode_contains", unicode_contains, METH_VARARGS}, + {"unicode_isidentifier", unicode_isidentifier, METH_O}, {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, {NULL}, }; diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 9dd09f68003d937..aebfcec028514ef 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -4144,6 +4144,7 @@ PyInit__testcapi(void) PyModule_AddObject(m, "ULLONG_MAX", PyLong_FromUnsignedLongLong(ULLONG_MAX)); PyModule_AddObject(m, "PY_SSIZE_T_MAX", PyLong_FromSsize_t(PY_SSIZE_T_MAX)); PyModule_AddObject(m, "PY_SSIZE_T_MIN", PyLong_FromSsize_t(PY_SSIZE_T_MIN)); + PyModule_AddObject(m, "SIZEOF_WCHAR_T", PyLong_FromSsize_t(sizeof(wchar_t))); PyModule_AddObject(m, "SIZEOF_TIME_T", PyLong_FromSsize_t(sizeof(time_t))); PyModule_AddObject(m, "Py_Version", PyLong_FromUnsignedLong(Py_Version)); Py_INCREF(&PyInstanceMethod_Type); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b1acfc71379cd55..34e08f2ac1abc0b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4530,6 +4530,9 @@ unicode_decode_utf8(const char *s, Py_ssize_t size, } s += ascii_decode(s, end, PyUnicode_1BYTE_DATA(u)); if (s == end) { + if (consumed) { + *consumed = size; + } return u; }