diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 9c0b5b1..2fe1c9c 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -7,6 +7,11 @@ on: pull_request: branches: [ master ] +env: + CIBW_TEST_COMMAND: python -m unittest discover --start-directory {project} + CIBW_SKIP: pp* + CIBW_ENVIRONMENT_PASS_LINUX: TOMLI_USE_MYPYC + jobs: linters: @@ -74,12 +79,102 @@ jobs: with: token: ${{ secrets.CODECOV_TOKEN }} + binary-wheels-standard: + name: Binary wheels for ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Switch build backend to setuptools + run: | + pip install -r scripts/requirements.txt + python scripts/use_setuptools.py + + - name: Build wheels + uses: pypa/cibuildwheel@v2.22.0 + env: + CIBW_ARCHS_MACOS: x86_64 arm64 + TOMLI_USE_MYPYC: '1' + + - uses: actions/upload-artifact@v4 + with: + name: artifact-standard-${{ matrix.os }} + path: wheelhouse/*.whl + if-no-files-found: error + + pure-python-wheel-and-sdist: + name: Build a pure Python wheel and source distribution + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install build dependencies + run: pip install build + + - name: Build + run: python -m build + + - uses: actions/upload-artifact@v4 + with: + name: artifact-pure-python + path: dist/* + if-no-files-found: error + + binary-wheels-arm: + name: Build Linux wheels for ARM + runs-on: ubuntu-latest + # Very slow (~ 1 hour), no need to run on PRs + if: > + github.event_name == 'push' + && + (github.ref == 'refs/heads/master' || startsWith(github.event.ref, 'refs/tags')) + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Switch build backend to setuptools + run: | + pip install -r scripts/requirements.txt + python scripts/use_setuptools.py + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + with: + platforms: arm64 + + - name: Build wheels + uses: pypa/cibuildwheel@v2.22.0 + env: + CIBW_ARCHS_LINUX: aarch64 + TOMLI_USE_MYPYC: '1' + + - uses: actions/upload-artifact@v4 + with: + name: artifact-arm-linux + path: wheelhouse/*.whl + if-no-files-found: error + allgood: runs-on: ubuntu-latest needs: - tests - coverage - linters + - binary-wheels-standard + - pure-python-wheel-and-sdist + - binary-wheels-arm steps: - run: echo "Great success!" @@ -89,19 +184,20 @@ jobs: if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/download-artifact@v4 + with: + path: dist + pattern: artifact-* + merge-multiple: true - uses: actions/setup-python@v5 with: python-version: '3.x' - - name: Install build and publish tools + - name: Install twine run: | - pip install build twine - - name: Build and check + pip install twine + - name: Check and publish run: | - rm -rf dist/ && python -m build twine check --strict dist/* - - name: Publish - run: | twine upload dist/* env: TWINE_USERNAME: __token__ diff --git a/CHANGELOG.md b/CHANGELOG.md index b06ae51..73757be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## 2.2.0 + +- Added + - mypyc generated binary wheels for common platforms + ## 2.1.0 - Deprecated diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..cbd2698 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +prune tests/ diff --git a/pyproject.toml b/pyproject.toml index 40e2a38..ac3db83 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ env_list = ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] [tool.tox.env_run_base] description = "run tests against a built package under {base_python}" +pass_env = ["TOMLI_USE_MYPYC"] commands = [ ["python", "-m", "unittest", { replace = "posargs", extend = true }], ] diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 0000000..3b069aa --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1 @@ +tomli-w diff --git a/scripts/use_setuptools.py b/scripts/use_setuptools.py new file mode 100644 index 0000000..3bffb89 --- /dev/null +++ b/scripts/use_setuptools.py @@ -0,0 +1,12 @@ +from pathlib import Path +import tomllib + +import tomli_w # type: ignore[import-not-found] + +pyproject_path = Path(__file__).parent.parent / "pyproject.toml" +data = tomllib.loads(pyproject_path.read_bytes().decode()) +data["build-system"] = { + "requires": ["setuptools>=69", "mypy[mypyc]>=1.13"], + "build-backend": "setuptools.build_meta", +} +pyproject_path.write_bytes(tomli_w.dumps(data).encode()) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..abdb9cd --- /dev/null +++ b/setup.py @@ -0,0 +1,15 @@ +import os + +from setuptools import setup # type: ignore[import-untyped] + +if os.environ.get("TOMLI_USE_MYPYC") == "1": + import glob + + from mypyc.build import mypycify # type: ignore[import-untyped] + + files = glob.glob("src/**/*.py", recursive=True) + ext_modules = mypycify(files) +else: + ext_modules = [] + +setup(ext_modules=ext_modules) diff --git a/src/tomli/__init__.py b/src/tomli/__init__.py index c0b4cbf..fa1a7f2 100644 --- a/src/tomli/__init__.py +++ b/src/tomli/__init__.py @@ -6,6 +6,3 @@ __version__ = "2.1.0" # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT from ._parser import TOMLDecodeError, load, loads - -# Pretend this exception was created here. -TOMLDecodeError.__module__ = __name__ diff --git a/src/tomli/_parser.py b/src/tomli/_parser.py index 16c76cd..db56a16 100644 --- a/src/tomli/_parser.py +++ b/src/tomli/_parser.py @@ -6,8 +6,9 @@ from collections.abc import Iterable import string +import sys from types import MappingProxyType -from typing import IO, Any, NamedTuple +from typing import IO, Any, Final, NamedTuple import warnings from ._re import ( @@ -20,6 +21,17 @@ ) from ._types import Key, ParseFloat, Pos +# Inline tables/arrays are implemented using recursion. Pathologically +# nested documents cause pure Python to raise RecursionError (which is OK), +# but mypyc binary wheels will crash unrecoverably (not OK). According to +# mypyc docs this will be fixed in the future: +# https://mypyc.readthedocs.io/en/latest/differences_from_python.html#stack-overflows +# Before mypyc's fix is in, recursion needs to be limited by this library. +# Choosing `sys.getrecursionlimit()` as maximum inline table/array nesting +# level, as it allows more nesting than pure Python, but still seems a far +# lower number than where mypyc binaries crash. +MAX_INLINE_NESTING: Final = sys.getrecursionlimit() + ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127)) # Neither of these sets include quotation mark or backslash. They are @@ -69,9 +81,9 @@ class TOMLDecodeError(ValueError): def __init__( self, - msg: str = DEPRECATED_DEFAULT, # type: ignore[assignment] - doc: str = DEPRECATED_DEFAULT, # type: ignore[assignment] - pos: Pos = DEPRECATED_DEFAULT, # type: ignore[assignment] + msg: str | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT, + doc: str | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT, + pos: Pos | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT, *args: Any, ): if ( @@ -86,11 +98,11 @@ def __init__( DeprecationWarning, stacklevel=2, ) - if pos is not DEPRECATED_DEFAULT: # type: ignore[comparison-overlap] + if pos is not DEPRECATED_DEFAULT: args = pos, *args - if doc is not DEPRECATED_DEFAULT: # type: ignore[comparison-overlap] + if doc is not DEPRECATED_DEFAULT: args = doc, *args - if msg is not DEPRECATED_DEFAULT: # type: ignore[comparison-overlap] + if msg is not DEPRECATED_DEFAULT: args = msg, *args ValueError.__init__(self, *args) return @@ -202,10 +214,10 @@ class Flags: """Flags that map to parsed keys/namespaces.""" # Marks an immutable namespace (inline array or inline table). - FROZEN = 0 + FROZEN: Final = 0 # Marks a nest that has been explicitly created and can no longer # be opened using the "[table]" syntax. - EXPLICIT_NEST = 1 + EXPLICIT_NEST: Final = 1 def __init__(self) -> None: self._flags: dict[str, dict] = {} @@ -251,8 +263,8 @@ def is_(self, key: Key, flag: int) -> bool: cont = inner_cont["nested"] key_stem = key[-1] if key_stem in cont: - cont = cont[key_stem] - return flag in cont["flags"] or flag in cont["recursive_flags"] + inner_cont = cont[key_stem] + return flag in inner_cont["flags"] or flag in inner_cont["recursive_flags"] return False @@ -393,7 +405,7 @@ def create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]: def key_value_rule( src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat ) -> Pos: - pos, key, value = parse_key_value_pair(src, pos, parse_float) + pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl=0) key_parent, key_stem = key[:-1], key[-1] abs_key_parent = header + key_parent @@ -425,7 +437,7 @@ def key_value_rule( def parse_key_value_pair( - src: str, pos: Pos, parse_float: ParseFloat + src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int ) -> tuple[Pos, Key, Any]: pos, key = parse_key(src, pos) try: @@ -436,7 +448,7 @@ def parse_key_value_pair( raise TOMLDecodeError("Expected '=' after a key in a key/value pair", src, pos) pos += 1 pos = skip_chars(src, pos, TOML_WS) - pos, value = parse_value(src, pos, parse_float) + pos, value = parse_value(src, pos, parse_float, nest_lvl) return pos, key, value @@ -479,7 +491,9 @@ def parse_one_line_basic_str(src: str, pos: Pos) -> tuple[Pos, str]: return parse_basic_str(src, pos, multiline=False) -def parse_array(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, list]: +def parse_array( + src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int +) -> tuple[Pos, list]: pos += 1 array: list = [] @@ -487,7 +501,7 @@ def parse_array(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, list] if src.startswith("]", pos): return pos + 1, array while True: - pos, val = parse_value(src, pos, parse_float) + pos, val = parse_value(src, pos, parse_float, nest_lvl) array.append(val) pos = skip_comments_and_array_ws(src, pos) @@ -503,7 +517,9 @@ def parse_array(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, list] return pos + 1, array -def parse_inline_table(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, dict]: +def parse_inline_table( + src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int +) -> tuple[Pos, dict]: pos += 1 nested_dict = NestedDict() flags = Flags() @@ -512,7 +528,7 @@ def parse_inline_table(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos if src.startswith("}", pos): return pos + 1, nested_dict.dict while True: - pos, key, value = parse_key_value_pair(src, pos, parse_float) + pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl) key_parent, key_stem = key[:-1], key[-1] if flags.is_(key, Flags.FROZEN): raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos) @@ -654,8 +670,16 @@ def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]: def parse_value( # noqa: C901 - src: str, pos: Pos, parse_float: ParseFloat + src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int ) -> tuple[Pos, Any]: + if nest_lvl > MAX_INLINE_NESTING: + # Pure Python should have raised RecursionError already. + # This ensures mypyc binaries eventually do the same. + raise RecursionError( # pragma: no cover + "TOML inline arrays/tables are nested more than the allowed" + f" {MAX_INLINE_NESTING} levels" + ) + try: char: str | None = src[pos] except IndexError: @@ -685,11 +709,11 @@ def parse_value( # noqa: C901 # Arrays if char == "[": - return parse_array(src, pos, parse_float) + return parse_array(src, pos, parse_float, nest_lvl + 1) # Inline tables if char == "{": - return parse_inline_table(src, pos, parse_float) + return parse_inline_table(src, pos, parse_float, nest_lvl + 1) # Dates and times datetime_match = RE_DATETIME.match(src, pos) diff --git a/tests/test_error.py b/tests/test_error.py index 3a85874..f5e489f 100644 --- a/tests/test_error.py +++ b/tests/test_error.py @@ -42,15 +42,18 @@ def test_invalid_char_quotes(self): def test_type_error(self): with self.assertRaises(TypeError) as exc_info: tomllib.loads(b"v = 1") # type: ignore[arg-type] - self.assertEqual(str(exc_info.exception), "Expected str object, not 'bytes'") + # Mypyc extension leads to different message than pure Python + self.assertIn( + str(exc_info.exception), + ("Expected str object, not 'bytes'", "str object expected; got bytes"), + ) with self.assertRaises(TypeError) as exc_info: tomllib.loads(False) # type: ignore[arg-type] - self.assertEqual(str(exc_info.exception), "Expected str object, not 'bool'") - - def test_module_name(self): - self.assertEqual( - tomllib.TOMLDecodeError("", "", 0).__module__, tomllib.__name__ + # Mypyc extension leads to different message than pure Python + self.assertIn( + str(exc_info.exception), + ("Expected str object, not 'bool'", "str object expected; got bool"), ) def test_invalid_parse_float(self): diff --git a/tests/test_misc.py b/tests/test_misc.py index 0dad492..6d0c463 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -6,6 +6,7 @@ import datetime from decimal import Decimal as D from pathlib import Path +import sys import tempfile import unittest @@ -33,9 +34,13 @@ def test_incorrect_load(self): with open(file_path, "r") as txt_f: with self.assertRaises(TypeError) as exc_info: tomllib.load(txt_f) # type: ignore[arg-type] - self.assertEqual( + # Mypyc extension leads to different message than pure Python + self.assertIn( str(exc_info.exception), - "File must be opened in binary mode, e.g. use `open('foo.toml', 'rb')`", + ( + "File must be opened in binary mode, e.g. use `open('foo.toml', 'rb')`", # noqa: E501 + "bytes object expected; got str", + ), ) def test_parse_float(self): @@ -99,7 +104,27 @@ def test_inline_array_recursion_limit(self): recursive_array_toml = "arr = " + nest_count * "[" + nest_count * "]" tomllib.loads(recursive_array_toml) + nest_count = sys.getrecursionlimit() + 2 + recursive_array_toml = "arr = " + nest_count * "[" + nest_count * "]" + with self.assertRaisesRegex( + RecursionError, + r"maximum recursion depth exceeded" + r"|" + r"TOML inline arrays/tables are nested more than the allowed [0-9]+ levels", + ): + tomllib.loads(recursive_array_toml) + def test_inline_table_recursion_limit(self): nest_count = 310 recursive_table_toml = nest_count * "key = {" + nest_count * "}" tomllib.loads(recursive_table_toml) + + nest_count = sys.getrecursionlimit() + 2 + recursive_table_toml = nest_count * "key = {" + nest_count * "}" + with self.assertRaisesRegex( + RecursionError, + r"maximum recursion depth exceeded" + r"|" + r"TOML inline arrays/tables are nested more than the allowed [0-9]+ levels", + ): + tomllib.loads(recursive_table_toml)