Merge pull request #4263 from tybug/next

Optimizations
HypothesisWorks · Feb 6, 2025 · ffdf391 · ffdf391
2 parents 7cb0989 + d598f6a
commit ffdf391
Show file tree

Hide file tree

Showing 9 changed files with 98 additions and 110 deletions.
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,3 @@
+RELEASE_TYPE: patch
+
+Optimize performance (improves speed by ~5%) and clarify the wording in an error message.
diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
@@ -23,7 +23,7 @@
 import warnings
 import zlib
 from collections import defaultdict
-from collections.abc import Coroutine, Generator, Hashable, Sequence
+from collections.abc import Coroutine, Generator, Hashable, Iterable, Sequence
 from functools import partial
 from random import Random
 from typing import (
@@ -321,7 +321,7 @@ def accept(test):
     return accept
 
 
-def encode_failure(choices):
+def encode_failure(choices: Iterable[ChoiceT]) -> bytes:
     blob = choices_to_bytes(choices)
     compressed = zlib.compress(blob)
     if len(compressed) < len(blob):
@@ -687,7 +687,7 @@ def skip_exceptions_to_reraise():
     return tuple(sorted(exceptions, key=str))
 
 
-def failure_exceptions_to_catch():
+def failure_exceptions_to_catch() -> tuple[type[BaseException], ...]:
     """Return a tuple of exceptions meaning 'this test has failed', to catch.
 
     This is intended to cover most common test runners; if you would

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -492,8 +492,7 @@ def __len__(self) -> int:
         return self.__length
 
     def __getitem__(self, i: int) -> Example:
-        assert isinstance(i, int)
-        n = len(self)
+        n = self.__length
         if i < -n or i >= n:
             raise IndexError(f"Index {i} out of range [-{n}, {n})")
         if i < 0:

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/junkdrawer.py b/hypothesis-python/src/hypothesis/internal/conjecture/junkdrawer.py
@@ -17,18 +17,9 @@
 import sys
 import time
 import warnings
+from array import ArrayType
 from collections.abc import Iterable, Iterator, Sequence
-from typing import (
-    Any,
-    Callable,
-    Generic,
-    List,
-    Literal,
-    Optional,
-    TypeVar,
-    Union,
-    overload,
-)
+from typing import Any, Callable, Generic, Literal, Optional, TypeVar, Union, overload
 
 from sortedcontainers import SortedList
 
@@ -41,7 +32,7 @@
 
 def array_or_list(
     code: str, contents: Iterable[int]
-) -> "Union[List[int], array.ArrayType[int]]":
+) -> Union[list[int], "ArrayType[int]"]:
     if code == "O":
         return list(contents)
     return array.array(code, contents)
@@ -82,7 +73,7 @@ class IntList(Sequence[int]):
 
     __slots__ = ("__underlying",)
 
-    __underlying: "Union[List[int], array.ArrayType[int]]"
+    __underlying: Union[list[int], "ArrayType[int]"]
 
     def __init__(self, values: Sequence[int] = ()):
         for code in ARRAY_CODES:
@@ -116,11 +107,13 @@ def __len__(self) -> int:
     def __getitem__(self, i: int) -> int: ...  # pragma: no cover
 
     @overload
-    def __getitem__(self, i: slice) -> "IntList": ...  # pragma: no cover
+    def __getitem__(
+        self, i: slice
+    ) -> Union[list[int], "ArrayType[int]"]: ...  # pragma: no cover
 
-    def __getitem__(self, i: Union[int, slice]) -> "Union[int, IntList]":
-        if isinstance(i, slice):
-            return IntList(self.__underlying[i])
+    def __getitem__(
+        self, i: Union[int, slice]
+    ) -> Union[int, list[int], "ArrayType[int]"]:
         return self.__underlying[i]
 
     def __delitem__(self, i: Union[int, slice]) -> None:

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/utils.py b/hypothesis-python/src/hypothesis/internal/conjecture/utils.py
@@ -16,7 +16,7 @@
 from collections import OrderedDict, abc
 from collections.abc import Sequence
 from functools import lru_cache
-from typing import TYPE_CHECKING, List, Optional, TypeVar, Union
+from typing import TYPE_CHECKING, Optional, TypeVar, Union
 
 from hypothesis.errors import InvalidArgument
 from hypothesis.internal.compat import int_from_bytes
@@ -72,7 +72,7 @@ def check_sample(
             )
     elif not isinstance(values, (OrderedDict, abc.Sequence, enum.EnumMeta)):
         raise InvalidArgument(
-            f"Cannot sample from {values!r}, not an ordered collection. "
+            f"Cannot sample from {values!r} because it is not an ordered collection. "
             f"Hypothesis goes to some length to ensure that the {strategy_name} "
             "strategy has stable results between runs. To replay a saved "
             "example, the sampled values must have the same iteration order "
@@ -87,6 +87,73 @@ def check_sample(
     return tuple(values)
 
 
+@lru_cache(64)
+def compute_sampler_table(weights: tuple[float, ...]) -> list[tuple[int, int, float]]:
+    n = len(weights)
+    table: list[list[int | float | None]] = [[i, None, None] for i in range(n)]
+    total = sum(weights)
+    num_type = type(total)
+
+    zero = num_type(0)  # type: ignore
+    one = num_type(1)  # type: ignore
+
+    small: list[int] = []
+    large: list[int] = []
+
+    probabilities = [w / total for w in weights]
+    scaled_probabilities: list[float] = []
+
+    for i, alternate_chance in enumerate(probabilities):
+        scaled = alternate_chance * n
+        scaled_probabilities.append(scaled)
+        if scaled == 1:
+            table[i][2] = zero
+        elif scaled < 1:
+            small.append(i)
+        else:
+            large.append(i)
+    heapq.heapify(small)
+    heapq.heapify(large)
+
+    while small and large:
+        lo = heapq.heappop(small)
+        hi = heapq.heappop(large)
+
+        assert lo != hi
+        assert scaled_probabilities[hi] > one
+        assert table[lo][1] is None
+        table[lo][1] = hi
+        table[lo][2] = one - scaled_probabilities[lo]
+        scaled_probabilities[hi] = (
+            scaled_probabilities[hi] + scaled_probabilities[lo]
+        ) - one
+
+        if scaled_probabilities[hi] < 1:
+            heapq.heappush(small, hi)
+        elif scaled_probabilities[hi] == 1:
+            table[hi][2] = zero
+        else:
+            heapq.heappush(large, hi)
+    while large:
+        table[large.pop()][2] = zero
+    while small:
+        table[small.pop()][2] = zero
+
+    new_table: list[tuple[int, int, float]] = []
+    for base, alternate, alternate_chance in table:
+        assert isinstance(base, int)
+        assert isinstance(alternate, int) or alternate is None
+        assert alternate_chance is not None
+        if alternate is None:
+            new_table.append((base, base, alternate_chance))
+        elif alternate < base:
+            new_table.append((alternate, base, one - alternate_chance))
+        else:
+            new_table.append((base, alternate, alternate_chance))
+    new_table.sort()
+    return new_table
+
+
 class Sampler:
     """Sampler based on Vose's algorithm for the alias method. See
     http://www.keithschwarz.com/darts-dice-coins/ for a good explanation.
@@ -109,69 +176,7 @@ class Sampler:
 
     def __init__(self, weights: Sequence[float], *, observe: bool = True):
         self.observe = observe
-
-        n = len(weights)
-        table: "list[list[int | float | None]]" = [[i, None, None] for i in range(n)]
-        total = sum(weights)
-        num_type = type(total)
-
-        zero = num_type(0)  # type: ignore
-        one = num_type(1)  # type: ignore
-
-        small: "List[int]" = []
-        large: "List[int]" = []
-
-        probabilities = [w / total for w in weights]
-        scaled_probabilities: "List[float]" = []
-
-        for i, alternate_chance in enumerate(probabilities):
-            scaled = alternate_chance * n
-            scaled_probabilities.append(scaled)
-            if scaled == 1:
-                table[i][2] = zero
-            elif scaled < 1:
-                small.append(i)
-            else:
-                large.append(i)
-        heapq.heapify(small)
-        heapq.heapify(large)
-
-        while small and large:
-            lo = heapq.heappop(small)
-            hi = heapq.heappop(large)
-
-            assert lo != hi
-            assert scaled_probabilities[hi] > one
-            assert table[lo][1] is None
-            table[lo][1] = hi
-            table[lo][2] = one - scaled_probabilities[lo]
-            scaled_probabilities[hi] = (
-                scaled_probabilities[hi] + scaled_probabilities[lo]
-            ) - one
-
-            if scaled_probabilities[hi] < 1:
-                heapq.heappush(small, hi)
-            elif scaled_probabilities[hi] == 1:
-                table[hi][2] = zero
-            else:
-                heapq.heappush(large, hi)
-        while large:
-            table[large.pop()][2] = zero
-        while small:
-            table[small.pop()][2] = zero
-
-        self.table: "list[tuple[int, int, float]]" = []
-        for base, alternate, alternate_chance in table:
-            assert isinstance(base, int)
-            assert isinstance(alternate, int) or alternate is None
-            assert alternate_chance is not None
-            if alternate is None:
-                self.table.append((base, base, alternate_chance))
-            elif alternate < base:
-                self.table.append((alternate, base, one - alternate_chance))
-            else:
-                self.table.append((base, alternate, alternate_chance))
-        self.table.sort()
+        self.table = compute_sampler_table(tuple(weights))
 
     def sample(
         self,

diff --git a/hypothesis-python/src/hypothesis/internal/escalation.py b/hypothesis-python/src/hypothesis/internal/escalation.py
@@ -16,7 +16,7 @@
 from functools import partial
 from inspect import getframeinfo
 from pathlib import Path
-from types import ModuleType
+from types import ModuleType, TracebackType
 from typing import Callable, NamedTuple, Optional
 
 import hypothesis
@@ -57,7 +57,9 @@ def accept(filepath: str) -> bool:
 is_hypothesis_file = belongs_to(hypothesis)
 
 
-def get_trimmed_traceback(exception=None):
+def get_trimmed_traceback(
+    exception: Optional[BaseException] = None,
+) -> Optional[TracebackType]:
     """Return the current traceback, minus any frames added by Hypothesis."""
     if exception is None:
         _, exception, tb = sys.exc_info()
@@ -67,9 +69,10 @@ def get_trimmed_traceback(exception=None):
     # was raised inside Hypothesis. Additionally, the environment variable
     # HYPOTHESIS_NO_TRACEBACK_TRIM is respected if nonempty, because verbose
     # mode is prohibitively slow when debugging strategy recursion errors.
+    assert hypothesis.settings.default is not None
     if (
         tb is None
-        or os.environ.get("HYPOTHESIS_NO_TRACEBACK_TRIM", None)
+        or os.environ.get("HYPOTHESIS_NO_TRACEBACK_TRIM")
         or hypothesis.settings.default.verbosity >= hypothesis.Verbosity.debug
         or (
             is_hypothesis_file(traceback.extract_tb(tb)[-1][0])

diff --git a/hypothesis-python/src/hypothesis/internal/reflection.py b/hypothesis-python/src/hypothesis/internal/reflection.py
@@ -82,7 +82,7 @@ def _clean_source(src: str) -> bytes:
     return "\n".join(x.rstrip() for x in src.splitlines() if x.rstrip()).encode()
 
 
-def function_digest(function):
+def function_digest(function: Any) -> bytes:
     """Returns a string that is stable across multiple invocations across
     multiple processes and is prone to changing significantly in response to
     minor changes to the function.

diff --git a/hypothesis-python/tests/conjecture/test_junkdrawer.py b/hypothesis-python/tests/conjecture/test_junkdrawer.py
@@ -169,9 +169,9 @@ def test_int_list_extend():
 
 def test_int_list_slice():
     x = IntList([1, 2])
-    assert x[:1] == IntList([1])
-    assert x[0:2] == IntList([1, 2])
-    assert x[1:] == IntList([2])
+    assert list(x[:1]) == [1]
+    assert list(x[0:2]) == [1, 2]
+    assert list(x[1:]) == [2]
 
 
 def test_int_list_del():

diff --git a/hypothesis-python/tests/nocover/test_conjecture_int_list.py b/hypothesis-python/tests/nocover/test_conjecture_int_list.py
@@ -23,16 +23,6 @@ def valid_index(draw):
     return draw(st.integers(0, len(machine.model) - 1))
 
 
-@st.composite
-def valid_slice(draw):
-    machine = draw(st.runner())
-    result = [
-        draw(st.integers(0, max(3, len(machine.model) * 2 - 1))) for _ in range(2)
-    ]
-    result.sort()
-    return slice(*result)
-
-
 class IntListRules(RuleBasedStateMachine):
     @initialize(ls=st.lists(INTEGERS))
     def starting_lists(self, ls):
@@ -52,16 +42,11 @@ def append(self, n):
         self.model.append(n)
         self.target.append(n)
 
-    @rule(i=valid_index() | valid_slice())
+    @rule(i=valid_index())
     def delete(self, i):
         del self.model[i]
         del self.target[i]
 
-    @rule(sl=valid_slice())
-    def slice(self, sl):
-        self.model = self.model[sl]
-        self.target = self.target[sl]
-
     @rule(i=valid_index())
     def agree_on_values(self, i):
         assert self.model[i] == self.target[i]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		RELEASE_TYPE: patch

		Optimize performance (improves speed by ~5%) and clarify the wording in an error message.