Skip to content

Commit

Permalink
Merge pull request #3605 from andreareina/data-generation-runtime
Browse files Browse the repository at this point in the history
  • Loading branch information
Zac-HD authored Apr 2, 2023
2 parents a147989 + 3c0ec72 commit 8f1f79e
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 20 deletions.
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ their individual contributions.
* `Alex Stapleton <https://www.github.com/public>`_
* `Alex Willmer <https://github.com/moreati>`_ (alex@moreati.org.uk)
* `Andrea Pierré <https://www.github.com/kir0ul>`_
* `Andrea Reina <https://www.github.com/andreareina>`_
* `Ben Anhalt <https://github.com/benanhalt>`_
* `Ben Peterson <https://github.com/killthrush>`_ (killthrush@hotmail.com)
* `Benjamin Lee <https://github.com/Benjamin-Lee>`_ (benjamindlee@me.com)
Expand Down
5 changes: 5 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
RELEASE_TYPE: patch

This patch clarifies the reporting of time spent generating data. A
simple arithmetic mean of the percentage of time spent can be
misleading; reporting the actual time spent avoids misunderstandings.
36 changes: 21 additions & 15 deletions hypothesis-python/src/hypothesis/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
# obtain one at https://mozilla.org/MPL/2.0/.

import math
import statistics
from collections import Counter

from hypothesis.utils.dynamicvariables import DynamicVariable
Expand Down Expand Up @@ -41,6 +40,24 @@ def describe_targets(best_targets):
return lines


def format_ms(times):
"""Format `times` into a string representing approximate milliseconds.
`times` is a collection of durations in seconds.
"""
ordered = sorted(times)
n = max(0, len(ordered) - 1)
lower = int(ordered[int(math.floor(n * 0.05))] * 1000)
upper = int(ordered[int(math.ceil(n * 0.95))] * 1000)
if upper == 0:
ms = "< 1ms"
elif lower == upper:
ms = f"~ {lower}ms"
else:
ms = f"~ {lower}-{upper} ms"
return ms


def describe_statistics(stats_dict):
"""Return a multi-line string describing the passed run statistics.
Expand All @@ -63,22 +80,11 @@ def describe_statistics(stats_dict):
if not cases:
continue
statuses = Counter(t["status"] for t in cases)
runtimes = sorted(t["runtime"] for t in cases)
n = max(0, len(runtimes) - 1)
lower = int(runtimes[int(math.floor(n * 0.05))] * 1000)
upper = int(runtimes[int(math.ceil(n * 0.95))] * 1000)
if upper == 0:
ms = "< 1ms"
elif lower == upper:
ms = f"~ {lower}ms"
else:
ms = f"{lower}-{upper} ms"
drawtime_percent = 100 * statistics.mean(
t["drawtime"] / t["runtime"] if t["runtime"] > 0 else 0 for t in cases
)
runtime_ms = format_ms(t["runtime"] for t in cases)
drawtime_ms = format_ms(t["drawtime"] for t in cases)
lines.append(
f" - during {phase} phase ({d['duration-seconds']:.2f} seconds):\n"
f" - Typical runtimes: {ms}, ~ {drawtime_percent:.0f}% in data generation\n"
f" - Typical runtimes: {runtime_ms}, of which {drawtime_ms} in data generation\n"
f" - {statuses['valid']} passing examples, {statuses['interesting']} "
f"failing examples, {statuses['invalid'] + statuses['overrun']} invalid examples"
)
Expand Down
10 changes: 5 additions & 5 deletions hypothesis-python/tests/cover/test_statistical_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.

import re
import time
import traceback

Expand Down Expand Up @@ -145,7 +146,7 @@ def test(i):

@pytest.mark.parametrize("draw_delay", [False, True])
@pytest.mark.parametrize("test_delay", [False, True])
def test_draw_time_percentage(draw_delay, test_delay):
def test_draw_timing(draw_delay, test_delay):
time.freeze()

@st.composite
Expand All @@ -161,11 +162,10 @@ def test(_):

stats = describe_statistics(call_for_statistics(test))
if not draw_delay:
assert "~ 0%" in stats
elif test_delay:
assert "~ 50%" in stats
assert "< 1ms" in stats
else:
assert "~ 100%" in stats
match = re.search(r"of which ~ (?P<gentime>\d+)", stats)
assert 49 <= int(match.group("gentime")) <= 51


def test_has_lambdas_in_output():
Expand Down

0 comments on commit 8f1f79e

Please sign in to comment.