Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clarify time spent in data generation #3605

Merged
merged 3 commits into from
Apr 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ their individual contributions.
* `Alex Stapleton <https://www.github.com/public>`_
* `Alex Willmer <https://github.com/moreati>`_ (alex@moreati.org.uk)
* `Andrea Pierré <https://www.github.com/kir0ul>`_
* `Andrea Reina <https://www.github.com/andreareina>`_
* `Ben Anhalt <https://github.com/benanhalt>`_
* `Ben Peterson <https://github.com/killthrush>`_ (killthrush@hotmail.com)
* `Benjamin Lee <https://github.com/Benjamin-Lee>`_ (benjamindlee@me.com)
Expand Down
5 changes: 5 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
RELEASE_TYPE: patch

This patch clarifies the reporting of time spent generating data. A
simple arithmetic mean of the percentage of time spent can be
misleading; reporting the actual time spent avoids misunderstandings.
36 changes: 21 additions & 15 deletions hypothesis-python/src/hypothesis/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
# obtain one at https://mozilla.org/MPL/2.0/.

import math
import statistics
from collections import Counter

from hypothesis.utils.dynamicvariables import DynamicVariable
Expand Down Expand Up @@ -41,6 +40,24 @@ def describe_targets(best_targets):
return lines


def format_ms(times):
"""Format `times` into a string representing approximate milliseconds.

`times` is a collection of durations in seconds.
"""
ordered = sorted(times)
n = max(0, len(ordered) - 1)
lower = int(ordered[int(math.floor(n * 0.05))] * 1000)
upper = int(ordered[int(math.ceil(n * 0.95))] * 1000)
if upper == 0:
ms = "< 1ms"
elif lower == upper:
ms = f"~ {lower}ms"
else:
ms = f"~ {lower}-{upper} ms"
return ms


def describe_statistics(stats_dict):
"""Return a multi-line string describing the passed run statistics.

Expand All @@ -63,22 +80,11 @@ def describe_statistics(stats_dict):
if not cases:
continue
statuses = Counter(t["status"] for t in cases)
runtimes = sorted(t["runtime"] for t in cases)
n = max(0, len(runtimes) - 1)
lower = int(runtimes[int(math.floor(n * 0.05))] * 1000)
upper = int(runtimes[int(math.ceil(n * 0.95))] * 1000)
if upper == 0:
ms = "< 1ms"
elif lower == upper:
ms = f"~ {lower}ms"
else:
ms = f"{lower}-{upper} ms"
drawtime_percent = 100 * statistics.mean(
t["drawtime"] / t["runtime"] if t["runtime"] > 0 else 0 for t in cases
)
runtime_ms = format_ms(t["runtime"] for t in cases)
drawtime_ms = format_ms(t["drawtime"] for t in cases)
lines.append(
f" - during {phase} phase ({d['duration-seconds']:.2f} seconds):\n"
f" - Typical runtimes: {ms}, ~ {drawtime_percent:.0f}% in data generation\n"
f" - Typical runtimes: {runtime_ms}, of which {drawtime_ms} in data generation\n"
f" - {statuses['valid']} passing examples, {statuses['interesting']} "
f"failing examples, {statuses['invalid'] + statuses['overrun']} invalid examples"
)
Expand Down
10 changes: 5 additions & 5 deletions hypothesis-python/tests/cover/test_statistical_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.

import re
import time
import traceback

Expand Down Expand Up @@ -145,7 +146,7 @@ def test(i):

@pytest.mark.parametrize("draw_delay", [False, True])
@pytest.mark.parametrize("test_delay", [False, True])
def test_draw_time_percentage(draw_delay, test_delay):
def test_draw_timing(draw_delay, test_delay):
time.freeze()

@st.composite
Expand All @@ -161,11 +162,10 @@ def test(_):

stats = describe_statistics(call_for_statistics(test))
if not draw_delay:
assert "~ 0%" in stats
elif test_delay:
assert "~ 50%" in stats
assert "< 1ms" in stats
else:
assert "~ 100%" in stats
match = re.search(r"of which ~ (?P<gentime>\d+)", stats)
assert 49 <= int(match.group("gentime")) <= 51


def test_has_lambdas_in_output():
Expand Down