Merge pull request #28 from HeyHugo/master

Merged HeyHugo's improvements on the ramping code. Better code separation and better UI.
locustio · Jul 1, 2012 · eb4a1f8 · eb4a1f8
2 parents f29fb95 + 525f413
commit eb4a1f8
Show file tree

Hide file tree

Showing 6 changed files with 156 additions and 145 deletions.
diff --git a/locust/main.py b/locust/main.py
@@ -382,16 +382,6 @@ def main():
         runners.locust_runner = SlaveLocustRunner(locust_classes, options.hatch_rate, options.num_clients, num_requests=options.num_requests, host=options.host, master_host=options.master_host)
         main_greenlet = runners.locust_runner.greenlet
 
-    if options.ramp:
-        import rampstats
-        from rampstats import on_request_success, on_report_to_master, on_slave_report
-        if options.slave:
-            events.report_to_master += on_report_to_master
-        if options.master:
-            events.slave_report += on_slave_report
-        else:
-            events.request_success += on_request_success
-
     if options.print_stats or (options.no_web and not options.slave):
         # spawn stats printing greenlet
         gevent.spawn(stats_printer)

diff --git a/locust/ramping.py b/locust/ramping.py
@@ -0,0 +1,143 @@
+"""
+This module adds a tool to locust with intention to help find a highest amount of simulated users, a system can handle.
+Parameters to define thresholds for this tool are configured in the web-user-interface
+
+When this module is used, additional response time -data is recorded.
+This so that we can calculate a percentile value of the current response times,
+meaning we account for the response times recorded in a moving time window.
+
+"""
+
+from stats import percentile, RequestStats
+from runners import locust_runner, DistributedLocustRunner, SLAVE_REPORT_INTERVAL, STATE_HATCHING
+from collections import deque
+import events
+import math
+import gevent
+import logging
+
+logger = logging.getLogger(__name__)
+
+response_times = deque([])
+
+# Are we running in distributed mode or not?
+is_distributed = isinstance(locust_runner, DistributedLocustRunner)
+
+# The time window in seconds that current_percentile use data from
+PERCENTILE_TIME_WINDOW = 15.0
+
+def current_percentile(percent):
+    if is_distributed:
+        # Flatten out the deque of lists and calculate the percentile to be returned
+        return percentile(sorted([item for sublist in response_times for item in sublist]), percent)
+    else:
+        return percentile(sorted(response_times), percent)
+
+def on_request_success_ramping(_, _1, response_time, _2):
+    if is_distributed:
+        response_times.append(response_time)
+    else:
+        response_times.append(response_time)
+
+        # remove from the queue
+        rps = RequestStats.sum_stats().current_rps
+        if len(response_times) > rps*PERCENTILE_TIME_WINDOW:
+            for i in xrange(len(response_times) - int(math.ceil(rps*PERCENTILE_TIME_WINDOW))):
+                response_times.popleft()
+
+def on_report_to_master_ramping(_, data):
+    global response_times
+    data["current_responses"] = response_times
+    response_times = []
+
+def on_slave_report_ramping(_, data):
+    if "current_responses" in data:
+        response_times.append(data["current_responses"])
+
+    # remove from the queue
+    slaves = locust_runner.slave_count
+    response_times_per_slave_count = PERCENTILE_TIME_WINDOW/SLAVE_REPORT_INTERVAL
+    if len(response_times) > slaves * response_times_per_slave_count:
+        response_times.popleft()
+
+def register_listeners():
+    events.report_to_master += on_report_to_master_ramping
+    events.slave_report += on_slave_report_ramping
+    events.request_success += on_request_success_ramping
+
+def remove_listeners():
+    events.report_to_master.__idec__(on_report_to_master_ramping)
+    events.slave_report.__idec__(on_slave_report_ramping)
+    events.request_success.__idec__(on_request_success_ramping)
+
+def start_ramping(hatch_rate=None, max_locusts=1000, hatch_stride=100,
+          percent=0.95, response_time_limit=2000, acceptable_fail=0.05,
+          precision=200, start_count=0, calibration_time=15):
+
+    register_listeners()
+
+    def ramp_up(clients, hatch_stride, boundery_found=False):
+        while True:
+            if locust_runner.state != STATE_HATCHING:
+                if locust_runner.num_clients >= max_locusts:
+                    logger.info("Ramp up halted; Max locusts limit reached: %d" % max_locusts)
+                    return ramp_down(clients, hatch_stride)
+
+                gevent.sleep(calibration_time)
+                fail_ratio = RequestStats.sum_stats().fail_ratio
+                if fail_ratio > acceptable_fail:
+                    logger.info("Ramp up halted; Acceptable fail ratio %d%% exceeded with fail ratio %d%%" % (acceptable_fail*100, fail_ratio*100))
+                    return ramp_down(clients, hatch_stride)
+
+                p = current_percentile(percent)
+                if p >= response_time_limit:
+                    logger.info("Ramp up halted; Percentile response times getting high: %d" % p)
+                    return ramp_down(clients, hatch_stride)
+
+                if boundery_found and hatch_stride <= precision:
+                    logger.info("Sweet spot found! Ramping stopped at %i locusts" % (locust_runner.num_clients))
+                    return remove_listeners()
+
+                logger.info("Ramping up...")
+                if boundery_found:
+                    hatch_stride = max((hatch_stride/2),precision)
+                clients += hatch_stride
+                locust_runner.start_hatching(clients, locust_runner.hatch_rate)
+            gevent.sleep(1)
+
+    def ramp_down(clients, hatch_stride):
+        while True:
+            if locust_runner.state != STATE_HATCHING:
+                if locust_runner.num_clients < max_locusts:
+                    gevent.sleep(calibration_time)
+                    fail_ratio = RequestStats.sum_stats().fail_ratio
+                    if fail_ratio <= acceptable_fail:
+                        p = current_percentile(percent)
+                        if p <= response_time_limit:
+                            if hatch_stride <= precision:
+                                logger.info("Sweet spot found! Ramping stopped at %i locusts" % (locust_runner.num_clients))
+                                return remove_listeners()
+
+                            logger.info("Ramping up...")
+                            hatch_stride = max((hatch_stride/2),precision)
+                            clients += hatch_stride
+                            locust_runner.start_hatching(clients, locust_runner.hatch_rate)
+                            return ramp_up(clients, hatch_stride, True)
+
+                logger.info("Ramping down...")
+                hatch_stride = max((hatch_stride/2),precision)
+                clients -= hatch_stride
+                if clients > 0:
+                    locust_runner.start_hatching(clients, locust_runner.hatch_rate)
+                else:
+                    logger.warning("No responses met the ramping thresholds, check your ramp configuration, locustfile and \"--host\" address")
+                    logger.info("RAMING STOPPED")
+                    return remove_listeners()
+            gevent.sleep(1)
+
+    if hatch_rate:
+        locust_runner.hatch_rate = hatch_rate
+    if start_count > 0:
+        locust_runner.start_hatching(start_count, hatch_rate)
+    logger.info("RAMPING STARTED")
+    ramp_up(start_count, hatch_stride)
diff --git a/locust/rampstats.py b/locust/rampstats.py
diff --git a/locust/runners.py b/locust/runners.py
@@ -131,7 +131,7 @@ def kill_locusts(self, kill_count):
         bucket = self.weight_locusts(kill_count)
         kill_count = len(bucket)
         self.num_clients -= kill_count
-        logger.debug("killing locusts: %i", kill_count)
+        logger.info("Killing %i locusts" % kill_count)
         dying = []
         for g in self.locusts:
             for l in bucket:
@@ -175,82 +175,6 @@ def stop(self):
         self.locusts.kill(block=True)
         self.state = STATE_STOPPED
 
-
-    def start_ramping(self, hatch_rate=None, max_locusts=1000, hatch_stride=100,
-                      percent=0.95, response_time_limit=2000, acceptable_fail=0.05,
-                      precision=200, start_count=0, calibration_time=15):
-
-        from rampstats import current_percentile
-        if hatch_rate:
-            self.hatch_rate = hatch_rate
-
-        def ramp_down_help(clients, hatch_stride):
-            print "ramping down..."
-            hatch_stride = max(hatch_stride/2, precision)
-            clients -= hatch_stride
-            self.start_hatching(clients, self.hatch_rate)
-            return clients, hatch_stride
-
-        def ramp_up(clients, hatch_stride, boundery_found=False):
-            while True:
-                if self.state != STATE_HATCHING:
-                    if self.num_clients >= max_locusts:
-                        print "ramp up stopped due to max locusts limit reached:", max_locusts
-                        client, hatch_stride = ramp_down_help(clients, hatch_stride)
-                        return ramp_down(clients, hatch_stride)
-                    gevent.sleep(calibration_time)
-                    fail_ratio = RequestStats.sum_stats().fail_ratio
-                    if fail_ratio > acceptable_fail:
-                        print "ramp up stopped due to acceptable fail ratio %d%% exceeded with fail ratio %d%%" % (acceptable_fail*100, fail_ratio*100)
-                        client, hatch_stride = ramp_down_help(clients, hatch_stride)
-                        return ramp_down(clients, hatch_stride)
-                    p = current_percentile(percent)
-                    if p >= response_time_limit:
-                        print "ramp up stopped due to percentile response times getting high:", p
-                        client, hatch_stride = ramp_down_help(clients, hatch_stride)
-                        return ramp_down(clients, hatch_stride)
-                    if boundery_found and hatch_stride <= precision:
-                        print "sweet spot found, ramping stopped!"
-                        return
-                    print "ramping up..."
-                    if boundery_found:
-                        hatch_stride = max((hatch_stride/2),precision)
-                    clients += hatch_stride
-                    self.start_hatching(clients, self.hatch_rate)
-                gevent.sleep(1)
-
-        def ramp_down(clients, hatch_stride):
-            while True:
-                if self.state != STATE_HATCHING:
-                    if self.num_clients < max_locusts:
-                        gevent.sleep(calibration_time)
-                        fail_ratio = RequestStats.sum_stats().fail_ratio
-                        if fail_ratio <= acceptable_fail:
-                            p = current_percentile(percent)
-                            if p <= response_time_limit:
-                                if hatch_stride <= precision:
-                                    print "sweet spot found, ramping stopped!"
-                                    return
-                                print "ramping up..."
-                                hatch_stride = max((hatch_stride/2),precision)
-                                clients += hatch_stride
-                                self.start_hatching(clients, self.hatch_rate)
-                                return ramp_up(clients, hatch_stride, True)
-                    print "ramping down..."
-                    hatch_stride = max((hatch_stride/2),precision)
-                    clients -= hatch_stride
-                    if clients > 0:
-                        self.start_hatching(clients, self.hatch_rate)
-                    else:
-                        print "WARNING: no responses met the ramping thresholds, check your ramp configuration, locustfile and \"--host\" address"
-                        print "ramping stopped!"
-                        return
-                gevent.sleep(1)
-
-        if start_count > self.num_clients:
-            self.start_hatching(start_count, hatch_rate)
-        ramp_up(start_count, hatch_stride)
-
 class LocalLocustRunner(LocustRunner):
     def start_hatching(self, locust_count=None, hatch_rate=None, wait=False):
         self.hatching_greenlet = gevent.spawn(lambda: super(LocalLocustRunner, self).start_hatching(locust_count, hatch_rate, wait=wait))

diff --git a/locust/templates/index.html b/locust/templates/index.html
@@ -99,25 +99,25 @@ <h2>Change the locust count</h2>
                     <h2>Ramping</h2>
                     <form action="/ramp" method="POST" id="ramp_form">
                         <div style="float:left;">
-                        <label for="init_count">Initial number of clients</label>
+                        <label for="init_count" title="This is the number of initial number of locusts that will be spawned. Pick a number you know can be handled.">Initial number of clients</label>
                         <input type="text" name="init_count" id="init_count" class="val" /><br>
-                        <label for="hatch_rate">Hatch rate <span style="color:#8a8a8a;">(users spawned/second)</span></label>
+                        <label for="hatch_rate" title="This is the rate locusts/second at which all slaves together will spawn additional locusts">Hatch rate <span style="color:#8a8a8a;">(users spawned/second)</span></label>
                         <input type="text" name="hatch_rate" id="hatch_rate" class="val" /><br>
-                        <label for="hatch_stride">Hatch stride</label>
+                        <label for="hatch_stride" title="This is the amount of locusts that will be spawned in between each sleep (calibration)">Hatch stride</label>
                         <input type="text" name="hatch_stride" id="hatch_stride" class="val" /><br>
-                        <label for="precision">Precision (min value of hatch stride)</label>
+                        <label for="precision" title="Hatchstride will lower when a threshold has been reached and when hachstride <= precision; the sweet spot has been found">Precision (min value of hatch stride)</label>
                         <input type="text" name="precision" id="precision" class="val" /><br>
-                        <label for="wait_time">Calibration time (seconds)</label>
+                        <label for="wait_time" title="The amount of time to sleep to gather enough statistics before checking for thresholds">Calibration time (seconds)</label>
                         <input type="text" name="wait_time" id="wait_time" class="val" value="20" /><br>
                         </div>
                         <div style="float:right;">
-                        <label for="max_count">Max number of clients</label>
+                        <label for="max_count" title="This is a threshold, if exceeded we start ramping down">Max number of clients</label>
                         <input type="text" name="max_count" id="max_count" class="val" /><br>
-                        <label for="percentile">Percentile (%)</label>
+                        <label for="percentile" title="Threshold - The percentage of all responses that has to be have response times lower than a set limit.">Percentile (%)</label>
                         <input type="text" name="percentile" id="percentile" class="val" value="95" /><br>
-                        <label for="response_time">Max percentile response time (ms)</label>
+                        <label for="response_time" title="Threshold - The highest response time that a percentage of all responses must not exceed.">Max percentile response time (ms)</label>
                         <input type="text" name="response_time" id="response_time" class="val" value="2000" /><br>
-                        <label for="fail_rate">Accepted fail rate (%)</label>
+                        <label for="fail_rate" title="This threshold is the highest ratio in percent of all requests that are allowed to fail.">Accepted fail ratio (%)</label>
                         <input type="text" name="fail_rate" id="fail_rate" class="val" value="5" /><br>
                         <br><br>
                         <input type="image" src="/static/img/start_button.png" value="Start swarming" class="start_button">

diff --git a/locust/web.py b/locust/web.py
@@ -69,6 +69,8 @@ def stop():
 
 @app.route("/ramp", methods=["POST"])
 def ramp():
+    from ramping import start_ramping
+
     init_clients = int(request.form["init_count"])
     hatch_rate = int(request.form["hatch_rate"])
     hatch_stride = int(request.form["hatch_stride"])
@@ -78,7 +80,7 @@ def ramp():
     percentile = float(int(request.form["percentile"]) / 100.0)
     fail_rate = float(int(request.form["fail_rate"]) / 100.0)
     calibration_time = int(request.form["wait_time"])
-    gevent.spawn(runners.locust_runner.start_ramping, hatch_rate, max_clients, hatch_stride, percentile, response_time, fail_rate, precision, init_clients, calibration_time)
+    gevent.spawn(start_ramping, hatch_rate, max_clients, hatch_stride, percentile, response_time, fail_rate, precision, init_clients, calibration_time)
     response = make_response(json.dumps({'success':True, 'message': 'Ramping started'}))
     response.headers["Content-type"] = "application/json"
     return response