Allow to control benchmark placement per track (elastic#260)

With this commit we add a new parameter `placement` in the race config file that allows to specify per track which benchmark machine should be targeted when the number of targeted machines is less than the number of available ones. The motivation for this change is that single-node benchmarks have historically always targeted the first machine in the pool leading to frequent disk failures. If we spread the load across machines the expected lifetime of the disks across machines is more evenly distributed. By only allowing to specify the placement on the top level (i.e. per track) we ensure that charts for the same track are still consistent and don't suffer from machine to machine variation. In this commit we also modify the race configurations for the existing nightly benchmarks. We have assigned the placement roughly based on the expected write load. It is not possible to spread the load completely evenly because some tracks dominate I/O load (for example `nyc_taxis` for group-1) but spreading the load a bit is still preferable to have the disk on the first target machine always fail first. Relates elastic#259
cbuescher · May 15, 2020 · 4e1765f · 4e1765f
1 parent e4ac91d
commit 4e1765f
Show file tree

Hide file tree

Showing 5 changed files with 36 additions and 17 deletions.
diff --git a/night_rally/night_rally.py b/night_rally/night_rally.py
@@ -12,8 +12,6 @@
 import json
 import jsonschema
 
-from collections import OrderedDict
-
 ROOT = os.path.dirname(os.path.realpath(__file__))
 RALLY_BINARY = "rally --skip-update"
 VERSION_PATTERN = re.compile(r"^(\d+)\.(\d+)\.(\d+)(?:-(.+))?$")
@@ -470,9 +468,10 @@ def user_tags(self, x_pack, track_license):
 
 
 class RaceConfig:
-    def __init__(self, track_name, track_repository, configuration, available_hosts):
+    def __init__(self, track_name, track_repository, placement, configuration, available_hosts):
         self.track = track_name
         self.track_repository = track_repository
+        self.placement = placement
         self.configuration = configuration
         self.available_hosts = available_hosts
 
@@ -543,8 +542,12 @@ def x_pack(self):
     def target_hosts(self):
         if self.node_count > len(self.available_hosts):
             return None
+        elif self.node_count == len(self.available_hosts):
+            return self.available_hosts
         else:
-            return self.available_hosts[:self.node_count]
+            d = collections.deque(self.available_hosts)
+            d.rotate(-self.placement)
+            return list(d)[:self.node_count]
 
     def _as_array(self, v):
         if isinstance(v, str):
@@ -563,7 +566,7 @@ def validate_race_configs(race_configs):
     jsonschema.validate(race_configs, race_configs_schema)
 
 
-def run_rally(tracks, release_params, available_hosts, command, dry_run=False, skip_ansible=False, system=os.system):
+def run_rally(race_configs, release_params, available_hosts, command, dry_run=False, skip_ansible=False, system=os.system):
     # Build list of host:port pairs for target hosts
     available_hosts_with_http_ports = list(map(lambda h: f"{h}:{TARGET_HTTP_PORT}", available_hosts))
     available_hosts_with_transport_ports = list(map(lambda h: f"{h}:{TARGET_TRANSPORT_PORT}", available_hosts))
@@ -573,11 +576,12 @@ def run_rally(tracks, release_params, available_hosts, command, dry_run=False, s
     else:
         runner = system
 
-    for track in tracks:
-        track_name = track["track"]
-        track_repository = track.get("track-repository", "default")
+    for r in race_configs:
+        track_name = r["track"]
+        placement = r.get("placement", 0)
+        track_repository = r.get("track-repository", "default")
 
-        for flavor_config in track["flavors"]:
+        for flavor_config in r["flavors"]:
             for license_config in flavor_config["licenses"]:
                 # TODO refactor encapsulation in Release/Docker Command
                 # release benchmarks override license, only go through oss
@@ -591,7 +595,7 @@ def run_rally(tracks, release_params, available_hosts, command, dry_run=False, s
                     # TODO refactor encapsulation in Release/Docker Command
                     configuration["license"] = release_params["license"] if release_params else license_config["name"]
 
-                    race_cfg = RaceConfig(track_name, track_repository, configuration, available_hosts_with_http_ports)
+                    race_cfg = RaceConfig(track_name, track_repository, placement, configuration, available_hosts_with_http_ports)
 
                     if race_cfg.target_hosts:
                         if command.runnable(race_cfg):
@@ -959,7 +963,7 @@ def __init__(self,
         self.version = version
         self._mode = mode
         self.configuration_name = configuration_name
-        self.release_params = OrderedDict()
+        self.release_params = collections.OrderedDict()
         self._release_x_pack_components = release_x_pack_components
         self._release_license = release_license
         # For now only used for release benchmarks
@@ -1031,7 +1035,7 @@ def main():
     )
 
     target_hosts = args.target_host.split(",")
-    tracks = load_race_configs(args.race_configs)
+    race_configs = load_race_configs(args.race_configs)
     params = []
 
     if args.telemetry:
@@ -1058,7 +1062,7 @@ def main():
                                      common_cli_params.setup, common_cli_params.race_configs_id, args.test_mode))
         command = NightlyCommand(params, start_date)
 
-    rally_failure = run_rally(tracks, common_cli_params.release_params, target_hosts, command, args.dry_run, args.skip_ansible)
+    rally_failure = run_rally(race_configs, common_cli_params.release_params, target_hosts, command, args.dry_run, args.skip_ansible)
 
     if common_cli_params.is_nightly:
         copy_results_for_release_comparison(

diff --git a/night_rally/resources/race-configs-group-1.json b/night_rally/resources/race-configs-group-1.json
@@ -1,6 +1,7 @@
 [
   {
     "track": "geonames",
+    "placement": 1,
     "flavors": [
       {
         "name": "oss",
@@ -159,6 +160,7 @@
   },
   {
     "track": "geopoint",
+    "placement": 0,
     "flavors": [
       {
         "name": "oss",
@@ -296,6 +298,7 @@
   },
   {
     "track": "percolator",
+    "placement": 0,
     "flavors": [
       {
         "name": "oss",
@@ -345,6 +348,7 @@
   },
   {
     "track": "nyc_taxis",
+    "placement": 2,
     "flavors": [
       {
         "name": "oss",

diff --git a/night_rally/resources/race-configs-group-2.json b/night_rally/resources/race-configs-group-2.json
@@ -1,6 +1,7 @@
 [
   {
     "track": "nested",
+    "placement": 0,
     "flavors": [
       {
         "name": "oss",
@@ -52,6 +53,7 @@
   },
   {
     "track": "noaa",
+    "placement": 0,
     "flavors": [
       {
         "name": "oss",
@@ -103,6 +105,7 @@
   },
   {
     "track": "pmc",
+    "placement": 1,
     "flavors": [
       {
         "name": "oss",
@@ -352,6 +355,7 @@
   },
   {
     "track": "http_logs",
+    "placement": 2,
     "flavors": [
       {
         "name": "oss",

diff --git a/night_rally/resources/race-configs-schema.json b/night_rally/resources/race-configs-schema.json
@@ -15,6 +15,10 @@
         "type": "string",
         "description": "The track-repository that contains the track specification"
       },
+      "placement": {
+        "type": "integer",
+        "description": "Indicates on which target machine(s) to place a benchmark. Should be in the range [0, number_of_target_machines - 1]. 0 means that target hosts are assigned starting from the first target host, 1, starting from the second target host and so on."
+      },
       "flavors": {
         "type": "array",
         "minItems": 1,
@@ -95,6 +99,7 @@
           }
         }
       }
-    }
+    },
+    "required": ["track", "flavors"]
   }
 }
diff --git a/tests/night_rally_test.py b/tests/night_rally_test.py
@@ -563,6 +563,7 @@ def test_run_two_oss_tracks_successfully(self, mocked_wait_until_port_is_free):
         tracks = [
             {
                 "track": "geonames",
+                "placement": 0,
                 "flavors": [
                     {
                         "name": "oss",
@@ -583,6 +584,7 @@ def test_run_two_oss_tracks_successfully(self, mocked_wait_until_port_is_free):
             },
             {
                 "track": "percolator",
+                "placement": 1,
                 "flavors": [
                     {
                         "name": "oss",
@@ -607,17 +609,17 @@ def test_run_two_oss_tracks_successfully(self, mocked_wait_until_port_is_free):
         race_configs_id = os.path.basename(get_random_race_configs_id())
         params = [night_rally.StandardParams("nightly", start_date, 8, "bare", race_configs_id=race_configs_id)]
         cmd = night_rally.NightlyCommand(params, start_date)
-        night_rally.run_rally(tracks, None, ["localhost"], cmd, skip_ansible=True, system=system_call)
+        night_rally.run_rally(tracks, None, ["127.0.0.1", "127.0.0.2", "127.0.0.3"], cmd, skip_ansible=True, system=system_call)
         self.assertEqual(2, len(system_call.calls))
         self.assertEqual(
             [
-                "rally --skip-update --configuration-name=\"nightly\" --quiet --target-host=\"localhost:9200\" "
+                "rally --skip-update --configuration-name=\"nightly\" --quiet --target-host=\"127.0.0.1:9200\" "
                 "--effective-start-date=\"2016-10-01 00:00:00\" --track-repository=\"default\" --track=\"geonames\" "
                 "--challenge=\"append-no-conflicts\" --car=\"defaults\" --client-options=\"timeout:240\" "
                 "--user-tag=\"name:geonames-defaults,setup:bare,race-configs-id:{},license:oss\" --runtime-jdk=\"8\" "
                 "--pipeline=\"from-sources-complete\" --revision=\"@2016-10-01T00:00:00Z\"".format(race_configs_id),
 
-                "rally --skip-update --configuration-name=\"nightly\" --quiet --target-host=\"localhost:9200\" "
+                "rally --skip-update --configuration-name=\"nightly\" --quiet --target-host=\"127.0.0.2:9200\" "
                 "--effective-start-date=\"2016-10-01 00:00:00\" --track-repository=\"default\" --track=\"percolator\" "
                 "--challenge=\"append-no-conflicts\" --car=\"4gheap\" --client-options=\"timeout:240\" "
                 "--user-tag=\"name:percolator-4g,setup:bare,race-configs-id:{},license:oss\" "