Support jobserver client mode automatically.

Detect that the environment variable MAKEFLAGS specifies a jobserver pool to use, and automatically use it to control build parallelism when this is the case. This is disabled is `--dry-run` or an explicit `-j<COUNT>` is passed on the command-line. Note that the `-l` option used to limit dispatch based on the overall load factor will still be in effect if used. + Use default member initialization for BuildConfig struct. + Add a new regression test suite that uses the misc/jobserver_pool.py script that was introduced in a previous commit, to verify that everything works properly.
ninja-build · Nov 11, 2024 · 981bff4 · 981bff4
1 parent 65f9c60
commit 981bff4
Show file tree

Hide file tree

Showing 9 changed files with 463 additions and 17 deletions.
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
@@ -27,6 +27,7 @@ jobs:
       run: |
         ./ninja_test --gtest_color=yes
         ../../misc/output_test.py
+        ../../misc/jobserver_test.py
     - name: Build release ninja
       run: CLICOLOR_FORCE=1 ninja -f build-Release.ninja
       working-directory: build
@@ -35,6 +36,7 @@ jobs:
       run: |
         ./ninja_test --gtest_color=yes
         ../../misc/output_test.py
+        ../../misc/jobserver_test.py
 
   build:
     runs-on: [ubuntu-latest]
@@ -166,6 +168,7 @@ jobs:
         ./ninja all
         python3 misc/ninja_syntax_test.py
         ./misc/output_test.py
+        ./misc/jobserver_test.py
 
   build-aarch64:
     name: Build Linux ARM64

diff --git a/doc/manual.asciidoc b/doc/manual.asciidoc
@@ -187,10 +187,42 @@ Ninja defaults to running commands in parallel anyway, so typically
 you don't need to pass `-j`.)
 
 
+GNU Jobserver support
+~~~~~~~~~~~~~~~~~~~~~
+
+Since version 1.13., Ninja builds can follow the
+https://https://www.gnu.org/software/make/manual/html_node/Job-Slots.html[GNU Make jobserver]
+client protocol. This is useful when Ninja is invoked as part of a larger
+build system controlled by a top-level GNU Make instance, or any other
+jobserver pool implementation, as it allows better coordination between
+concurrent build tasks.
+
+This feature is automatically enabled under the following conditions:
+
+- Dry-run (i.e. `-n` or `--dry-run`) is not enabled.
+
+- No explicit job count (e.g. `-j<COUNT>`) is passed on the command
+  line.
+
+- The `MAKEFLAGS` environment variable is defined and describes a valid
+  jobserver mode using `--jobserver-auth` or even `--jobserver-fds`.
+
+In this case, Ninja will use the jobserver pool of job slots to control
+parallelism, instead of its default implementation of `-j<count>`.
+
+Note that load-average limitations (i.e. when using `-l<count>`)
+are still being enforced in this mode.
+
+On Posix, Ninja supports both the `pipe` and `fifo` client modes, based on
+the content of `MAKEFLAGS`.
+
+IMPORTANT: A warning will be printed when `pipe` mode is detected, as this
+mode can be less reliable than `fifo`.
+
 Environment variables
 ~~~~~~~~~~~~~~~~~~~~~
 
-Ninja supports one environment variable to control its behavior:
+Ninja supports a few environment variables to control its behavior:
 `NINJA_STATUS`, the progress status printed before the rule being run.
 
 Several placeholders are available:
@@ -215,6 +247,10 @@ The default progress status is `"[%f/%t] "` (note the trailing space
 to separate from the build rule). Another example of possible progress status
 could be `"[%u/%r/%f] "`.
 
+If `MAKEFLAGS` is defined in the environment, if may alter how
+Ninja dispatches parallel build commands. See the GNU Jobserver support
+section for details.
+
 Extra tools
 ~~~~~~~~~~~
 

diff --git a/misc/jobserver_test.py b/misc/jobserver_test.py
@@ -0,0 +1,255 @@
+#!/usr/bin/env python3
+# Copyright 2024 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from textwrap import dedent
+import os
+import platform
+import subprocess
+import tempfile
+import typing as T
+import sys
+import unittest
+
+_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
+_JOBSERVER_POOL_SCRIPT = os.path.join(_SCRIPT_DIR, "jobserver_pool.py")
+_JOBSERVER_TEST_HELPER_SCRIPT = os.path.join(_SCRIPT_DIR, "jobserver_test_helper.py")
+
+_PLATFORM_IS_WINDOWS = platform.system() == "Windows"
+
+default_env = dict(os.environ)
+default_env.pop("NINJA_STATUS", None)
+default_env.pop("MAKEFLAGS", None)
+default_env["TERM"] = "dumb"
+NINJA_PATH = os.path.abspath("./ninja")
+
+
+class BuildDir:
+    def __init__(self, build_ninja: str):
+        self.build_ninja = dedent(build_ninja)
+        self.d: T.Optional[tempfile.TemporaryDirectory] = None
+
+    def __enter__(self):
+        self.d = tempfile.TemporaryDirectory()
+        with open(os.path.join(self.d.name, "build.ninja"), "w") as f:
+            f.write(self.build_ninja)
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.d.cleanup()
+
+    @property
+    def path(self) -> str:
+        assert self.d
+        return self.d.name
+
+    def run(
+        self,
+        cmd_flags: T.Sequence[str] = [],
+        env: T.Dict[str, str] = default_env,
+    ) -> None:
+        """Run a command, raise exception on error. Do not capture outputs."""
+        ret = subprocess.run(cmd_flags, env=env)
+        ret.check_returncode()
+
+    def ninja_run(
+        self,
+        ninja_args: T.List[str],
+        prefix_args: T.List[str] = [],
+        extra_env: T.Dict[str, str] = {},
+    ) -> "subprocess.CompletedProcess[str]":
+        ret = self.ninja_spawn(
+            ninja_args,
+            prefix_args=prefix_args,
+            extra_env=extra_env,
+            capture_output=False,
+        )
+        ret.check_returncode()
+        return ret
+
+    def ninja_clean(self) -> None:
+        self.ninja_run(["-t", "clean"])
+
+    def ninja_spawn(
+        self,
+        ninja_args: T.List[str],
+        prefix_args: T.List[str] = [],
+        extra_env: T.Dict[str, str] = {},
+        capture_output: bool = True,
+    ) -> "subprocess.CompletedProcess[str]":
+        """Run Ninja command and capture outputs."""
+        return subprocess.run(
+            prefix_args + [NINJA_PATH, "-C", self.path] + ninja_args,
+            text=True,
+            capture_output=capture_output,
+            env={**default_env, **extra_env},
+        )
+
+
+def span_output_file(span_n: int) -> str:
+    return "out%02d" % span_n
+
+
+def generate_build_plan(command_count: int) -> str:
+    """Generate a Ninja build plan for |command_count| parallel tasks.
+
+    Each task calls the test helper script which waits for 100ms
+    then writes its own start and end time to its output file.
+    """
+    result = f"""
+rule span
+    command = {sys.executable} -S {_JOBSERVER_TEST_HELPER_SCRIPT} --duration-ms=100 $out
+
+"""
+
+    for n in range(command_count):
+        result += "build %s: span\n" % span_output_file(n)
+
+    result += "build all: phony %s\n" % " ".join(
+        [span_output_file(n) for n in range(command_count)]
+    )
+    return result
+
+
+def compute_max_overlapped_spans(build_dir: str, command_count: int) -> int:
+    """Compute the maximum number of overlapped spanned tasks.
+
+    This reads the output files from |build_dir| and look at their start and end times
+    to compute the maximum number of tasks that were run in parallel.
+    """
+    # Read the output files.
+    if command_count < 2:
+        return 0
+
+    spans: T.List[T.Tuple[int, int]] = []
+    for n in range(command_count):
+        with open(os.path.join(build_dir, span_output_file(n)), "rb") as f:
+            content = f.read().decode("utf-8")
+        lines = content.splitlines()
+        assert len(lines) == 2, f"Unexpected output file content: [{content}]"
+        spans.append((int(lines[0]), int(lines[1])))
+
+    # Stupid but simple, for each span, count the number of other spans that overlap it.
+    max_overlaps = 1
+    for n in range(command_count):
+        cur_start, cur_end = spans[n]
+        cur_overlaps = 1
+        for m in range(command_count):
+            other_start, other_end = spans[m]
+            if n != m and other_end > cur_start and other_start < cur_end:
+                cur_overlaps += 1
+
+        if cur_overlaps > max_overlaps:
+            max_overlaps = cur_overlaps
+
+    return max_overlaps
+
+
+class JobserverTest(unittest.TestCase):
+
+    def test_no_jobserver_client(self):
+        task_count = 10
+        build_plan = generate_build_plan(task_count)
+        with BuildDir(build_plan) as b:
+            output = b.run([NINJA_PATH, "-C", b.path, "-j0", "all"])
+
+            max_overlaps = compute_max_overlapped_spans(b.path, task_count)
+            self.assertEqual(max_overlaps, task_count)
+
+            b.ninja_clean()
+            output = b.run([NINJA_PATH, "-C", b.path, "-j1", "all"])
+
+            max_overlaps = compute_max_overlapped_spans(b.path, task_count)
+            self.assertEqual(max_overlaps, 1)
+
+    def _run_client_test(self, jobserver_args: T.List[str]) -> None:
+        task_count = 10
+        build_plan = generate_build_plan(task_count)
+        with BuildDir(build_plan) as b:
+            # First, run the full 10 tasks with with 10 tokens, this should allow all
+            # tasks to run in parallel.
+            ret = b.ninja_run(
+                ninja_args=["all"],
+                prefix_args=jobserver_args + [f"--jobs={task_count}"],
+            )
+            max_overlaps = compute_max_overlapped_spans(b.path, task_count)
+            self.assertEqual(max_overlaps, task_count)
+
+            # Second, use 4 tokens only, and verify that this was enforced by Ninja.
+            b.ninja_clean()
+            b.ninja_run(
+                ["all"],
+                prefix_args=jobserver_args + ["--jobs=4"],
+            )
+            max_overlaps = compute_max_overlapped_spans(b.path, task_count)
+            self.assertEqual(max_overlaps, 4)
+
+            # Third, verify that --jobs=1 serializes all tasks.
+            b.ninja_clean()
+            b.ninja_run(
+                ["all"],
+                prefix_args=jobserver_args + ["--jobs=1"],
+            )
+            max_overlaps = compute_max_overlapped_spans(b.path, task_count)
+            self.assertEqual(max_overlaps, 1)
+
+            # Finally, verify that -j1 overrides the pool.
+            b.ninja_clean()
+            b.ninja_run(
+                ["-j1", "all"],
+                prefix_args=jobserver_args + [f"--jobs={task_count}"],
+            )
+            max_overlaps = compute_max_overlapped_spans(b.path, task_count)
+            self.assertEqual(max_overlaps, 1)
+
+    @unittest.skipIf(_PLATFORM_IS_WINDOWS, "These test methods do not work on Windows")
+    def test_jobserver_client_with_posix_pipe(self):
+        self._run_client_test([sys.executable, "-S", _JOBSERVER_POOL_SCRIPT, "--pipe"])
+
+    @unittest.skipIf(_PLATFORM_IS_WINDOWS, "These test methods do not work on Windows")
+    def test_jobserver_client_with_posix_fifo(self):
+        self._run_client_test([sys.executable, "-S", _JOBSERVER_POOL_SCRIPT])
+
+    def _test_MAKEFLAGS_value(
+        self, ninja_args: T.List[str] = [], prefix_args: T.List[str] = []
+    ):
+        build_plan = r"""
+rule print
+    command = echo MAKEFLAGS="[$$MAKEFLAGS]"
+
+build all: print
+"""
+        with BuildDir(build_plan) as b:
+            ret = b.ninja_spawn(
+                ninja_args + ["--quiet", "all"], prefix_args=prefix_args
+            )
+            self.assertEqual(ret.returncode, 0)
+            output = ret.stdout.strip()
+            pos = output.find("MAKEFLAGS=[")
+            self.assertNotEqual(pos, -1, "Could not find MAKEFLAGS in output!")
+            makeflags, sep, _ = output[pos + len("MAKEFLAGS=[") :].partition("]")
+            self.assertEqual(sep, "]", "Missing ] in output!: " + output)
+            self.assertTrue(
+                "--jobserver-auth=" in makeflags,
+                f"Missing --jobserver-auth from MAKEFLAGS [{makeflags}]\nSTDOUT [{ret.stdout}]\nSTDERR [{ret.stderr}]",
+            )
+
+    def test_client_passes_MAKEFLAGS(self):
+        self._test_MAKEFLAGS_value(
+            prefix_args=[sys.executable, "-S", _JOBSERVER_POOL_SCRIPT]
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/misc/jobserver_test_helper.py b/misc/jobserver_test_helper.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+# Copyright 2024 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Simple utility used by the jobserver test. Wait for specific time, then write start/stop times to output file."""
+
+import argparse
+import time
+import sys
+from pathlib import Path
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--duration-ms",
+        default="50",
+        help="sleep duration in milliseconds (default 50)",
+    )
+    parser.add_argument("output_file", type=Path, help="output file name.")
+    args = parser.parse_args()
+
+    now_time_ns = time.time_ns()
+    time.sleep(int(args.duration_ms) / 1000.0)
+    args.output_file.write_text(f"{now_time_ns}\n{time.time_ns()}\n")
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())