From 16fb8546b9ce84062347fac3c16133250c615c59 Mon Sep 17 00:00:00 2001 From: Tye McQueen Date: Mon, 29 Jul 2024 15:18:50 -0700 Subject: [PATCH 1/2] Support periodic pruning of the most expensive worker process. --- examples/example_config.py | 33 +++++++++++++++++++++++ gunicorn/arbiter.py | 30 ++++++++++++++++++--- gunicorn/config.py | 55 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 3 deletions(-) diff --git a/examples/example_config.py b/examples/example_config.py index 5a399a497..064851606 100644 --- a/examples/example_config.py +++ b/examples/example_config.py @@ -71,6 +71,39 @@ timeout = 30 keepalive = 2 +# +# prune_function +# A function that is passed a process ID of a worker and returns a +# score (such as total memory used). Once every prune seconds, the +# worker with the highest score is killed (unless the score is below +# the prune floor). +# +# prune_seconds +# How many seconds to wait between killing the worker with the highest +# score from the prune function. If set to 0 (the default), then no +# pruning is done. The actual time waited is a random value between +# 90% and 100% of this value. +# +# prune_floor +# When the score from the prune function is at or below this value, the +# worker will not be killed even if it has the highest score. +# + +import psutil + +def proc_vmsize(pid): + # Return how many MB of virtual memory is being used by a worker process + try: + p = psutil.Process(pid) + mb = p.memory_info().vms/1024/1024 + return mb + except psutil.NoSuchProcessError: + return 0 + +prune_seconds = 5*60 # Prune largest worker every 4.75-5.25m +prune_function = proc_vmsize # Measure worker size in MB of VM +prune_floor = 300 # Don't kill workers using <= 300 MB of VM + # # spew - Install a trace function that spews every line of Python # that is executed when running the server. This is the diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index 1cf436748..b36bd17c4 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -63,6 +63,7 @@ def __init__(self, app): self.reexec_pid = 0 self.master_pid = 0 self.master_name = "Master" + self.next_prune = None cwd = util.getcwd() @@ -203,6 +204,13 @@ def run(self): while True: self.maybe_promote_master() + if 0 < self.cfg.prune_seconds: + if self.next_prune is None: + self.next_prune = time.monotonic() + self.cfg.prune_seconds + elif self.next_prune <= time.monotonic(): + self.prune_worker() + self.next_prune += self.cfg.prune_seconds * ( + 0.95 + 0.10 * random.random()) sig = self.SIG_QUEUE.pop(0) if self.SIG_QUEUE else None if sig is None: @@ -486,6 +494,22 @@ def reload(self): # manage workers self.manage_workers() + def prune_worker(self): + """\ + Kill the worker with highest prune score + """ + workers = list(self.WORKERS.items()) + maxi = self.cfg.prune_floor + victim = 0 + for pid, _ in workers: + score = self.cfg.prune_function(pid) + if maxi < score: + maxi = score + victim = pid + if victim != 0: + self.log.info(f"Pruning worker (pid: {victim}) with score {score}") + self.kill_worker(victim, signal.SIGTERM) + def murder_workers(self): """\ Kill unused/idle workers @@ -586,9 +610,9 @@ def manage_workers(self): def spawn_worker(self): self.worker_age += 1 - worker = self.worker_class(self.worker_age, self.pid, self.LISTENERS, - self.app, self.timeout / 2.0, - self.cfg, self.log) + worker = self.worker_class( + self.worker_age, self.pid, self.LISTENERS, self.app, + self.timeout / 2.0, self.cfg, self.log) self.cfg.pre_fork(self, worker) pid = os.fork() if pid != 0: diff --git a/gunicorn/config.py b/gunicorn/config.py index 144acaecc..33488ad6f 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -724,6 +724,61 @@ class WorkerConnections(Setting): """ +class PruneFunction(Setting): + name = "prune_function" + section = "Worker Processes" + cli = ["--prune-function"] + validator = validate_callable(1) + type = callable + + def prune_score(pid): + return 0 + default = staticmethod(prune_score) + desc = """\ + A function that is passed a process ID of a worker and returns a + score (such as total memory used). Once every prune seconds, the + worker with the highest score is killed (unless the score is below + the prune floor). + """ + + +class PruneSeconds(Setting): + name = "prune_seconds" + section = "Worker Processes" + cli = ["--prune-seconds"] + meta = "INT" + validator = validate_pos_int + type = int + default = 0 + desc = """\ + How many seconds to wait between killing the worker with the highest + score from the prune function. If set to 0 (the default), then no + pruning is done. The actual time waited is a random value between + 95% and 105% of this value. + + A worker handling an unusually large request can significantly grow + how much memory it is consuming for the rest of its existence. So + rare large requests will tend to eventually make every worker + unnecessarily large. If the large requests are indeed rare, then + you can significantly reduce the total memory used by your service + by periodically pruning the largest worker process. + """ + + +class PruneFloor(Setting): + name = "prune_floor" + section = "Worker Processes" + cli = ["--prune-floor"] + meta = "INT" + validator = validate_pos_int + type = int + default = 0 + desc = """\ + When the score from the prune function is at or below this value, the + worker will not be killed even if it has the highest score. + """ + + class MaxRequests(Setting): name = "max_requests" section = "Worker Processes" From 6e1ca033f19a7aee33e2e684f283321b13b68f90 Mon Sep 17 00:00:00 2001 From: Tye McQueen Date: Mon, 29 Jul 2024 16:11:59 -0700 Subject: [PATCH 2/2] Update docs based on new settings. Closes #3251. --- docs/source/settings.rst | 52 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/docs/source/settings.rst b/docs/source/settings.rst index 4e0c11877..af3c6c8a6 100644 --- a/docs/source/settings.rst +++ b/docs/source/settings.rst @@ -1555,6 +1555,58 @@ The maximum number of simultaneous clients. This setting only affects the ``gthread``, ``eventlet`` and ``gevent`` worker types. +.. _prune-function: + +``prune_function`` +~~~~~~~~~~~~~~~~~~ + +**Command line:** ``--prune-function`` + +**Default:** + +.. code-block:: python + + def prune_score(pid): + return 0 + +A function that is passed a process ID of a worker and returns a +score (such as total memory used). Once every prune seconds, the +worker with the highest score is killed (unless the score is below +the prune floor). + +.. _prune-seconds: + +``prune_seconds`` +~~~~~~~~~~~~~~~~~ + +**Command line:** ``--prune-seconds INT`` + +**Default:** ``0`` + +How many seconds to wait between killing the worker with the highest +score from the prune function. If set to 0 (the default), then no +pruning is done. The actual time waited is a random value between +95% and 105% of this value. + +A worker handling an unusually large request can significantly grow +how much memory it is consuming for the rest of its existence. So +rare large requests will tend to eventually make every worker +unnecessarily large. If the large requests are indeed rare, then +you can significantly reduce the total memory used by your service +by periodically pruning the largest worker process. + +.. _prune-floor: + +``prune_floor`` +~~~~~~~~~~~~~~~ + +**Command line:** ``--prune-floor INT`` + +**Default:** ``0`` + +When the score from the prune function is at or below this value, the +worker will not be killed even if it has the highest score. + .. _max-requests: ``max_requests``