Skip to content

Commit

Permalink
Support periodic pruning of the most expensive worker process.
Browse files Browse the repository at this point in the history
  • Loading branch information
TyeMcQueen committed Jul 30, 2024
1 parent 79b9a52 commit c122d79
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 7 deletions.
33 changes: 33 additions & 0 deletions examples/example_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,39 @@
timeout = 30
keepalive = 2

#
# prune_function
# A function that is passed a process ID of a worker and returns a
# score (such as total memory used). Once every prune seconds, the
# worker with the highest score is killed (unless the score is below
# the prune floor).
#
# prune_seconds
# How many seconds to wait between killing the worker with the highest
# score from the prune function. If set to 0 (the default), then no
# pruning is done. The actual time waited is a random value between
# 90% and 100% of this value.
#
# prune_floor
# When the score from the prune function is at or below this value, the
# worker will not be killed even if it has the highest score.
#

import psutil

def proc_vmsize(pid):
# Return how many MB of virtual memory is being used by a worker process
try:
p = psutil.Process(pid)
mb = p.memory_info().vms/1024/1024
return mb
except psutil.NoSuchProcessError:
return 0

prune_seconds = 5*60 # Prune largest worker every 4.75-5.25m
prune_function = proc_vmsize # Measure worker size in MB of VM
prune_floor = 300 # Don't kill workers using <= 300 MB of VM

#
# spew - Install a trace function that spews every line of Python
# that is executed when running the server. This is the
Expand Down
40 changes: 33 additions & 7 deletions gunicorn/arbiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def __init__(self, app):
self.reexec_pid = 0
self.master_pid = 0
self.master_name = "Master"
self.next_prune = None

cwd = util.getcwd()

Expand Down Expand Up @@ -203,6 +204,13 @@ def run(self):

while True:
self.maybe_promote_master()
if 0 < self.cfg.prune_seconds:
if self.next_prune is None:
self.next_prune = time.monotonic() + self.cfg.prune_seconds
elif self.next_prune <= time.monotonic():
self.prune_worker()
self.next_prune += self.cfg.prune_seconds * (
0.95 + 0.10 * random.random())

sig = self.SIG_QUEUE.pop(0) if self.SIG_QUEUE else None
if sig is None:
Expand Down Expand Up @@ -486,6 +494,22 @@ def reload(self):
# manage workers
self.manage_workers()

def prune_worker(self):
"""\
Kill the worker with highest prune score
"""
workers = list(self.WORKERS.items())
maxi = self.cfg.prune_floor
victim = 0
for pid, _ in workers:
score = self.cfg.prune_function(pid)
if maxi < score:
maxi = score
victim = pid
if victim != 0:
self.log.info(f"Pruning worker (pid: {victim}) with score {score}")
self.kill_worker(victim, signal.SIGTERM)

def murder_workers(self):
"""\
Kill unused/idle workers
Expand Down Expand Up @@ -579,16 +603,18 @@ def manage_workers(self):
active_worker_count = len(workers)
if self._last_logged_active_worker_count != active_worker_count:
self._last_logged_active_worker_count = active_worker_count
self.log.debug("{0} workers".format(active_worker_count),
extra={"metric": "gunicorn.workers",
"value": active_worker_count,
"mtype": "gauge"})
self.log.debug(
"{0} workers".format(active_worker_count),
extra={
"metric": "gunicorn.workers",
"value": active_worker_count,
"mtype": "gauge"})

def spawn_worker(self):
self.worker_age += 1
worker = self.worker_class(self.worker_age, self.pid, self.LISTENERS,
self.app, self.timeout / 2.0,
self.cfg, self.log)
worker = self.worker_class(
self.worker_age, self.pid, self.LISTENERS, self.app,
self.timeout / 2.0, self.cfg, self.log)
self.cfg.pre_fork(self, worker)
pid = os.fork()
if pid != 0:
Expand Down
55 changes: 55 additions & 0 deletions gunicorn/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,61 @@ class WorkerConnections(Setting):
"""


class PruneFunction(Setting):
name = "prune_function"
section = "Worker Processes"
cli = ["--prune-function"]
validator = validate_callable(1)
type = callable

def prune_score(pid):
return 0
default = staticmethod(prune_score)
desc = """\
A function that is passed a process ID of a worker and returns a
score (such as total memory used). Once every prune seconds, the
worker with the highest score is killed (unless the score is below
the prune floor).
"""


class PruneSeconds(Setting):
name = "prune_seconds"
section = "Worker Processes"
cli = ["--prune-seconds"]
meta = "INT"
validator = validate_pos_int
type = int
default = 0
desc = """\
How many seconds to wait between killing the worker with the highest
score from the prune function. If set to 0 (the default), then no
pruning is done. The actual time waited is a random value between
95% and 105% of this value.
A worker handling an unusually large request can significantly grow
how much memory it is consuming for the rest of its existence. So
rare large requests will tend to eventually make every worker
unnecessarily large. If the large requests are indeed rare, then
you can significantly reduce the total memory used by your service
by periodically pruning the largest worker process.
"""


class PruneFloor(Setting):
name = "prune_floor"
section = "Worker Processes"
cli = ["--prune-floor"]
meta = "INT"
validator = validate_pos_int
type = int
default = 0
desc = """\
When the score from the prune function is at or below this value, the
worker will not be killed even if it has the highest score.
"""


class MaxRequests(Setting):
name = "max_requests"
section = "Worker Processes"
Expand Down

0 comments on commit c122d79

Please sign in to comment.