Skip to content

Commit

Permalink
Support periodic pruning of the most expensive worker process.
Browse files Browse the repository at this point in the history
  • Loading branch information
TyeMcQueen committed Jul 30, 2024
1 parent 79b9a52 commit 00ed5a2
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 7 deletions.
33 changes: 33 additions & 0 deletions examples/example_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,39 @@
timeout = 30
keepalive = 2

#
# prune_function
# A function that is passed a process ID of a worker and returns a
# score (such as total memory used). Once every prune seconds, the
# worker with the highest score is killed (unless the score is below
# the prune floor).
#
# prune_seconds
# How many seconds to wait between killing the worker with the highest
# score from the prune function. If set to 0 (the default), then no
# pruning is done. The actual time waited is a random value between
# 90% and 100% of this value.
#
# prune_floor
# When the score from the prune function is at or below this value, the
# worker will not be killed even if it has the highest score.
#

import psutil

def proc_vmsize(pid):
# Return how many MB of virtual memory is being used by a worker process
try:
p = psutil.Process(pid)
mb = p.memory_info().vms/1024/1024
return mb
except psutil.NoSuchProcessError:
return 0

prune_seconds = 5*60 # Prune largest worker every 4.5-5m
prune_function = proc_vmsize # Measure worker size in MB of VM
prune_floor = 300 # Don't kill workers using <= 300 MB of VM

#
# spew - Install a trace function that spews every line of Python
# that is executed when running the server. This is the
Expand Down
40 changes: 33 additions & 7 deletions gunicorn/arbiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def __init__(self, app):
self.reexec_pid = 0
self.master_pid = 0
self.master_name = "Master"
self.next_prune = None

cwd = util.getcwd()

Expand Down Expand Up @@ -203,6 +204,13 @@ def run(self):

while True:
self.maybe_promote_master()
if 0 < self.cfg.prune_seconds:
if self.next_prune is None:
self.next_prune = time.monotonic() + self.cfg.prune_seconds
elif self.next_prune <= time.monotonic():
self.prune_worker()
self.next_prune +=
self.cfg.prune_seconds*(0.9 + 0.1*random.random())

sig = self.SIG_QUEUE.pop(0) if self.SIG_QUEUE else None
if sig is None:
Expand Down Expand Up @@ -486,6 +494,22 @@ def reload(self):
# manage workers
self.manage_workers()

def prune_worker(self):
"""\
Kill the worker with highest prune score
"""
workers = list(self.WORKERS.items())
max = self.cfg.prune_floor
victim = 0
for (pid, worker) in workers:
score = self.cfg.prune_function(pid)
if max < score:
max = score
victim = pid
if victim != 0:
self.log.info(f"Pruning worker (pid: {victim}) with score {score}")
self.kill_worker(pid, signal.SIGTERM)

def murder_workers(self):
"""\
Kill unused/idle workers
Expand Down Expand Up @@ -579,16 +603,18 @@ def manage_workers(self):
active_worker_count = len(workers)
if self._last_logged_active_worker_count != active_worker_count:
self._last_logged_active_worker_count = active_worker_count
self.log.debug("{0} workers".format(active_worker_count),
extra={"metric": "gunicorn.workers",
"value": active_worker_count,
"mtype": "gauge"})
self.log.debug(
"{0} workers".format(active_worker_count),
extra={
"metric": "gunicorn.workers",
"value": active_worker_count,
"mtype": "gauge"})

def spawn_worker(self):
self.worker_age += 1
worker = self.worker_class(self.worker_age, self.pid, self.LISTENERS,
self.app, self.timeout / 2.0,
self.cfg, self.log)
worker = self.worker_class(
self.worker_age, self.pid, self.LISTENERS, self.app,
self.timeout / 2.0, self.cfg, self.log)
self.cfg.pre_fork(self, worker)
pid = os.fork()
if pid != 0:
Expand Down
48 changes: 48 additions & 0 deletions gunicorn/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,54 @@ class WorkerConnections(Setting):
"""


class PruneFunction(Setting):
name = "prune_function"
section = "Worker Processes"
cli = ["--prune-function"]
validator = validate_callable(1)
type = callable

def prune_score(pid):
return 0
default = staticmethod(prune_score)
desc = """\
A function that is passed a process ID of a worker and returns a
score (such as total memory used). Once every prune seconds, the
worker with the highest score is killed (unless the score is below
the prune floor).
"""


class PruneSeconds(Setting):
name = "prune_seconds"
section = "Worker Processes"
cli = ["--prune-seconds"]
meta = "INT"
validator = validate_pos_int
type = int
default = 0
desc = """\
How many seconds to wait between killing the worker with the highest
score from the prune function. If set to 0 (the default), then no
pruning is done. The actual time waited is a random value between
90% and 100% of this value.
"""


class PruneFloor(Setting):
name = "prune_floor"
section = "Worker Processes"
cli = ["--prune-floor"]
meta = "INT"
validator = validate_pos_int
type = int
default = 0
desc = """\
When the score from the prune function is at or below this value, the
worker will not be killed even if it has the highest score.
"""


class MaxRequests(Setting):
name = "max_requests"
section = "Worker Processes"
Expand Down

0 comments on commit 00ed5a2

Please sign in to comment.