From 0a0ef9d140a1eedaa03e791eca9090f7a15ced42 Mon Sep 17 00:00:00 2001 From: Ankit Patel <ankit.patel@affirm.com> Date: Wed, 1 Feb 2023 16:06:02 -0500 Subject: [PATCH 01/28] only create sockets in forked processes not in master --- gunicorn/arbiter.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index 7b9ed76f5..f3f63d204 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -10,6 +10,7 @@ import sys import time import traceback +import socket from gunicorn.errors import HaltServer, AppImportError from gunicorn.pidfile import Pidfile @@ -152,7 +153,8 @@ def start(self): for fd in os.environ.pop('GUNICORN_FD').split(','): fds.append(int(fd)) - self.LISTENERS = sock.create_sockets(self.cfg, self.log, fds) + if not (self.cfg.reuse_port and hasattr(socket, 'SO_REUSEPORT')): + self.LISTENERS = sock.create_sockets(self.cfg, self.log, fds) listeners_str = ",".join([str(lnr) for lnr in self.LISTENERS]) self.log.debug("Arbiter booted") @@ -579,6 +581,8 @@ def spawn_worker(self): try: util._setproctitle("worker [%s]" % self.proc_name) self.log.info("Booting worker with pid: %s", worker.pid) + if self.cfg.reuse_port: + worker.sockets = sock.create_sockets(self.cfg, self.log) self.cfg.post_fork(self, worker) worker.init_process() sys.exit(0) From b0115b9c272d8d1b67a0a3efc2711c3c9f2690c5 Mon Sep 17 00:00:00 2001 From: Ankit Patel <ankit.patel@affirm.com> Date: Wed, 1 Feb 2023 16:06:43 -0500 Subject: [PATCH 02/28] detach socket in child process --- gunicorn/workers/ggevent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gunicorn/workers/ggevent.py b/gunicorn/workers/ggevent.py index 0a844db3a..314a38ca8 100644 --- a/gunicorn/workers/ggevent.py +++ b/gunicorn/workers/ggevent.py @@ -41,7 +41,7 @@ def patch(self): sockets = [] for s in self.sockets: sockets.append(socket.socket(s.FAMILY, socket.SOCK_STREAM, - fileno=s.sock.fileno())) + fileno=s.sock.detach())) self.sockets = sockets def notify(self): From 16fb8546b9ce84062347fac3c16133250c615c59 Mon Sep 17 00:00:00 2001 From: Tye McQueen <tye.mcqueen@gmail.com> Date: Mon, 29 Jul 2024 15:18:50 -0700 Subject: [PATCH 03/28] Support periodic pruning of the most expensive worker process. --- examples/example_config.py | 33 +++++++++++++++++++++++ gunicorn/arbiter.py | 30 ++++++++++++++++++--- gunicorn/config.py | 55 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 3 deletions(-) diff --git a/examples/example_config.py b/examples/example_config.py index 5a399a497..064851606 100644 --- a/examples/example_config.py +++ b/examples/example_config.py @@ -71,6 +71,39 @@ timeout = 30 keepalive = 2 +# +# prune_function +# A function that is passed a process ID of a worker and returns a +# score (such as total memory used). Once every prune seconds, the +# worker with the highest score is killed (unless the score is below +# the prune floor). +# +# prune_seconds +# How many seconds to wait between killing the worker with the highest +# score from the prune function. If set to 0 (the default), then no +# pruning is done. The actual time waited is a random value between +# 90% and 100% of this value. +# +# prune_floor +# When the score from the prune function is at or below this value, the +# worker will not be killed even if it has the highest score. +# + +import psutil + +def proc_vmsize(pid): + # Return how many MB of virtual memory is being used by a worker process + try: + p = psutil.Process(pid) + mb = p.memory_info().vms/1024/1024 + return mb + except psutil.NoSuchProcessError: + return 0 + +prune_seconds = 5*60 # Prune largest worker every 4.75-5.25m +prune_function = proc_vmsize # Measure worker size in MB of VM +prune_floor = 300 # Don't kill workers using <= 300 MB of VM + # # spew - Install a trace function that spews every line of Python # that is executed when running the server. This is the diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index 1cf436748..b36bd17c4 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -63,6 +63,7 @@ def __init__(self, app): self.reexec_pid = 0 self.master_pid = 0 self.master_name = "Master" + self.next_prune = None cwd = util.getcwd() @@ -203,6 +204,13 @@ def run(self): while True: self.maybe_promote_master() + if 0 < self.cfg.prune_seconds: + if self.next_prune is None: + self.next_prune = time.monotonic() + self.cfg.prune_seconds + elif self.next_prune <= time.monotonic(): + self.prune_worker() + self.next_prune += self.cfg.prune_seconds * ( + 0.95 + 0.10 * random.random()) sig = self.SIG_QUEUE.pop(0) if self.SIG_QUEUE else None if sig is None: @@ -486,6 +494,22 @@ def reload(self): # manage workers self.manage_workers() + def prune_worker(self): + """\ + Kill the worker with highest prune score + """ + workers = list(self.WORKERS.items()) + maxi = self.cfg.prune_floor + victim = 0 + for pid, _ in workers: + score = self.cfg.prune_function(pid) + if maxi < score: + maxi = score + victim = pid + if victim != 0: + self.log.info(f"Pruning worker (pid: {victim}) with score {score}") + self.kill_worker(victim, signal.SIGTERM) + def murder_workers(self): """\ Kill unused/idle workers @@ -586,9 +610,9 @@ def manage_workers(self): def spawn_worker(self): self.worker_age += 1 - worker = self.worker_class(self.worker_age, self.pid, self.LISTENERS, - self.app, self.timeout / 2.0, - self.cfg, self.log) + worker = self.worker_class( + self.worker_age, self.pid, self.LISTENERS, self.app, + self.timeout / 2.0, self.cfg, self.log) self.cfg.pre_fork(self, worker) pid = os.fork() if pid != 0: diff --git a/gunicorn/config.py b/gunicorn/config.py index 144acaecc..33488ad6f 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -724,6 +724,61 @@ class WorkerConnections(Setting): """ +class PruneFunction(Setting): + name = "prune_function" + section = "Worker Processes" + cli = ["--prune-function"] + validator = validate_callable(1) + type = callable + + def prune_score(pid): + return 0 + default = staticmethod(prune_score) + desc = """\ + A function that is passed a process ID of a worker and returns a + score (such as total memory used). Once every prune seconds, the + worker with the highest score is killed (unless the score is below + the prune floor). + """ + + +class PruneSeconds(Setting): + name = "prune_seconds" + section = "Worker Processes" + cli = ["--prune-seconds"] + meta = "INT" + validator = validate_pos_int + type = int + default = 0 + desc = """\ + How many seconds to wait between killing the worker with the highest + score from the prune function. If set to 0 (the default), then no + pruning is done. The actual time waited is a random value between + 95% and 105% of this value. + + A worker handling an unusually large request can significantly grow + how much memory it is consuming for the rest of its existence. So + rare large requests will tend to eventually make every worker + unnecessarily large. If the large requests are indeed rare, then + you can significantly reduce the total memory used by your service + by periodically pruning the largest worker process. + """ + + +class PruneFloor(Setting): + name = "prune_floor" + section = "Worker Processes" + cli = ["--prune-floor"] + meta = "INT" + validator = validate_pos_int + type = int + default = 0 + desc = """\ + When the score from the prune function is at or below this value, the + worker will not be killed even if it has the highest score. + """ + + class MaxRequests(Setting): name = "max_requests" section = "Worker Processes" From 6e1ca033f19a7aee33e2e684f283321b13b68f90 Mon Sep 17 00:00:00 2001 From: Tye McQueen <tye.mcqueen@gmail.com> Date: Mon, 29 Jul 2024 16:11:59 -0700 Subject: [PATCH 04/28] Update docs based on new settings. Closes #3251. --- docs/source/settings.rst | 52 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/docs/source/settings.rst b/docs/source/settings.rst index 4e0c11877..af3c6c8a6 100644 --- a/docs/source/settings.rst +++ b/docs/source/settings.rst @@ -1555,6 +1555,58 @@ The maximum number of simultaneous clients. This setting only affects the ``gthread``, ``eventlet`` and ``gevent`` worker types. +.. _prune-function: + +``prune_function`` +~~~~~~~~~~~~~~~~~~ + +**Command line:** ``--prune-function`` + +**Default:** + +.. code-block:: python + + def prune_score(pid): + return 0 + +A function that is passed a process ID of a worker and returns a +score (such as total memory used). Once every prune seconds, the +worker with the highest score is killed (unless the score is below +the prune floor). + +.. _prune-seconds: + +``prune_seconds`` +~~~~~~~~~~~~~~~~~ + +**Command line:** ``--prune-seconds INT`` + +**Default:** ``0`` + +How many seconds to wait between killing the worker with the highest +score from the prune function. If set to 0 (the default), then no +pruning is done. The actual time waited is a random value between +95% and 105% of this value. + +A worker handling an unusually large request can significantly grow +how much memory it is consuming for the rest of its existence. So +rare large requests will tend to eventually make every worker +unnecessarily large. If the large requests are indeed rare, then +you can significantly reduce the total memory used by your service +by periodically pruning the largest worker process. + +.. _prune-floor: + +``prune_floor`` +~~~~~~~~~~~~~~~ + +**Command line:** ``--prune-floor INT`` + +**Default:** ``0`` + +When the score from the prune function is at or below this value, the +worker will not be killed even if it has the highest score. + .. _max-requests: ``max_requests`` From 798c389de1d91b8a117c86c2279b230118ecedbc Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" <pajod@users.noreply.github.com> Date: Fri, 8 Dec 2023 01:37:57 +0100 Subject: [PATCH 05/28] CI: test against Pylint 3.x Pylint 3.0 is the first version to officially support Python 12 --- tox.ini | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 9bf99e1be..6e4f9bf5e 100644 --- a/tox.ini +++ b/tox.ini @@ -43,8 +43,10 @@ commands = tests/test_systemd.py \ tests/test_util.py \ tests/test_valid_requests.py +# OK to request linter version unsupported on oldest python matrix +# linting only has to work on one (currently supported) release deps = - pylint==2.17.4 + pylint>=3.1.0 [testenv:docs-lint] no_package = true From b16b341b8db61fb97bd8c1fb008f3d4d3b219892 Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" <pajod@users.noreply.github.com> Date: Fri, 8 Dec 2023 01:49:42 +0100 Subject: [PATCH 06/28] pylint: shut up about method-scope cyclic import --- gunicorn/app/wsgiapp.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gunicorn/app/wsgiapp.py b/gunicorn/app/wsgiapp.py index 1b0ba969d..cd496b451 100644 --- a/gunicorn/app/wsgiapp.py +++ b/gunicorn/app/wsgiapp.py @@ -11,6 +11,8 @@ class WSGIApplication(Application): def init(self, parser, opts, args): + # pylint: disable=cyclic-import + self.app_uri = None if opts.paste: @@ -47,6 +49,8 @@ def load_wsgiapp(self): return util.import_app(self.app_uri) def load_pasteapp(self): + # pylint: disable=cyclic-import + from .pasterapp import get_wsgi_app return get_wsgi_app(self.app_uri, defaults=self.cfg.paste_global_conf) From 20efd2dd8dca4a14f6e9a7d11974cdddb437be71 Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" <pajod@users.noreply.github.com> Date: Fri, 8 Dec 2023 06:35:17 +0100 Subject: [PATCH 07/28] docs: remove references to EoL software Debian buster EoL since 2022-09-10 --- docs/source/install.rst | 56 +++++++++++------------------------------ 1 file changed, 15 insertions(+), 41 deletions(-) diff --git a/docs/source/install.rst b/docs/source/install.rst index 2367086df..490fa3792 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -96,59 +96,33 @@ advantages: rolled back in case of incompatibility. The package can also be purged entirely from the system in seconds. -stable ("buster") ------------------- +stable (as of 2024, "bookworm") +------------------------------- -The version of Gunicorn in the Debian_ "stable" distribution is 19.9.0 -(December 2020). You can install it using:: +The version of Gunicorn in the Debian_ "stable" distribution is 20.1.0 +(2021-04-28). You can install it using:: - $ sudo apt-get install gunicorn3 - -You can also use the most recent version 20.0.4 (December 2020) by using -`Debian Backports`_. First, copy the following line to your -``/etc/apt/sources.list``:: - - deb http://ftp.debian.org/debian buster-backports main - -Then, update your local package lists:: - - $ sudo apt-get update - -You can then install the latest version using:: - - $ sudo apt-get -t buster-backports install gunicorn - -oldstable ("stretch") ---------------------- - -While Debian releases newer than Stretch will give you gunicorn with Python 3 -support no matter if you install the gunicorn or gunicorn3 package for Stretch -you specifically have to install gunicorn3 to get Python 3 support. - -The version of Gunicorn in the Debian_ "oldstable" distribution is 19.6.0 -(December 2020). You can install it using:: - - $ sudo apt-get install gunicorn3 + $ sudo apt-get install gunicorn -You can also use the most recent version 19.7.1 (December 2020) by using +You may have access to a more recent packaged version by using `Debian Backports`_. First, copy the following line to your ``/etc/apt/sources.list``:: - deb http://ftp.debian.org/debian stretch-backports main + deb http://ftp.debian.org/debian bookworm-backports main Then, update your local package lists:: $ sudo apt-get update -You can then install the latest version using:: +You can then install the latest available version using:: - $ sudo apt-get -t stretch-backports install gunicorn3 + $ sudo apt-get -t bookworm-backports install gunicorn -Testing ("bullseye") / Unstable ("sid") ---------------------------------------- +Testing (as of 2024, "trixie") / Unstable ("sid") +------------------------------------------------- -"bullseye" and "sid" contain the latest released version of Gunicorn 20.0.4 -(December 2020). You can install it in the usual way:: +"trixie" and "sid" ship the most recently packaged version of Gunicorn 20.1.0 +(2021-04-28). You can install it in the usual way:: $ sudo apt-get install gunicorn @@ -156,8 +130,8 @@ Testing ("bullseye") / Unstable ("sid") Ubuntu ====== -Ubuntu_ 20.04 LTS (Focal Fossa) or later contains the Gunicorn package by -default 20.0.4 (December 2020) so that you can install it in the usual way:: +Ubuntu_ 20.04 LTS (Focal Fossa) and later ship packages similar to Debian +so that you can install it in the usual way:: $ sudo apt-get update $ sudo apt-get install gunicorn From b237685f1d893280275241b1fc7fe829740c76b0 Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" <pajod@users.noreply.github.com> Date: Wed, 31 Jul 2024 03:02:12 +0200 Subject: [PATCH 08/28] lint: let pylint see if/elif covers all cases --- gunicorn/workers/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gunicorn/workers/base.py b/gunicorn/workers/base.py index 93c465c98..5fa165e01 100644 --- a/gunicorn/workers/base.py +++ b/gunicorn/workers/base.py @@ -255,6 +255,8 @@ def handle_error(self, req, client, addr, exc): reason = "Forbidden" mesg = "'%s'" % str(exc) status_int = 403 + else: + raise AssertionError("mismatched except/elif branches") msg = "Invalid request from ip={ip}: {error}" self.log.warning(msg.format(ip=addr[0], error=str(exc))) From 980ecc4de29e78acaa3df06dfbf2391563968a04 Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" <pajod@users.noreply.github.com> Date: Wed, 31 Jul 2024 03:29:07 +0200 Subject: [PATCH 09/28] lint: ignore import/method name clash --- gunicorn/arbiter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index 1eaf453d5..d145ce848 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -42,6 +42,7 @@ class Arbiter: SIG_QUEUE = [] SIGNALS = [getattr(signal, "SIG%s" % x) for x in "HUP QUIT INT TERM TTIN TTOU USR1 USR2 WINCH".split()] + # pylint: disable=used-before-assignment SIG_NAMES = dict( (getattr(signal, name), name[3:].lower()) for name in dir(signal) if name[:3] == "SIG" and name[3] != "_" From e2d1fc3d30f0a8b3e14a55bc6cc815a17ebfd306 Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" <pajod@users.noreply.github.com> Date: Wed, 31 Jul 2024 03:29:48 +0200 Subject: [PATCH 10/28] lint: clarify undefined access cannot happen --- gunicorn/util.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/gunicorn/util.py b/gunicorn/util.py index e66dbebf3..106919c8d 100644 --- a/gunicorn/util.py +++ b/gunicorn/util.py @@ -141,13 +141,14 @@ def set_owner_process(uid, gid, initgroups=False): """ set user and group of workers processes """ if gid: + username = None if uid: try: username = get_username(uid) except KeyError: initgroups = False - if initgroups: + if initgroups and username is not None: os.initgroups(username, gid) elif gid != os.getgid(): os.setgid(gid) @@ -161,15 +162,12 @@ def chown(path, uid, gid): if sys.platform.startswith("win"): - def _waitfor(func, pathname, waitall=False): + def _waitfor(func, pathname): # Perform the operation func(pathname) # Now setup the wait loop - if waitall: - dirname = pathname - else: - dirname, name = os.path.split(pathname) - dirname = dirname or '.' + dirname, name = os.path.split(pathname) + dirname = dirname or '.' # Check for `pathname` to be removed from the filesystem. # The exponential backoff of the timeout amounts to a total # of ~1 second after which the deletion is probably an error @@ -186,7 +184,7 @@ def _waitfor(func, pathname, waitall=False): # Other Windows APIs can fail or give incorrect results when # dealing with files that are pending deletion. L = os.listdir(dirname) - if not L if waitall else name in L: + if name in L: return # Increase the timeout and try again time.sleep(timeout) From c402cb6df9bfb42d45072af1ca442edb36c7a7dc Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" <pajod@users.noreply.github.com> Date: Wed, 31 Jul 2024 03:30:11 +0200 Subject: [PATCH 11/28] lint: ignore tornado<5 signature differences --- gunicorn/workers/gtornado.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/gunicorn/workers/gtornado.py b/gunicorn/workers/gtornado.py index 544af7d09..e0c9b2605 100644 --- a/gunicorn/workers/gtornado.py +++ b/gunicorn/workers/gtornado.py @@ -97,9 +97,12 @@ def run(self): for callback in self.callbacks: callback.start() else: - PeriodicCallback(self.watchdog, 1000, io_loop=self.ioloop).start() - PeriodicCallback(self.heartbeat, 1000, io_loop=self.ioloop).start() - + PeriodicCallback( # pylint: disable=unexpected-keyword-arg + self.watchdog, 1000, io_loop=self.ioloop + ).start() + PeriodicCallback( # pylint: disable=unexpected-keyword-arg + self.heartbeat, 1000, io_loop=self.ioloop + ).start() # Assume the app is a WSGI callable if its not an # instance of tornado.web.Application or is an # instance of tornado.wsgi.WSGIApplication @@ -132,7 +135,7 @@ def finish(other): class _HTTPServer(tornado.httpserver.HTTPServer): - def on_close(instance, server_conn): + def on_close(instance, server_conn): # pylint: disable=arguments-renamed self.handle_request() super().on_close(server_conn) From f5f41febe937cb208a867647d5e9baf1d07ef75d Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" <pajod@users.noreply.github.com> Date: Wed, 31 Jul 2024 03:30:37 +0200 Subject: [PATCH 12/28] lint: tightly scope ignores --- .pylintrc | 7 ++----- tests/test_config.py | 3 +++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.pylintrc b/.pylintrc index bc2046c0c..d52118173 100644 --- a/.pylintrc +++ b/.pylintrc @@ -15,9 +15,8 @@ disable= bad-mcs-classmethod-argument, bare-except, broad-except, - duplicate-bases, duplicate-code, - eval-used, + superfluous-parens, fixme, import-error, import-outside-toplevel, @@ -47,9 +46,7 @@ disable= ungrouped-imports, unused-argument, useless-object-inheritance, - useless-import-alias, - comparison-with-callable, try-except-raise, consider-using-with, consider-using-f-string, - unspecified-encoding + unspecified-encoding, diff --git a/tests/test_config.py b/tests/test_config.py index 6ca014b6a..5dce9fe22 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -2,6 +2,9 @@ # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. +# config contains callables - comparing those is intended here +# pylint: disable=comparison-with-callable + import os import re import sys From 7eadeb941716f8430f92fc0ee14597cdd66cfe13 Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" <pajod@users.noreply.github.com> Date: Mon, 12 Aug 2024 17:12:29 +0200 Subject: [PATCH 13/28] lint: split availability testing from usage --- gunicorn/reloader.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gunicorn/reloader.py b/gunicorn/reloader.py index 1c67f2a7d..6cdc5438a 100644 --- a/gunicorn/reloader.py +++ b/gunicorn/reloader.py @@ -56,14 +56,15 @@ def run(self): has_inotify = False if sys.platform.startswith('linux'): try: - from inotify.adapters import Inotify - import inotify.constants + import inotify.constants as _ has_inotify = True except ImportError: pass if has_inotify: + from inotify.adapters import Inotify + import inotify.constants class InotifyReloader(threading.Thread): event_mask = (inotify.constants.IN_CREATE | inotify.constants.IN_DELETE From 0bff3a17916c80087dc4e1b520ef9767269b842f Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" <pajod@users.noreply.github.com> Date: Mon, 12 Aug 2024 19:19:29 +0200 Subject: [PATCH 14/28] lint: inotify reloader does not access undefined --- gunicorn/reloader.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gunicorn/reloader.py b/gunicorn/reloader.py index 6cdc5438a..dac29122f 100644 --- a/gunicorn/reloader.py +++ b/gunicorn/reloader.py @@ -54,17 +54,18 @@ def run(self): has_inotify = False +inotify = None # inotify.adapters +Inotify = None # inotify.adapters.Inotify if sys.platform.startswith('linux'): try: - import inotify.constants as _ + from inotify.adapters import Inotify + import inotify.constants has_inotify = True except ImportError: pass if has_inotify: - from inotify.adapters import Inotify - import inotify.constants class InotifyReloader(threading.Thread): event_mask = (inotify.constants.IN_CREATE | inotify.constants.IN_DELETE From 2096e428357fb26bed917d456a0f6ba741284cb8 Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" <pajod@users.noreply.github.com> Date: Tue, 13 Aug 2024 22:24:36 +0200 Subject: [PATCH 15/28] config: reload-extra without reload --- docs/source/settings.rst | 9 ++++++++- gunicorn/config.py | 9 ++++++++- gunicorn/reloader.py | 15 ++++++++++++--- gunicorn/workers/base.py | 4 ++-- 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/docs/source/settings.rst b/docs/source/settings.rst index e1e91fa76..824b42fee 100644 --- a/docs/source/settings.rst +++ b/docs/source/settings.rst @@ -82,6 +82,10 @@ The default behavior is to attempt inotify with a fallback to file system polling. Generally, inotify should be preferred if available because it consumes less system resources. +.. note:: + If the application fails to load while this option is used, + the (potentially sensitive!) traceback will be shared in + the response to subsequent HTTP requests. .. note:: In order to use the inotify reloader, you must have the ``inotify`` package installed. @@ -114,10 +118,13 @@ Valid engines are: **Default:** ``[]`` -Extends :ref:`reload` option to also watch and reload on additional files +Alternative or extension to :ref:`reload` option to (also) watch +and reload on additional files (e.g., templates, configurations, specifications, etc.). .. versionadded:: 19.8 +.. versionchanged:: 23.FIXME + Option no longer silently ignored if used without :ref:`reload`. .. _spew: diff --git a/gunicorn/config.py b/gunicorn/config.py index 402a26b68..e37238d81 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -921,6 +921,10 @@ class Reload(Setting): system polling. Generally, inotify should be preferred if available because it consumes less system resources. + .. note:: + If the application fails to load while this option is used, + the (potentially sensitive!) traceback will be shared in + the response to subsequent HTTP requests. .. note:: In order to use the inotify reloader, you must have the ``inotify`` package installed. @@ -956,10 +960,13 @@ class ReloadExtraFiles(Setting): validator = validate_list_of_existing_files default = [] desc = """\ - Extends :ref:`reload` option to also watch and reload on additional files + Alternative or extension to :ref:`reload` option to (also) watch + and reload on additional files (e.g., templates, configurations, specifications, etc.). .. versionadded:: 19.8 + .. versionchanged:: 23.FIXME + Option no longer silently ignored if used without :ref:`reload`. """ diff --git a/gunicorn/reloader.py b/gunicorn/reloader.py index 1c67f2a7d..51fd15c88 100644 --- a/gunicorn/reloader.py +++ b/gunicorn/reloader.py @@ -14,17 +14,21 @@ class Reloader(threading.Thread): - def __init__(self, extra_files=None, interval=1, callback=None): + def __init__(self, extra_files=None, interval=1, callback=None, auto_detect=False): super().__init__() self.daemon = True self._extra_files = set(extra_files or ()) self._interval = interval self._callback = callback + self._auto_detect = auto_detect def add_extra_file(self, filename): self._extra_files.add(filename) def get_files(self): + if not self._auto_detect: + return self._extra_files + fnames = [ COMPILED_EXT_RE.sub('py', module.__file__) for module in tuple(sys.modules.values()) @@ -71,12 +75,13 @@ class InotifyReloader(threading.Thread): | inotify.constants.IN_MOVE_SELF | inotify.constants.IN_MOVED_FROM | inotify.constants.IN_MOVED_TO) - def __init__(self, extra_files=None, callback=None): + def __init__(self, extra_files=None, callback=None, auto_detect=False): super().__init__() self.daemon = True self._callback = callback self._dirs = set() self._watcher = Inotify() + self._auto_detect = auto_detect for extra_file in extra_files: self.add_extra_file(extra_file) @@ -91,6 +96,9 @@ def add_extra_file(self, filename): self._dirs.add(dirname) def get_dirs(self): + if not self._auto_detect: + return set() + fnames = [ os.path.dirname(os.path.abspath(COMPILED_EXT_RE.sub('py', module.__file__))) for module in tuple(sys.modules.values()) @@ -100,6 +108,7 @@ def get_dirs(self): return set(fnames) def run(self): + # FIXME: _watchers/_dirs inconsistent - latter gets reset self._dirs = self.get_dirs() for dirname in self._dirs: @@ -117,7 +126,7 @@ def run(self): else: class InotifyReloader: - def __init__(self, extra_files=None, callback=None): + def __init__(self, extra_files=None, callback=None, auto_detect=False): raise ImportError('You must have the inotify module installed to ' 'use the inotify reloader') diff --git a/gunicorn/workers/base.py b/gunicorn/workers/base.py index 93c465c98..8869472dc 100644 --- a/gunicorn/workers/base.py +++ b/gunicorn/workers/base.py @@ -119,7 +119,7 @@ def init_process(self): self.init_signals() # start the reloader - if self.cfg.reload: + if self.cfg.reload or self.cfg.reload_extra_files: def changed(fname): self.log.info("Worker reloading: %s modified", fname) self.alive = False @@ -130,7 +130,7 @@ def changed(fname): reloader_cls = reloader_engines[self.cfg.reload_engine] self.reloader = reloader_cls(extra_files=self.cfg.reload_extra_files, - callback=changed) + callback=changed, auto_detect=self.cfg.reload) self.load_wsgi() if self.reloader: From 56b3e4231c1889cb4db05667c7d6dfec736284d7 Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" <pajod@users.noreply.github.com> Date: Tue, 13 Aug 2024 23:38:18 +0200 Subject: [PATCH 16/28] style: re-verbosify HTTP commentary --- gunicorn/http/message.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index 59ce0bf4b..5279a07bd 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -178,8 +178,9 @@ def set_body_reader(self): elif name == "TRANSFER-ENCODING": # T-E can be a list # https://datatracker.ietf.org/doc/html/rfc9112#name-transfer-encoding - vals = [v.strip() for v in value.split(',')] - for val in vals: + te_split_at_comma = [v.strip() for v in value.split(',')] + # N.B. we might have split in the middle of quoted transfer-parameter + for val in te_split_at_comma: if val.lower() == "chunked": # DANGER: transfer codings stack, and stacked chunking is never intended if chunked: @@ -187,7 +188,7 @@ def set_body_reader(self): chunked = True elif val.lower() == "identity": # does not do much, could still plausibly desync from what the proxy does - # safe option: nuke it, its never needed + # safe option: reject, its never needed if chunked: raise InvalidHeader("TRANSFER-ENCODING", req=self) elif val.lower() in ('compress', 'deflate', 'gzip'): @@ -196,6 +197,8 @@ def set_body_reader(self): raise InvalidHeader("TRANSFER-ENCODING", req=self) self.force_close() else: + # DANGER: this not only rejects unknown encodings, but also + # leftovers from not splitting at transfer-coding boundary raise UnsupportedTransferCoding(value) if chunked: @@ -203,11 +206,13 @@ def set_body_reader(self): # a) CL + TE (TE overrides CL.. only safe if the recipient sees it that way too) # b) chunked HTTP/1.0 (always faulty) if self.version < (1, 1): - # framing wonky, see RFC 9112 Section 6.1 + # framing is faulty + # https://datatracker.ietf.org/doc/html/rfc9112#section-6.1-16 raise InvalidHeader("TRANSFER-ENCODING", req=self) if content_length is not None: # we cannot be certain the message framing we understood matches proxy intent # -> whatever happens next, remaining input must not be trusted + # https://datatracker.ietf.org/doc/html/rfc9112#section-6.1-15 raise InvalidHeader("CONTENT-LENGTH", req=self) self.body = Body(ChunkedReader(self, self.unreader)) elif content_length is not None: From 13f54ed1d382da8791646255d268f190594564de Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" <pajod@users.noreply.github.com> Date: Wed, 14 Aug 2024 22:06:54 +0200 Subject: [PATCH 17/28] docs: faq: block arbiter to slow down #2719 --- docs/source/faq.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/source/faq.rst b/docs/source/faq.rst index 8c52a4865..e51884e1d 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -109,6 +109,15 @@ threads. However `a work has been started .. _worker_class: settings.html#worker-class .. _`number of workers`: design.html#how-many-workers +Why are are responses delayed on startup/re-exec? +------------------------------------------------- + +If workers are competing for resources during wsgi import, the result may be slower +than sequential startup. Either avoid duplicate work altogether +via :ref:`preload-app`. Or, if that is not an option, tune worker spawn sequence by +adding a delay in the :ref:`pre-fork` to sacrifice overall startup completion time +for reduced time for first request completion. + Why I don't see any logs in the console? ---------------------------------------- From 99cbc818e147eb9381fd83c038beb707fd863dd9 Mon Sep 17 00:00:00 2001 From: Richard Eklycke <richard@eklycke.se> Date: Sun, 18 Feb 2024 00:29:44 +0100 Subject: [PATCH 18/28] workers/gthread: Remove locks + one event queue + general cleanup The main purpose is to remove complexity from gthread by: * Removing the lock for handling self._keep and self.poller. This is possible since we now do all such manipulation on the main thread instead. When a connection is done, it posts a callback through the PollableMethodCaller which gets executed on the main thread. * Having a single event queue (self.poller), as opposed to also managing a set of futures. This fixes #3146 (although there are more minimal ways of doing it). There are other more minor things as well: * Renaming some variables, e.g. self._keep to self.keepalived_conns. * Remove self-explanatory comments (what the code does, not why). * Just decide that socket is blocking. * Use time.monotonic() for timeouts in gthread. Some complexity has been added to the shutdown sequence, but hopefully for good reason: it's to make sure that all already accepted connections are served within the grace period. --- gunicorn/workers/gthread.py | 260 ++++++++++++++++-------------------- 1 file changed, 113 insertions(+), 147 deletions(-) diff --git a/gunicorn/workers/gthread.py b/gunicorn/workers/gthread.py index 7a23228cd..196759b81 100644 --- a/gunicorn/workers/gthread.py +++ b/gunicorn/workers/gthread.py @@ -13,6 +13,7 @@ from concurrent import futures import errno import os +import queue import selectors import socket import ssl @@ -21,7 +22,6 @@ from collections import deque from datetime import datetime from functools import partial -from threading import RLock from . import base from .. import http @@ -40,44 +40,64 @@ def __init__(self, cfg, sock, client, server): self.timeout = None self.parser = None - self.initialized = False - - # set the socket to non blocking - self.sock.setblocking(False) def init(self): - self.initialized = True - self.sock.setblocking(True) - if self.parser is None: # wrap the socket if needed if self.cfg.is_ssl: self.sock = sock.ssl_wrap_socket(self.sock, self.cfg) - # initialize the parser self.parser = http.RequestParser(self.cfg, self.sock, self.client) - def set_timeout(self): - # set the timeout - self.timeout = time.time() + self.cfg.keepalive + def is_initialized(self): + return bool(self.parser) + + def set_keepalive_timeout(self): + self.timeout = time.monotonic() + self.cfg.keepalive def close(self): util.close(self.sock) +class PollableMethodQueue(object): + + def __init__(self): + self.fds = [] + self.method_queue = None + + def init(self): + self.fds = os.pipe() + self.method_queue = queue.SimpleQueue() + + def close(self): + for fd in self.fds: + os.close(fd) + + def get_fd(self): + return self.fds[0] + + def defer(self, callback, *args): + self.method_queue.put(partial(callback, *args)) + os.write(self.fds[1], b'0') + + def run_callbacks(self, max_callbacks_at_a_time=10): + zeroes = os.read(self.fds[0], max_callbacks_at_a_time) + for _ in range(0, len(zeroes)): + method = self.method_queue.get() + method() + + class ThreadWorker(base.Worker): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.worker_connections = self.cfg.worker_connections self.max_keepalived = self.cfg.worker_connections - self.cfg.threads - # initialise the pool - self.tpool = None + self.thread_pool = None self.poller = None - self._lock = None - self.futures = deque() - self._keep = deque() + self.keepalived_conns = deque() self.nr_conns = 0 + self.method_queue = PollableMethodQueue() @classmethod def check_config(cls, cfg, log): @@ -88,100 +108,66 @@ def check_config(cls, cfg, log): "Check the number of worker connections and threads.") def init_process(self): - self.tpool = self.get_thread_pool() + self.thread_pool = self.get_thread_pool() self.poller = selectors.DefaultSelector() - self._lock = RLock() + self.method_queue.init() super().init_process() def get_thread_pool(self): """Override this method to customize how the thread pool is created""" return futures.ThreadPoolExecutor(max_workers=self.cfg.threads) - def handle_quit(self, sig, frame): + def handle_exit(self, sig, frame): self.alive = False - # worker_int callback - self.cfg.worker_int(self) - self.tpool.shutdown(False) - time.sleep(0.1) - sys.exit(0) - - def _wrap_future(self, fs, conn): - fs.conn = conn - self.futures.append(fs) - fs.add_done_callback(self.finish_request) - - def enqueue_req(self, conn): - conn.init() - # submit the connection to a worker - fs = self.tpool.submit(self.handle, conn) - self._wrap_future(fs, conn) + self.method_queue.defer(lambda: None) # To wake up poller.select() + + def handle_quit(self, sig, frame): + self.thread_pool.shutdown(False) + super().handle_quit(sig, frame) + + def set_accept_enabled(self, enabled): + for sock in self.sockets: + if enabled: + self.poller.register(sock, selectors.EVENT_READ, self.accept) + else: + self.poller.unregister(sock) - def accept(self, server, listener): + def accept(self, listener): try: sock, client = listener.accept() - # initialize the connection object - conn = TConn(self.cfg, sock, client, server) - self.nr_conns += 1 - # wait until socket is readable - with self._lock: - self.poller.register(conn.sock, selectors.EVENT_READ, - partial(self.on_client_socket_readable, conn)) + sock.setblocking(True) # Explicitly set behavior since it differs per OS + conn = TConn(self.cfg, sock, client, listener.getsockname()) + + self.poller.register(conn.sock, selectors.EVENT_READ, + partial(self.on_client_socket_readable, conn)) except OSError as e: if e.errno not in (errno.EAGAIN, errno.ECONNABORTED, errno.EWOULDBLOCK): raise def on_client_socket_readable(self, conn, client): - with self._lock: - # unregister the client from the poller - self.poller.unregister(client) + self.poller.unregister(client) - if conn.initialized: - # remove the connection from keepalive - try: - self._keep.remove(conn) - except ValueError: - # race condition - return + if conn.is_initialized(): + self.keepalived_conns.remove(conn) + conn.init() - # submit the connection to a worker - self.enqueue_req(conn) + fs = self.thread_pool.submit(self.handle, conn) + fs.add_done_callback( + lambda fut: self.method_queue.defer(self.finish_request, conn, fut)) def murder_keepalived(self): - now = time.time() - while True: - with self._lock: - try: - # remove the connection from the queue - conn = self._keep.popleft() - except IndexError: - break - - delta = conn.timeout - now + now = time.monotonic() + while self.keepalived_conns: + delta = self.keepalived_conns[0].timeout - now if delta > 0: - # add the connection back to the queue - with self._lock: - self._keep.appendleft(conn) break - else: - self.nr_conns -= 1 - # remove the socket from the poller - with self._lock: - try: - self.poller.unregister(conn.sock) - except OSError as e: - if e.errno != errno.EBADF: - raise - except KeyError: - # already removed by the system, continue - pass - except ValueError: - # already removed by the system continue - pass - - # close the socket - conn.close() + + conn = self.keepalived_conns.popleft() + self.poller.unregister(conn.sock) + self.nr_conns -= 1 + conn.close() def is_parent_alive(self): # If our parent changed then we shut down. @@ -190,39 +176,23 @@ def is_parent_alive(self): return False return True + def wait_for_and_dispatch_events(self, timeout): + for key, _ in self.poller.select(timeout): + callback = key.data + callback(key.fileobj) + def run(self): - # init listeners, add them to the event loop - for sock in self.sockets: - sock.setblocking(False) - # a race condition during graceful shutdown may make the listener - # name unavailable in the request handler so capture it once here - server = sock.getsockname() - acceptor = partial(self.accept, server) - self.poller.register(sock, selectors.EVENT_READ, acceptor) + self.set_accept_enabled(True) + self.poller.register(self.method_queue.get_fd(), + selectors.EVENT_READ, + self.method_queue.run_callbacks) while self.alive: # notify the arbiter we are alive self.notify() - # can we accept more connections? - if self.nr_conns < self.worker_connections: - # wait for an event - events = self.poller.select(1.0) - for key, _ in events: - callback = key.data - callback(key.fileobj) - - # check (but do not wait) for finished requests - result = futures.wait(self.futures, timeout=0, - return_when=futures.FIRST_COMPLETED) - else: - # wait for a request to finish - result = futures.wait(self.futures, timeout=1.0, - return_when=futures.FIRST_COMPLETED) - - # clean up finished requests - for fut in result.done: - self.futures.remove(fut) + new_connections_accepted = self.nr_conns < self.worker_connections + self.wait_for_and_dispatch_events(timeout=1) if not self.is_parent_alive(): break @@ -230,57 +200,53 @@ def run(self): # handle keepalive timeouts self.murder_keepalived() - self.tpool.shutdown(False) + new_connections_still_accepted = self.nr_conns < self.worker_connections + if new_connections_accepted != new_connections_still_accepted: + self.set_accept_enabled(new_connections_still_accepted) + + # Don't accept any new connections, as we're about to shut down + if self.nr_conns < self.worker_connections: + self.set_accept_enabled(False) + + # ... but try handle all already accepted connections within the grace period + graceful_timeout = time.monotonic() + self.cfg.graceful_timeout + while self.nr_conns > 0: + time_remaining = max(graceful_timeout - time.monotonic(), 0) + if time_remaining == 0: + break + self.wait_for_and_dispatch_events(timeout=time_remaining) + + self.thread_pool.shutdown(wait=False) self.poller.close() + self.method_queue.close() for s in self.sockets: s.close() - futures.wait(self.futures, timeout=self.cfg.graceful_timeout) - - def finish_request(self, fs): - if fs.cancelled(): - self.nr_conns -= 1 - fs.conn.close() - return - + def finish_request(self, conn, fs): try: - (keepalive, conn) = fs.result() - # if the connection should be kept alived add it - # to the eventloop and record it + keepalive = not fs.cancelled() and fs.result() if keepalive and self.alive: - # flag the socket as non blocked - conn.sock.setblocking(False) - - # register the connection - conn.set_timeout() - with self._lock: - self._keep.append(conn) - - # add the socket to the event loop - self.poller.register(conn.sock, selectors.EVENT_READ, - partial(self.on_client_socket_readable, conn)) + conn.set_keepalive_timeout() + self.keepalived_conns.append(conn) + self.poller.register(conn.sock, selectors.EVENT_READ, + partial(self.on_client_socket_readable, conn)) else: self.nr_conns -= 1 conn.close() except Exception: - # an exception happened, make sure to close the - # socket. self.nr_conns -= 1 - fs.conn.close() + conn.close() def handle(self, conn): - keepalive = False req = None try: req = next(conn.parser) if not req: - return (False, conn) + return False # handle the request - keepalive = self.handle_request(req, conn) - if keepalive: - return (keepalive, conn) + return self.handle_request(req, conn) except http.errors.NoMoreData as e: self.log.debug("Ignored premature client disconnection. %s", e) @@ -307,7 +273,7 @@ def handle(self, conn): except Exception as e: self.handle_error(req, conn.sock, conn.client, e) - return (False, conn) + return False def handle_request(self, req, conn): environ = {} @@ -327,7 +293,7 @@ def handle_request(self, req, conn): if not self.alive or not self.cfg.keepalive: resp.force_close() - elif len(self._keep) >= self.max_keepalived: + elif len(self.keepalived_conns) >= self.max_keepalived: resp.force_close() respiter = self.wsgi(environ, resp.start_response) From 1fcadcb01670e4e4e21c93d5bc8b39897fdadff4 Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" <pajod@users.noreply.github.com> Date: Wed, 14 Aug 2024 23:01:38 +0200 Subject: [PATCH 19/28] Revert "let's exception not bubble" This reverts commit 40232284934c32939c0e4e78caad1987c3773e08. We use sys.exit. On purpose. We should therefore not be catching SystemExit. --- gunicorn/workers/base_async.py | 2 +- gunicorn/workers/sync.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gunicorn/workers/base_async.py b/gunicorn/workers/base_async.py index 9466d6aaa..082be66fc 100644 --- a/gunicorn/workers/base_async.py +++ b/gunicorn/workers/base_async.py @@ -81,7 +81,7 @@ def handle(self, listener, client, addr): self.log.debug("Ignoring socket not connected") else: self.log.debug("Ignoring EPIPE") - except BaseException as e: + except Exception as e: self.handle_error(req, client, addr, e) finally: util.close(client) diff --git a/gunicorn/workers/sync.py b/gunicorn/workers/sync.py index 4c029f912..754ae08fe 100644 --- a/gunicorn/workers/sync.py +++ b/gunicorn/workers/sync.py @@ -153,7 +153,7 @@ def handle(self, listener, client, addr): self.log.debug("Ignoring socket not connected") else: self.log.debug("Ignoring EPIPE") - except BaseException as e: + except Exception as e: self.handle_error(req, client, addr, e) finally: util.close(client) From 8617c39de9ec527111404d07b3ae9abd2e426d9d Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" <pajod@users.noreply.github.com> Date: Wed, 14 Aug 2024 23:47:38 +0200 Subject: [PATCH 20/28] workaround: reintroduce gevent.Timeout handling This is probably wrong. But less wrong than handling *all* BaseException. Reports of this happening may be the result of some *other* async Timeout in the wsgi app bubbling through to us. Gevent docs promise: "[..] if *exception* is the literal ``False``, the timeout is still raised, but the context manager suppresses it, so the code outside the with-block won't see it." --- gunicorn/workers/base.py | 2 ++ gunicorn/workers/base_async.py | 3 +++ gunicorn/workers/geventlet.py | 2 ++ gunicorn/workers/ggevent.py | 2 ++ gunicorn/workers/sync.py | 2 ++ 5 files changed, 11 insertions(+) diff --git a/gunicorn/workers/base.py b/gunicorn/workers/base.py index 93c465c98..1a0176d17 100644 --- a/gunicorn/workers/base.py +++ b/gunicorn/workers/base.py @@ -29,6 +29,8 @@ class Worker: + WORKAROUND_BASE_EXCEPTIONS = () # none + SIGNALS = [getattr(signal, "SIG%s" % x) for x in ( "ABRT HUP QUIT INT TERM USR1 USR2 WINCH CHLD".split() )] diff --git a/gunicorn/workers/base_async.py b/gunicorn/workers/base_async.py index 082be66fc..bc790912a 100644 --- a/gunicorn/workers/base_async.py +++ b/gunicorn/workers/base_async.py @@ -81,6 +81,9 @@ def handle(self, listener, client, addr): self.log.debug("Ignoring socket not connected") else: self.log.debug("Ignoring EPIPE") + except self.WORKAROUND_BASE_EXCEPTIONS as e: + self.log.warning("Catched async exception (compat workaround). If this is not a bug in your app, please file a report.") + self.handle_error(req, client, addr, e) except Exception as e: self.handle_error(req, client, addr, e) finally: diff --git a/gunicorn/workers/geventlet.py b/gunicorn/workers/geventlet.py index 087eb61ec..87f9c4e1f 100644 --- a/gunicorn/workers/geventlet.py +++ b/gunicorn/workers/geventlet.py @@ -123,6 +123,8 @@ def patch_sendfile(): class EventletWorker(AsyncWorker): + WORKAROUND_BASE_EXCEPTIONS = (eventlet.Timeout, ) + def patch(self): hubs.use_hub() eventlet.monkey_patch() diff --git a/gunicorn/workers/ggevent.py b/gunicorn/workers/ggevent.py index b9b9b4408..538dd13c9 100644 --- a/gunicorn/workers/ggevent.py +++ b/gunicorn/workers/ggevent.py @@ -31,6 +31,8 @@ class GeventWorker(AsyncWorker): + WORKAROUND_BASE_EXCEPTIONS = (gevent.Timeout, ) + server_class = None wsgi_handler = None diff --git a/gunicorn/workers/sync.py b/gunicorn/workers/sync.py index 754ae08fe..e972d9c3a 100644 --- a/gunicorn/workers/sync.py +++ b/gunicorn/workers/sync.py @@ -110,6 +110,8 @@ def run_for_multiple(self, timeout): return def run(self): + assert len(self.WORKAROUND_BASE_EXCEPTIONS) == 0 + # if no timeout is given the worker will never wait and will # use the CPU for nothing. This minimal timeout prevent it. timeout = self.timeout or 0.5 From ef94875043fea78ff158067f8d6da506fad698ca Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" <pajod@users.noreply.github.com> Date: Wed, 14 Aug 2024 23:58:17 +0200 Subject: [PATCH 21/28] style: line break --- gunicorn/workers/base_async.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gunicorn/workers/base_async.py b/gunicorn/workers/base_async.py index bc790912a..bb6d560d6 100644 --- a/gunicorn/workers/base_async.py +++ b/gunicorn/workers/base_async.py @@ -82,7 +82,8 @@ def handle(self, listener, client, addr): else: self.log.debug("Ignoring EPIPE") except self.WORKAROUND_BASE_EXCEPTIONS as e: - self.log.warning("Catched async exception (compat workaround). If this is not a bug in your app, please file a report.") + self.log.warning("Catched async exception (compat workaround). " + "If this is not a bug in your app, please file a report.") self.handle_error(req, client, addr, e) except Exception as e: self.handle_error(req, client, addr, e) From 2b2700d6acfb3b31172d607b2db895aed988fe12 Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" <pajod@users.noreply.github.com> Date: Mon, 12 Aug 2024 15:44:03 +0200 Subject: [PATCH 22/28] packaging: include tox.ini in sdist tox testing works without full git repo, so no obvious harm in aiding people running it from source distribution based on setuptools itself doing it, and suggesting so in an example in docs: https://setuptools.pypa.io/en/latest/userguide/miscellaneous.html#controlling-files-in-the-distribution https://github.com/pypa/setuptools/commit/551eb7f444dea2cb15cd70093d37d49b42a49d07 beware of .gitattributes / MANIFEST.in precedence when using setuptools-scm --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST.in b/MANIFEST.in index af8aae7e7..05ff5bd1a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,6 +5,7 @@ include README.rst include THANKS include requirements_dev.txt include requirements_test.txt +include tox.ini recursive-include tests * recursive-include examples * recursive-include docs * From 0d325bdd4038961ac4a70630be4a03986c0be83f Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" <pajod@users.noreply.github.com> Date: Mon, 12 Aug 2024 16:00:56 +0200 Subject: [PATCH 23/28] packaging: add .pylintrc to sdist undo when .pylintrc is moved into pyproject.toml (which is only supported since pylint 2.5.3+ @ 2020-06-8) --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST.in b/MANIFEST.in index 05ff5bd1a..1423168b5 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -6,6 +6,7 @@ include THANKS include requirements_dev.txt include requirements_test.txt include tox.ini +include .pylintrc recursive-include tests * recursive-include examples * recursive-include docs * From 866fd687a4a8213b71cb8a0c81db4def273f5098 Mon Sep 17 00:00:00 2001 From: Richard Eklycke <richard@eklycke.se> Date: Fri, 2 Feb 2024 11:28:54 +0100 Subject: [PATCH 24/28] arbiter: Handle SIGCHLD in normal/main process context ... as opposed to in signal context. This is beneficial, since it means that we can, in a signal safe way, print messages about why e.g. a worker stopped its execution. And since handle_sigchld() logs what it does anyway, don't bother printing out that we're handling SIGCHLD. If workers are killed at rapid pace, we won't get as many SIGCHLD as there are workers killed anyway. --- gunicorn/arbiter.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index d145ce848..f2bd93880 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -40,13 +40,10 @@ class Arbiter: # I love dynamic languages SIG_QUEUE = [] - SIGNALS = [getattr(signal, "SIG%s" % x) - for x in "HUP QUIT INT TERM TTIN TTOU USR1 USR2 WINCH".split()] + SIGNALS = [getattr(signal.Signals, "SIG%s" % x) + for x in "CHLD HUP QUIT INT TERM TTIN TTOU USR1 USR2 WINCH".split()] # pylint: disable=used-before-assignment - SIG_NAMES = dict( - (getattr(signal, name), name[3:].lower()) for name in dir(signal) - if name[:3] == "SIG" and name[3] != "_" - ) + SIG_NAMES = dict((sig, sig.name[3:].lower()) for sig in SIGNALS) def __init__(self, app): os.environ["SERVER_SOFTWARE"] = SERVER_SOFTWARE @@ -186,7 +183,6 @@ def init_signals(self): # initialize all signals for s in self.SIGNALS: signal.signal(s, self.signal) - signal.signal(signal.SIGCHLD, self.handle_chld) def signal(self, sig, frame): if len(self.SIG_QUEUE) < 5: @@ -220,7 +216,8 @@ def run(self): if not handler: self.log.error("Unhandled signal: %s", signame) continue - self.log.info("Handling signal: %s", signame) + if sig != signal.SIGCHLD: + self.log.info("Handling signal: %s", signame) handler() self.wakeup() except (StopIteration, KeyboardInterrupt): @@ -237,10 +234,9 @@ def run(self): self.pidfile.unlink() sys.exit(-1) - def handle_chld(self, sig, frame): + def handle_chld(self): "SIGCHLD handling" self.reap_workers() - self.wakeup() def handle_hup(self): """\ @@ -392,7 +388,10 @@ def stop(self, graceful=True): # instruct the workers to exit self.kill_workers(sig) # wait until the graceful timeout - while self.WORKERS and time.time() < limit: + while True: + self.reap_workers() + if not self.WORKERS or time.time() >= limit: + break time.sleep(0.1) self.kill_workers(signal.SIGKILL) From ceec745f95da7e9c4a37b8edc828c39f50ca41cc Mon Sep 17 00:00:00 2001 From: Richard Eklycke <richard@eklycke.se> Date: Fri, 2 Feb 2024 12:36:36 +0100 Subject: [PATCH 25/28] arbiter: Remove PIPE and only use SIG_QUEUE instead Since we can use something from queue.*, we can make it blocking as well, removing the need for two different data structures. --- gunicorn/arbiter.py | 90 +++++++++++---------------------------------- 1 file changed, 22 insertions(+), 68 deletions(-) diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index f2bd93880..c6995bfdc 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -4,7 +4,6 @@ import errno import os import random -import select import signal import sys import time @@ -36,10 +35,10 @@ class Arbiter: LISTENERS = [] WORKERS = {} - PIPE = [] # I love dynamic languages - SIG_QUEUE = [] + SIG_QUEUE = queue.SimpleQueue() + # pylint: disable=used-before-assignment SIGNALS = [getattr(signal.Signals, "SIG%s" % x) for x in "CHLD HUP QUIT INT TERM TTIN TTOU USR1 USR2 WINCH".split()] # pylint: disable=used-before-assignment @@ -168,16 +167,6 @@ def init_signals(self): Initialize master signal handling. Most of the signals are queued. Child signals only wake up the master. """ - # close old PIPE - for p in self.PIPE: - os.close(p) - - # initialize the pipe - self.PIPE = pair = os.pipe() - for p in pair: - util.set_non_blocking(p) - util.close_on_exec(p) - self.log.close_on_exec() # initialize all signals @@ -185,9 +174,8 @@ def init_signals(self): signal.signal(s, self.signal) def signal(self, sig, frame): - if len(self.SIG_QUEUE) < 5: - self.SIG_QUEUE.append(sig) - self.wakeup() + """ Note: Signal handler! No logging allowed. """ + self.SIG_QUEUE.put(sig) def run(self): "Main master loop." @@ -200,26 +188,23 @@ def run(self): while True: self.maybe_promote_master() - sig = self.SIG_QUEUE.pop(0) if self.SIG_QUEUE else None - if sig is None: - self.sleep() - self.murder_workers() - self.manage_workers() - continue - - if sig not in self.SIG_NAMES: - self.log.info("Ignoring unknown signal: %s", sig) - continue + try: + sig = self.SIG_QUEUE.get(timeout=1) + except queue.Empty: + sig = None + + if sig: + signame = self.SIG_NAMES.get(sig) + handler = getattr(self, "handle_%s" % signame, None) + if not handler: + self.log.error("Unhandled signal: %s", signame) + continue + if sig != signal.SIGCHLD: + self.log.info("Handling signal: %s", signame) + handler() - signame = self.SIG_NAMES.get(sig) - handler = getattr(self, "handle_%s" % signame, None) - if not handler: - self.log.error("Unhandled signal: %s", signame) - continue - if sig != signal.SIGCHLD: - self.log.info("Handling signal: %s", signame) - handler() - self.wakeup() + self.murder_workers() + self.manage_workers() except (StopIteration, KeyboardInterrupt): self.halt() except HaltServer as inst: @@ -323,16 +308,6 @@ def maybe_promote_master(self): # reset proctitle util._setproctitle("master [%s]" % self.proc_name) - def wakeup(self): - """\ - Wake up the arbiter by writing to the PIPE - """ - try: - os.write(self.PIPE[1], b'.') - except OSError as e: - if e.errno not in [errno.EAGAIN, errno.EINTR]: - raise - def halt(self, reason=None, exit_status=0): """ halt arbiter """ self.stop() @@ -347,25 +322,6 @@ def halt(self, reason=None, exit_status=0): self.cfg.on_exit(self) sys.exit(exit_status) - def sleep(self): - """\ - Sleep until PIPE is readable or we timeout. - A readable PIPE means a signal occurred. - """ - try: - ready = select.select([self.PIPE[0]], [], [], 1.0) - if not ready[0]: - return - while os.read(self.PIPE[0], 1): - pass - except OSError as e: - # TODO: select.error is a subclass of OSError since Python 3.3. - error_number = getattr(e, 'errno', e.args[0]) - if error_number not in [errno.EAGAIN, errno.EINTR]: - raise - except KeyboardInterrupt: - sys.exit() - def stop(self, graceful=True): """\ Stop workers @@ -388,11 +344,9 @@ def stop(self, graceful=True): # instruct the workers to exit self.kill_workers(sig) # wait until the graceful timeout - while True: - self.reap_workers() - if not self.WORKERS or time.time() >= limit: - break + while self.WORKERS and time.time() < limit: time.sleep(0.1) + self.reap_workers() self.kill_workers(signal.SIGKILL) From 24087da59e141f87a240d80b0d0bd02900559c1a Mon Sep 17 00:00:00 2001 From: Richard Eklycke <richard@eklycke.se> Date: Thu, 1 Feb 2024 22:19:05 +0100 Subject: [PATCH 26/28] arbiter: Use waitpid() facilities to handle worker exit status This change is meant to handle the return value of waitpid() in a way that is more in line with the man page of said syscall. The changes can be summarized as follows: * Use os.WIFEXITED and os.WIFSIGNALED to determine what caused waitpid() to return, and exactly how a worker may have exited. * In case of normal termination, use os.WEXITSTATUS() to read the exit status (instead of using a hand rolled bit shift). A redundant log was removed in this code path. * In case of termination by a signal, use os.WTERMSIG() to determine the signal which caused the worker to terminate. This was buggy before, since the WCOREFLAG (0x80) could cause e.g. a SIGSEGV (code 11) to be reported as "code 139", meaning "code (0x80 | 11)". * Since waitpid() isn't called with WSTOPPED nor WCONTINUED, there's no need to have any os.WIFSTOPPED or os.WIFCONTINUED handling. --- gunicorn/arbiter.py | 59 +++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 32 deletions(-) diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index c6995bfdc..1bdb66902 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -8,6 +8,7 @@ import sys import time import traceback +import queue from gunicorn.errors import HaltServer, AppImportError from gunicorn.pidfile import Pidfile @@ -471,44 +472,38 @@ def reap_workers(self): break if self.reexec_pid == wpid: self.reexec_pid = 0 - else: - # A worker was terminated. If the termination reason was - # that it could not boot, we'll shut it down to avoid - # infinite start/stop cycles. - exitcode = status >> 8 - if exitcode != 0: - self.log.error('Worker (pid:%s) exited with code %s', wpid, exitcode) + continue + + if os.WIFEXITED(status): + # A worker was normally terminated. If the termination + # reason was that it could not boot, we'll halt the server + # to avoid infinite start/stop cycles. + exitcode = os.WEXITSTATUS(status) + log = self.log.error if exitcode != 0 else self.log.debug + log('Worker (pid:%s) exited with code %s', wpid, exitcode) if exitcode == self.WORKER_BOOT_ERROR: reason = "Worker failed to boot." raise HaltServer(reason, self.WORKER_BOOT_ERROR) if exitcode == self.APP_LOAD_ERROR: reason = "App failed to load." raise HaltServer(reason, self.APP_LOAD_ERROR) - - if exitcode > 0: - # If the exit code of the worker is greater than 0, - # let the user know. - self.log.error("Worker (pid:%s) exited with code %s.", - wpid, exitcode) - elif status > 0: - # If the exit code of the worker is 0 and the status - # is greater than 0, then it was most likely killed - # via a signal. - try: - sig_name = signal.Signals(status).name - except ValueError: - sig_name = "code {}".format(status) - msg = "Worker (pid:{}) was sent {}!".format( - wpid, sig_name) - - # Additional hint for SIGKILL - if status == signal.SIGKILL: - msg += " Perhaps out of memory?" - self.log.error(msg) - - worker = self.WORKERS.pop(wpid, None) - if not worker: - continue + elif os.WIFSIGNALED(status): + # A worker was terminated by a signal. + sig = os.WTERMSIG(status) + try: + sig_name = signal.Signals(sig).name + except ValueError: + sig_name = "signal {}".format(sig) + msg = "Worker (pid:{}) was terminated by {}!".format( + wpid, sig_name) + + # Additional hint for SIGKILL + if sig == signal.SIGKILL: + msg += " Perhaps out of memory?" + self.log.error(msg) + + worker = self.WORKERS.pop(wpid, None) + if worker: worker.tmp.close() self.cfg.child_exit(self, worker) except OSError as e: From d74394b96a7a9afd1b0b318c4c5965e0930d585c Mon Sep 17 00:00:00 2001 From: Richard Eklycke <richard@eklycke.se> Date: Sat, 3 Feb 2024 17:08:16 +0100 Subject: [PATCH 27/28] arbiter: Reinstall SIGCHLD as required by some UNIXes According to the python signal documentation[1], SIGCHLD is handled differently from other signals. Specifically, if the underlying implementation resets the SIGCHLD signal handler, then python won't reinstall it (as it does for other signals). This behavior doesn't seem to exist for neither Linux nor Mac, but perhaps one could argue that it's good practise anyway. [1] https://docs.python.org/3/library/signal.html --- gunicorn/arbiter.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index 1bdb66902..871e42559 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -9,6 +9,7 @@ import time import traceback import queue +import socket from gunicorn.errors import HaltServer, AppImportError from gunicorn.pidfile import Pidfile @@ -178,6 +179,10 @@ def signal(self, sig, frame): """ Note: Signal handler! No logging allowed. """ self.SIG_QUEUE.put(sig) + # Some UNIXes require SIGCHLD to be reinstalled, see python signal docs + if sig == signal.SIGCHLD: + signal.signal(sig, self.signal) + def run(self): "Main master loop." self.start() From 717fbc2bbd9406930a804251f814f44c32495599 Mon Sep 17 00:00:00 2001 From: Richard Eklycke <richard@eklycke.se> Date: Sun, 4 Feb 2024 23:06:54 +0100 Subject: [PATCH 28/28] arbiter: clean up main loop * Look up handlers in __init__() to induce run-time error early on if something is wrong. * Since we now know that all handlers exist, we can simplify the main loop in arbiter, in such a way that we don't need to call wakeup(). So after this commit, the pipe in arbiter is only used to deliver which signal was sent. --- gunicorn/arbiter.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index 871e42559..344ed2d12 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -1,6 +1,7 @@ # # This file is part of gunicorn released under the MIT license. # See the NOTICE for more information. +import queue import errno import os import random @@ -8,8 +9,6 @@ import sys import time import traceback -import queue -import socket from gunicorn.errors import HaltServer, AppImportError from gunicorn.pidfile import Pidfile @@ -43,8 +42,6 @@ class Arbiter: # pylint: disable=used-before-assignment SIGNALS = [getattr(signal.Signals, "SIG%s" % x) for x in "CHLD HUP QUIT INT TERM TTIN TTOU USR1 USR2 WINCH".split()] - # pylint: disable=used-before-assignment - SIG_NAMES = dict((sig, sig.name[3:].lower()) for sig in SIGNALS) def __init__(self, app): os.environ["SERVER_SOFTWARE"] = SERVER_SOFTWARE @@ -74,6 +71,11 @@ def __init__(self, app): 0: sys.executable } + self.SIG_HANDLERS = dict( + (sig, getattr(self, "handle_%s" % sig.name[3:].lower())) + for sig in self.SIGNALS + ) + def _get_num_workers(self): return self._num_workers @@ -196,18 +198,11 @@ def run(self): try: sig = self.SIG_QUEUE.get(timeout=1) - except queue.Empty: - sig = None - - if sig: - signame = self.SIG_NAMES.get(sig) - handler = getattr(self, "handle_%s" % signame, None) - if not handler: - self.log.error("Unhandled signal: %s", signame) - continue if sig != signal.SIGCHLD: - self.log.info("Handling signal: %s", signame) - handler() + self.log.info("Handling signal: %s", signal.Signals(sig).name) + self.SIG_HANDLERS[sig]() + except queue.Empty: + pass self.murder_workers() self.manage_workers()