diff --git a/cloudinit/config/cc_ansible.py b/cloudinit/config/cc_ansible.py index 5fb737429c3..c9d1ead23f5 100644 --- a/cloudinit/config/cc_ansible.py +++ b/cloudinit/config/cc_ansible.py @@ -9,7 +9,7 @@ from copy import deepcopy from typing import Optional -from cloudinit import lifecycle, subp +from cloudinit import lifecycle, signal_handler, subp from cloudinit.cloud import Cloud from cloudinit.config import Config from cloudinit.config.schema import MetaSchema @@ -63,7 +63,8 @@ def do_as(self, command: list, **kwargs): return self.distro.do_as(command, self.run_user, **kwargs) def subp(self, command, **kwargs): - return subp.subp(command, update_env=self.env, **kwargs) + with signal_handler.suspend_crash(): + return subp.subp(command, update_env=self.env, **kwargs) @abc.abstractmethod def is_installed(self): diff --git a/cloudinit/config/cc_bootcmd.py b/cloudinit/config/cc_bootcmd.py index b21728f73fe..4e4c113af88 100644 --- a/cloudinit/config/cc_bootcmd.py +++ b/cloudinit/config/cc_bootcmd.py @@ -11,7 +11,7 @@ import logging -from cloudinit import subp, temp_utils, util +from cloudinit import signal_handler, subp, temp_utils, util from cloudinit.cloud import Cloud from cloudinit.config import Config from cloudinit.config.schema import MetaSchema @@ -50,7 +50,10 @@ def handle(name: str, cfg: Config, cloud: Cloud, args: list) -> None: try: iid = cloud.get_instance_id() env = {"INSTANCE_ID": str(iid)} if iid else {} - subp.subp(["/bin/sh", tmpf.name], update_env=env, capture=False) + with signal_handler.suspend_crash(): + subp.subp( + ["/bin/sh", tmpf.name], update_env=env, capture=False + ) except Exception: util.logexc(LOG, "Failed to run bootcmd module %s", name) raise diff --git a/cloudinit/config/cc_package_update_upgrade_install.py b/cloudinit/config/cc_package_update_upgrade_install.py index 3aa94e0a917..0dd4f89f5f8 100644 --- a/cloudinit/config/cc_package_update_upgrade_install.py +++ b/cloudinit/config/cc_package_update_upgrade_install.py @@ -10,7 +10,7 @@ import os import time -from cloudinit import subp, util +from cloudinit import signal_handler, subp, util from cloudinit.cloud import Cloud from cloudinit.config import Config from cloudinit.config.schema import MetaSchema @@ -48,7 +48,10 @@ def _fire_reboot( wait_attempts: int = 6, initial_sleep: int = 1, backoff: int = 2 ): """Run a reboot command and panic if it doesn't happen fast enough.""" - subp.subp(REBOOT_CMD) + # systemd will kill cloud-init with a signal + # this is expected so don't behave as if this is a failure state + with signal_handler.suspend_crash(): + subp.subp(REBOOT_CMD) start = time.monotonic() wait_time = initial_sleep for _i in range(wait_attempts): @@ -106,8 +109,9 @@ def handle(name: str, cfg: Config, cloud: Cloud, args: list) -> None: break if (upgrade or pkglist) and reboot_if_required and reboot_fn_exists: try: - LOG.warning( - "Rebooting after upgrade or install per %s", reboot_marker + LOG.info( + "***WARNING*** Rebooting after upgrade or install per %s", + reboot_marker, ) # Flush the above warning + anything else out... flush_loggers(LOG) diff --git a/cloudinit/config/cc_power_state_change.py b/cloudinit/config/cc_power_state_change.py index 1e3dda1d666..d0640d5111d 100644 --- a/cloudinit/config/cc_power_state_change.py +++ b/cloudinit/config/cc_power_state_change.py @@ -13,7 +13,7 @@ import subprocess import time -from cloudinit import subp, util +from cloudinit import signal_handler, subp, util from cloudinit.cloud import Cloud from cloudinit.config import Config from cloudinit.config.schema import MetaSchema @@ -217,4 +217,7 @@ def fatal(msg): except Exception as e: fatal("Unexpected Exception when checking condition: %s" % e) - func(*args) + # systemd could kill this process with a signal before it exits + # this is expected, so don't crash + with signal_handler.suspend_crash(): + func(*args) diff --git a/cloudinit/signal_handler.py b/cloudinit/signal_handler.py index c5e490bb6b0..51a7610c008 100644 --- a/cloudinit/signal_handler.py +++ b/cloudinit/signal_handler.py @@ -5,28 +5,64 @@ # Author: Joshua Harlow # # This file is part of cloud-init. See LICENSE file for license information. +import contextlib import inspect import logging import signal import sys +import threading +import types from io import StringIO +from typing import Callable, Dict, Final, NamedTuple, Union from cloudinit import version as vr from cloudinit.log import log_util LOG = logging.getLogger(__name__) - -BACK_FRAME_TRACE_DEPTH = 3 -EXIT_FOR = { - signal.SIGINT: ("Cloud-init %(version)s received SIGINT, exiting...", 1), - signal.SIGTERM: ("Cloud-init %(version)s received SIGTERM, exiting...", 1), - # Can't be caught... - # signal.SIGKILL: ('Cloud-init killed, exiting...', 1), - signal.SIGABRT: ("Cloud-init %(version)s received SIGABRT, exiting...", 1), +SIG_MESSAGE: Final = "Cloud-init {} received {}, exiting\n" +BACK_FRAME_TRACE_DEPTH: Final = 3 +SIGNALS: Final[Dict[int, str]] = { + signal.SIGINT: "Cloud-init %(version)s received SIGINT, exiting", + signal.SIGTERM: "Cloud-init %(version)s received SIGTERM, exiting", + signal.SIGABRT: "Cloud-init %(version)s received SIGABRT, exiting", } +class ExitBehavior(NamedTuple): + exit_code: int + log_level: int + + +SIGNAL_EXIT_BEHAVIOR_CRASH: Final = ExitBehavior(1, logging.ERROR) +SIGNAL_EXIT_BEHAVIOR_QUIET: Final = ExitBehavior(0, logging.INFO) +_SIGNAL_EXIT_BEHAVIOR = SIGNAL_EXIT_BEHAVIOR_CRASH +_SUSPEND_WRITE_LOCK = threading.RLock() + + +def inspect_handler(sig: Union[int, Callable, None]) -> None: + """inspect_handler() logs signal handler state""" + if callable(sig): + # only produce a log when the signal handler isn't in the expected + # default state + if not isinstance(sig, types.BuiltinFunctionType): + LOG.info("Signal state [%s] - previously custom handler.", sig) + elif sig == signal.SIG_IGN: + LOG.info("Signal state [SIG_IGN] - previously ignored.") + elif sig is None: + LOG.info("Signal state [None] - previously not installed from Python.") + elif sig == signal.SIG_DFL: + LOG.info( + "Signal state [%s] - default way of handling signal was " + "previously in use.", + sig, + ) + else: + # this should never happen, unless something in Python changes + # https://docs.python.org/3/library/signal.html#signal.getsignal + LOG.warning("Signal state [%s(%s)] - unknown", type(sig), sig) + + def _pprint_frame(frame, depth, max_depth, contents): if depth > max_depth or not frame: return @@ -39,18 +75,40 @@ def _pprint_frame(frame, depth, max_depth, contents): def _handle_exit(signum, frame): - (msg, rc) = EXIT_FOR[signum] - msg = msg % ({"version": vr.version_string()}) - contents = StringIO() - contents.write("%s\n" % (msg)) + # in practice we always receive a Signals object but int is possible + name = signum.name if isinstance(signum, signal.Signals) else signum + contents = StringIO(SIG_MESSAGE.format(vr.version_string(), name)) _pprint_frame(frame, 1, BACK_FRAME_TRACE_DEPTH, contents) - log_util.multi_log(contents.getvalue(), log=LOG, log_level=logging.ERROR) - sys.exit(rc) + log_util.multi_log( + contents.getvalue(), log=LOG, log_level=_SIGNAL_EXIT_BEHAVIOR.log_level + ) + sys.exit(_SIGNAL_EXIT_BEHAVIOR.exit_code) def attach_handlers(): + """attach cloud-init's handlers""" sigs_attached = 0 - for signum in EXIT_FOR.keys(): - signal.signal(signum, _handle_exit) - sigs_attached += len(EXIT_FOR) + for signum in SIGNALS.keys(): + inspect_handler(signal.signal(signum, _handle_exit)) + sigs_attached += len(SIGNALS) return sigs_attached + + +@contextlib.contextmanager +def suspend_crash(): + """suspend_crash() allows signals to be received without exiting 1 + + This allow signal handling without a crash where it is expected. The + call stack is still printed if signal is received during this context, but + the return code is 0 and no traceback is printed. + + Threadsafe. + """ + global _SIGNAL_EXIT_BEHAVIOR + + # If multiple threads simultaneously were to modify this + # global state, this function would not behave as expected. + with _SUSPEND_WRITE_LOCK: + _SIGNAL_EXIT_BEHAVIOR = SIGNAL_EXIT_BEHAVIOR_QUIET + yield + _SIGNAL_EXIT_BEHAVIOR = SIGNAL_EXIT_BEHAVIOR_CRASH diff --git a/cloudinit/subp.py b/cloudinit/subp.py index eebf009d4f0..07148f17971 100644 --- a/cloudinit/subp.py +++ b/cloudinit/subp.py @@ -9,7 +9,7 @@ from io import TextIOWrapper from typing import List, Optional, Union -from cloudinit import performance +from cloudinit import performance, signal_handler LOG = logging.getLogger(__name__) @@ -368,7 +368,8 @@ def runparts(dirp, skip_no_exist=True, exe_prefix=None): if is_exe(exe_path): attempted.append(exe_path) try: - subp(prefix + [exe_path], capture=False) + with signal_handler.suspend_crash(): + subp(prefix + [exe_path], capture=False) except ProcessExecutionError as e: LOG.debug(e) failed.append(exe_name) diff --git a/tests/integration_tests/integration_settings.py b/tests/integration_tests/integration_settings.py index fb736abf5f7..300b4690cd6 100644 --- a/tests/integration_tests/integration_settings.py +++ b/tests/integration_tests/integration_settings.py @@ -81,7 +81,7 @@ # Install from a PPA. It MUST start with 'ppa:' # # A path to a valid package to be uploaded and installed -CLOUD_INIT_SOURCE = "NONE" +CLOUD_INIT_SOURCE = "IN_PLACE" # cloud-init metapackage to install # Examples: cloud-init, cloud-init-base, cloud-init-smart-os diff --git a/tests/integration_tests/modules/test_power_state_change.py b/tests/integration_tests/modules/test_power_state_change.py index 35180e223f9..4fd483fad50 100644 --- a/tests/integration_tests/modules/test_power_state_change.py +++ b/tests/integration_tests/modules/test_power_state_change.py @@ -3,6 +3,7 @@ Test that the power state config options work as expected. """ +import re import time import pytest @@ -52,7 +53,6 @@ def _can_connect(instance): # This test is marked unstable because even though it should be able to # run anywhere, I can only get it to run in an lxd container, and even then # occasionally some timing issues will crop up. -@pytest.mark.unstable @pytest.mark.skipif(not IS_UBUNTU, reason="Only ever tested on Ubuntu") @pytest.mark.skipif( PLATFORM != "lxd_container", @@ -64,7 +64,7 @@ class TestPowerChange: [ ("poweroff", "now", "10", "will execute: shutdown -P now msg"), ("reboot", "now", "0", "will execute: shutdown -r now msg"), - ("halt", "+1", "0", "will execute: shutdown -H +1 msg"), + ("halt", "+1", "0", re.escape("will execute: shutdown -H +1 msg")), ], ) def test_poweroff( diff --git a/tests/unittests/config/test_cc_package_update_upgrade_install.py b/tests/unittests/config/test_cc_package_update_upgrade_install.py index 1cc82d71fa7..64f96d6705d 100644 --- a/tests/unittests/config/test_cc_package_update_upgrade_install.py +++ b/tests/unittests/config/test_cc_package_update_upgrade_install.py @@ -121,7 +121,7 @@ def _isfile(filename: str): # subp and not really rebooting the system subp_call = ["/sbin/reboot"] - caplog.set_level(logging.WARNING) + caplog.set_level(logging.INFO) with mock.patch( "cloudinit.subp.subp", return_value=SubpResult("{}", "fakeerr") ) as m_subp: diff --git a/tests/unittests/test_signal_handler.py b/tests/unittests/test_signal_handler.py new file mode 100644 index 00000000000..bb162799f7d --- /dev/null +++ b/tests/unittests/test_signal_handler.py @@ -0,0 +1,51 @@ +"""cloudinit.signal_handler tests""" + +import inspect +import signal +from unittest.mock import Mock, patch + +import pytest + +from cloudinit import signal_handler + +REENTRANT = "reentrant" + + +class TestSignalHandler: + + @pytest.mark.parametrize( + "m_args", + [ + (signal.SIGINT, inspect.currentframe()), + (9, None), + (signal.SIGTERM, None), + (1, inspect.currentframe()), + ], + ) + @pytest.mark.parametrize( + "m_suspended", + [ + (REENTRANT, 0), + (True, 0), + (False, 1), + ], + ) + def test_suspend_signal(self, m_args, m_suspended): + """suspend_crash should prevent crashing (exit 1) on signal + + otherwise cloud-init should exit 1 + """ + sig, frame = m_args + suspended, rc = m_suspended + + with patch.object(signal_handler.sys, "exit", Mock()) as m_exit: + if suspended is True: + with signal_handler.suspend_crash(): + signal_handler._handle_exit(sig, frame) + elif suspended == REENTRANT: + with signal_handler.suspend_crash(): + with signal_handler.suspend_crash(): + signal_handler._handle_exit(sig, frame) + else: + signal_handler._handle_exit(sig, frame) + m_exit.assert_called_with(rc)