Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3006.x] Fix salt-ssh retcode reporting #64576

Merged
merged 5 commits into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/64575.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed salt-ssh stacktrace when retcode is not an integer
1 change: 1 addition & 0 deletions changelog/64588.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed SSH shell seldomly fails to report any exit code
17 changes: 16 additions & 1 deletion salt/client/ssh/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,14 +556,26 @@ def handle_routine(self, que, opts, host, target, mine=False):
)
ret = {"id": single.id}
stdout, stderr, retcode = single.run()
try:
retcode = int(retcode)
except (TypeError, ValueError):
log.warning(f"Got an invalid retcode for host '{host}': '{retcode}'")
retcode = 1
# This job is done, yield
try:
data = salt.utils.json.find_json(stdout)
if len(data) < 2 and "local" in data:
ret["ret"] = data["local"]
try:
# Ensure a reported local retcode is kept
retcode = data["local"]["retcode"]
remote_retcode = data["local"]["retcode"]
try:
retcode = int(remote_retcode)
except (TypeError, ValueError):
log.warning(
f"Host '{host}' reported an invalid retcode: '{remote_retcode}'"
)
retcode = max(retcode, 1)
except (KeyError, TypeError):
pass
else:
Expand Down Expand Up @@ -816,6 +828,9 @@ def run(self, jid=None):
final_exit = 0
for ret, retcode in self.handle_ssh():
host = next(iter(ret))
if not isinstance(retcode, int):
log.warning(f"Host '{host}' returned an invalid retcode: {retcode}")
retcode = 1
final_exit = max(final_exit, retcode)

self.cache_job(jid, host, ret[host], fun)
Expand Down
15 changes: 14 additions & 1 deletion salt/client/ssh/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,19 @@ def _run_cmd(self, cmd, key_accept=False, passwd_retries=3):
if stdout:
old_stdout = stdout
time.sleep(0.01)
return ret_stdout, ret_stderr, term.exitstatus
finally:
term.close(terminate=True, kill=True)
# Ensure term.close is called before querying the exitstatus, otherwise
# it might still be None.
ret_status = term.exitstatus
if ret_status is None:
if term.signalstatus is not None:
# The process died because of an unhandled signal, report
# a non-zero exitcode bash-style.
ret_status = 128 + term.signalstatus
else:
log.warning(
"VT reported both exitstatus and signalstatus as None. "
"This is likely a bug."
)
return ret_stdout, ret_stderr, ret_status
48 changes: 47 additions & 1 deletion tests/pytests/unit/client/ssh/test_shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest

import salt.client.ssh.shell as shell
from tests.support.mock import patch
from tests.support.mock import MagicMock, PropertyMock, patch


@pytest.fixture
Expand Down Expand Up @@ -52,3 +52,49 @@ def test_ssh_shell_exec_cmd(caplog):
ret = _shell.exec_cmd("ls {}".format(passwd))
assert not any([x for x in ret if passwd in str(x)])
assert passwd not in caplog.text


def test_ssh_shell_exec_cmd_waits_for_term_close_before_reading_exit_status():
"""
Ensure that the terminal is always closed before accessing its exitstatus.
"""
term = MagicMock()
has_unread_data = PropertyMock(side_effect=(True, True, False))
exitstatus = PropertyMock(
side_effect=lambda *args: 0 if term._closed is True else None
)
term.close.side_effect = lambda *args, **kwargs: setattr(term, "_closed", True)
type(term).has_unread_data = has_unread_data
type(term).exitstatus = exitstatus
term.recv.side_effect = (("hi ", ""), ("there", ""), (None, None), (None, None))
shl = shell.Shell({}, "localhost")
with patch("salt.utils.vt.Terminal", autospec=True, return_value=term):
stdout, stderr, retcode = shl.exec_cmd("do something")
assert stdout == "hi there"
assert stderr == ""
assert retcode == 0


def test_ssh_shell_exec_cmd_returns_status_code_with_highest_bit_set_if_process_dies():
"""
Ensure that if a child process dies as the result of a signal instead of exiting
regularly, the shell returns the signal code encoded in the lowest seven bits with
the highest one set, not None.
"""
term = MagicMock()
term.exitstatus = None
term.signalstatus = 9
has_unread_data = PropertyMock(side_effect=(True, True, False))
type(term).has_unread_data = has_unread_data
term.recv.side_effect = (
("", "leave me alone"),
("", " please"),
(None, None),
(None, None),
)
shl = shell.Shell({}, "localhost")
with patch("salt.utils.vt.Terminal", autospec=True, return_value=term):
stdout, stderr, retcode = shl.exec_cmd("do something")
assert stdout == ""
assert stderr == "leave me alone please"
assert retcode == 137
59 changes: 58 additions & 1 deletion tests/pytests/unit/client/ssh/test_ssh.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import salt.client.ssh.client
import salt.utils.msgpack
from salt.client import ssh
from tests.support.mock import MagicMock, patch
from tests.support.mock import MagicMock, Mock, patch

pytestmark = [
pytest.mark.skip_if_binaries_missing("ssh", "ssh-keygen", check_all=True),
Expand Down Expand Up @@ -449,3 +449,60 @@ def test_key_deploy_no_permission_denied(tmp_path, opts):
ret = client.key_deploy(host, ssh_ret)
assert ret == ssh_ret
assert mock_key_run.call_count == 0


@pytest.mark.parametrize("retcode,expected", [("null", None), ('"foo"', "foo")])
def test_handle_routine_remote_invalid_retcode(opts, target, retcode, expected, caplog):
"""
Ensure that if a remote returns an invalid retcode as part of the return dict,
the final exit code is still an integer and set to 1 at least.
"""
single_ret = (f'{{"local": {{"retcode": {retcode}, "return": "foo"}}}}', "", 0)
opts["tgt"] = "localhost"
single = MagicMock(spec=ssh.Single)
single.id = "localhost"
single.run.return_value = single_ret
que = Mock()

with patch("salt.roster.get_roster_file", MagicMock(return_value="")), patch(
"salt.client.ssh.Single", autospec=True, return_value=single
):
client = ssh.SSH(opts)
client.handle_routine(que, opts, "localhost", target)
que.put.assert_called_once_with(
({"id": "localhost", "ret": {"retcode": expected, "return": "foo"}}, 1)
)
assert f"Host 'localhost' reported an invalid retcode: '{expected}'" in caplog.text


def test_handle_routine_single_run_invalid_retcode(opts, target, caplog):
"""
Ensure that if Single.run() call returns an invalid retcode,
the final exit code is still an integer and set to 1 at least.
"""
single_ret = ("", "Something went seriously wrong", None)
opts["tgt"] = "localhost"
single = MagicMock(spec=ssh.Single)
single.id = "localhost"
single.run.return_value = single_ret
que = Mock()

with patch("salt.roster.get_roster_file", MagicMock(return_value="")), patch(
"salt.client.ssh.Single", autospec=True, return_value=single
):
client = ssh.SSH(opts)
client.handle_routine(que, opts, "localhost", target)
que.put.assert_called_once_with(
(
{
"id": "localhost",
"ret": {
"stdout": "",
"stderr": "Something went seriously wrong",
"retcode": 1,
},
},
1,
)
)
assert "Got an invalid retcode for host 'localhost': 'None'" in caplog.text