From d3589a40f4f559036995e6c73fee476fd9d3cdc8 Mon Sep 17 00:00:00 2001 From: Yaqiang Zhu Date: Fri, 25 Oct 2024 12:19:17 +0800 Subject: [PATCH] [dhcp_relay] Only check parent dhcrelay process in dhcprelayd (#20551) Why I did it In master and 202405 branch, dhcp_relay container was updated to Bookworm, hence isc-dhcp-relay was updated from 4.4.1 to 4.4.3. In this version, isc-dhcp-relay would create child process to proceed when there is network io, hence dhcprelayd would get duplicated dhcrelay process when checking How I did it Only check parent dhcp_relay process How to verify it UT Run https://github.com/sonic-net/sonic-mgmt/blob/master/tests/dhcp_relay/test_dhcp_relay_stress.py with latest dhcprelayd --- .../dhcp_utilities/common/utils.py | 18 ------------- .../dhcp_utilities/dhcprelayd/dhcprelayd.py | 20 ++++++++++++-- .../tests/common_utils.py | 6 ++++- .../tests/test_dhcprelayd.py | 26 +++++++++++++------ src/sonic-dhcp-utilities/tests/test_utils.py | 15 ----------- 5 files changed, 41 insertions(+), 44 deletions(-) diff --git a/src/sonic-dhcp-utilities/dhcp_utilities/common/utils.py b/src/sonic-dhcp-utilities/dhcp_utilities/common/utils.py index 325553f536af..ee4b1815f5a0 100644 --- a/src/sonic-dhcp-utilities/dhcp_utilities/common/utils.py +++ b/src/sonic-dhcp-utilities/dhcp_utilities/common/utils.py @@ -150,24 +150,6 @@ def _parse_table_to_dict(table): return ret -def get_target_process_cmds(process_name): - """ - Get running process cmds - Args: - process_name: name of process - Returns: - List of cmds list - """ - res = [] - for proc in psutil.process_iter(): - try: - if proc.name() == process_name: - res.append(proc.cmdline()) - except psutil.NoSuchProcess: - continue - return res - - def is_smart_switch(device_metadata): """ Check in device metadata whether subtype is smartswitch diff --git a/src/sonic-dhcp-utilities/dhcp_utilities/dhcprelayd/dhcprelayd.py b/src/sonic-dhcp-utilities/dhcp_utilities/dhcprelayd/dhcprelayd.py index d1b22a3158b4..208ab9a6c934 100644 --- a/src/sonic-dhcp-utilities/dhcp_utilities/dhcprelayd/dhcprelayd.py +++ b/src/sonic-dhcp-utilities/dhcp_utilities/dhcprelayd/dhcprelayd.py @@ -8,7 +8,7 @@ import syslog import time from swsscommon import swsscommon -from dhcp_utilities.common.utils import DhcpDbConnector, terminate_proc, get_target_process_cmds, is_smart_switch +from dhcp_utilities.common.utils import DhcpDbConnector, terminate_proc, is_smart_switch from dhcp_utilities.common.dhcp_db_monitor import DhcpRelaydDbMonitor, DhcpServerTableIntfEnablementEventChecker, \ VlanTableEventChecker, VlanIntfTableEventChecker, DhcpServerFeatureStateChecker, MidPlaneTableEventChecker @@ -226,7 +226,23 @@ def _check_dhcp_relay_processes(self): """ Check whether dhcrelay running as expected, if not, dhcprelayd will exit with code 1 """ - running_cmds = get_target_process_cmds("dhcrelay") + procs = {} + for proc in psutil.process_iter(): + try: + if proc.name() != "dhcrelay": + continue + procs[proc.pid] = [proc.ppid(), proc.cmdline()] + except psutil.NoSuchProcess: + continue + + # When there is network io, dhcrelay would create child process to proceed them, psutil has chance to get + # duplicated cmdline. Hence ignore chlid process in here + running_cmds = [] + for _, (parent_pid, cmdline) in procs.items(): + if parent_pid in procs: + continue + running_cmds.append(cmdline) + running_cmds.sort() expected_cmds = [value for key, value in self.dhcp_relay_supervisor_config.items() if "isc-dhcpv4-relay" in key] expected_cmds.sort() diff --git a/src/sonic-dhcp-utilities/tests/common_utils.py b/src/sonic-dhcp-utilities/tests/common_utils.py index f5eb4e018f86..29c579545d8e 100644 --- a/src/sonic-dhcp-utilities/tests/common_utils.py +++ b/src/sonic-dhcp-utilities/tests/common_utils.py @@ -65,10 +65,11 @@ def mock_get_config_db_table(table_name): class MockProc(object): - def __init__(self, name, pid=1, exited=False): + def __init__(self, name, pid=1, exited=False, ppid=1): self.proc_name = name self.pid = pid self.exited = exited + self.parent_id = ppid def name(self): if self.exited: @@ -96,6 +97,9 @@ def wait(self): def status(self): return self.status + def ppid(self): + return self.parent_id + class MockPopen(object): def __init__(self, pid): diff --git a/src/sonic-dhcp-utilities/tests/test_dhcprelayd.py b/src/sonic-dhcp-utilities/tests/test_dhcprelayd.py index 4914ff020d63..7b542f51359d 100644 --- a/src/sonic-dhcp-utilities/tests/test_dhcprelayd.py +++ b/src/sonic-dhcp-utilities/tests/test_dhcprelayd.py @@ -195,23 +195,33 @@ def test_execute_supervisor_dhcp_relay_process(mock_swsscommon_dbconnector_init, mock_run.assert_called_once_with(["supervisorctl", op, "dhcpmon-Vlan1000"], check=True) -@pytest.mark.parametrize("target_procs_cmds", [[["dhcrelay", "-d"]], [["dhcpmon"]]]) -def test_check_dhcp_relay_process(mock_swsscommon_dbconnector_init, mock_swsscommon_table_init, target_procs_cmds): +@pytest.mark.parametrize("iter_process", [ + [ + ["dhcrelay", 2, False, 1], ["dhcrelay", 3, False, 2], ["dhcpmon", 4, False, 1], ["dhcrelay", 5, True, 1] + ], + [ + ["dhcpmon", 4, False, 1] + ]]) +def test_check_dhcp_relay_process(mock_swsscommon_dbconnector_init, mock_swsscommon_table_init, iter_process): exp_config = { - "isc-dhcpv4-relay-Vlan1000": ["dhcrelay", "-d"] + "isc-dhcpv4-relay-Vlan1000": ["/usr/sbin/dhcrelay", "-d", "-m", "discard", "-a", "%h:%p", "%P", + "--name-alias-map-file", "/tmp/port-name-alias-map.txt", "-id", "Vlan1000", + "-iu", "docker0", "240.127.1.2"], + "dhcpmon-Vlan1000": ["/usr/sbin/dhcpmon", "-id", "Vlan1000", "-iu", "docker0", "-im", "eth0"] } - with patch("dhcp_utilities.dhcprelayd.dhcprelayd.get_target_process_cmds", return_value=target_procs_cmds), \ - patch.object(DhcpRelayd, "dhcp_relay_supervisor_config", + process_iter_ret = [MockProc(name=item[0], pid=item[1], exited=item[2], ppid=item[3]) for item in iter_process] + with patch.object(DhcpRelayd, "dhcp_relay_supervisor_config", return_value=exp_config, new_callable=PropertyMock), \ - patch.object(sys, "exit", mock_exit_func): + patch.object(sys, "exit", mock_exit_func), \ + patch.object(psutil, "process_iter", return_value=process_iter_ret): dhcp_db_connector = DhcpDbConnector() dhcprelayd = DhcpRelayd(dhcp_db_connector, None) try: dhcprelayd._check_dhcp_relay_processes() except SystemExit: - assert target_procs_cmds[0] != exp_config["isc-dhcpv4-relay-Vlan1000"] + assert all(process[0] != "dhcrelay" for process in iter_process) else: - assert target_procs_cmds[0] == exp_config["isc-dhcpv4-relay-Vlan1000"] + assert any(process[0] == "dhcrelay" for process in iter_process) def test_get_dhcp_relay_config(mock_swsscommon_dbconnector_init, mock_swsscommon_table_init): diff --git a/src/sonic-dhcp-utilities/tests/test_utils.py b/src/sonic-dhcp-utilities/tests/test_utils.py index 4a41049fe06e..88d07c648d87 100644 --- a/src/sonic-dhcp-utilities/tests/test_utils.py +++ b/src/sonic-dhcp-utilities/tests/test_utils.py @@ -141,21 +141,6 @@ def test_validate_ttr_type(test_data): assert res == test_data[2] -def test_get_target_process_cmds(): - with patch.object(psutil, "process_iter", return_value=[MockProc("dhcrelay", 1), - MockProc("dhcrelay", 1, exited=True), - MockProc("dhcpmon", 2)], - new_callable=PropertyMock): - res = utils.get_target_process_cmds("dhcrelay") - expected_res = [ - [ - "/usr/sbin/dhcrelay", "-d", "-m", "discard", "-a", "%h:%p", "%P", "--name-alias-map-file", - "/tmp/port-name-alias-map.txt", "-id", "Vlan1000", "-iu", "docker0", "240.127.1.2" - ] - ] - assert res == expected_res - - @pytest.mark.parametrize("is_smart_switch", [True, False]) def test_is_smart_switch(is_smart_switch): device_metadata = {"localhost": {"subtype": "SmartSwitch"}} if is_smart_switch else {"localhost": {}}