Skip to content

Commit

Permalink
Retry gnmi_get calls for potential server startup timing issue (#16910)
Browse files Browse the repository at this point in the history
What is the motivation for this PR?
There are cases where gnmi_get is called when server is not fully ready after rotation which is a timing issue for a few seconds. If we retry the gnmi_get call, it will succeed. Add wait_until to retry the client calls for a period of 30 seconds.

How did you do it?
Add wait_until to retry for 30 seconds

How did you verify/test it?
Manual test/pipeline
  • Loading branch information
zbud-msft authored Feb 12, 2025
1 parent 639ff7c commit ce2f9b2
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 14 deletions.
5 changes: 5 additions & 0 deletions tests/telemetry/telemetry_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,3 +196,8 @@ def rotate_telemetry_certs(duthost, localhost):
duthost.copy(src="streamingtelemetryserver.key", dest=path)
duthost.copy(src="dsmsroot.cer", dest=path)
duthost.copy(src="dsmsroot.key", dest=path)


def execute_ptf_gnmi_cli(ptfhost, cmd):
rc = ptfhost.shell(cmd)['rc']
return rc == 0
24 changes: 10 additions & 14 deletions tests/telemetry/test_telemetry_cert_rotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from tests.common.helpers.gnmi_utils import GNMIEnvironment
from telemetry_utils import generate_client_cli
from telemetry_utils import archive_telemetry_certs, unarchive_telemetry_certs, rotate_telemetry_certs
from telemetry_utils import execute_ptf_gnmi_cli


pytestmark = [
pytest.mark.topology('any', 't1-multi-asic')
Expand Down Expand Up @@ -74,15 +76,13 @@ def test_telemetry_post_cert_del(duthosts, enum_rand_one_per_hwsku_hostname, ptf
# Initial request should pass with certs
cmd = generate_client_cli(duthost=duthost, gnxi_path=gnxi_path, method=METHOD_GET,
target="OTHERS", xpath="proc/uptime")
ret = ptfhost.shell(cmd)['rc']
assert ret == 0, "Telemetry server request should complete with certs"
pytest_assert(wait_until(30, 5, 0, execute_ptf_gnmi_cli, ptfhost, cmd),
"Telemetry server request should complete with certs")

# Remove certs
archive_telemetry_certs(duthost)

# Requests should fail without certs
cmd = generate_client_cli(duthost=duthost, gnxi_path=gnxi_path, method=METHOD_GET,
target="OTHERS", xpath="proc/uptime")
ret = ptfhost.shell(cmd, module_ignore_errors=True)['rc']
assert ret != 0, "Telemetry server request should fail without certs"

Expand Down Expand Up @@ -122,10 +122,8 @@ def test_telemetry_post_cert_add(duthosts, enum_rand_one_per_hwsku_hostname, ptf
wait_tcp_connection(localhost, dut_ip, env.gnmi_port, timeout_s=60)

# Requests should successfully complete with certs
cmd = generate_client_cli(duthost=duthost, gnxi_path=gnxi_path, method=METHOD_GET,
target="OTHERS", xpath="proc/uptime")
ret = ptfhost.shell(cmd)['rc']
assert ret == 0, "Telemetry server request should complete with certs"
pytest_assert(wait_until(30, 5, 0, execute_ptf_gnmi_cli, ptfhost, cmd),
"Telemetry server request should complete with certs")


@pytest.mark.parametrize('setup_streaming_telemetry', [False], indirect=True)
Expand All @@ -142,8 +140,8 @@ def test_telemetry_cert_rotate(duthosts, enum_rand_one_per_hwsku_hostname, ptfho
# Initial request should complete with certs
cmd = generate_client_cli(duthost=duthost, gnxi_path=gnxi_path, method=METHOD_GET,
target="OTHERS", xpath="proc/uptime")
ret = ptfhost.shell(cmd)['rc']
assert ret == 0, "Telemetry server request should fail without certs"
pytest_assert(wait_until(30, 5, 0, execute_ptf_gnmi_cli, ptfhost, cmd),
"Telemetry server request should complete with certs")

# Rotate certs
rotate_telemetry_certs(duthost, localhost)
Expand All @@ -153,7 +151,5 @@ def test_telemetry_cert_rotate(duthosts, enum_rand_one_per_hwsku_hostname, ptfho
wait_tcp_connection(localhost, dut_ip, env.gnmi_port, timeout_s=60)

# Requests should successfully complete with certs
cmd = generate_client_cli(duthost=duthost, gnxi_path=gnxi_path, method=METHOD_GET,
target="OTHERS", xpath="proc/uptime")
ret = ptfhost.shell(cmd)['rc']
assert ret == 0, "Telemetry server request should complete with certs"
pytest_assert(wait_until(30, 5, 0, execute_ptf_gnmi_cli, ptfhost, cmd),
"Telemetry server request should complete with certs")

0 comments on commit ce2f9b2

Please sign in to comment.