Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pmon] Update the platform_wait script to wait on asic init #79

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
url = https://github.com/sonic-net/sonic-swss-common
[submodule "sonic-linux-kernel"]
path = src/sonic-linux-kernel
url = https://github.com/sonic-net/sonic-linux-kernel
url = https://github.com/vivekrnv/sonic-linux-kernel
[submodule "sonic-sairedis"]
path = src/sonic-sairedis
url = https://github.com/sonic-net/sonic-sairedis
Expand Down
1 change: 1 addition & 0 deletions build_debian.sh
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,7 @@ sudo LANG=C DEBIAN_FRONTEND=noninteractive chroot $FILESYSTEM_ROOT apt-get -y in
resolvconf \
lsof \
sysstat \
xxd \
zstd

# Have systemd create the auditd log directory
Expand Down
33 changes: 16 additions & 17 deletions device/mellanox/x86_64-mlnx_msn2700-r0/platform_wait
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ declare -r SYSLOG_INFO="info"

declare -r HW_MGMT_CONFIG="/var/run/hw-management/config"

declare -r MODULE_COUNTER="${HW_MGMT_CONFIG}/module_counter"
declare -r SFP_COUNTER="${HW_MGMT_CONFIG}/sfp_counter"
declare -r ASIC_INIT_DONE="${HW_MGMT_CONFIG}/asics_init_done"
declare -r NUM_ASICS="${HW_MGMT_CONFIG}/asic_num"
declare -r ASIC_CHIPUP_COMPLETED="${HW_MGMT_CONFIG}/asic_chipup_completed"

declare -r EXIT_SUCCESS="0"
declare -r EXIT_TIMEOUT="1"
Expand All @@ -26,44 +27,42 @@ function log_info() {
eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_INFO} $@"
}

function wait_for_sfp() {
local -r _NUM_MATCH="^[0-9]+$"
local -r _NUM_ZERO="0"
function wait_for_asic_chipup() {

local _MODULE_CNT="0"
local _SFP_CNT="0"
local _ASIC_INIT="0"
local _ASIC_COUNT="0"
local _ASICS_CHIPUP="0"

local -i _WDOG_CNT="1"
local -ir _WDOG_MAX="300"

local -r _TIMEOUT="1s"

while [[ "${_WDOG_CNT}" -le "${_WDOG_MAX}" ]]; do
_MODULE_CNT="$(cat ${MODULE_COUNTER} 2>&1)"
_SFP_CNT="$(cat ${SFP_COUNTER} 2>&1)"
_ASIC_INIT="$(cat ${ASIC_INIT_DONE} 2>&1)"
_ASIC_COUNT="$(cat ${NUM_ASICS} 2>&1)"
_ASICS_CHIPUP="$(cat ${ASIC_CHIPUP_COMPLETED} 2>&1)"

if [[ "${_MODULE_CNT}" =~ ${_NUM_MATCH} && "${_SFP_CNT}" =~ ${_NUM_MATCH} ]]; then
if [[ "${_SFP_CNT}" -gt "${_NUM_ZERO}" && "${_MODULE_CNT}" -eq "${_SFP_CNT}" ]]; then
return "${EXIT_SUCCESS}"
fi
if [[ "${_ASIC_INIT}" -eq 1 && "${_ASIC_COUNT}" -eq "${_ASICS_CHIPUP}" ]]; then
return "${EXIT_SUCCESS}"
fi

let "_WDOG_CNT++"
sleep "${_TIMEOUT}"
done

log_error "Mellanox ASIC is not ready: INIT: ${_ASIC_INIT}, NUM_ASIC: ${_ASIC_COUNT}, CHIPUP: ${_ASICS_CHIPUP} timeout...."
return "${EXIT_TIMEOUT}"
}

log_info "Wait for SFP interfaces to be ready"
log_info "Wait for Mellanox ASIC to be ready"

wait_for_sfp
wait_for_asic_chipup
EXIT_CODE="$?"
if [[ "${EXIT_CODE}" != "${EXIT_SUCCESS}" ]]; then
log_error "SFP interfaces are not ready: timeout"
exit "${EXIT_CODE}"
fi

log_info "SFP interfaces are ready"
log_notice "Mellanox ASIC is ready"

exit "${EXIT_SUCCESS}"
1 change: 0 additions & 1 deletion files/build_templates/lldp.timer.j2

This file was deleted.

11 changes: 0 additions & 11 deletions files/scripts/syncd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -105,17 +105,6 @@ function waitplatform() {
debug "Started pmon service"
fi
fi
if [[ x"$BOOT_TYPE" = @(x"fast"|x"warm"|x"fastfast") ]]; then
debug "LLDP service is delayed by a timer for better fast/warm boot performance"
else
lldp_state=$(systemctl is-enabled lldp.timer)
if [[ $lldp_state == "enabled" ]]
then
debug "Starting lldp service..."
/bin/systemctl start lldp
debug "Started lldp service"
fi
fi
}

function stopplatform1() {
Expand Down
15 changes: 14 additions & 1 deletion platform/mellanox/integration-scripts/sdk_kernel_patches.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def get_kernel_dir(self):
major_kernel_path = os.path.join(KERNEL_BACKPORTS, "{}.{}".format(kernel, major))

# if the k_dir with actual minor doesn't exit, use the closest minor version
for minor_i in range(minor_int, 0, -1):
for minor_i in range(minor_int, -1, -1):
path = os.path.join(major_kernel_path, "{}.{}.{}".format(kernel, major, minor_i))
if os.path.exists(os.path.join(self.args.patches, path)):
minor = str(minor_i)
Expand Down Expand Up @@ -137,6 +137,18 @@ def get_new_patches(self):
Data.new_patches = FileHandler.read_dir(patches_path, "*.patch")
Data.new_patches.sort()

def cleanup_old_patches(self):
patches_del = copy.deepcopy(Data.old_patches)
for patch in Data.new_patches:
if patch in Data.old_patches:
patches_del.remove(patch)
print(f" -> Patches to be removed are : {patches_del}")
for patch in patches_del:
file_n = os.path.join(self.args.build_root, os.path.join(SLK_PATCH_LOC, patch))
if os.path.exists(file_n):
print(f"{file_n} is deleted")
os.remove(file_n)

def refresh_markers(self):
print("-> INFO Refreshing Markers ")
(Data.i_sdk_start, Data.i_sdk_end) = FileHandler.find_marker_indices(Data.old_series, SDK_MARKER)
Expand Down Expand Up @@ -187,6 +199,7 @@ def perform(self):
self.refresh_markers()
self.add_new_patch_series()
self.process_update()
self.cleanup_old_patches()
patch_table = self.fetch_patch_table(os.path.join(self.args.patches, Data.k_dir))
slk_msg = self.create_commit_msg(patch_table)
if self.args.slk_msg:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def is_psu_hotswapable(cls):
@classmethod
@utils.read_only_cache()
def get_sfp_count(cls):
return utils.read_int_from_file('/run/hw-management/config/sfp_counter')
return utils.read_int_from_file('/run/hw-management/config/module_counter')

@classmethod
def get_linecard_sfp_count(cls, lc_index):
Expand Down
5 changes: 4 additions & 1 deletion platform/mellanox/mlnx-platform-api/sonic_platform/fan.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,10 @@ def get_target_speed(self):
"""
try:
# Get PSU fan target speed according to current system cooling level
cooling_level = utils.read_int_from_file('/run/hw-management/thermal/cooling_cur_state', log_func=None)
pwm = utils.read_int_from_file('/run/hw-management/thermal/pwm1', log_func=None)
if pwm >= PWM_MAX:
pwm = PWM_MAX - 1
cooling_level = int(pwm / PWM_MAX * 10)
return int(self.PSU_FAN_SPEED[cooling_level], 16)
except Exception:
return self.get_speed()
Expand Down
4 changes: 2 additions & 2 deletions platform/mellanox/mlnx-platform-api/sonic_platform/psu.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,8 @@ def __init__(self, psu_index):

self.psu_power_max_capacity = os.path.join(PSU_PATH, "config/psu{}_power_capacity".format(self.index))

self.psu_temp = os.path.join(PSU_PATH, 'thermal/psu{}_temp'.format(self.index))
self.psu_temp_threshold = os.path.join(PSU_PATH, 'thermal/psu{}_temp_max'.format(self.index))
self.psu_temp = os.path.join(PSU_PATH, 'thermal/psu{}_temp1'.format(self.index))
self.psu_temp_threshold = os.path.join(PSU_PATH, 'thermal/psu{}_temp1_max'.format(self.index))

self.psu_power_slope = os.path.join(PSU_PATH, self.PSU_POWER_SLOPE.format(self.index))

Expand Down
26 changes: 8 additions & 18 deletions platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,16 @@
"psu thermals":
{
"name": "PSU-{} Temp",
"temperature": "psu{}_temp",
"high_threshold": "psu{}_temp_max",
"temperature": "psu{}_temp1",
"high_threshold": "psu{}_temp1_max",
"type": "indexable"
},
"chassis thermals": [
{
"name": "ASIC",
"temperature": "asic",
"high_threshold": "mlxsw/temp_trip_hot",
"high_critical_threshold": "mlxsw/temp_trip_crit"
"high_threshold": "asic_temp_emergency",
"high_critical_threshold": "asic_temp_trip_crit"
},
{
"name": "Ambient Port Side Temp",
Expand Down Expand Up @@ -105,8 +105,8 @@
{
"name": "Gearbox {} Temp",
"temperature": "gearbox{}_temp_input",
"high_threshold": "mlxsw-gearbox{}/temp_trip_hot",
"high_critical_threshold": "mlxsw-gearbox{}/temp_trip_crit",
"high_threshold": "gearbox{}_temp_emergency",
"high_critical_threshold": "gearbox{}_temp_trip_crit",
"type": "indexable"
},
{
Expand Down Expand Up @@ -135,8 +135,8 @@
'linecard thermals': {
"name": "Gearbox {} Temp",
"temperature": "gearbox{}_temp_input",
"high_threshold": "mlxsw-gearbox{}/temp_trip_hot",
"high_critical_threshold": "mlxsw-gearbox{}/temp_trip_crit",
"high_threshold": "gearbox{}_temp_emergency",
"high_critical_threshold": "gearbox{}_temp_trip_crit",
"type": "indexable"
}
}
Expand Down Expand Up @@ -268,16 +268,6 @@ def _check_thermal_sysfs_existence(file_path):


class Thermal(ThermalBase):
thermal_algorithm_status = False
# Expect cooling level, used for caching the cooling level value before commiting to hardware
expect_cooling_level = None
# Expect cooling state
expect_cooling_state = None
# Last committed cooling level
last_set_cooling_level = None
last_set_cooling_state = None
last_set_psu_cooling_level = None

def __init__(self, name, temp_file, high_th_file, high_crit_th_file, position):
"""
index should be a string for category ambient and int for other categories
Expand Down
13 changes: 1 addition & 12 deletions platform/mellanox/mlnx-platform-api/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES.
# Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES.
# Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -42,14 +42,3 @@ def auto_recover_mock():
utils.read_str_from_file = origin_read_str_from_file
utils.write_file = origin_write_file
utils.read_float_from_file = origin_read_float_from_file


@pytest.fixture(scope='function', autouse=True)
def auto_reset_cooling_level():
from sonic_platform.thermal import Thermal
yield
Thermal.expect_cooling_level = None
Thermal.expect_cooling_state = None
Thermal.last_set_cooling_level = None
Thermal.last_set_cooling_state = None
Thermal.last_set_psu_cooling_level = None
4 changes: 2 additions & 2 deletions platform/mellanox/mlnx-platform-api/tests/test_fan_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ def test_psu_fan_basic(self, mock_path_exists, mock_powergood, mock_presence, mo
assert fan.get_presence() is False
mock_path_exists.return_value = True
assert fan.get_presence() is True
mock_read_int.return_value = 7
assert fan.get_target_speed() == 70
mock_read_int.return_value = int(255 / 10 * 7)
assert fan.get_target_speed() == 60
mock_read_int.return_value = FAN_DIR_VALUE_INTAKE
assert fan.get_direction() == Fan.FAN_DIRECTION_INTAKE
mock_read_int.return_value = FAN_DIR_VALUE_EXHAUST
Expand Down
2 changes: 1 addition & 1 deletion platform/mellanox/rules.mk
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ include $(PLATFORM_PATH)/docker-saiserver-mlnx.mk
include $(PLATFORM_PATH)/one-image.mk
include $(PLATFORM_PATH)/libsaithrift-dev.mk
include $(PLATFORM_PATH)/mlnx-ffb.mk
#include $(PLATFORM_PATH)/issu-version.mk
include $(PLATFORM_PATH)/issu-version.mk
include $(PLATFORM_PATH)/mlnx-onie-fw-update.mk
include $(PLATFORM_PATH)/mlnx-ssd-fw-update.mk
include $(PLATFORM_PATH)/iproute2.mk
Expand Down
15 changes: 7 additions & 8 deletions platform/mellanox/sdk.dep
Original file line number Diff line number Diff line change
Expand Up @@ -210,14 +210,13 @@ endif

# SX_KERNEL

# TODO: enable for bookworm
#SPATH := $($(SX_KERNEL)_SRC_PATH)
#SLINKS := $(shell find $(SPATH) -type l -exec echo {} \; | grep -Ev ' ')
#SMDEP_PATHS := $(shell git submodule status --recursive -- $(SPATH) | awk '{print $$2}' | grep -Ev ' ')
#SMDEP_FILES := $(foreach path,$(SMDEP_PATHS),$(filter-out $(SMDEP_PATHS),$(addprefix $(path)/,$(shell cd $(path) && git ls-files | grep -Ev ' '))))
#DEP_FILES := $(SONIC_COMMON_FILES_LIST) $(PLATFORM_PATH)/sdk.mk $(PLATFORM_PATH)/sdk.dep
#DEP_FILES += $(SONIC_COMMON_BASE_FILES_LIST)
#DEP_FILES += $(filter-out $(SMDEP_PATHS),$(shell git ls-files -- $(SPATH) | grep -Ev ' '))
SPATH := $($(SX_KERNEL)_SRC_PATH)
SLINKS := $(shell find $(SPATH) -type l -exec echo {} \; | grep -Ev ' ')
SMDEP_PATHS := $(shell git submodule status --recursive -- $(SPATH) | awk '{print $$2}' | grep -Ev ' ')
SMDEP_FILES := $(foreach path,$(SMDEP_PATHS),$(filter-out $(SMDEP_PATHS),$(addprefix $(path)/,$(shell cd $(path) && git ls-files | grep -Ev ' '))))
DEP_FILES := $(SONIC_COMMON_FILES_LIST) $(PLATFORM_PATH)/sdk.mk $(PLATFORM_PATH)/sdk.dep
DEP_FILES += $(SONIC_COMMON_BASE_FILES_LIST)
DEP_FILES += $(filter-out $(SMDEP_PATHS),$(shell git ls-files -- $(SPATH) | grep -Ev ' '))

$(SX_KERNEL)_CACHE_MODE := GIT_CONTENT_SHA
$(SX_KERNEL)_DEP_FLAGS := $(MLNX_SDK_COMMON_FLAGS_LIST)
Expand Down
11 changes: 5 additions & 6 deletions platform/mellanox/sdk.mk
Original file line number Diff line number Diff line change
Expand Up @@ -152,12 +152,11 @@ ifeq ($(SDK_FROM_SRC),y)
$(eval $(call add_derived_package,$(SX_HASH_CALC),$(SX_HASH_CALC_DBGSYM)))
endif

# TODO: enable for bookworm
#SX_KERNEL = sx-kernel_1.mlnx.$(MLNX_SDK_DEB_VERSION)_$(CONFIGURED_ARCH).deb
#$(SX_KERNEL)_DEPENDS += $(LINUX_HEADERS) $(LINUX_HEADERS_COMMON)
#$(SX_KERNEL)_SRC_PATH = $(PLATFORM_PATH)/sdk-src/sx-kernel
#SX_KERNEL_DEV = sx-kernel-dev_1.mlnx.$(MLNX_SDK_DEB_VERSION)_$(CONFIGURED_ARCH).deb
#$(eval $(call add_derived_package,$(SX_KERNEL),$(SX_KERNEL_DEV)))
SX_KERNEL = sx-kernel_1.mlnx.$(MLNX_SDK_DEB_VERSION)_$(CONFIGURED_ARCH).deb
$(SX_KERNEL)_DEPENDS += $(LINUX_HEADERS) $(LINUX_HEADERS_COMMON)
$(SX_KERNEL)_SRC_PATH = $(PLATFORM_PATH)/sdk-src/sx-kernel
SX_KERNEL_DEV = sx-kernel-dev_1.mlnx.$(MLNX_SDK_DEB_VERSION)_$(CONFIGURED_ARCH).deb
$(eval $(call add_derived_package,$(SX_KERNEL),$(SX_KERNEL_DEV)))

define make_url
$(1)_URL = $(MLNX_SDK_ASSETS_URL)/$(1)
Expand Down
4 changes: 2 additions & 2 deletions rules/linux-kernel.dep
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ SPATH := $($(LINUX_HEADERS_COMMON)_SRC_PATH)
DEP_FILES := rules/linux-kernel.mk rules/linux-kernel.dep
SMDEP_FILES := $(addprefix $(SPATH)/,$(shell cd $(SPATH) && git ls-files))

DEP_FLAGS := $(SONIC_COMMON_FLAGS_LIST) \
$(KERNEL_PROCURE_METHOD) $(KERNEL_CACHE_PATH) $(SECURE_UPGRADE_MODE) $(SECURE_UPGRADE_SIGNING_CERT)
DEP_FLAGS := $(SONIC_COMMON_FLAGS_LIST) $(INCLUDE_EXTERNAL_PATCHES) \
$(KERNEL_PROCURE_METHOD) $(KERNEL_CACHE_PATH) $(SECURE_UPGRADE_MODE) $(SECURE_UPGRADE_SIGNING_CERT)

$(LINUX_HEADERS_COMMON)_CACHE_MODE := GIT_CONTENT_SHA
$(LINUX_HEADERS_COMMON)_DEP_FLAGS := $(DEP_FLAGS)
Expand Down
2 changes: 1 addition & 1 deletion rules/linux-kernel.mk
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ endif
# Place an URL here to .tar.gz file if you want to include those patches
EXTERNAL_KERNEL_PATCH_URL =
# Set y to include non upstream patches tarball provided by the corresponding platform
INCLUDE_EXTERNAL_PATCHES = n
INCLUDE_EXTERNAL_PATCHES ?= n
# platforms should override this and provide an absolute location to the patches
EXTERNAL_KERNEL_PATCH_LOC =

Expand Down
1 change: 1 addition & 0 deletions slave.mk
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,7 @@ endif
$(info "CROSS_BUILD_ENVIRON" : "$(CROSS_BUILD_ENVIRON)")
$(info "GZ_COMPRESS_PROGRAM" : "$(GZ_COMPRESS_PROGRAM)")
$(info "LEGACY_SONIC_MGMT_DOCKER" : "$(LEGACY_SONIC_MGMT_DOCKER)")
$(info "INCLUDE_EXTERNAL_PATCHES" : "$(INCLUDE_EXTERNAL_PATCHES)")
$(info )
else
$(info SONiC Build System for $(CONFIGURED_PLATFORM):$(CONFIGURED_ARCH))
Expand Down
2 changes: 1 addition & 1 deletion src/sonic-linux-kernel
Submodule sonic-linux-kernel updated 35 files
+15 −17 patch/0001-hwmon-ltc2978-Add-support-for-LTC3888.patch
+50 −56 patch/0002-i2c-designware-Recover-from-a-stuck-SDA-line.patch
+14 −12 patch/0003-mmc-sdhci-cadence-Add-AMD-Pensando-Elba-SoC-support.patch
+0 −52 patch/0004-arm64-traps-Call-platform-handler-for-do_serror.patch
+25 −20 patch/0004-spi-dw-Add-support-for-AMD-Pensando-Elba-SoC.patch
+29 −0 patch/0005-spi-spidev-Add-pensando-cpld-compat-entry.patch
+57 −34 patch/0006-arch-arm64-Initial-support-for-the-Elba-SoC.patch
+12 −17 patch/0007-arm64-dts-pensando-Elba-flash-partitions.patch
+822 −0 patch/0008-soc-pensando-Add-capmem-driver.patch
+22 −52 patch/0009-irqchip-pensando-Interrupt-domain-controllers.patch
+103 −163 patch/0010-i2c-rd1173-Add-RD1173-I2C-controller-driver.patch
+39 −39 patch/0011-uio-pensando-UIO-drivers-for-Elba.patch
+0 −435 patch/0012-drivers-soc-pensando-dev-capmem-driver.patch
+14 −13 patch/0012-soc-pensando-Add-Boot-State-Machine.patch
+37 −137 patch/0013-soc-pensando-Add-crash-dump-driver.patch
+16 −17 patch/0014-soc-pensando-Add-Reset-Cause-driver.patch
+71 −69 patch/0015-soc-pensando-Add-pcie-driver.patch
+9 −11 patch/0016-arm64-dts-pensando-add-mnet-mcrypt-devices.patch
+144 −0 patch/0017-soc-pensando-refactor-pciep_regrd32-for-kpcimgr.patch
+36 −45 patch/0018-soc-pensando-Add-kpcimgr-driver.patch
+79 −0 patch/0019-mmc-sdhci-cadence-Support-mmc-hardware-reset.patch
+71 −121 patch/0020-reset-elba-Add-mmc-hw-reset-driver.patch
+15 −16 patch/0021-soc-pensando-Add-sbus-driver.patch
+62 −32 patch/0022-soc-pensando-Add-boot_count-to-sysfs.patch
+0 −94 patch/0023-arch-arm64-boot-dts-psci-support.patch
+254 −0 patch/0023-i2c-rd1173-Reset-RD1173-master-when-i2c_busy-set.patch
+99 −121 patch/0024-soc-pensando-Add-penfw-driver.patch
+284 −0 patch/0025-arm64-dts-pensando-psci-support.patch
+105 −0 patch/0026-i2c-rd1173-Fix-Lattice-RD1173-interrupt-handling.patch
+18 −21 patch/0027-hwmon-pmbus-tps53679-Add-support-for-TI-TPS53659.patch
+42 −33 patch/0028-EDAC-elba-Add-Elba-EDAC-support.patch
+53 −0 patch/0029-arm64-traps-Handle-SError-interrupt.patch
+23 −23 patch/0030-quirks-for-the-Pensando-qspi-controller.patch
+2 −0 patch/kconfig-inclusions
+33 −27 patch/series