Skip to content

Commit

Permalink
[mlnx|ffb]: Add fast-fast flow in fast(warm)-reboot script (sonic-net…
Browse files Browse the repository at this point in the history
…#390)

* [mlnx|ffb] Add fast-fast flow in fast(warm)-reboot script

Signed-off-by: Stepan Blyschak <stepanb@mellanox.com>

* [Mellanox|FFB]: Fix review comments

* Change naming convention from "fast-fast" to "fastfast"

Signed-off-by: Volodymyr Samotiy <volodymyrs@mellanox.com>
  • Loading branch information
Volodymyr Samotiy authored and yxieca committed Nov 30, 2018
1 parent c541aad commit 97d73da
Show file tree
Hide file tree
Showing 2 changed files with 163 additions and 14 deletions.
102 changes: 92 additions & 10 deletions scripts/fast-reboot
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
REBOOT_USER=$(logname)
REBOOT_TIME=$(date)
REBOOT_CAUSE_FILE="/var/cache/sonic/reboot-cause.txt"
REBOOT_TYPE=$(basename $0)
WARM_DIR=/host/warmboot
REDIS_FILE=dump.rdb
REBOOT_SCRIPT_NAME=$(basename $0)
REBOOT_TYPE="${REBOOT_SCRIPT_NAME}"

# Check root privileges
if [[ "$EUID" -ne 0 ]]
Expand All @@ -14,6 +15,8 @@ then
exit 1
fi

sonic_asic_type=$(sonic-cfggen -y /etc/sonic/sonic_version.yml -v asic_type)

function clear_warm_boot()
{
config warm_restart disable || /bin/true
Expand All @@ -25,6 +28,19 @@ function clear_warm_boot()
fi
}

function cleanup_except_table()
{
local REDIS_DB_NUMBER="$1"
local TABLE_PREFIX="$2"
redis-cli -n "${REDIS_DB_NUMBER}" eval "
for _, k in ipairs(redis.call('keys', '*')) do
if not string.match(k, '${TABLE_PREFIX}') then
redis.call('del', k)
end
end
" 0
}

function initialize_pre_shutdown()
{
TABLE="WARM_RESTART_TABLE|warm-shutdown"
Expand Down Expand Up @@ -86,9 +102,27 @@ case "$REBOOT_TYPE" in
BOOT_TYPE_ARG=$REBOOT_TYPE
;;
"warm-reboot")
BOOT_TYPE_ARG="warm"
trap clear_warm_boot EXIT HUP INT QUIT TERM KILL ABRT ALRM
config warm_restart enable system
if [[ "$sonic_asic_type" == "mellanox" ]]; then
REBOOT_TYPE="fastfast-reboot"
BOOT_TYPE_ARG="fastfast"
# source mlnx-ffb.sh file with
# functions to check ISSU upgrade/do ISSU start
source mlnx-ffb.sh

trap clear_warm_boot EXIT HUP INT QUIT TERM KILL ABRT ALRM

# Set warm reboot flag for some components.
# In fastfast boot flow, only APPL layer dockers
# are enabled to perform warm restart
config warm_restart disable system
config warm_restart disable swss
config warm_restart enable bgp
config warm_restart enable teamd
else
BOOT_TYPE_ARG="warm"
trap clear_warm_boot EXIT HUP INT QUIT TERM KILL ABRT ALRM
config warm_restart enable system
fi
;;
*)
echo "Not supported reboot type: $REBOOT_TYPE" >&2
Expand Down Expand Up @@ -118,11 +152,22 @@ else
fi
INITRD=$(echo $KERNEL_IMAGE | sed 's/vmlinuz/initrd.img/g')

sonic_asic_type=$(sonic-cfggen -y /etc/sonic/sonic_version.yml -v asic_type)

# Install new FW for mellanox platforms before control plane goes down
# So on boot switch will not spend time to upgrade FW increasing the CP downtime
if [[ "$sonic_asic_type" == "mellanox" ]]; then

if [[ "$REBOOT_TYPE" = "fastfast-reboot" ]]; then
check_issu_enabled || {
echo "Warm reboot is not supported by this HWSKU"
exit 1
}

check_sdk_upgrade || {
echo "Warm reboot is not supported"
exit 1
}
fi

echo "Prepare MLNX ASIC to ${REBOOT_TYPE}: install new FW if required"

MLNX_EXIT_SUCCESS="0"
Expand All @@ -136,12 +181,20 @@ if [[ "$sonic_asic_type" == "mellanox" ]]; then
echo "Failed to burn MLNX FW: errno=${MLNX_EXIT_CODE}"
exit "${MLNX_EXIT_ERROR}"
fi

if [[ "$REBOOT_TYPE" = "fastfast-reboot" ]]; then
issu_start || {
echo "ISSU start failed"
echo "Cold reboot may be requiered to recover"
exit 1
}
fi
fi

# Load kernel into the memory
/sbin/kexec -l "$KERNEL_IMAGE" --initrd="$INITRD" --append="$BOOT_OPTIONS"

if [[ "$REBOOT_TYPE" = "fast-reboot" ]]; then
if [[ "$REBOOT_TYPE" = "fast-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" ]]; then
# Dump the ARP and FDB tables to files also as default routes for both IPv4 and IPv6
# into /host/fast-reboot
mkdir -p /host/fast-reboot
Expand Down Expand Up @@ -180,7 +233,28 @@ fi
# Kill swss dockers
docker kill swss

# Pre-shutdown syncd and stop teamd gracefully

# Warm reboot: dump state to host disk
if [[ "$REBOOT_TYPE" = "fastfast-reboot" ]]; then
mkdir -p $WARM_DIR

# Dump route table form APPL DB.
# This route table will be used by fpmsyncd
# reconcialtion logic
cleanup_except_table 0 'ROUTE_TABLE'
cleanup_except_table 4 'WARM_RESTART_TABLE'
cleanup_except_table 6 'WARM_RESTART_TABLE'

redis-cli -n 1 FLUSHDB
redis-cli -n 2 FLUSHDB
redis-cli -n 5 FLUSHDB

redis-cli save
docker cp database:/var/lib/redis/$REDIS_FILE $WARM_DIR
docker exec -i database rm /var/lib/redis/$REDIS_FILE
fi

# Pre-shutdown syncd
if [[ "$REBOOT_TYPE" = "warm-reboot" ]]; then
initialize_pre_shutdown

Expand All @@ -189,15 +263,23 @@ if [[ "$REBOOT_TYPE" = "warm-reboot" ]]; then
wait_for_pre_shutdown_complete_or_fail

backup_datebase
fi

# Stop teamd gracefully
if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" ]]; then
# Send USR1 signal to all teamd instances to stop them
# It will prepare teamd for warm-reboot
# Note: We must send USR1 signal before syncd, because it will send the last packet through CPU port
docker exec -i teamd pkill -USR1 teamd > /dev/null
fi

# syncd service stop is capable of handling both warm/fast/cold shutdown
systemctl stop syncd
if [[ "$sonic_asic_type" = "mellanox" ]]; then
docker kill syncd
else
# syncd service stop is capable of handling both warm/fast/cold shutdown
systemctl stop syncd
fi

# Kill other containers to make the reboot faster
docker ps -q | xargs docker kill > /dev/null
Expand All @@ -223,7 +305,7 @@ fi
# Update the reboot cause file to reflect that user issued this script
# Upon next boot, the contents of this file will be used to determine the
# cause of the previous reboot
echo "User issued '${REBOOT_TYPE}' command [User: ${REBOOT_USER}, Time: ${REBOOT_TIME}]" > ${REBOOT_CAUSE_FILE}
echo "User issued '${REBOOT_SCRIPT_NAME}' command [User: ${REBOOT_USER}, Time: ${REBOOT_TIME}]" > ${REBOOT_CAUSE_FILE}

# Wait until all buffers synced with disk
sync
Expand Down
75 changes: 71 additions & 4 deletions show/mlnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
import sys
import subprocess
import click
import sonic_platform
from swsssdk import ConfigDBConnector
import xml.etree.ElementTree as ET
except ImportError as e:
raise ImportError("%s - required module not found" % str(e))

Expand All @@ -18,9 +21,12 @@
SNIFFER_CONF_FILE_IN_CONTAINER = CONTAINER_NAME + ':' + SNIFFER_CONF_FILE
TMP_SNIFFER_CONF_FILE = '/tmp/tmp.conf'

HWSKU_PATH = '/usr/share/sonic/hwsku/'

SAI_PROFILE_DELIMITER = '='

# run command
def run_command(command, display_cmd=False, ignore_error=False):
def run_command(command, display_cmd=False, ignore_error=False, print_to_console=True):
"""Run bash command and print output to stdout
"""
if display_cmd == True:
Expand All @@ -29,12 +35,14 @@ def run_command(command, display_cmd=False, ignore_error=False):
proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
(out, err) = proc.communicate()

if len(out) > 0:
if len(out) > 0 and print_to_console:
click.echo(out)

if proc.returncode != 0 and not ignore_error:
sys.exit(proc.returncode)

return out, err


# 'mlnx' group
@click.group()
Expand All @@ -61,8 +69,57 @@ def sniffer_status_get(env_variable_name):
return enabled


@mlnx.command()
def sniffer():
def is_issu_status_enabled():
""" This function parses the SAI XML profile used for mlnx to
get whether ISSU is enabled or disabled
@return: True/False
"""

# ISSU disabled if node in XML config wasn't found
issu_enabled = False

# Get the SAI XML path from sai.profile
sai_profile_path = '/{}/sai.profile'.format(HWSKU_PATH)

DOCKER_CAT_COMMAND = 'docker exec -ti {container_name} cat {path}'

command = DOCKER_CAT_COMMAND.format(container_name=CONTAINER_NAME, path=sai_profile_path)
sai_profile_content, _ = run_command(command, print_to_console=False)

sai_profile_kvs = {}

for line in sai_profile_content.split('\n'):
if not SAI_PROFILE_DELIMITER in line:
continue
key, value = line.split(SAI_PROFILE_DELIMITER)
sai_profile_kvs[key] = value.strip()

try:
sai_xml_path = sai_profile_kvs['SAI_INIT_CONFIG_FILE']
except KeyError:
print >> sys.stderr, "Failed to get SAI XML from sai profile"
sys.exit(1)

# Get ISSU from SAI XML
command = DOCKER_CAT_COMMAND.format(container_name=CONTAINER_NAME, path=sai_xml_path)
sai_xml_content, _ = run_command(command, print_to_console=False)

try:
root = ET.fromstring(sai_xml_content)
except ET.ParseError:
print >> sys.stderr, "Failed to parse SAI xml"
sys.exit(1)

el = root.find('platform_info').find('issu-enabled')

if el is not None:
issu_enabled = int(el.text) == 1

return issu_enabled


@mlnx.command('sniffer')
def sniffer_status():
""" Show sniffer status """
components = ['sdk']
env_variable_strings = [ENV_VARIABLE_SX_SNIFFER]
Expand All @@ -72,3 +129,13 @@ def sniffer():
print components[index] + " sniffer is enabled"
else:
print components[index] + " sniffer is disabled"


@mlnx.command('issu')
def issu_status():
""" Show ISSU status """

res = is_issu_status_enabled()

print 'ISSU is enabled' if res else 'ISSU is disabled'

0 comments on commit 97d73da

Please sign in to comment.