diff --git a/scripts/generate_dump b/scripts/generate_dump index 0862bd3b67..7096c7c04a 100755 --- a/scripts/generate_dump +++ b/scripts/generate_dump @@ -37,6 +37,8 @@ HOME=${HOME:-/root} USER=${USER:-root} TIMEOUT_MIN="5" SKIP_BCMCMD=0 +SAVE_STDERR=true +RETURN_CODE=0 handle_signal() { @@ -46,7 +48,15 @@ handle_signal() } trap 'handle_signal' SIGINT +handle_error() { + if [ "$1" != "0" ]; then + echo "ERR: RC:-$1 observed on line $2" >&2 + RETURN_CODE=1 + fi +} + save_bcmcmd() { + trap 'handle_error $? $LINENO' ERR local start_t=$(date +%s%3N) local end_t=0 local cmd="$1" @@ -106,6 +116,7 @@ save_bcmcmd() { # None ############################################################################### save_bcmcmd_all_ns() { + trap 'handle_error $? $LINENO' ERR local do_gzip=${3:-false} if [[ ( "$NUM_ASICS" > 1 ) ]]; then @@ -142,6 +153,7 @@ save_bcmcmd_all_ns() { # None ############################################################################### save_cmd() { + trap 'handle_error $? $LINENO' ERR local start_t=$(date +%s%3N) local end_t=0 local cmd="$1" @@ -150,6 +162,15 @@ save_cmd() { local do_gzip=${3:-false} local tarpath="${BASE}/dump/$filename" local timeout_cmd="timeout --foreground ${TIMEOUT_MIN}m" + local redirect='&>' + local redirect_eval='2>&1' + + if ! $SAVE_STDERR + then + redirect=">" + redirect_eval="" + fi + [ ! -d $LOGDIR ] && $MKDIR $V -p $LOGDIR # eval required here to re-evaluate the $cmd properly at runtime @@ -159,7 +180,7 @@ save_cmd() { if $do_gzip; then tarpath="${tarpath}.gz" filepath="${filepath}.gz" - local cmds="$cmd 2>&1 | gzip -c > '${filepath}'" + local cmds="$cmd $redirect_eval | gzip -c > '${filepath}'" if $NOOP; then echo "${timeout_cmd} bash -c \"${cmds}\"" else @@ -170,9 +191,9 @@ save_cmd() { fi else if $NOOP; then - echo "${timeout_cmd} $cmd &> '$filepath'" + echo "${timeout_cmd} $cmd $redirect '$filepath'" else - eval "${timeout_cmd} $cmd" &> "$filepath" + eval "${timeout_cmd} $cmd" "$redirect" "$filepath" if [ $? -ne 0 ]; then echo "Command: $cmd timedout after ${TIMEOUT_MIN} minutes." fi @@ -198,6 +219,7 @@ save_cmd() { # None ############################################################################### save_cmd_all_ns() { + trap 'handle_error $? $LINENO' ERR local do_zip=${3:-false} # host or default namespace @@ -226,6 +248,7 @@ save_cmd_all_ns() { # None ############################################################################### copy_from_docker() { + trap 'handle_error $? $LINENO' ERR local start_t=$(date +%s%3N) local end_t=0 local docker=$1 @@ -233,7 +256,7 @@ copy_from_docker() { local dstpath=$3 local timeout_cmd="timeout --foreground ${TIMEOUT_MIN}m" - local touch_cmd="sudo docker exec -i ${docker} touch ${filename}" + local touch_cmd="sudo docker exec ${docker} touch ${filename}" local cp_cmd="sudo docker cp ${docker}:${filename} ${dstpath}" if $NOOP; then @@ -267,6 +290,7 @@ copy_from_docker() { # None ############################################################################### copy_from_masic_docker() { + trap 'handle_error $? $LINENO' ERR local docker=$1 local filename=$2 local dstpath=$3 @@ -292,6 +316,7 @@ copy_from_masic_docker() { # vtysh namespace option ############################################################################### get_vtysh_namespace() { + trap 'handle_error $? $LINENO' ERR local asic_id=${1:-""} local ns="" if [[ ( $asic_id = "" ) ]] ; then @@ -316,6 +341,7 @@ get_vtysh_namespace() { # None ############################################################################### save_vtysh() { + trap 'handle_error $? $LINENO' ERR local vtysh_cmd=$1 local filename=$2 local do_gzip=${3:-false} @@ -345,6 +371,7 @@ save_vtysh() { # None ############################################################################### save_ip() { + trap 'handle_error $? $LINENO' ERR local ip_args=$1 local filename="ip.$2" local do_gzip=${3:-false} @@ -363,6 +390,7 @@ save_ip() { # None ############################################################################### save_bridge() { + trap 'handle_error $? $LINENO' ERR local br_args=$1 local filename="bridge.$2" local do_gzip=${3:-false} @@ -379,6 +407,7 @@ save_bridge() { # None ############################################################################### save_bridge_info() { + trap 'handle_error $? $LINENO' ERR save_bridge "fdb show" "fdb" save_bridge "vlan show" "vlan" } @@ -395,6 +424,7 @@ save_bridge_info() { # None ############################################################################### save_bgp_neighbor() { + trap 'handle_error $? $LINENO' ERR local timeout_cmd="timeout --foreground ${TIMEOUT_MIN}m" local asic_id=${1:-""} local ns=$(get_vtysh_namespace $asic_id) @@ -431,6 +461,7 @@ save_bgp_neighbor() { # None ############################################################################### save_bgp_neighbor_all_ns() { + trap 'handle_error $? $LINENO' ERR if [[ ( "$NUM_ASICS" == 1 ) ]] ; then save_bgp_neighbor else @@ -451,6 +482,7 @@ save_bgp_neighbor_all_ns() { # None ############################################################################### save_nat_info() { + trap 'handle_error $? $LINENO' ERR save_cmd_all_ns "iptables -t nat -nv -L" "nat.iptables" save_cmd_all_ns "conntrack -j -L" "nat.conntrack" save_cmd_all_ns "conntrack -j -L | wc" "nat.conntrackcount" @@ -469,6 +501,7 @@ save_nat_info() { # None ############################################################################### save_bfd_info() { + trap 'handle_error $? $LINENO' ERR save_vtysh "show bfd peers" "frr.bfd.peers" save_vtysh "show bfd peers counters" "frr.bfd.peers.counters" save_vtysh "show bfd peers json" "frr.bfd.peers.json" @@ -485,6 +518,7 @@ save_bfd_info() { # None ############################################################################### save_ip_info() { + trap 'handle_error $? $LINENO' ERR save_ip "link" "link" save_ip "addr" "addr" save_ip "rule" "rule" @@ -503,6 +537,7 @@ save_ip_info() { # None ############################################################################### save_bgp_info() { + trap 'handle_error $? $LINENO' ERR save_vtysh "show ip bgp summary" "bgp.summary" save_vtysh "show ip bgp neighbors" "bgp.neighbors" save_vtysh "show ip bgp" "bgp.table" @@ -522,6 +557,7 @@ save_bgp_info() { # None ############################################################################### save_frr_info() { + trap 'handle_error $? $LINENO' ERR save_vtysh "show running-config" "frr.running_config" save_vtysh "show ip route vrf all" "frr.ip_route" save_vtysh "show ipv6 route vrf all" "frr.ip6_route" @@ -541,6 +577,7 @@ save_frr_info() { # None ############################################################################### save_redis_info() { + trap 'handle_error $? $LINENO' ERR save_redis "APPL_DB" save_redis "ASIC_DB" save_redis "COUNTERS_DB" @@ -568,6 +605,7 @@ save_redis_info() { # None ############################################################################### save_proc() { + trap 'handle_error $? $LINENO' ERR local procfiles="$@" $MKDIR $V -p $TARDIR/proc for f in $procfiles @@ -593,6 +631,7 @@ save_proc() { # None ############################################################################### save_redis() { + trap 'handle_error $? $LINENO' ERR local db_name=$1 if [ $# -ge 2 ] && [ -n "$2" ]; then local dest_file_name=$2 @@ -612,6 +651,7 @@ save_redis() { # None ############################################################################### save_saidump() { + trap 'handle_error $? $LINENO' ERR if [[ ( "$NUM_ASICS" == 1 ) ]] ; then save_cmd "docker exec -it syncd saidump" "saidump" else @@ -632,6 +672,7 @@ save_saidump() { # None ############################################################################### save_platform_info() { + trap 'handle_error $? $LINENO' ERR save_cmd "show platform syseeprom" "syseeprom" save_cmd "show platform psustatus" "psustatus" save_cmd "show platform ssdhealth" "ssdhealth" @@ -659,6 +700,7 @@ save_platform_info() { # None ############################################################################### save_file() { + trap 'handle_error $? $LINENO' ERR local start_t=$(date +%s%3N) local end_t=0 local orig_path=$1 @@ -705,6 +747,7 @@ save_file() { # None ############################################################################### find_files() { + trap 'handle_error $? $LINENO' ERR local -r directory=$1 $TOUCH --date="${SINCE_DATE}" "${REFERENCE_FILE}" local -r find_command="find -L $directory -type f -newer ${REFERENCE_FILE}" @@ -749,6 +792,7 @@ enable_logrotate() { # None ############################################################################### collect_mellanox() { + trap 'handle_error $? $LINENO' ERR local sai_dump_filename="/tmp/sai_sdk_dump_$(date +"%m_%d_%Y_%I_%M_%p")" ${CMD_PREFIX}docker exec -it syncd saisdkdump -f $sai_dump_filename ${CMD_PREFIX}docker exec syncd tar Ccf $(dirname $sai_dump_filename) - $(basename $sai_dump_filename) | tar Cxf /tmp/ - @@ -763,9 +807,9 @@ collect_mellanox() { # Save SDK error dumps local sdk_dump_path=`${CMD_PREFIX}docker exec syncd cat /tmp/sai.profile|grep "SAI_DUMP_STORE_PATH"|cut -d = -f2` - if [[ $sdk_dump_path ]]; then + if [[ -d $sdk_dump_path ]]; then copy_from_docker syncd $sdk_dump_path /tmp/sdk-dumps - for file in $(find /tmp/sdk-dumps); do + for file in $(find /tmp/sdk-dumps -type f); do save_file ${file} sai_sdk_dump false done rm -rf /tmp/sdk-dumps @@ -782,6 +826,7 @@ collect_mellanox() { # None ############################################################################### collect_broadcom() { + trap 'handle_error $? $LINENO' ERR local platform=$(show platform summary --json | python -c 'import sys, json; \ print(json.load(sys.stdin)["platform"])') local hwsku=$(show platform summary --json | python -c 'import sys, json; \ @@ -868,6 +913,7 @@ collect_broadcom() { # None ############################################################################### save_log_files() { + trap 'handle_error $? $LINENO' ERR disable_logrotate trap enable_logrotate HUP INT QUIT TERM KILL ABRT ALRM @@ -907,6 +953,7 @@ save_log_files() { # None ############################################################################### save_warmboot_files() { + trap 'handle_error $? $LINENO' ERR # Copy the warmboot files start_t=$(date +%s%3N) if $NOOP; then @@ -935,6 +982,7 @@ save_warmboot_files() { ############################################################################### save_crash_files() { # archive core dump files + trap 'handle_error $? $LINENO' ERR for file in $(find_files "/var/core/"); do # don't gzip already-gzipped log files :) if [ -z "${file##*.gz}" ]; then @@ -967,9 +1015,15 @@ save_crash_files() { # ASIC Count ############################################################################### get_asic_count() { + trap 'handle_error $? $LINENO' ERR + local redirect_eval="2>&1" + if ! $SAVE_STDERR + then + redirect_eval="" + fi local cmd="show platform summary --json | python -c 'import sys, json; \ print(json.load(sys.stdin)[\"asic_count\"])'" - echo `eval ${cmd} 2>&1` + echo `eval ${cmd} ${redirect_eval}` } ############################################################################### @@ -983,6 +1037,7 @@ get_asic_count() { # None ############################################################################### save_counter_snapshot() { + trap 'handle_error $? $LINENO' ERR local asic_name="$1" local idx=$2 counter_t=$(date +'%d/%m/%Y %H:%M:%S:%6N') @@ -1016,6 +1071,7 @@ save_counter_snapshot() { # None ############################################################################### main() { + trap 'handle_error $? $LINENO' ERR local start_t=0 local end_t=0 if [ `whoami` != root ] && ! $NOOP; @@ -1094,12 +1150,12 @@ main() { if [[ ( "$NUM_ASICS" > 1 ) ]]; then for (( i=0; i<$NUM_ASICS; i++ )) do - save_cmd "docker exec -it lldp$i lldpcli show statistics" "lldp$i.statistics" + save_cmd "docker exec lldp$i lldpcli show statistics" "lldp$i.statistics" save_cmd "docker logs bgp$i" "docker.bgp$i.log" save_cmd "docker logs swss$i" "docker.swss$i.log" done else - save_cmd "docker exec -it lldp lldpcli show statistics" "lldp.statistics" + save_cmd "docker exec lldp lldpcli show statistics" "lldp.statistics" save_cmd "docker logs bgp" "docker.bgp.log" save_cmd "docker logs swss" "docker.swss.log" fi @@ -1192,6 +1248,7 @@ main() { fi echo ${TARFILE} + exit $RETURN_CODE } ############################################################################### @@ -1247,11 +1304,13 @@ OPTIONS "24 March", "yesterday", etc. -t TIMEOUT_MINS Command level timeout in minutes + -r + Redirect any intermediate errors to STDERR EOF } -while getopts ":xnvhzas:t:" opt; do +while getopts ":xnvhzas:t:r" opt; do case $opt in x) # enable bash debugging @@ -1293,6 +1352,9 @@ while getopts ":xnvhzas:t:" opt; do t) TIMEOUT_MIN="${OPTARG}" ;; + r) + SAVE_STDERR=false + ;; /?) echo "Invalid option: -$OPTARG" >&2 exit 1 diff --git a/show/main.py b/show/main.py index 672ef2127d..c4cec8df8e 100644 --- a/show/main.py +++ b/show/main.py @@ -1213,7 +1213,8 @@ def users(verbose): @click.option('--verbose', is_flag=True, help="Enable verbose output") @click.option('--allow-process-stop', is_flag=True, help="Dump additional data which may require system interruption") @click.option('--silent', is_flag=True, help="Run techsupport in silent mode") -def techsupport(since, global_timeout, cmd_timeout, verbose, allow_process_stop, silent): +@click.option('--redirect-stderr', '-r', is_flag=True, help="Redirect an intermediate errors to STDERR") +def techsupport(since, global_timeout, cmd_timeout, verbose, allow_process_stop, silent, redirect_stderr): """Gather information for troubleshooting""" cmd = "sudo timeout -s SIGTERM --foreground {}m".format(global_timeout) @@ -1229,6 +1230,8 @@ def techsupport(since, global_timeout, cmd_timeout, verbose, allow_process_stop, if since: cmd += " -s '{}'".format(since) cmd += " -t {}".format(cmd_timeout) + if redirect_stderr: + cmd += " -r" run_command(cmd, display_cmd=verbose)