-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrunner.sh
333 lines (228 loc) · 13.3 KB
/
runner.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
#!/bin/bash
#****************************************************************************************************************\
# *
# Script Name : runner.sh *
# *
# Purpose : Get an input command and optional flags , and print a summary of the command executions *
# that include return-codes summary *
# *
# Inputs : Command for executions *
# Number of times to run the command *
# *
# Optional flags : Number of allowed failed command invocation *
# creating network traffic capture - for failed execution *
# crating system/resources metrics measured during command execution - for failed execution *
# creating log for all system calls ran by the command - for failed execution *
# creating command output logs (stdout , stderr) - for failed execution *
# Running with debug mode that show each instruction executed by the script *
# *
# *
# *
# Written by : Mohamad Abo-Ras (mohamad.abo.ras@gmail.com) *
# *
# Date : Jan01,2018 *
# *
# Changes History: *
# *
# Date | By | Changes/New features *
# ----------+------------+----------------------------------------------- * *
# 01.01.2018 | Mohamad | script created for 1st version . * *
# 8 | Abo-Ras | * *
# ----------+------------+----------------------------------------------- * *
#****************************************************************************************************************/
## define global variables
export DEBUG="set +x" ## disabled debug
export execution_count=0 ## total executions counter
export DEFAULT_EXIT_CODE=255 ## default exit-code
export EXEC_NAME=$(basename $0) ## script name
export OUTPUT_DIR="`pwd`" ## path for output & generated files
export TIME_SIGNUTURE="date +"%Y%m%d_%H%M%S"" ## current date&time
## define local functions/methods
## print the summary report
# And exit the program with the most frequent return-code .
# in case no returned-codes (such case if the script killed while the first invocation) , then it will exit with default-return-code (predefined as 255)
finalize() {
frequent_rc=${DEFAULT_EXIT_CODE}
if [[ ! -z ${list_rc[@]} ]] ; then
printf "|------Return Codes Summary-----|\n"
printf "| Return-Code | Occurrence |\n"
printf "|-------------------------------|\n"
echo ${list_rc[@]} | tr ' ' '\n' | sort | uniq -c | sort -rnk 1 | awk '{ printf "|\t%2d\t|\t%2d\t|\n", $2, $1 }'
printf "|-------------------------------|\n"
frequent_rc=$(echo ${list_rc[@]} |tr ' ' '\n' | sort | uniq -c | sort -rnk 1 | head -1 | awk '{print $2}')
fi
exit ${frequent_rc}
}
## signal handler
# catch system signals (SIGINT , SIGTERM)
# handler : call finalize
# note : trap will not catch SIGKILL signal as OS doesn't allow it
trap finalize SIGINT SIGTERM
## print script usage
usage() {
cat <<EOF
Usage: ${EXEC_NAME} [OPTION].... [command [arg ...]]
-c COUNT Number of times to run the given command
--failed-count N Number of allowed failed command invocation attempts before giving up
--net-trace For each failed execution, create a 'pcap' file with the network traffic during the execution.
--sys-trace For each failed execution, create a log for each of the following values, measured during command execution
(disk io,+memory,processes/threads,cpu usage of the command network card package counters)
--call-trace For each failed execution, add also a log with all the system calls ran by the command
--log-trace For each failed execution, add also the command output logs (stdout, stderr)
--debug Debug mode, show each instruction executed by the script
EOF
exit ${DEFAULT_EXIT_CODE}
}
# generate network traffic capture by calling to tcpdump
# default : collect 4000 packets for all the network interfaces formatted with default timestamp with extra verbose output
# note : those values is configurable and could be any value depend on the local machine/server and which details needed to capture from network traffic
exec_net_trace () {
export NET_PCAP="${OUTPUT_DIR}/net_trace_`${TIME_SIGNUTURE}`.pcap"
[[ ${NET_TRACE} ]] && /usr/sbin/tcpdump -i any -vvv -nnN -tttt -s0 -c 4000 -w ${NET_PCAP}
}
# generate system/resources metrics measured by using OPVA-HP extract utility : disk io , memory , processes/threads and cpu usage of the command,network card package counters
# time frame to generate from start to end date with format : %m/%d/%Y %H:%M:%S" , e.g. 01/01/2018 12:15:00
# all the extracted data saved to common file "SYS_TRACE_OUT"
# + alternative utility is to use named sar "/usr/bin/sar" - Collect, report, or save system activity information
# time-frame(start to end) time format is "HH:MM:SS"
# example : sar -n ALL -P ALL -r -u -s ${START_TIME} -e ${END_TIME}
exec_sys_trace() {
export SYS_TRACE_OUT="${OUTPUT_DIR}/sys_trace_`${TIME_SIGNUTURE}`.out"
[[ ${SYS_TRACE} ]] && /opt/perf/bin/extract -GDNUY -xp -b \"${START_DATE}\" -e \"${END_DATE}\" -f ${SYS_TRACE_OUT}
}
#************************************************************************************************************
# *
# m a i n p r o g r a m f l o w *
# *
#************************************************************************************************************
# Test getopt : run getopt for test , if fail then exit
getopt --test > /dev/null
if [[ $? -ne 4 ]]; then
echo "getopt Test : `getopt --test` failed in this environment."
exit ${DEFAULT_EXIT_CODE}
fi
# setting the valid options : short&long options
# -c => mandatory flag & input argument required
# --failed-count => optional flag & input argument required
# --net-trace => optional flag & input argument not required
# --sys-trace => .... same as previous
# --call-trace => .... same as previous
# --log-trace => .... same as previous
# --debug => .... same as previous
SHORT_OPTIONS=c:
LONG_OPTIONS=failed-count:,net-trace,sys-trace,call-trace,log-trace,debug
OPTS=$(getopt --options=${SHORT_OPTIONS} --longoptions=${LONG_OPTIONS} --name "$0" -- "$@")
if [[ $? -ne 0 ]]; then
# e.g. $? == 1
# then getopt has complained about wrong arguments to stdout
exit 2
fi
# read getopt's output this way to handle the quoting right:
eval set -- "${OPTS}"
# parse each options from argument list until getting separate --
while true; do
case "$1" in
-c)
# Getting & store the execution count/times
export COUNT="$2"
shift 2
;;
--failed-count)
# Getting & store the failed execution count
export FAIL_COUNT="$2"
shift 2
;;
--debug)
# Setting the debug options
export DEBUG="set -xvf"
shift
;;
--net-trace)
# trigger tcpdump to generate network traffic : we used - /usr/sbin/tcpdump
export NET_TRACE="Y"
shift
;;
--sys-trace)
# triggering OPVA Extract utility to generate system resources measured matrices : we used /opt/perf/bin/extract
export SYS_TRACE="Y"
shift
;;
--call-trace)
# trigger the strace to generate call trace - we used /usr/bin/strace
export CALL_TRACE="Y"
shift
;;
--log-trace)
# trigger the log trace by printing the stdout & stderr to separate log files
export LOG_TRACE="Y"
export LOG_OUT="${OUTPUT_DIR}/log_trace_`${TIME_SIGNUTURE}`.out"
export LOG_ERR="${OUTPUT_DIR}/log_trace_`${TIME_SIGNUTURE}`.err"
shift
;;
--)
# once we arrived to -- this end of arguments parsing
shift
break
;;
*)
# when the input from OPTS no matching any of above , then it means the input not valid/or something wrong happened
usage
;;
esac
done
# enable/disable the debug option depend on the input options (if --debug was passed)
${DEBUG}
# handle non-option arguments , check if the input argument(not for the OPTS) mount is only 1 argument(one argument for the command parameter)
if [[ $# -ne 1 ]]; then
usage
fi
# getting the command to run from command-line argument
COMMAND="$1"
# declare array to store all the returned value for each command execution
set -a list_rc
# command executions loop - main invocations steps
# Test and check the executions time and allowed fails counters
while [[ ${execution_count} != ${COUNT} ]] && [[ ${failed_executions} != ${FAIL_COUNT} ]] ; do
export START_DATE=$(date +"%m/%d/%Y %H:%M:%S")
export CALL_TRACE_OUT="${OUTPUT_DIR}/call_trace_`${TIME_SIGNUTURE}`.out"
# if --net-trace enabled then trigger net traffic monitor
[[ ${NET_TRACE} ]] && exec_net_trace 1>/dev/null 2>&1 &
wait_pids="${wait_pids} $!"
# --log-trace enable
if [[ ${LOG_TRACE} ]] ; then
# --call-trace
if [[ ${CALL_TRACE} ]] ; then
/usr/bin/strace -ttT -o ${CALL_TRACE_OUT} ${COMMAND} 1>>${LOG_OUT} 2>>${LOG_ERR}
# getting return code from strace.out file 'exit_group' printted in the last line in strace.out and include exit-code exit_group(exit-code)
this_rc="`grep exit_group ${CALL_TRACE_OUT} | tail -1 | cut -d'(' -f2 | cut -d')' -f1`"
else
# redirect the stdout & stderr to separate file in filesystem
${COMMAND} 1>>${LOG_OUT} 2>>${LOG_ERR}
this_rc=$?
fi
else
# --call-trace
if [[ ${CALL_TRACE} ]] ; then
/usr/bin/strace -ttT -o ${CALL_TRACE_OUT} ${COMMAND}
this_rc="`grep exit_group ${CALL_TRACE_OUT} | tail -1 | cut -d'(' -f2 | cut -d')' -f1`"
else
${COMMAND}
this_rc=$?
fi
fi
wait ${wait_pids}
export END_DATE=$(date +"%m/%d/%Y %H:%M:%S")
# save the returned-code of the command execution into the list
list_rc[execution_count]=${this_rc}
# return-code handler in two cases success/fail
# failure case : increase the failed execution counter to be updated for the next execution in while loop
# success case : delete the pcap file that the generated only if net-trace is enabled
[[ ${this_rc} != 0 ]] && ((failed_executions++))
[[ ${this_rc} == 0 ]] && [[ ${NET_TRACE} ]] && rm -f ${NET_PCAP}
# update the loop counter in any case
((execution_count++))
# another case : if the execution failed - then trigger the sys-trace generation
[[ ${this_rc} != 0 ]] && exec_sys_trace 1>/dev/null 2>&1
done
# calling to finalize to print the summary , when the script done
finalize