-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmonitor.py
executable file
·190 lines (153 loc) · 5.24 KB
/
monitor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#!/usr/bin/python
# Author: Prem Karat [prem.karat@gmail.com]
# MIT License
#
# Reference:
# The daemonize() code is from python-cookbook3 by David Beazely
from fabric.api import env, run, get
from fabric.context_managers import hide
import atexit
import datetime
import os
import re
import sys
import signal
import socket
import tempfile
import time
# Globals
env.user = None
env.password = None
# or, specify path to server public key here:
# env.key_filename = ''
env.sudo_password = None
env.keepalive = 1
env.use_ssh_config = True
env.abort_on_prompts = True
def usage():
print("Usage:")
print("\t montior.py <host-ip> <interval> [start|stop]\n")
print("\t <host-ip>: Valid IPv4 address")
print("\t <interval>: in seconds. Minimum 1 sec")
print("\t start: Run as daemon")
print("\t stop: Stop daemon")
def getargs():
if len(sys.argv) != 4:
usage()
raise SystemExit("Invalid Usage")
ipaddr = sys.argv[1]
interval = sys.argv[2]
try:
socket.inet_pton(socket.AF_INET, ipaddr)
except socket.error:
usage()
raise SystemExit('Invalid IP address')
try:
interval = int(interval)
except ValueError:
usage()
raise SystemExit('Invalid interval')
if not interval:
usage()
raise SystemExit('Interval should be miminum 1 second')
if sys.argv[3] != 'start' and sys.argv[3] != 'stop':
usage()
raise SystemExit('Unknown argument %s' % sys.argv[3])
return (ipaddr, interval)
def daemonize(pidfile, stdin='/dev/null', stdout='/dev/null',
stderr='/dev/null'):
if os.path.exists(pidfile):
raise RuntimeError('Already Running')
try:
if os.fork() > 0:
raise SystemExit(0)
except OSError:
raise RuntimeError('fork 1 failed')
os.umask(0)
os.setsid()
try:
if os.fork() > 0:
raise SystemExit(0)
except OSError:
raise RuntimeError('fork 2 failed')
# Replace file descriptors for stdin, stdout, and stderr
with open(stdin, 'rb', 0) as f:
os.dup2(f.fileno(), sys.stdin.fileno())
with open(stdout, 'ab', 0) as f:
os.dup2(f.fileno(), sys.stdout.fileno())
with open(stderr, 'ab', 0) as f:
os.dup2(f.fileno(), sys.stderr.fileno())
# Write the PID file
with open(pidfile, 'w') as f:
f.write(str(os.getpid()))
# Arrange to have the PID file removed on exit/signal
atexit.register(lambda: os.remove(pidfile))
# Signal handler for termination (required)
def sigterm_handler(signo, frame):
raise SystemExit(1)
signal.signal(signal.SIGTERM, sigterm_handler)
def main(host, interval):
env.host_string = host
env.command_timeout = interval
prev_nprocs = 0
prev_dusage = 0
filepos = 0
PROCCMD = "sudo ps --no-headers -ef | wc -l"
MEMCMD = "sudo ps --no-headers -eo pid,pmem,comm | sort -rk2 | head -n5"
DISKCMD = "sudo df -h /var | tail -n1"
SYSLOG = '/var/log/syslog'
while True:
with hide('running', 'stdout', 'stderr'):
ts = datetime.datetime.now().strftime("%Y-%m-%d-%H:%M:%S")
sys.stdout.write('%s\n' % ts)
sys.stdout.write('-------------------\n')
# Get # of process information
output = run(PROCCMD)
cur_nprocs = int(output.stdout)
diff = cur_nprocs - prev_nprocs
sys.stdout.write('Number of process: %s and %d\n'
% (cur_nprocs, diff))
prev_nprocs = cur_nprocs
# Get top 5 pmem process
res = run(MEMCMD)
sys.stdout.write('Top 5 process by memory usage:\t\n')
for line in res.splitlines():
sys.stdout.write('\t%s\n' % line.strip())
# Get /var disk usage informatoin
res = run(DISKCMD)
match = re.findall('.*(\d+)%.*', res.stdout)
if match:
cur_dusage = int(match[0])
diff = cur_dusage - prev_dusage
sys.stdout.write('Disk space usage in /var partion: %s%% '
'and %d%%\n' % (match[0], diff))
prev_dusage = cur_dusage
# Incremental check for ERROR (case insensitive) in syslog
with tempfile.TemporaryFile() as f:
get(SYSLOG, f)
f.seek(filepos)
for line in f:
if 'error' in line.lower():
sys.stdout.write('%s\n' % line.strip())
filepos = f.tell()
sys.stdout.write('\n')
# Flush I/O buffers
sys.stdout.flush()
sys.stderr.flush()
time.sleep(interval)
if __name__ == '__main__':
PIDFILE = 'monitor.pid'
ipaddr, interval = getargs()
if sys.argv[3] == 'start':
try:
daemonize(PIDFILE, stdout='monitor.log',
stderr='monitor.log')
except RuntimeError:
raise SystemExit('Failed to run as daemon')
main(ipaddr, interval)
elif sys.argv[3] == 'stop':
if os.path.exists(PIDFILE):
with open(PIDFILE) as f:
os.kill(int(f.read()), signal.SIGTERM)
else:
raise SystemExit('monitor daemon not running')