-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcheck.go
105 lines (91 loc) · 2.79 KB
/
check.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
package main
import (
"sync"
"time"
log "github.com/sirupsen/logrus"
)
type check struct {
Name string
ExecLock *sync.Mutex
Instructions *instructions
LastChecked time.Time
LastCritical time.Time
Attempts int
Status string
}
func (c *check) isMuted() bool {
return c.Instructions.Muted
}
func (c *check) every() time.Duration {
return c.Instructions.every()
}
func (c *check) id() string {
return c.Instructions.id()
}
func (c *check) check() {
go func() {
// lock it all up ... at least so we don't run the same check twice
c.ExecLock.Lock()
defer c.ExecLock.Unlock()
// make sure we update the last checked date
c.LastChecked = time.Now()
// setup our logger
cLog := log.WithFields(log.Fields{"check": c.Name, "attempt": c.Attempts})
// was it ok?
if c.Instructions.Check.exec("check", c).ok() {
// were we in critical mode?
if c.Attempts >= c.Instructions.Try() && c.Instructions.Try() != 0 {
// yes ...lets recove
cLog.Info("Recovering")
if !c.Instructions.Recover.exec("recover", c).ok() {
cLog.WithFields(log.Fields{"instruction": "recover"}).Error(c.Instructions.Recover.Error.Error())
}
}
// reset ...
c.Attempts = 0
c.Status = "OK"
cLog.Info("Ok")
if !c.Instructions.OK.exec("ok", c).ok() {
cLog.WithFields(log.Fields{"instruction": "ok"}).Error(c.Instructions.OK.Error.Error())
}
return
}
// increase our attempts
c.Attempts++
if c.Attempts <= c.Instructions.Try() {
// where we at in regards to attempts vs tries
if c.Attempts >= c.Instructions.Try() {
// we need to error out :(
cLog.Error("Check failed. Upgrading status to critical")
c.Status = "Critical"
c.LastCritical = time.Now()
if !c.Instructions.Critical.exec("critical", c).ok() {
cLog.WithFields(log.Fields{"instruction": "critical"}).Error(c.Instructions.Critical.Error.Error())
}
} else {
// not yet critical
cLog.Warn("Check failed")
c.Status = "Warning"
if !c.Instructions.Warning.exec("warn", c).ok() {
cLog.WithFields(log.Fields{"instruction": "warning"}).Error(c.Instructions.Warning.Error.Error())
}
}
// ok, so we failed ... but not quite at error levels
if c.Attempts >= c.Instructions.Fix.After && c.Attempts <= c.Instructions.Try() {
if c.Instructions.Fix.okToExec() {
// regardless, try to fix ...
cLog.Info("Attempting fix")
if !c.Instructions.Fix.exec("fix", c).ok() {
cLog.WithFields(log.Fields{"instruction": "fix"}).Error(c.Instructions.Fix.Error.Error())
}
}
}
} else {
// Ok, so we are over our check attempts ....
if c.LastCritical.Add(c.Instructions.Reset).Before(time.Now()) && c.Instructions.Try() != 0 && c.Instructions.Reset != 0*time.Second {
c.Attempts = 0
cLog.Info("Reset check")
}
}
}()
}