diff --git a/cmd/backrest/backrest.go b/cmd/backrest/backrest.go index f08a5acb..b6e0f08d 100644 --- a/cmd/backrest/backrest.go +++ b/cmd/backrest/backrest.go @@ -21,6 +21,7 @@ import ( "github.com/garethgeorge/backrest/internal/config" "github.com/garethgeorge/backrest/internal/env" "github.com/garethgeorge/backrest/internal/logwriter" + "github.com/garethgeorge/backrest/internal/metric" "github.com/garethgeorge/backrest/internal/oplog" "github.com/garethgeorge/backrest/internal/oplog/bboltstore" "github.com/garethgeorge/backrest/internal/orchestrator" @@ -116,6 +117,7 @@ func main() { mux.Handle(backrestHandlerPath, auth.RequireAuthentication(backrestHandler, authenticator)) mux.Handle("/", webui.Handler()) mux.Handle("/download/", http.StripPrefix("/download", api.NewDownloadHandler(oplog))) + mux.Handle("/metrics", auth.RequireAuthentication(metric.GetRegistry().Handler(), authenticator)) // Serve the HTTP gateway server := &http.Server{ diff --git a/go.mod b/go.mod index 3fb6be18..7ae0756f 100644 --- a/go.mod +++ b/go.mod @@ -32,6 +32,8 @@ require ( require ( github.com/akavel/rsrc v0.10.2 // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/dchest/jsmin v0.0.0-20220218165748-59f39799265f // indirect github.com/fatih/color v1.17.0 // indirect github.com/getlantern/context v0.0.0-20220418194847-3d5e7a086201 // indirect @@ -45,8 +47,14 @@ require ( github.com/go-stack/stack v1.8.1 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/josephspurrier/goversioninfo v1.4.0 // indirect + github.com/klauspost/compress v1.17.9 // indirect github.com/mattn/go-isatty v0.0.20 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c // indirect + github.com/prometheus/client_golang v1.20.3 // indirect + github.com/prometheus/client_model v0.6.1 // indirect + github.com/prometheus/common v0.55.0 // indirect + github.com/prometheus/procfs v0.15.1 // indirect github.com/randall77/makefat v0.0.0-20210315173500-7ddd0e42c844 // indirect go.opentelemetry.io/otel v1.27.0 // indirect go.opentelemetry.io/otel/metric v1.27.0 // indirect diff --git a/go.sum b/go.sum index e12d5ede..0d875748 100644 --- a/go.sum +++ b/go.sum @@ -5,6 +5,10 @@ github.com/akavel/rsrc v0.10.2/go.mod h1:uLoCtb9J+EyAqh+26kdrTgmzRBFPGOolLWKpdxk github.com/alessio/shellescape v1.4.2 h1:MHPfaU+ddJ0/bYWpgIeUnQUqKrlJ1S7BfEYPM4uEoM0= github.com/alessio/shellescape v1.4.2/go.mod h1:PZAiSCk0LJaZkiCSkPv8qIobYglO3FPpyFjDCtHLS30= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/containrrr/shoutrrr v0.8.0 h1:mfG2ATzIS7NR2Ec6XL+xyoHzN97H8WPjir8aYzJUSec= github.com/containrrr/shoutrrr v0.8.0/go.mod h1:ioyQAyu1LJY6sILuNyKaQaw+9Ttik5QePU8atnAdO2o= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -77,6 +81,8 @@ github.com/jarcoal/httpmock v1.3.0 h1:2RJ8GP0IIaWwcC9Fp2BmVi8Kog3v2Hn7VXM3fTd+nu github.com/jarcoal/httpmock v1.3.0/go.mod h1:3yb8rc4BI7TCBhFY8ng0gjuLKJNquuDNiPaZjnENuYg= github.com/josephspurrier/goversioninfo v1.4.0 h1:Puhl12NSHUSALHSuzYwPYQkqa2E1+7SrtAPJorKK0C8= github.com/josephspurrier/goversioninfo v1.4.0/go.mod h1:JWzv5rKQr+MmW+LvM412ToT/IkYDZjaclF2pKDss8IY= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= @@ -87,6 +93,8 @@ github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovk github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/natefinch/atomic v1.0.1 h1:ZPYKxkqQOx3KZ+RsbnP/YsgvxWQPGxjC0oBt2AhwV0A= github.com/natefinch/atomic v1.0.1/go.mod h1:N/D/ELrljoqDyT3rZrsUmtsuzvHkeB/wWjHV22AZRbM= github.com/ncruces/zenity v0.10.12 h1:o4SErDa0kQijlqG6W4OYYzO6kA0fGu34uegvJGcMLBI= @@ -100,6 +108,14 @@ github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c/go.mod h1:X07ZCGwU github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.20.3 h1:oPksm4K8B+Vt35tUhw6GbSNSgVlVSBH0qELP/7u83l4= +github.com/prometheus/client_golang v1.20.3/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= +github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/randall77/makefat v0.0.0-20210315173500-7ddd0e42c844 h1:GranzK4hv1/pqTIhMTXt2X8MmMOuH3hMeUR0o9SP5yc= github.com/randall77/makefat v0.0.0-20210315173500-7ddd0e42c844/go.mod h1:T1TLSfyWVBRXVGzWd0o9BI4kfoO9InEgfQe4NV3mLz8= github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966/go.mod h1:sUM3LWHvSMaG192sy56D9F7CNvL7jUJVXoqM1QKLnog= diff --git a/internal/hook/hook.go b/internal/hook/hook.go index 0a8ca179..aafabdf6 100644 --- a/internal/hook/hook.go +++ b/internal/hook/hook.go @@ -66,6 +66,7 @@ func newOneoffRunHookTask(title, instanceID, repoID, planID string, parentOp *v1 return &tasks.GenericOneoffTask{ OneoffTask: tasks.OneoffTask{ BaseTask: tasks.BaseTask{ + TaskType: "hook", TaskName: fmt.Sprintf("run hook %v", title), TaskRepoID: repoID, TaskPlanID: planID, diff --git a/internal/metric/metric.go b/internal/metric/metric.go new file mode 100644 index 00000000..30855f35 --- /dev/null +++ b/internal/metric/metric.go @@ -0,0 +1,84 @@ +package metric + +import ( + "net/http" + "slices" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +var ( + globalRegistry = initRegistry() +) + +func initRegistry() *Registry { + + commonDims := []string{"repo_id", "plan_id"} + + registry := &Registry{ + reg: prometheus.NewRegistry(), + backupBytesProcessed: prometheus.NewSummaryVec(prometheus.SummaryOpts{ + Name: "backrest_backup_bytes_processed", + Help: "The total number of bytes processed during a backup", + }, commonDims), + backupBytesAdded: prometheus.NewSummaryVec(prometheus.SummaryOpts{ + Name: "backrest_backup_bytes_added", + Help: "The total number of bytes added during a backup", + }, commonDims), + backupFileWarnings: prometheus.NewSummaryVec(prometheus.SummaryOpts{ + Name: "backrest_backup_file_warnings", + Help: "The total number of file warnings during a backup", + }, commonDims), + tasksDuration: prometheus.NewSummaryVec(prometheus.SummaryOpts{ + Name: "backrest_tasks_duration_secs", + Help: "The duration of a task in seconds", + }, append(slices.Clone(commonDims), "task_type")), + tasksRun: prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "backrest_tasks_run_total", + Help: "The total number of tasks run", + }, append(slices.Clone(commonDims), "task_type", "status")), + } + + registry.reg.MustRegister(registry.backupBytesProcessed) + registry.reg.MustRegister(registry.backupBytesAdded) + registry.reg.MustRegister(registry.backupFileWarnings) + registry.reg.MustRegister(registry.tasksDuration) + registry.reg.MustRegister(registry.tasksRun) + + return registry +} + +func GetRegistry() *Registry { + return globalRegistry +} + +type Registry struct { + reg *prometheus.Registry + backupBytesProcessed *prometheus.SummaryVec + backupBytesAdded *prometheus.SummaryVec + backupFileWarnings *prometheus.SummaryVec + tasksDuration *prometheus.SummaryVec + tasksRun *prometheus.CounterVec +} + +func (r *Registry) Handler() http.Handler { + return promhttp.HandlerFor(r.reg, promhttp.HandlerOpts{}) +} + +func (r *Registry) RecordTaskRun(repoID, planID, taskType string, duration_secs float64, status string) { + if repoID == "" { + repoID = "_unassociated_" + } + if planID == "" { + planID = "_unassociated_" + } + r.tasksRun.WithLabelValues(repoID, planID, taskType, status).Inc() + r.tasksDuration.WithLabelValues(repoID, planID, taskType).Observe(duration_secs) +} + +func (r *Registry) RecordBackupSummary(repoID, planID string, bytesProcessed, bytesAdded int64, fileWarnings int64) { + r.backupBytesProcessed.WithLabelValues(repoID, planID).Observe(float64(bytesProcessed)) + r.backupBytesAdded.WithLabelValues(repoID, planID).Observe(float64(bytesAdded)) + r.backupFileWarnings.WithLabelValues(repoID, planID).Observe(float64(fileWarnings)) +} diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go index e418701b..da36e19f 100644 --- a/internal/orchestrator/orchestrator.go +++ b/internal/orchestrator/orchestrator.go @@ -12,6 +12,7 @@ import ( v1 "github.com/garethgeorge/backrest/gen/go/v1" "github.com/garethgeorge/backrest/internal/config" "github.com/garethgeorge/backrest/internal/logwriter" + "github.com/garethgeorge/backrest/internal/metric" "github.com/garethgeorge/backrest/internal/oplog" "github.com/garethgeorge/backrest/internal/orchestrator/logging" "github.com/garethgeorge/backrest/internal/orchestrator/repo" @@ -426,6 +427,7 @@ func (o *Orchestrator) RunTask(ctx context.Context, st tasks.ScheduledTask) erro runner.Logger(ctx).Error("task failed", zap.Error(err), zap.Duration("duration", time.Since(start))) } else { runner.Logger(ctx).Info("task finished", zap.Duration("duration", time.Since(start))) + metric.GetRegistry().RecordTaskRun(st.Task.RepoID(), st.Task.PlanID(), st.Task.Type(), time.Since(start).Seconds(), "success") } if op != nil { diff --git a/internal/orchestrator/tasks/task.go b/internal/orchestrator/tasks/task.go index 9df8d4ee..b3488639 100644 --- a/internal/orchestrator/tasks/task.go +++ b/internal/orchestrator/tasks/task.go @@ -86,6 +86,7 @@ func (s ScheduledTask) Less(other ScheduledTask) bool { // Task is a task that can be scheduled to run at a specific time. type Task interface { Name() string // human readable name for this task. + Type() string // simple string 'type' for this task. Next(now time.Time, runner TaskRunner) (ScheduledTask, error) // returns the next scheduled task. Run(ctx context.Context, st ScheduledTask, runner TaskRunner) error // run the task. PlanID() string // the ID of the plan this task is associated with. @@ -93,11 +94,16 @@ type Task interface { } type BaseTask struct { + TaskType string TaskName string TaskPlanID string TaskRepoID string } +func (b BaseTask) Type() string { + return b.TaskType +} + func (b BaseTask) Name() string { return b.TaskName } @@ -164,7 +170,7 @@ type testTaskRunner struct { var _ TaskRunner = &testTaskRunner{} -func newTestTaskRunner(t testing.TB, config *v1.Config, oplog *oplog.OpLog) *testTaskRunner { +func newTestTaskRunner(_ testing.TB, config *v1.Config, oplog *oplog.OpLog) *testTaskRunner { return &testTaskRunner{ config: config, oplog: oplog, diff --git a/internal/orchestrator/tasks/taskbackup.go b/internal/orchestrator/tasks/taskbackup.go index 9ba4971d..bc7dc51d 100644 --- a/internal/orchestrator/tasks/taskbackup.go +++ b/internal/orchestrator/tasks/taskbackup.go @@ -9,6 +9,7 @@ import ( "time" v1 "github.com/garethgeorge/backrest/gen/go/v1" + "github.com/garethgeorge/backrest/internal/metric" "github.com/garethgeorge/backrest/internal/oplog" "github.com/garethgeorge/backrest/internal/protoutil" "github.com/garethgeorge/backrest/pkg/restic" @@ -29,6 +30,7 @@ var _ Task = &BackupTask{} func NewScheduledBackupTask(plan *v1.Plan) *BackupTask { return &BackupTask{ BaseTask: BaseTask{ + TaskType: "backup", TaskName: fmt.Sprintf("backup for plan %q", plan.Id), TaskRepoID: plan.Repo, TaskPlanID: plan.Id, @@ -39,6 +41,7 @@ func NewScheduledBackupTask(plan *v1.Plan) *BackupTask { func NewOneoffBackupTask(plan *v1.Plan, at time.Time) *BackupTask { return &BackupTask{ BaseTask: BaseTask{ + TaskType: "backup", TaskName: fmt.Sprintf("backup for plan %q", plan.Id), TaskRepoID: plan.Repo, TaskPlanID: plan.Id, @@ -132,6 +135,7 @@ func (t *BackupTask) Run(ctx context.Context, st ScheduledTask, runner TaskRunne var sendWg sync.WaitGroup lastSent := time.Now() // debounce progress updates, these can endup being very frequent. var lastFiles []string + fileErrorCount := 0 summary, err := repo.Backup(ctx, plan, func(entry *restic.BackupProgressEntry) { sendWg.Wait() if entry.MessageType == "status" { @@ -145,6 +149,7 @@ func (t *BackupTask) Run(ctx context.Context, st ScheduledTask, runner TaskRunne backupOp.OperationBackup.LastStatus = protoutil.BackupProgressEntryToProto(entry) } else if entry.MessageType == "error" { l.Sugar().Warnf("an unknown error was encountered in processing item: %v", entry.Item) + fileErrorCount++ backupError, err := protoutil.BackupProgressEntryToBackupError(entry) if err != nil { l.Sugar().Errorf("failed to convert backup progress entry to backup error: %v", err) @@ -180,6 +185,8 @@ func (t *BackupTask) Run(ctx context.Context, st ScheduledTask, runner TaskRunne summary = &restic.BackupProgressEntry{} } + metric.GetRegistry().RecordBackupSummary(t.RepoID(), t.PlanID(), summary.TotalBytesProcessed, summary.DataAdded, int64(fileErrorCount)) + vars := HookVars{ Task: t.Name(), SnapshotStats: summary, diff --git a/internal/orchestrator/tasks/taskcheck.go b/internal/orchestrator/tasks/taskcheck.go index 4455438e..753f6aee 100644 --- a/internal/orchestrator/tasks/taskcheck.go +++ b/internal/orchestrator/tasks/taskcheck.go @@ -20,6 +20,7 @@ type CheckTask struct { func NewCheckTask(repoID, planID string, force bool) Task { return &CheckTask{ BaseTask: BaseTask{ + TaskType: "check", TaskName: fmt.Sprintf("check for repo %q", repoID), TaskRepoID: repoID, TaskPlanID: planID, diff --git a/internal/orchestrator/tasks/taskcollectgarbage.go b/internal/orchestrator/tasks/taskcollectgarbage.go index 5be5b10e..29e6573d 100644 --- a/internal/orchestrator/tasks/taskcollectgarbage.go +++ b/internal/orchestrator/tasks/taskcollectgarbage.go @@ -35,6 +35,7 @@ type CollectGarbageTask struct { func NewCollectGarbageTask() *CollectGarbageTask { return &CollectGarbageTask{ BaseTask: BaseTask{ + TaskType: "collect_garbage", TaskName: "collect garbage", }, } diff --git a/internal/orchestrator/tasks/taskforget.go b/internal/orchestrator/tasks/taskforget.go index af27ba2f..f72b7968 100644 --- a/internal/orchestrator/tasks/taskforget.go +++ b/internal/orchestrator/tasks/taskforget.go @@ -16,6 +16,7 @@ func NewOneoffForgetTask(repoID, planID string, flowID int64, at time.Time) Task return &GenericOneoffTask{ OneoffTask: OneoffTask{ BaseTask: BaseTask{ + TaskType: "forget", TaskName: fmt.Sprintf("forget for plan %q in repo %q", repoID, planID), TaskRepoID: repoID, TaskPlanID: planID, diff --git a/internal/orchestrator/tasks/taskforgetsnapshot.go b/internal/orchestrator/tasks/taskforgetsnapshot.go index 46c4351d..ef3174f4 100644 --- a/internal/orchestrator/tasks/taskforgetsnapshot.go +++ b/internal/orchestrator/tasks/taskforgetsnapshot.go @@ -12,6 +12,7 @@ func NewOneoffForgetSnapshotTask(repoID, planID string, flowID int64, at time.Ti return &GenericOneoffTask{ OneoffTask: OneoffTask{ BaseTask: BaseTask{ + TaskType: "forget_snapshot", TaskName: fmt.Sprintf("forget snapshot %q for plan %q in repo %q", snapshotID, planID, repoID), TaskRepoID: repoID, TaskPlanID: planID, diff --git a/internal/orchestrator/tasks/taskindexsnapshots.go b/internal/orchestrator/tasks/taskindexsnapshots.go index 27f909e0..a9f9eabc 100644 --- a/internal/orchestrator/tasks/taskindexsnapshots.go +++ b/internal/orchestrator/tasks/taskindexsnapshots.go @@ -19,6 +19,7 @@ func NewOneoffIndexSnapshotsTask(repoID string, at time.Time) Task { return &GenericOneoffTask{ OneoffTask: OneoffTask{ BaseTask: BaseTask{ + TaskType: "index_snapshots", TaskName: fmt.Sprintf("index snapshots for repo %q", repoID), TaskRepoID: repoID, }, diff --git a/internal/orchestrator/tasks/taskprune.go b/internal/orchestrator/tasks/taskprune.go index 0350b67b..63eba4cd 100644 --- a/internal/orchestrator/tasks/taskprune.go +++ b/internal/orchestrator/tasks/taskprune.go @@ -21,6 +21,7 @@ type PruneTask struct { func NewPruneTask(repoID, planID string, force bool) Task { return &PruneTask{ BaseTask: BaseTask{ + TaskType: "prune", TaskName: fmt.Sprintf("prune repo %q", repoID), TaskRepoID: repoID, TaskPlanID: planID, diff --git a/internal/orchestrator/tasks/taskrestore.go b/internal/orchestrator/tasks/taskrestore.go index 11d70510..db3f331e 100644 --- a/internal/orchestrator/tasks/taskrestore.go +++ b/internal/orchestrator/tasks/taskrestore.go @@ -15,6 +15,7 @@ func NewOneoffRestoreTask(repoID, planID string, flowID int64, at time.Time, sna return &GenericOneoffTask{ OneoffTask: OneoffTask{ BaseTask: BaseTask{ + TaskType: "restore", TaskName: fmt.Sprintf("restore snapshot %q in repo %q", snapshotID, repoID), TaskRepoID: repoID, TaskPlanID: planID, diff --git a/internal/orchestrator/tasks/taskstats.go b/internal/orchestrator/tasks/taskstats.go index 35fad6a0..6a541300 100644 --- a/internal/orchestrator/tasks/taskstats.go +++ b/internal/orchestrator/tasks/taskstats.go @@ -18,6 +18,7 @@ type StatsTask struct { func NewStatsTask(repoID, planID string, force bool) Task { return &StatsTask{ BaseTask: BaseTask{ + TaskType: "stats", TaskName: fmt.Sprintf("stats for repo %q", repoID), TaskRepoID: repoID, TaskPlanID: planID,