Files
backrest/internal/metric/metric.go
Thibault Cohen 8bafe7ea35
Some checks failed
Build Snapshot Release / build (push) Has been cancelled
Release Please / release-please (push) Has been cancelled
Test / test-nix (push) Has been cancelled
Test / test-win (push) Has been cancelled
fix: improve exported prometheus metrics for task execution and status (#684)
2025-02-25 22:55:22 -08:00

99 lines
3.4 KiB
Go

package metric
import (
"net/http"
"slices"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
var (
globalRegistry = initRegistry()
)
func initRegistry() *Registry {
commonDims := []string{"repo_id", "plan_id"}
registry := &Registry{
reg: prometheus.NewRegistry(),
backupBytesProcessed: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "backrest_backup_bytes_processed",
Help: "The total number of bytes processed during a backup",
}, commonDims),
backupBytesAdded: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "backrest_backup_bytes_added",
Help: "The total number of bytes added during a backup",
}, commonDims),
backupFileWarnings: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "backrest_backup_file_warnings",
Help: "The total number of file warnings during a backup",
}, commonDims),
tasksDuration: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "backrest_tasks_duration_secs",
Help: "The duration of a task in seconds",
}, append(slices.Clone(commonDims), "task_type")),
tasksRun: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "backrest_tasks_run_total",
Help: "The total number of tasks run",
}, append(slices.Clone(commonDims), "task_type", "status")),
lastTaskStatus: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "backrest_last_task_status",
Help: "The status of the last task",
}, append(slices.Clone(commonDims), "task_type", "status")),
}
registry.reg.MustRegister(registry.backupBytesProcessed)
registry.reg.MustRegister(registry.backupBytesAdded)
registry.reg.MustRegister(registry.backupFileWarnings)
registry.reg.MustRegister(registry.tasksDuration)
registry.reg.MustRegister(registry.tasksRun)
registry.reg.MustRegister(registry.lastTaskStatus)
return registry
}
func GetRegistry() *Registry {
return globalRegistry
}
type Registry struct {
reg *prometheus.Registry
backupBytesProcessed *prometheus.GaugeVec
backupBytesAdded *prometheus.GaugeVec
backupFileWarnings *prometheus.GaugeVec
tasksDuration *prometheus.GaugeVec
tasksRun *prometheus.CounterVec
lastTaskStatus *prometheus.GaugeVec
}
func (r *Registry) Handler() http.Handler {
return promhttp.HandlerFor(r.reg, promhttp.HandlerOpts{})
}
func (r *Registry) RecordTaskRun(repoID, planID, taskType string, duration_secs float64, status string) {
if repoID == "" {
repoID = "_unassociated_"
}
if planID == "" {
planID = "_unassociated_"
}
r.lastTaskStatus.DeletePartialMatch(prometheus.Labels{"repo_id": repoID, "plan_id": planID, "task_type": taskType})
if status == "success" {
r.lastTaskStatus.WithLabelValues(repoID, planID, taskType, status).Set(0)
} else if status == "failed" {
r.lastTaskStatus.WithLabelValues(repoID, planID, taskType, status).Set(1)
} else {
r.lastTaskStatus.WithLabelValues(repoID, planID, taskType, status).Set(-1)
}
r.tasksRun.WithLabelValues(repoID, planID, taskType, status).Inc()
r.tasksDuration.WithLabelValues(repoID, planID, taskType).Set(duration_secs)
}
func (r *Registry) RecordBackupSummary(repoID, planID string, bytesProcessed, bytesAdded int64, fileWarnings int64) {
r.backupBytesProcessed.WithLabelValues(repoID, planID).Set(float64(bytesProcessed))
r.backupBytesAdded.WithLabelValues(repoID, planID).Set(float64(bytesAdded))
r.backupFileWarnings.WithLabelValues(repoID, planID).Set(float64(fileWarnings))
}