package tasks import ( "context" "fmt" "reflect" "time" v1 "github.com/garethgeorge/backrest/gen/go/v1" "github.com/garethgeorge/backrest/internal/logstore" "github.com/garethgeorge/backrest/internal/oplog" "go.uber.org/zap" ) type gcSettingsForType struct { maxAge time.Duration keepMin int keepMax int } type groupByKey struct { RepoID string RepoGUID string PlanID string InstanceID string Type reflect.Type } const ( gcStartupDelay = 1 * time.Second gcInterval = 24 * time.Hour ) var gcSettings = map[reflect.Type]gcSettingsForType{ reflect.TypeOf(&v1.Operation_OperationStats{}): { maxAge: 365 * 24 * time.Hour, keepMin: 1, keepMax: 100, }, reflect.TypeOf(&v1.Operation_OperationCheck{}): { maxAge: 365 * 24 * time.Hour, keepMin: 1, keepMax: 12, }, reflect.TypeOf(&v1.Operation_OperationPrune{}): { maxAge: 365 * 24 * time.Hour, keepMin: 1, keepMax: 12, }, } var defaultGcSettings = gcSettingsForType{ maxAge: 30 * 24 * time.Hour, keepMin: 1, keepMax: 100, } type CollectGarbageTask struct { BaseTask firstRun bool logstore *logstore.LogStore } func NewCollectGarbageTask(logstore *logstore.LogStore) *CollectGarbageTask { return &CollectGarbageTask{ BaseTask: BaseTask{ TaskType: "collect_garbage", TaskName: "collect garbage", }, logstore: logstore, } } var _ Task = &CollectGarbageTask{} func (t *CollectGarbageTask) Next(now time.Time, runner TaskRunner) (ScheduledTask, error) { if !t.firstRun { t.firstRun = true runAt := now.Add(gcStartupDelay) return ScheduledTask{ Task: t, RunAt: runAt, }, nil } runAt := now.Add(gcInterval) return ScheduledTask{ Task: t, RunAt: runAt, }, nil } func (t *CollectGarbageTask) Run(ctx context.Context, st ScheduledTask, runner TaskRunner) error { if err := t.gcOperations(runner); err != nil { return fmt.Errorf("collecting garbage: %w", err) } return nil } func (t *CollectGarbageTask) gcOperations(runner TaskRunner) error { // snapshotForgottenForFlow returns whether the snapshot associated with the flow is forgotten snapshotForgottenForFlow := make(map[int64]bool) if err := runner.QueryOperations(oplog.SelectAll, func(op *v1.Operation) error { if snapshotOp, ok := op.Op.(*v1.Operation_OperationIndexSnapshot); ok { snapshotForgottenForFlow[op.FlowId] = snapshotOp.OperationIndexSnapshot.Forgot } return nil }); err != nil { return fmt.Errorf("identifying forgotten snapshots: %w", err) } // keep track of IDs that are still valid and of the IDs that are being forgotten validIDs := make(map[int64]struct{}) forgetIDs := []int64{} curTime := curTimeMillis() var deletedByMaxAge, deletedByMaxCount, deletedByForgottenSnapshot int deletedByType := make(map[string]int) stats := make(map[groupByKey]gcSettingsForType) if err := runner.QueryOperations(oplog.Query{}.SetReversed(true), func(op *v1.Operation) error { validIDs[op.Id] = struct{}{} forgot, ok := snapshotForgottenForFlow[op.FlowId] if ok { if forgot { // snapshot is forgotten; this operation is eligible for gc forgetIDs = append(forgetIDs, op.Id) deletedByForgottenSnapshot++ deletedByType[reflect.TypeOf(op.Op).String()]++ } return nil } key := groupByKey{ RepoGUID: op.RepoGuid, RepoID: op.RepoId, PlanID: op.PlanId, InstanceID: op.InstanceId, Type: reflect.TypeOf(op.Op), } st, ok := stats[key] if !ok { gcSettings, ok := gcSettings[reflect.TypeOf(op.Op)] if !ok { st = defaultGcSettings } else { st = gcSettings } } st.keepMax-- // decrement the max retention, when this < 0 operation must be gc'd st.keepMin-- // decrement the min retention, when this < 0 we can start gc'ing stats[key] = st // update the stats if st.keepMin >= 0 { // can't delete if within min retention period return nil } if st.keepMax < 0 { // max retention reached; this operation must be gc'd. forgetIDs = append(forgetIDs, op.Id) deletedByMaxCount++ deletedByType[key.Type.String()]++ } else if curTime-op.UnixTimeStartMs > st.maxAge.Milliseconds() { // operation is old enough to be gc'd forgetIDs = append(forgetIDs, op.Id) deletedByMaxAge++ deletedByType[key.Type.String()]++ } return nil }); err != nil { return fmt.Errorf("identifying gc eligible operations: %w", err) } if err := runner.DeleteOperation(forgetIDs...); err != nil { return fmt.Errorf("removing gc eligible operations: %w", err) } for _, id := range forgetIDs { // update validIDs with respect to the just deleted operations delete(validIDs, id) } zap.L().Info("collecting garbage operations", zap.Int("operations_removed", len(forgetIDs)), zap.Int("removed_by_age", deletedByMaxAge), zap.Int("removed_by_limit", deletedByMaxCount), zap.Int("removed_by_snapshot_forgotten", deletedByForgottenSnapshot), zap.Any("removed_by_type", deletedByType)) // cleaning up logstore toDelete := []string{} if err := t.logstore.SelectAll(func(id string, parentID int64) { if parentID == 0 { return } if _, ok := validIDs[parentID]; !ok { toDelete = append(toDelete, id) } }); err != nil { return fmt.Errorf("selecting all logstore entries: %w", err) } for _, id := range toDelete { if err := t.logstore.Delete(id); err != nil { zap.L().Error("deleting logstore entry", zap.String("id", id), zap.Error(err)) } } zap.L().Info("collecting garbage logs", zap.Any("logs_removed", len(toDelete))) return nil }