mirror of
https://github.com/garethgeorge/backrest.git
synced 2025-12-13 17:25:38 +00:00
feat: support restic check operation (#303)
This commit is contained in:
@@ -166,9 +166,15 @@ func (o *Orchestrator) ScheduleDefaultTasks(config *v1.Config) error {
|
||||
for _, repo := range config.Repos {
|
||||
// Schedule a prune task for the repo
|
||||
t := tasks.NewPruneTask(repo.GetId(), tasks.PlanForSystemTasks, false)
|
||||
if err := o.ScheduleTask(t, tasks.TaskPriorityDefault); err != nil {
|
||||
if err := o.ScheduleTask(t, tasks.TaskPriorityPrune); err != nil {
|
||||
return fmt.Errorf("schedule prune task for repo %q: %w", repo.GetId(), err)
|
||||
}
|
||||
|
||||
// Schedule a check task for the repo
|
||||
t = tasks.NewCheckTask(repo.GetId(), tasks.PlanForSystemTasks, false)
|
||||
if err := o.ScheduleTask(t, tasks.TaskPriorityCheck); err != nil {
|
||||
return fmt.Errorf("schedule check task for repo %q: %w", repo.GetId(), err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
@@ -263,6 +263,32 @@ func (r *RepoOrchestrator) Prune(ctx context.Context, output io.Writer) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *RepoOrchestrator) Check(ctx context.Context, output io.Writer) error {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
ctx, flush := forwardResticLogs(ctx)
|
||||
defer flush()
|
||||
|
||||
var opts []restic.GenericOption
|
||||
if r.repoConfig.CheckPolicy != nil {
|
||||
switch m := r.repoConfig.CheckPolicy.Mode.(type) {
|
||||
case *v1.CheckPolicy_ReadDataSubsetPercent:
|
||||
if m.ReadDataSubsetPercent > 0 {
|
||||
opts = append(opts, restic.WithFlags(fmt.Sprintf("--read-data-subset=%v%%", m.ReadDataSubsetPercent)))
|
||||
}
|
||||
case *v1.CheckPolicy_StructureOnly:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
r.l.Debug("checking repo")
|
||||
err := r.repo.Check(ctx, output, opts...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("check repo %v: %w", r.repoConfig.Id, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *RepoOrchestrator) Restore(ctx context.Context, snapshotId string, path string, target string, progressCallback func(event *v1.RestoreProgressEntry)) (*v1.RestoreProgressEntry, error) {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"os"
|
||||
"slices"
|
||||
@@ -189,3 +190,59 @@ func TestEnvVarPropagation(t *testing.T) {
|
||||
t.Fatal("expected snapshot id")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheck(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
tcs := []struct {
|
||||
name string
|
||||
repo *v1.Repo
|
||||
}{
|
||||
{
|
||||
name: "check structure",
|
||||
repo: &v1.Repo{
|
||||
Id: "test",
|
||||
Uri: t.TempDir(),
|
||||
Password: "test",
|
||||
CheckPolicy: &v1.CheckPolicy{
|
||||
Mode: nil,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "read data percent",
|
||||
repo: &v1.Repo{
|
||||
Id: "test",
|
||||
Uri: t.TempDir(),
|
||||
Password: "test",
|
||||
CheckPolicy: &v1.CheckPolicy{
|
||||
Mode: &v1.CheckPolicy_ReadDataSubsetPercent{
|
||||
ReadDataSubsetPercent: 50,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tcs {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
orchestrator, err := NewRepoOrchestrator(configForTest, tc.repo, helpers.ResticBinary(t))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create repo orchestrator: %v", err)
|
||||
}
|
||||
|
||||
buf := bytes.NewBuffer(nil)
|
||||
|
||||
err = orchestrator.Init(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("init error: %v", err)
|
||||
}
|
||||
|
||||
err = orchestrator.Check(context.Background(), buf)
|
||||
if err != nil {
|
||||
t.Errorf("check error: %v", err)
|
||||
}
|
||||
t.Logf("check output: %s", buf.String())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -84,11 +84,7 @@ func (t *taskRunnerImpl) ExecuteHooks(events []v1.Hook_Condition, vars hook.Hook
|
||||
}
|
||||
}
|
||||
if planID != "" {
|
||||
var err error
|
||||
plan, err = t.FindPlan()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
plan, _ = t.FindPlan()
|
||||
}
|
||||
var flowID int64
|
||||
if t.op != nil {
|
||||
|
||||
@@ -17,12 +17,13 @@ const (
|
||||
PlanForUnassociatedOperations = "_unassociated_"
|
||||
PlanForSystemTasks = "_system_" // plan for system tasks e.g. garbage collection, prune, stats, etc.
|
||||
|
||||
TaskPriorityStats = -1
|
||||
TaskPriorityDefault = 0
|
||||
TaskPriorityInteractive = 1 << 1
|
||||
TaskPriorityStats = 0
|
||||
TaskPriorityDefault = 1 << 1 // default priority
|
||||
TaskPriorityForget = 1 << 2
|
||||
TaskPriorityIndexSnapshots = 1 << 3
|
||||
TaskPriorityPrune = 1 << 4
|
||||
TaskPriorityCheck = 1 << 4 // check should always run after prune.
|
||||
TaskPriorityPrune = 1 << 5
|
||||
TaskPriorityInteractive = 1 << 6 // highest priority
|
||||
)
|
||||
|
||||
// TaskRunner is an interface for running tasks. It is used by tasks to create operations and write logs.
|
||||
|
||||
185
internal/orchestrator/tasks/taskcheck.go
Normal file
185
internal/orchestrator/tasks/taskcheck.go
Normal file
@@ -0,0 +1,185 @@
|
||||
package tasks
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
v1 "github.com/garethgeorge/backrest/gen/go/v1"
|
||||
"github.com/garethgeorge/backrest/internal/hook"
|
||||
"github.com/garethgeorge/backrest/internal/ioutil"
|
||||
"github.com/garethgeorge/backrest/internal/oplog"
|
||||
"github.com/garethgeorge/backrest/internal/oplog/indexutil"
|
||||
"github.com/garethgeorge/backrest/internal/protoutil"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
type CheckTask struct {
|
||||
BaseTask
|
||||
force bool
|
||||
didRun bool
|
||||
}
|
||||
|
||||
func NewCheckTask(repoID, planID string, force bool) Task {
|
||||
return &CheckTask{
|
||||
BaseTask: BaseTask{
|
||||
TaskName: fmt.Sprintf("prune repo %q", repoID),
|
||||
TaskRepoID: repoID,
|
||||
TaskPlanID: planID,
|
||||
},
|
||||
force: force,
|
||||
}
|
||||
}
|
||||
|
||||
func (t *CheckTask) Next(now time.Time, runner TaskRunner) (ScheduledTask, error) {
|
||||
if t.force {
|
||||
if t.didRun {
|
||||
return NeverScheduledTask, nil
|
||||
}
|
||||
t.didRun = true
|
||||
return ScheduledTask{
|
||||
Task: t,
|
||||
RunAt: now,
|
||||
Op: &v1.Operation{
|
||||
Op: &v1.Operation_OperationCheck{},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
repo, err := runner.GetRepo(t.RepoID())
|
||||
if err != nil {
|
||||
return ScheduledTask{}, fmt.Errorf("get repo %v: %w", t.RepoID(), err)
|
||||
}
|
||||
|
||||
if repo.CheckPolicy.GetSchedule() == nil {
|
||||
return NeverScheduledTask, nil
|
||||
}
|
||||
|
||||
var lastRan time.Time
|
||||
var foundBackup bool
|
||||
if err := runner.OpLog().ForEach(oplog.Query{RepoId: t.RepoID()}, indexutil.Reversed(indexutil.CollectAll()), func(op *v1.Operation) error {
|
||||
if _, ok := op.Op.(*v1.Operation_OperationCheck); ok {
|
||||
lastRan = time.Unix(0, op.UnixTimeEndMs*int64(time.Millisecond))
|
||||
return oplog.ErrStopIteration
|
||||
}
|
||||
if _, ok := op.Op.(*v1.Operation_OperationBackup); ok {
|
||||
foundBackup = true
|
||||
}
|
||||
return nil
|
||||
}); err != nil {
|
||||
return NeverScheduledTask, fmt.Errorf("finding last check run time: %w", err)
|
||||
} else if !foundBackup {
|
||||
return NeverScheduledTask, nil
|
||||
}
|
||||
|
||||
zap.L().Debug("last prune time", zap.Time("time", lastRan), zap.String("repo", t.RepoID()))
|
||||
|
||||
runAt, err := protoutil.ResolveSchedule(repo.CheckPolicy.GetSchedule(), lastRan)
|
||||
if errors.Is(err, protoutil.ErrScheduleDisabled) {
|
||||
return NeverScheduledTask, nil
|
||||
} else if err != nil {
|
||||
return NeverScheduledTask, fmt.Errorf("resolve schedule: %w", err)
|
||||
}
|
||||
|
||||
return ScheduledTask{
|
||||
Task: t,
|
||||
RunAt: runAt,
|
||||
Op: &v1.Operation{
|
||||
Op: &v1.Operation_OperationCheck{},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (t *CheckTask) Run(ctx context.Context, st ScheduledTask, runner TaskRunner) error {
|
||||
op := st.Op
|
||||
|
||||
repo, err := runner.GetRepoOrchestrator(t.RepoID())
|
||||
if err != nil {
|
||||
return fmt.Errorf("couldn't get repo %q: %w", t.RepoID(), err)
|
||||
}
|
||||
|
||||
if err := runner.ExecuteHooks([]v1.Hook_Condition{
|
||||
v1.Hook_CONDITION_CHECK_START,
|
||||
}, hook.HookVars{}); err != nil {
|
||||
// TODO: generalize this logic
|
||||
op.DisplayMessage = err.Error()
|
||||
var cancelErr *hook.HookErrorRequestCancel
|
||||
if errors.As(err, &cancelErr) {
|
||||
op.Status = v1.OperationStatus_STATUS_USER_CANCELLED // user visible cancelled status
|
||||
return nil
|
||||
}
|
||||
op.Status = v1.OperationStatus_STATUS_ERROR
|
||||
return fmt.Errorf("execute check start hooks: %w", err)
|
||||
}
|
||||
|
||||
err = repo.UnlockIfAutoEnabled(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("auto unlock repo %q: %w", t.RepoID(), err)
|
||||
}
|
||||
|
||||
opCheck := &v1.Operation_OperationCheck{
|
||||
OperationCheck: &v1.OperationCheck{},
|
||||
}
|
||||
op.Op = opCheck
|
||||
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
interval := time.NewTicker(1 * time.Second)
|
||||
defer interval.Stop()
|
||||
buf := ioutil.HeadWriter{Limit: 16 * 1024}
|
||||
bufWriter := ioutil.SynchronizedWriter{W: &buf}
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for {
|
||||
select {
|
||||
case <-interval.C:
|
||||
bufWriter.Mu.Lock()
|
||||
output := string(buf.Bytes())
|
||||
bufWriter.Mu.Unlock()
|
||||
|
||||
if opCheck.OperationCheck.Output != string(output) {
|
||||
opCheck.OperationCheck.Output = string(output)
|
||||
|
||||
if err := runner.OpLog().Update(op); err != nil {
|
||||
zap.L().Error("update prune operation with status output", zap.Error(err))
|
||||
}
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
if err := repo.Check(ctx, &bufWriter); err != nil {
|
||||
cancel()
|
||||
|
||||
runner.ExecuteHooks([]v1.Hook_Condition{
|
||||
v1.Hook_CONDITION_CHECK_ERROR,
|
||||
v1.Hook_CONDITION_ANY_ERROR,
|
||||
}, hook.HookVars{
|
||||
Error: err.Error(),
|
||||
})
|
||||
|
||||
return fmt.Errorf("prune: %w", err)
|
||||
}
|
||||
cancel()
|
||||
wg.Wait()
|
||||
|
||||
opCheck.OperationCheck.Output = string(buf.Bytes())
|
||||
|
||||
// Run a stats task after a successful prune
|
||||
if err := runner.ScheduleTask(NewStatsTask(t.RepoID(), PlanForSystemTasks, false), TaskPriorityStats); err != nil {
|
||||
zap.L().Error("schedule stats task", zap.Error(err))
|
||||
}
|
||||
|
||||
if err := runner.ExecuteHooks([]v1.Hook_Condition{
|
||||
v1.Hook_CONDITION_CHECK_SUCCESS,
|
||||
}, hook.HookVars{}); err != nil {
|
||||
return fmt.Errorf("execute prune success hooks: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
gcStartupDelay = 5 * time.Second
|
||||
gcStartupDelay = 60 * time.Second
|
||||
gcInterval = 24 * time.Hour
|
||||
// keep operations that are eligible for gc for 30 days OR up to a limit of 100 for any one plan.
|
||||
// an operation is eligible for gc if:
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
package tasks
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
@@ -59,14 +58,20 @@ func (t *PruneTask) Next(now time.Time, runner TaskRunner) (ScheduledTask, error
|
||||
}
|
||||
|
||||
var lastRan time.Time
|
||||
var foundBackup bool
|
||||
if err := runner.OpLog().ForEach(oplog.Query{RepoId: t.RepoID()}, indexutil.Reversed(indexutil.CollectAll()), func(op *v1.Operation) error {
|
||||
if _, ok := op.Op.(*v1.Operation_OperationPrune); ok {
|
||||
lastRan = time.Unix(0, op.UnixTimeEndMs*int64(time.Millisecond))
|
||||
return oplog.ErrStopIteration
|
||||
}
|
||||
if _, ok := op.Op.(*v1.Operation_OperationBackup); ok {
|
||||
foundBackup = true
|
||||
}
|
||||
return nil
|
||||
}); err != nil {
|
||||
return NeverScheduledTask, fmt.Errorf("finding last backup run time: %w", err)
|
||||
return NeverScheduledTask, fmt.Errorf("finding last prune run time: %w", err)
|
||||
} else if !foundBackup {
|
||||
return NeverScheduledTask, nil
|
||||
}
|
||||
|
||||
zap.L().Debug("last prune time", zap.Time("time", lastRan), zap.String("repo", t.RepoID()))
|
||||
@@ -95,6 +100,20 @@ func (t *PruneTask) Run(ctx context.Context, st ScheduledTask, runner TaskRunner
|
||||
return fmt.Errorf("couldn't get repo %q: %w", t.RepoID(), err)
|
||||
}
|
||||
|
||||
if err := runner.ExecuteHooks([]v1.Hook_Condition{
|
||||
v1.Hook_CONDITION_PRUNE_START,
|
||||
}, hook.HookVars{}); err != nil {
|
||||
op.DisplayMessage = err.Error()
|
||||
// TODO: generalize this logic
|
||||
var cancelErr *hook.HookErrorRequestCancel
|
||||
if errors.As(err, &cancelErr) {
|
||||
op.Status = v1.OperationStatus_STATUS_USER_CANCELLED // user visible cancelled status
|
||||
return nil
|
||||
}
|
||||
op.Status = v1.OperationStatus_STATUS_ERROR
|
||||
return fmt.Errorf("execute prune start hooks: %w", err)
|
||||
}
|
||||
|
||||
err = repo.UnlockIfAutoEnabled(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("auto unlock repo %q: %w", t.RepoID(), err)
|
||||
@@ -108,7 +127,7 @@ func (t *PruneTask) Run(ctx context.Context, st ScheduledTask, runner TaskRunner
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
interval := time.NewTicker(1 * time.Second)
|
||||
defer interval.Stop()
|
||||
var buf bytes.Buffer
|
||||
buf := ioutil.HeadWriter{Limit: 16 * 1024}
|
||||
bufWriter := ioutil.SynchronizedWriter{W: &buf}
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
@@ -118,14 +137,11 @@ func (t *PruneTask) Run(ctx context.Context, st ScheduledTask, runner TaskRunner
|
||||
select {
|
||||
case <-interval.C:
|
||||
bufWriter.Mu.Lock()
|
||||
output := buf.String()
|
||||
output := string(buf.Bytes())
|
||||
bufWriter.Mu.Unlock()
|
||||
if len(output) > 8*1024 { // only provide live status upto the first 8K of output.
|
||||
output = output[:len(output)-8*1024]
|
||||
}
|
||||
|
||||
if opPrune.OperationPrune.Output != output {
|
||||
opPrune.OperationPrune.Output = buf.String()
|
||||
if opPrune.OperationPrune.Output != string(output) {
|
||||
opPrune.OperationPrune.Output = string(output)
|
||||
|
||||
if err := runner.OpLog().Update(op); err != nil {
|
||||
zap.L().Error("update prune operation with status output", zap.Error(err))
|
||||
@@ -151,17 +167,18 @@ func (t *PruneTask) Run(ctx context.Context, st ScheduledTask, runner TaskRunner
|
||||
cancel()
|
||||
wg.Wait()
|
||||
|
||||
output := buf.String()
|
||||
if len(output) > 8*1024 { // only save the first 4K of output.
|
||||
output = output[:len(output)-8*1024]
|
||||
}
|
||||
|
||||
opPrune.OperationPrune.Output = output
|
||||
opPrune.OperationPrune.Output = string(buf.Bytes())
|
||||
|
||||
// Run a stats task after a successful prune
|
||||
if err := runner.ScheduleTask(NewStatsTask(t.RepoID(), PlanForSystemTasks, false), TaskPriorityStats); err != nil {
|
||||
zap.L().Error("schedule stats task", zap.Error(err))
|
||||
}
|
||||
|
||||
if err := runner.ExecuteHooks([]v1.Hook_Condition{
|
||||
v1.Hook_CONDITION_PRUNE_SUCCESS,
|
||||
}, hook.HookVars{}); err != nil {
|
||||
return fmt.Errorf("execute prune end hooks: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user