From fb80bc10aecee973d773ad248a005197b25e11b8 Mon Sep 17 00:00:00 2001 From: Ben Johnson Date: Thu, 20 May 2021 18:37:54 -0600 Subject: [PATCH] Refactor replica system --- .github/workflows/test.yml | 10 +- cmd/litestream/generations.go | 22 +- cmd/litestream/main.go | 113 +- cmd/litestream/main_test.go | 54 +- cmd/litestream/replicate.go | 17 +- cmd/litestream/restore.go | 16 +- cmd/litestream/snapshots.go | 36 +- cmd/litestream/wal.go | 67 +- db.go | 633 ++--- db_test.go | 4 +- file/replica_client.go | 380 +++ file/replica_client_test.go | 680 ++++++ go.mod | 2 +- internal/internal.go | 94 + .../internal_unix.go | 9 +- .../internal_windows.go | 6 +- internal/metrics.go | 44 - litestream.go | 321 ++- mock/replica_client.go | 65 + replica.go | 2042 +++++++++-------- replica_test.go | 202 +- s3/replica_client.go | 746 ++++++ s3/replica_client_test.go | 605 +++++ s3/s3.go | 1148 --------- 24 files changed, 4338 insertions(+), 2978 deletions(-) create mode 100644 file/replica_client.go create mode 100644 file/replica_client_test.go rename litestream_unix.go => internal/internal_unix.go (59%) rename litestream_windows.go => internal/internal_windows.go (74%) delete mode 100644 internal/metrics.go create mode 100644 mock/replica_client.go create mode 100644 s3/replica_client.go create mode 100644 s3/replica_client_test.go diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5c220e1..93a9e19 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -6,7 +6,7 @@ jobs: steps: - uses: actions/setup-go@v2 with: - go-version: '1.15' + go-version: '1.16' - uses: actions/checkout@v2 @@ -19,3 +19,11 @@ jobs: - name: Run unit tests run: go test -v ./... + + - name: Run s3 integration tests + run: go test -v ./s3 -integration + env: + LITESTREAM_S3_ACCESS_KEY_ID: ${{ secrets.LITESTREAM_S3_ACCESS_KEY_ID }} + LITESTREAM_S3_SECRET_ACCESS_KEY: ${{ secrets.LITESTREAM_S3_SECRET_ACCESS_KEY }} + LITESTREAM_S3_REGION: ${{ secrets.LITESTREAM_S3_REGION }} + LITESTREAM_S3_BUCKET: ${{ secrets.LITESTREAM_S3_BUCKET }} diff --git a/cmd/litestream/generations.go b/cmd/litestream/generations.go index 57a778e..fefa40c 100644 --- a/cmd/litestream/generations.go +++ b/cmd/litestream/generations.go @@ -30,8 +30,8 @@ func (c *GenerationsCommand) Run(ctx context.Context, args []string) (err error) } var db *litestream.DB - var r litestream.Replica - updatedAt := time.Now() + var r *litestream.Replica + dbUpdatedAt := time.Now() if isURL(fs.Arg(0)) { if *configPath != "" { return fmt.Errorf("cannot specify a replica URL and the -config flag") @@ -67,14 +67,14 @@ func (c *GenerationsCommand) Run(ctx context.Context, args []string) (err error) } // Determine last time database or WAL was updated. - if updatedAt, err = db.UpdatedAt(); err != nil { + if dbUpdatedAt, err = db.UpdatedAt(); err != nil { return err } } - var replicas []litestream.Replica + var replicas []*litestream.Replica if r != nil { - replicas = []litestream.Replica{r} + replicas = []*litestream.Replica{r} } else { replicas = db.Replicas } @@ -85,7 +85,7 @@ func (c *GenerationsCommand) Run(ctx context.Context, args []string) (err error) fmt.Fprintln(w, "name\tgeneration\tlag\tstart\tend") for _, r := range replicas { - generations, err := r.Generations(ctx) + generations, err := r.Client.Generations(ctx) if err != nil { log.Printf("%s: cannot list generations: %s", r.Name(), err) continue @@ -93,18 +93,18 @@ func (c *GenerationsCommand) Run(ctx context.Context, args []string) (err error) // Iterate over each generation for the replica. for _, generation := range generations { - stats, err := r.GenerationStats(ctx, generation) + createdAt, updatedAt, err := r.GenerationTimeBounds(ctx, generation) if err != nil { - log.Printf("%s: cannot find generation stats: %s", r.Name(), err) + log.Printf("%s: cannot determine generation time bounds: %s", r.Name(), err) continue } fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\n", r.Name(), generation, - truncateDuration(updatedAt.Sub(stats.UpdatedAt)).String(), - stats.CreatedAt.Format(time.RFC3339), - stats.UpdatedAt.Format(time.RFC3339), + truncateDuration(dbUpdatedAt.Sub(updatedAt)).String(), + createdAt.Format(time.RFC3339), + updatedAt.Format(time.RFC3339), ) } } diff --git a/cmd/litestream/main.go b/cmd/litestream/main.go index 4ff0085..bf98a82 100644 --- a/cmd/litestream/main.go +++ b/cmd/litestream/main.go @@ -19,6 +19,7 @@ import ( "time" "github.com/benbjohnson/litestream" + "github.com/benbjohnson/litestream/file" "github.com/benbjohnson/litestream/s3" _ "github.com/mattn/go-sqlite3" "gopkg.in/yaml.v2" @@ -272,15 +273,15 @@ func NewDBFromConfig(dbc *DBConfig) (*litestream.DB, error) { // ReplicaConfig represents the configuration for a single replica in a database. type ReplicaConfig struct { - Type string `yaml:"type"` // "file", "s3" - Name string `yaml:"name"` // name of replica, optional. - Path string `yaml:"path"` - URL string `yaml:"url"` - Retention time.Duration `yaml:"retention"` - RetentionCheckInterval time.Duration `yaml:"retention-check-interval"` - SyncInterval time.Duration `yaml:"sync-interval"` // s3 only - SnapshotInterval time.Duration `yaml:"snapshot-interval"` - ValidationInterval time.Duration `yaml:"validation-interval"` + Type string `yaml:"type"` // "file", "s3" + Name string `yaml:"name"` // name of replica, optional. + Path string `yaml:"path"` + URL string `yaml:"url"` + Retention *time.Duration `yaml:"retention"` + RetentionCheckInterval *time.Duration `yaml:"retention-check-interval"` + SyncInterval *time.Duration `yaml:"sync-interval"` + SnapshotInterval *time.Duration `yaml:"snapshot-interval"` + ValidationInterval *time.Duration `yaml:"validation-interval"` // S3 settings AccessKeyID string `yaml:"access-key-id"` @@ -293,24 +294,51 @@ type ReplicaConfig struct { } // NewReplicaFromConfig instantiates a replica for a DB based on a config. -func NewReplicaFromConfig(c *ReplicaConfig, db *litestream.DB) (litestream.Replica, error) { +func NewReplicaFromConfig(c *ReplicaConfig, db *litestream.DB) (_ *litestream.Replica, err error) { // Ensure user did not specify URL in path. if isURL(c.Path) { return nil, fmt.Errorf("replica path cannot be a url, please use the 'url' field instead: %s", c.Path) } + // Build replica. + r := litestream.NewReplica(db, c.Name) + if v := c.Retention; v != nil { + r.Retention = *v + } + if v := c.RetentionCheckInterval; v != nil { + r.RetentionCheckInterval = *v + } + if v := c.SyncInterval; v != nil { + r.SyncInterval = *v + } else if c.ReplicaType() == "s3" { + r.SyncInterval = 10 * time.Second // default s3 to 10s for configs + } + if v := c.SnapshotInterval; v != nil { + r.SnapshotInterval = *v + } + if v := c.ValidationInterval; v != nil { + r.ValidationInterval = *v + } + + // Build and set client on replica. switch c.ReplicaType() { case "file": - return newFileReplicaFromConfig(c, db) + if r.Client, err = newFileReplicaClientFromConfig(c, r); err != nil { + return nil, err + } case "s3": - return newS3ReplicaFromConfig(c, db) + if r.Client, err = newS3ReplicaClientFromConfig(c, r); err != nil { + return nil, err + } default: return nil, fmt.Errorf("unknown replica type in config: %q", c.Type) } + + return r, nil } -// newFileReplicaFromConfig returns a new instance of FileReplica build from config. -func newFileReplicaFromConfig(c *ReplicaConfig, db *litestream.DB) (_ *litestream.FileReplica, err error) { +// newFileReplicaClientFromConfig returns a new instance of file.ReplicaClient built from config. +func newFileReplicaClientFromConfig(c *ReplicaConfig, r *litestream.Replica) (_ *file.ReplicaClient, err error) { // Ensure URL & path are not both specified. if c.URL != "" && c.Path != "" { return nil, fmt.Errorf("cannot specify url & path for file replica") @@ -335,24 +363,13 @@ func newFileReplicaFromConfig(c *ReplicaConfig, db *litestream.DB) (_ *litestrea } // Instantiate replica and apply time fields, if set. - r := litestream.NewFileReplica(db, c.Name, path) - if v := c.Retention; v > 0 { - r.Retention = v - } - if v := c.RetentionCheckInterval; v > 0 { - r.RetentionCheckInterval = v - } - if v := c.SnapshotInterval; v > 0 { - r.SnapshotInterval = v - } - if v := c.ValidationInterval; v > 0 { - r.ValidationInterval = v - } - return r, nil + client := file.NewReplicaClient(path) + client.Replica = r + return client, nil } -// newS3ReplicaFromConfig returns a new instance of S3Replica build from config. -func newS3ReplicaFromConfig(c *ReplicaConfig, db *litestream.DB) (_ *s3.Replica, err error) { +// newS3ReplicaClientFromConfig returns a new instance of s3.ReplicaClient built from config. +func newS3ReplicaClientFromConfig(c *ReplicaConfig, r *litestream.Replica) (_ *s3.ReplicaClient, err error) { // Ensure URL & constituent parts are not both specified. if c.URL != "" && c.Path != "" { return nil, fmt.Errorf("cannot specify url & path for s3 replica") @@ -402,32 +419,16 @@ func newS3ReplicaFromConfig(c *ReplicaConfig, db *litestream.DB) (_ *s3.Replica, } // Build replica. - r := s3.NewReplica(db, c.Name) - r.AccessKeyID = c.AccessKeyID - r.SecretAccessKey = c.SecretAccessKey - r.Bucket = bucket - r.Path = path - r.Region = region - r.Endpoint = endpoint - r.ForcePathStyle = forcePathStyle - r.SkipVerify = skipVerify - - if v := c.Retention; v > 0 { - r.Retention = v - } - if v := c.RetentionCheckInterval; v > 0 { - r.RetentionCheckInterval = v - } - if v := c.SyncInterval; v > 0 { - r.SyncInterval = v - } - if v := c.SnapshotInterval; v > 0 { - r.SnapshotInterval = v - } - if v := c.ValidationInterval; v > 0 { - r.ValidationInterval = v - } - return r, nil + client := s3.NewReplicaClient() + client.AccessKeyID = c.AccessKeyID + client.SecretAccessKey = c.SecretAccessKey + client.Bucket = bucket + client.Path = path + client.Region = region + client.Endpoint = endpoint + client.ForcePathStyle = forcePathStyle + client.SkipVerify = skipVerify + return client, nil } // applyLitestreamEnv copies "LITESTREAM" prefixed environment variables to diff --git a/cmd/litestream/main_test.go b/cmd/litestream/main_test.go index bd30c28..feded64 100644 --- a/cmd/litestream/main_test.go +++ b/cmd/litestream/main_test.go @@ -6,8 +6,8 @@ import ( "path/filepath" "testing" - "github.com/benbjohnson/litestream" main "github.com/benbjohnson/litestream/cmd/litestream" + "github.com/benbjohnson/litestream/file" "github.com/benbjohnson/litestream/s3" ) @@ -96,9 +96,9 @@ func TestNewFileReplicaFromConfig(t *testing.T) { r, err := main.NewReplicaFromConfig(&main.ReplicaConfig{Path: "/foo"}, nil) if err != nil { t.Fatal(err) - } else if r, ok := r.(*litestream.FileReplica); !ok { + } else if client, ok := r.Client.(*file.ReplicaClient); !ok { t.Fatal("unexpected replica type") - } else if got, want := r.Path(), "/foo"; got != want { + } else if got, want := client.Path(), "/foo"; got != want { t.Fatalf("Path=%s, want %s", got, want) } } @@ -108,17 +108,17 @@ func TestNewS3ReplicaFromConfig(t *testing.T) { r, err := main.NewReplicaFromConfig(&main.ReplicaConfig{URL: "s3://foo/bar"}, nil) if err != nil { t.Fatal(err) - } else if r, ok := r.(*s3.Replica); !ok { + } else if client, ok := r.Client.(*s3.ReplicaClient); !ok { t.Fatal("unexpected replica type") - } else if got, want := r.Bucket, "foo"; got != want { + } else if got, want := client.Bucket, "foo"; got != want { t.Fatalf("Bucket=%s, want %s", got, want) - } else if got, want := r.Path, "bar"; got != want { + } else if got, want := client.Path, "bar"; got != want { t.Fatalf("Path=%s, want %s", got, want) - } else if got, want := r.Region, ""; got != want { + } else if got, want := client.Region, ""; got != want { t.Fatalf("Region=%s, want %s", got, want) - } else if got, want := r.Endpoint, ""; got != want { + } else if got, want := client.Endpoint, ""; got != want { t.Fatalf("Endpoint=%s, want %s", got, want) - } else if got, want := r.ForcePathStyle, false; got != want { + } else if got, want := client.ForcePathStyle, false; got != want { t.Fatalf("ForcePathStyle=%v, want %v", got, want) } }) @@ -127,17 +127,17 @@ func TestNewS3ReplicaFromConfig(t *testing.T) { r, err := main.NewReplicaFromConfig(&main.ReplicaConfig{URL: "s3://foo.localhost:9000/bar"}, nil) if err != nil { t.Fatal(err) - } else if r, ok := r.(*s3.Replica); !ok { + } else if client, ok := r.Client.(*s3.ReplicaClient); !ok { t.Fatal("unexpected replica type") - } else if got, want := r.Bucket, "foo"; got != want { + } else if got, want := client.Bucket, "foo"; got != want { t.Fatalf("Bucket=%s, want %s", got, want) - } else if got, want := r.Path, "bar"; got != want { + } else if got, want := client.Path, "bar"; got != want { t.Fatalf("Path=%s, want %s", got, want) - } else if got, want := r.Region, "us-east-1"; got != want { + } else if got, want := client.Region, "us-east-1"; got != want { t.Fatalf("Region=%s, want %s", got, want) - } else if got, want := r.Endpoint, "http://localhost:9000"; got != want { + } else if got, want := client.Endpoint, "http://localhost:9000"; got != want { t.Fatalf("Endpoint=%s, want %s", got, want) - } else if got, want := r.ForcePathStyle, true; got != want { + } else if got, want := client.ForcePathStyle, true; got != want { t.Fatalf("ForcePathStyle=%v, want %v", got, want) } }) @@ -146,17 +146,17 @@ func TestNewS3ReplicaFromConfig(t *testing.T) { r, err := main.NewReplicaFromConfig(&main.ReplicaConfig{URL: "s3://foo.s3.us-west-000.backblazeb2.com/bar"}, nil) if err != nil { t.Fatal(err) - } else if r, ok := r.(*s3.Replica); !ok { + } else if client, ok := r.Client.(*s3.ReplicaClient); !ok { t.Fatal("unexpected replica type") - } else if got, want := r.Bucket, "foo"; got != want { + } else if got, want := client.Bucket, "foo"; got != want { t.Fatalf("Bucket=%s, want %s", got, want) - } else if got, want := r.Path, "bar"; got != want { + } else if got, want := client.Path, "bar"; got != want { t.Fatalf("Path=%s, want %s", got, want) - } else if got, want := r.Region, "us-west-000"; got != want { + } else if got, want := client.Region, "us-west-000"; got != want { t.Fatalf("Region=%s, want %s", got, want) - } else if got, want := r.Endpoint, "https://s3.us-west-000.backblazeb2.com"; got != want { + } else if got, want := client.Endpoint, "https://s3.us-west-000.backblazeb2.com"; got != want { t.Fatalf("Endpoint=%s, want %s", got, want) - } else if got, want := r.ForcePathStyle, true; got != want { + } else if got, want := client.ForcePathStyle, true; got != want { t.Fatalf("ForcePathStyle=%v, want %v", got, want) } }) @@ -165,17 +165,17 @@ func TestNewS3ReplicaFromConfig(t *testing.T) { r, err := main.NewReplicaFromConfig(&main.ReplicaConfig{URL: "s3://foo.storage.googleapis.com/bar"}, nil) if err != nil { t.Fatal(err) - } else if r, ok := r.(*s3.Replica); !ok { + } else if client, ok := r.Client.(*s3.ReplicaClient); !ok { t.Fatal("unexpected replica type") - } else if got, want := r.Bucket, "foo"; got != want { + } else if got, want := client.Bucket, "foo"; got != want { t.Fatalf("Bucket=%s, want %s", got, want) - } else if got, want := r.Path, "bar"; got != want { + } else if got, want := client.Path, "bar"; got != want { t.Fatalf("Path=%s, want %s", got, want) - } else if got, want := r.Region, "us-east-1"; got != want { + } else if got, want := client.Region, "us-east-1"; got != want { t.Fatalf("Region=%s, want %s", got, want) - } else if got, want := r.Endpoint, "https://storage.googleapis.com"; got != want { + } else if got, want := client.Endpoint, "https://storage.googleapis.com"; got != want { t.Fatalf("Endpoint=%s, want %s", got, want) - } else if got, want := r.ForcePathStyle, true; got != want { + } else if got, want := client.ForcePathStyle, true; got != want { t.Fatalf("ForcePathStyle=%v, want %v", got, want) } }) diff --git a/cmd/litestream/replicate.go b/cmd/litestream/replicate.go index 512ecba..aa78d2b 100644 --- a/cmd/litestream/replicate.go +++ b/cmd/litestream/replicate.go @@ -9,9 +9,9 @@ import ( "net/http" _ "net/http/pprof" "os" - "time" "github.com/benbjohnson/litestream" + "github.com/benbjohnson/litestream/file" "github.com/benbjohnson/litestream/s3" "github.com/prometheus/client_golang/prometheus/promhttp" ) @@ -48,9 +48,10 @@ func (c *ReplicateCommand) ParseFlags(ctx context.Context, args []string) (err e dbConfig := &DBConfig{Path: fs.Arg(0)} for _, u := range fs.Args()[1:] { + syncInterval := litestream.DefaultSyncInterval dbConfig.Replicas = append(dbConfig.Replicas, &ReplicaConfig{ URL: u, - SyncInterval: 1 * time.Second, + SyncInterval: &syncInterval, }) } c.Config.DBs = []*DBConfig{dbConfig} @@ -102,13 +103,13 @@ func (c *ReplicateCommand) Run(ctx context.Context) (err error) { for _, db := range c.DBs { log.Printf("initialized db: %s", db.Path()) for _, r := range db.Replicas { - switch r := r.(type) { - case *litestream.FileReplica: - log.Printf("replicating to: name=%q type=%q path=%q", r.Name(), r.Type(), r.Path()) - case *s3.Replica: - log.Printf("replicating to: name=%q type=%q bucket=%q path=%q region=%q endpoint=%q sync-interval=%s", r.Name(), r.Type(), r.Bucket, r.Path, r.Region, r.Endpoint, r.SyncInterval) + switch client := r.Client.(type) { + case *file.ReplicaClient: + log.Printf("replicating to: name=%q type=%q path=%q", r.Name(), client.Type(), client.Path()) + case *s3.ReplicaClient: + log.Printf("replicating to: name=%q type=%q bucket=%q path=%q region=%q endpoint=%q sync-interval=%s", r.Name(), client.Type(), client.Bucket, client.Path, client.Region, client.Endpoint, r.SyncInterval) default: - log.Printf("replicating to: name=%q type=%q", r.Name(), r.Type()) + log.Printf("replicating to: name=%q type=%q", r.Name(), client.Type()) } } } diff --git a/cmd/litestream/restore.go b/cmd/litestream/restore.go index 4fa78e7..85f64f5 100644 --- a/cmd/litestream/restore.go +++ b/cmd/litestream/restore.go @@ -53,7 +53,7 @@ func (c *RestoreCommand) Run(ctx context.Context, args []string) (err error) { } // Determine replica & generation to restore from. - var r litestream.Replica + var r *litestream.Replica if isURL(fs.Arg(0)) { if *configPath != "" { return fmt.Errorf("cannot specify a replica URL and the -config flag") @@ -80,21 +80,25 @@ func (c *RestoreCommand) Run(ctx context.Context, args []string) (err error) { return fmt.Errorf("no matching backups found") } - return litestream.RestoreReplica(ctx, r, opt) + return r.Restore(ctx, opt) } // loadFromURL creates a replica & updates the restore options from a replica URL. -func (c *RestoreCommand) loadFromURL(ctx context.Context, replicaURL string, opt *litestream.RestoreOptions) (litestream.Replica, error) { - r, err := NewReplicaFromConfig(&ReplicaConfig{URL: replicaURL}, nil) +func (c *RestoreCommand) loadFromURL(ctx context.Context, replicaURL string, opt *litestream.RestoreOptions) (*litestream.Replica, error) { + syncInterval := litestream.DefaultSyncInterval + r, err := NewReplicaFromConfig(&ReplicaConfig{ + URL: replicaURL, + SyncInterval: &syncInterval, + }, nil) if err != nil { return nil, err } - opt.Generation, _, err = litestream.CalcReplicaRestoreTarget(ctx, r, *opt) + opt.Generation, _, err = r.CalcRestoreTarget(ctx, *opt) return r, err } // loadFromConfig returns a replica & updates the restore options from a DB reference. -func (c *RestoreCommand) loadFromConfig(ctx context.Context, dbPath, configPath string, expandEnv bool, opt *litestream.RestoreOptions) (litestream.Replica, error) { +func (c *RestoreCommand) loadFromConfig(ctx context.Context, dbPath, configPath string, expandEnv bool, opt *litestream.RestoreOptions) (*litestream.Replica, error) { // Load configuration. config, err := ReadConfigFile(configPath, expandEnv) if err != nil { diff --git a/cmd/litestream/snapshots.go b/cmd/litestream/snapshots.go index 920ddb5..72e67a5 100644 --- a/cmd/litestream/snapshots.go +++ b/cmd/litestream/snapshots.go @@ -4,6 +4,7 @@ import ( "context" "flag" "fmt" + "log" "os" "text/tabwriter" "time" @@ -29,7 +30,7 @@ func (c *SnapshotsCommand) Run(ctx context.Context, args []string) (err error) { } var db *litestream.DB - var r litestream.Replica + var r *litestream.Replica if isURL(fs.Arg(0)) { if *configPath != "" { return fmt.Errorf("cannot specify a replica URL and the -config flag") @@ -66,15 +67,11 @@ func (c *SnapshotsCommand) Run(ctx context.Context, args []string) (err error) { } // Find snapshots by db or replica. - var infos []*litestream.SnapshotInfo + var replicas []*litestream.Replica if r != nil { - if infos, err = r.Snapshots(ctx); err != nil { - return err - } + replicas = []*litestream.Replica{r} } else { - if infos, err = db.Snapshots(ctx); err != nil { - return err - } + replicas = db.Replicas } // List all snapshots. @@ -82,14 +79,21 @@ func (c *SnapshotsCommand) Run(ctx context.Context, args []string) (err error) { defer w.Flush() fmt.Fprintln(w, "replica\tgeneration\tindex\tsize\tcreated") - for _, info := range infos { - fmt.Fprintf(w, "%s\t%s\t%d\t%d\t%s\n", - info.Replica, - info.Generation, - info.Index, - info.Size, - info.CreatedAt.Format(time.RFC3339), - ) + for _, r := range replicas { + infos, err := r.Snapshots(ctx) + if err != nil { + log.Printf("cannot determine snapshots: %s", err) + continue + } + for _, info := range infos { + fmt.Fprintf(w, "%s\t%s\t%d\t%d\t%s\n", + r.Name(), + info.Generation, + info.Index, + info.Size, + info.CreatedAt.Format(time.RFC3339), + ) + } } return nil diff --git a/cmd/litestream/wal.go b/cmd/litestream/wal.go index bd60a28..9b7b9ef 100644 --- a/cmd/litestream/wal.go +++ b/cmd/litestream/wal.go @@ -4,6 +4,7 @@ import ( "context" "flag" "fmt" + "log" "os" "text/tabwriter" "time" @@ -30,7 +31,7 @@ func (c *WALCommand) Run(ctx context.Context, args []string) (err error) { } var db *litestream.DB - var r litestream.Replica + var r *litestream.Replica if isURL(fs.Arg(0)) { if *configPath != "" { return fmt.Errorf("cannot specify a replica URL and the -config flag") @@ -67,15 +68,11 @@ func (c *WALCommand) Run(ctx context.Context, args []string) (err error) { } // Find WAL files by db or replica. - var infos []*litestream.WALInfo + var replicas []*litestream.Replica if r != nil { - if infos, err = r.WALs(ctx); err != nil { - return err - } + replicas = []*litestream.Replica{r} } else { - if infos, err = db.WALs(ctx); err != nil { - return err - } + replicas = db.Replicas } // List all WAL files. @@ -83,19 +80,43 @@ func (c *WALCommand) Run(ctx context.Context, args []string) (err error) { defer w.Flush() fmt.Fprintln(w, "replica\tgeneration\tindex\toffset\tsize\tcreated") - for _, info := range infos { - if *generation != "" && info.Generation != *generation { - continue + for _, r := range replicas { + var generations []string + if *generation != "" { + generations = []string{*generation} + } else { + if generations, err = r.Client.Generations(ctx); err != nil { + log.Printf("%s: cannot determine generations: %s", r.Name(), err) + continue + } } - fmt.Fprintf(w, "%s\t%s\t%d\t%d\t%d\t%s\n", - info.Replica, - info.Generation, - info.Index, - info.Offset, - info.Size, - info.CreatedAt.Format(time.RFC3339), - ) + for _, generation := range generations { + if err := func() error { + itr, err := r.Client.WALSegments(ctx, generation) + if err != nil { + return err + } + defer itr.Close() + + for itr.Next() { + info := itr.WALSegment() + + fmt.Fprintf(w, "%s\t%s\t%d\t%d\t%d\t%s\n", + r.Name(), + info.Generation, + info.Index, + info.Offset, + info.Size, + info.CreatedAt.Format(time.RFC3339), + ) + } + return itr.Close() + }(); err != nil { + log.Printf("%s: cannot fetch wal segments: %s", r.Name(), err) + continue + } + } } return nil @@ -104,7 +125,7 @@ func (c *WALCommand) Run(ctx context.Context, args []string) (err error) { // Usage prints the help screen to STDOUT. func (c *WALCommand) Usage() { fmt.Printf(` -The wal command lists all wal files available for a database. +The wal command lists all wal segments available for a database. Usage: @@ -129,13 +150,13 @@ Arguments: Examples: - # List all WAL files for a database. + # List all WAL segments for a database. $ litestream wal /path/to/db - # List all WAL files on S3 for a specific generation. + # List all WAL segments on S3 for a specific generation. $ litestream wal -replica s3 -generation xxxxxxxx /path/to/db - # List all WAL files for replica URL. + # List all WAL segments for replica URL. $ litestream wal s3://mybkt/db `[1:], diff --git a/db.go b/db.go index d7a7d6e..e71d6b4 100644 --- a/db.go +++ b/db.go @@ -16,14 +16,13 @@ import ( "math/rand" "os" "path/filepath" - "sort" "strings" "sync" "time" + "github.com/benbjohnson/litestream/internal" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" - "golang.org/x/sync/errgroup" ) // Default DB settings. @@ -51,10 +50,8 @@ type DB struct { pageSize int // page size, in bytes notify chan struct{} // closes on WAL change - uid, gid int // db user/group obtained on init - mode os.FileMode - diruid, dirgid int // db parent user/group obtained on init - dirmode os.FileMode + fileInfo os.FileInfo // db info cached during init + dirInfo os.FileInfo // parent dir info cached during init ctx context.Context cancel func() @@ -96,7 +93,7 @@ type DB struct { // List of replicas for the database. // Must be set before calling Open(). - Replicas []Replica + Replicas []*Replica } // NewDB returns a new instance of DB for a given path. @@ -104,8 +101,6 @@ func NewDB(path string) *DB { db := &DB{ path: path, notify: make(chan struct{}), - uid: -1, gid: -1, mode: 0600, - diruid: -1, dirgid: -1, dirmode: 0700, MinCheckpointPageN: DefaultMinCheckpointPageN, MaxCheckpointPageN: DefaultMaxCheckpointPageN, @@ -196,10 +191,7 @@ func (db *DB) CurrentShadowWALIndex(generation string) (index int, size int64, e // Find highest wal index. for _, fi := range fis { - if !strings.HasSuffix(fi.Name(), WALExt) { - continue - } - if v, _, _, err := ParseWALPath(fi.Name()); err != nil { + if v, err := ParseWALPath(fi.Name()); err != nil { continue // invalid wal filename } else if v > index { index = v @@ -210,8 +202,18 @@ func (db *DB) CurrentShadowWALIndex(generation string) (index int, size int64, e return index, size, nil } +// FileInfo returns the cached file stats for the database file when it was initialized. +func (db *DB) FileInfo() os.FileInfo { + return db.fileInfo +} + +// DirInfo returns the cached file stats for the parent directory of the database file when it was initialized. +func (db *DB) DirInfo() os.FileInfo { + return db.dirInfo +} + // Replica returns a replica by name. -func (db *DB) Replica(name string) Replica { +func (db *DB) Replica(name string) *Replica { for _, r := range db.Replicas { if r.Name() == name { return r @@ -364,42 +366,6 @@ func (db *DB) UpdatedAt() (time.Time, error) { return t, nil } -// Snapshots returns a list of all snapshots across all replicas. -func (db *DB) Snapshots(ctx context.Context) ([]*SnapshotInfo, error) { - var infos []*SnapshotInfo - for _, r := range db.Replicas { - a, err := r.Snapshots(ctx) - if err != nil { - return nil, err - } - infos = append(infos, a...) - } - - // Sort in order by time. - sort.Slice(infos, func(i, j int) bool { - return infos[i].CreatedAt.Before(infos[j].CreatedAt) - }) - return infos, nil -} - -// WALs returns a list of all WAL files across all replicas. -func (db *DB) WALs(ctx context.Context) ([]*WALInfo, error) { - var infos []*WALInfo - for _, r := range db.Replicas { - a, err := r.WALs(ctx) - if err != nil { - return nil, err - } - infos = append(infos, a...) - } - - // Sort in order by time. - sort.Slice(infos, func(i, j int) bool { - return infos[i].CreatedAt.Before(infos[j].CreatedAt) - }) - return infos, nil -} - // init initializes the connection to the database. // Skipped if already initialized or if the database file does not exist. func (db *DB) init() (err error) { @@ -415,15 +381,13 @@ func (db *DB) init() (err error) { } else if err != nil { return err } - db.uid, db.gid = fileinfo(fi) - db.mode = fi.Mode() + db.fileInfo = fi // Obtain permissions for parent directory. if fi, err = os.Stat(filepath.Dir(db.path)); err != nil { return err } - db.diruid, db.dirgid = fileinfo(fi) - db.dirmode = fi.Mode() + db.dirInfo = fi dsn := db.path dsn += fmt.Sprintf("?_busy_timeout=%d", BusyTimeout.Milliseconds()) @@ -489,7 +453,7 @@ func (db *DB) init() (err error) { } // Ensure meta directory structure exists. - if err := mkdirAll(db.MetaPath(), db.dirmode, db.diruid, db.dirgid); err != nil { + if err := internal.MkdirAll(db.MetaPath(), db.dirInfo); err != nil { return err } @@ -596,7 +560,7 @@ func (db *DB) cleanWAL() error { // Determine lowest index that's been replicated to all replicas. min := -1 for _, r := range db.Replicas { - pos := r.LastPos() + pos := r.Pos() if pos.Generation != generation { pos = Pos{} // different generation, reset index to zero } @@ -620,7 +584,7 @@ func (db *DB) cleanWAL() error { return err } for _, fi := range fis { - if idx, _, _, err := ParseWALPath(fi.Name()); err != nil || idx >= min { + if idx, err := ParseWALPath(fi.Name()); err != nil || idx >= min { continue } if err := os.Remove(filepath.Join(dir, fi.Name())); err != nil { @@ -696,7 +660,7 @@ func (db *DB) createGeneration() (string, error) { // Generate new directory. dir := filepath.Join(db.MetaPath(), "generations", generation) - if err := mkdirAll(dir, db.dirmode, db.diruid, db.dirgid); err != nil { + if err := internal.MkdirAll(dir, db.dirInfo); err != nil { return "", err } @@ -707,10 +671,15 @@ func (db *DB) createGeneration() (string, error) { // Atomically write generation name as current generation. generationNamePath := db.GenerationNamePath() - if err := ioutil.WriteFile(generationNamePath+".tmp", []byte(generation+"\n"), db.mode); err != nil { + mode := os.FileMode(0600) + if db.fileInfo != nil { + mode = db.fileInfo.Mode() + } + if err := ioutil.WriteFile(generationNamePath+".tmp", []byte(generation+"\n"), mode); err != nil { return "", fmt.Errorf("write generation temp file: %w", err) } - _ = os.Chown(generationNamePath+".tmp", db.uid, db.gid) + uid, gid := internal.Fileinfo(db.fileInfo) + _ = os.Chown(generationNamePath+".tmp", uid, gid) if err := os.Rename(generationNamePath+".tmp", generationNamePath); err != nil { return "", fmt.Errorf("rename generation file: %w", err) } @@ -801,7 +770,7 @@ func (db *DB) Sync(ctx context.Context) (err error) { if checkpoint { changed = true - if err := db.checkpointAndInit(ctx, info.generation, checkpointMode); err != nil { + if err := db.checkpoint(ctx, info.generation, checkpointMode); err != nil { return fmt.Errorf("checkpoint: mode=%v err=%w", checkpointMode, err) } } @@ -959,7 +928,7 @@ func (db *DB) syncWAL(info syncInfo) (newSize int64, err error) { // Parse index of current shadow WAL file. dir, base := filepath.Split(info.shadowWALPath) - index, _, _, err := ParseWALPath(base) + index, err := ParseWALPath(base) if err != nil { return 0, fmt.Errorf("cannot parse shadow wal filename: %s", base) } @@ -993,12 +962,17 @@ func (db *DB) initShadowWALFile(filename string) (int64, error) { } // Write header to new WAL shadow file. - if err := mkdirAll(filepath.Dir(filename), db.dirmode, db.diruid, db.dirgid); err != nil { + mode := os.FileMode(0600) + if fi := db.fileInfo; fi != nil { + mode = fi.Mode() + } + if err := internal.MkdirAll(filepath.Dir(filename), db.dirInfo); err != nil { return 0, err - } else if err := ioutil.WriteFile(filename, hdr, db.mode); err != nil { + } else if err := ioutil.WriteFile(filename, hdr, mode); err != nil { return 0, err } - _ = os.Chown(filename, db.uid, db.gid) + uid, gid := internal.Fileinfo(db.fileInfo) + _ = os.Chown(filename, uid, gid) // Copy as much shadow WAL as available. newSize, err := db.copyToShadowWAL(filename) @@ -1155,7 +1129,7 @@ func (db *DB) shadowWALReader(pos Pos) (r *ShadowWALReader, err error) { // Ensure file is closed if any error occurs. defer func() { if err != nil { - r.Close() + f.Close() } }() @@ -1258,13 +1232,91 @@ func readLastChecksumFrom(f *os.File, pageSize int) (uint32, uint32, error) { } // Checkpoint performs a checkpoint on the WAL file. -func (db *DB) Checkpoint(mode string) (err error) { +func (db *DB) Checkpoint(ctx context.Context, mode string) (err error) { db.mu.Lock() defer db.mu.Unlock() - return db.checkpoint(mode) + + generation, err := db.CurrentGeneration() + if err != nil { + return fmt.Errorf("cannot determine generation: %w", err) + } + return db.checkpoint(ctx, generation, mode) } -func (db *DB) checkpoint(mode string) (err error) { +// checkpointAndInit performs a checkpoint on the WAL file and initializes a +// new shadow WAL file. +func (db *DB) checkpoint(ctx context.Context, generation, mode string) error { + shadowWALPath, err := db.CurrentShadowWALPath(generation) + if err != nil { + return err + } + + // Read WAL header before checkpoint to check if it has been restarted. + hdr, err := readWALHeader(db.WALPath()) + if err != nil { + return err + } + + // Copy shadow WAL before checkpoint to copy as much as possible. + if _, err := db.copyToShadowWAL(shadowWALPath); err != nil { + return fmt.Errorf("cannot copy to end of shadow wal before checkpoint: %w", err) + } + + // Execute checkpoint and immediately issue a write to the WAL to ensure + // a new page is written. + if err := db.execCheckpoint(mode); err != nil { + return err + } else if _, err = db.db.Exec(`INSERT INTO _litestream_seq (id, seq) VALUES (1, 1) ON CONFLICT (id) DO UPDATE SET seq = seq + 1`); err != nil { + return err + } + + // If WAL hasn't been restarted, exit. + if other, err := readWALHeader(db.WALPath()); err != nil { + return err + } else if bytes.Equal(hdr, other) { + return nil + } + + // Start a transaction. This will be promoted immediately after. + tx, err := db.db.Begin() + if err != nil { + return fmt.Errorf("begin: %w", err) + } + defer func() { _ = rollback(tx) }() + + // Insert into the lock table to promote to a write tx. The lock table + // insert will never actually occur because our tx will be rolled back, + // however, it will ensure our tx grabs the write lock. Unfortunately, + // we can't call "BEGIN IMMEDIATE" as we are already in a transaction. + if _, err := tx.ExecContext(ctx, `INSERT INTO _litestream_lock (id) VALUES (1);`); err != nil { + return fmt.Errorf("_litestream_lock: %w", err) + } + + // Copy the end of the previous WAL before starting a new shadow WAL. + if _, err := db.copyToShadowWAL(shadowWALPath); err != nil { + return fmt.Errorf("cannot copy to end of shadow wal: %w", err) + } + + // Parse index of current shadow WAL file. + index, err := ParseWALPath(shadowWALPath) + if err != nil { + return fmt.Errorf("cannot parse shadow wal filename: %s", shadowWALPath) + } + + // Start a new shadow WAL file with next index. + newShadowWALPath := filepath.Join(filepath.Dir(shadowWALPath), FormatWALPath(index+1)) + if _, err := db.initShadowWALFile(newShadowWALPath); err != nil { + return fmt.Errorf("cannot init shadow wal file: name=%s err=%w", newShadowWALPath, err) + } + + // Release write lock before checkpointing & exiting. + if err := tx.Rollback(); err != nil { + return fmt.Errorf("rollback post-checkpoint tx: %w", err) + } + return nil +} + +func (db *DB) execCheckpoint(mode string) (err error) { // Ignore if there is no underlying database. if db.db == nil { return nil @@ -1310,79 +1362,6 @@ func (db *DB) checkpoint(mode string) (err error) { return nil } -// checkpointAndInit performs a checkpoint on the WAL file and initializes a -// new shadow WAL file. -func (db *DB) checkpointAndInit(ctx context.Context, generation, mode string) error { - shadowWALPath, err := db.CurrentShadowWALPath(generation) - if err != nil { - return err - } - - // Read WAL header before checkpoint to check if it has been restarted. - hdr, err := readWALHeader(db.WALPath()) - if err != nil { - return err - } - - // Copy shadow WAL before checkpoint to copy as much as possible. - if _, err := db.copyToShadowWAL(shadowWALPath); err != nil { - return fmt.Errorf("cannot copy to end of shadow wal before checkpoint: %w", err) - } - - // Execute checkpoint and immediately issue a write to the WAL to ensure - // a new page is written. - if err := db.checkpoint(mode); err != nil { - return err - } else if _, err = db.db.Exec(`INSERT INTO _litestream_seq (id, seq) VALUES (1, 1) ON CONFLICT (id) DO UPDATE SET seq = seq + 1`); err != nil { - return err - } - - // If WAL hasn't been restarted, exit. - if other, err := readWALHeader(db.WALPath()); err != nil { - return err - } else if bytes.Equal(hdr, other) { - return nil - } - - // Start a transaction. This will be promoted immediately after. - tx, err := db.db.Begin() - if err != nil { - return fmt.Errorf("begin: %w", err) - } - defer func() { _ = rollback(tx) }() - - // Insert into the lock table to promote to a write tx. The lock table - // insert will never actually occur because our tx will be rolled back, - // however, it will ensure our tx grabs the write lock. Unfortunately, - // we can't call "BEGIN IMMEDIATE" as we are already in a transaction. - if _, err := tx.ExecContext(ctx, `INSERT INTO _litestream_lock (id) VALUES (1);`); err != nil { - return fmt.Errorf("_litestream_lock: %w", err) - } - - // Copy the end of the previous WAL before starting a new shadow WAL. - if _, err := db.copyToShadowWAL(shadowWALPath); err != nil { - return fmt.Errorf("cannot copy to end of shadow wal: %w", err) - } - - // Parse index of current shadow WAL file. - index, _, _, err := ParseWALPath(shadowWALPath) - if err != nil { - return fmt.Errorf("cannot parse shadow wal filename: %s", shadowWALPath) - } - - // Start a new shadow WAL file with next index. - newShadowWALPath := filepath.Join(filepath.Dir(shadowWALPath), FormatWALPath(index+1)) - if _, err := db.initShadowWALFile(newShadowWALPath); err != nil { - return fmt.Errorf("cannot init shadow wal file: name=%s err=%w", newShadowWALPath, err) - } - - // Release write lock before checkpointing & exiting. - if err := tx.Rollback(); err != nil { - return fmt.Errorf("rollback post-checkpoint tx: %w", err) - } - return nil -} - // monitor runs in a separate goroutine and monitors the database & WAL. func (db *DB) monitor() { ticker := time.NewTicker(db.MonitorInterval) @@ -1403,189 +1382,12 @@ func (db *DB) monitor() { } } -// RestoreReplica restores the database from a replica based on the options given. -// This method will restore into opt.OutputPath, if specified, or into the -// DB's original database path. It can optionally restore from a specific -// replica or generation or it will automatically choose the best one. Finally, -// a timestamp can be specified to restore the database to a specific -// point-in-time. -func RestoreReplica(ctx context.Context, r Replica, opt RestoreOptions) (err error) { - // Validate options. - if opt.OutputPath == "" { - return fmt.Errorf("output path required") - } else if opt.Generation == "" && opt.Index != math.MaxInt32 { - return fmt.Errorf("must specify generation when restoring to index") - } else if opt.Index != math.MaxInt32 && !opt.Timestamp.IsZero() { - return fmt.Errorf("cannot specify index & timestamp to restore") - } - - // Ensure logger exists. - logger := opt.Logger - if logger == nil { - logger = log.New(ioutil.Discard, "", 0) - } - - logPrefix := r.Name() - if db := r.DB(); db != nil { - logPrefix = fmt.Sprintf("%s(%s)", db.Path(), r.Name()) - } - - // Ensure output path does not already exist. - if _, err := os.Stat(opt.OutputPath); err == nil { - return fmt.Errorf("cannot restore, output path already exists: %s", opt.OutputPath) - } else if err != nil && !os.IsNotExist(err) { - return err - } - - // Find lastest snapshot that occurs before timestamp or index. - var minWALIndex int - if opt.Index < math.MaxInt32 { - if minWALIndex, err = SnapshotIndexByIndex(ctx, r, opt.Generation, opt.Index); err != nil { - return fmt.Errorf("cannot find snapshot index: %w", err) - } - } else { - if minWALIndex, err = SnapshotIndexAt(ctx, r, opt.Generation, opt.Timestamp); err != nil { - return fmt.Errorf("cannot find snapshot index by timestamp: %w", err) - } - } - - // Find the maximum WAL index that occurs before timestamp. - maxWALIndex, err := WALIndexAt(ctx, r, opt.Generation, opt.Index, opt.Timestamp) - if err != nil { - return fmt.Errorf("cannot find max wal index for restore: %w", err) - } - snapshotOnly := maxWALIndex == -1 - - // Initialize starting position. - pos := Pos{Generation: opt.Generation, Index: minWALIndex} - tmpPath := opt.OutputPath + ".tmp" - - // Copy snapshot to output path. - logger.Printf("%s: restoring snapshot %s/%08x to %s", logPrefix, opt.Generation, minWALIndex, tmpPath) - if err := restoreSnapshot(ctx, r, pos.Generation, pos.Index, tmpPath); err != nil { - return fmt.Errorf("cannot restore snapshot: %w", err) - } - - // If no WAL files available, move snapshot to final path & exit early. - if snapshotOnly { - logger.Printf("%s: snapshot only, finalizing database", logPrefix) - return os.Rename(tmpPath, opt.OutputPath) - } - - // Begin processing WAL files. - logger.Printf("%s: restoring wal files: generation=%s index=[%08x,%08x]", logPrefix, opt.Generation, minWALIndex, maxWALIndex) - - // Fill input channel with all WAL indexes to be loaded in order. - ch := make(chan int, maxWALIndex-minWALIndex+1) - for index := minWALIndex; index <= maxWALIndex; index++ { - ch <- index - } - close(ch) - - // Track load state for each WAL. - var mu sync.Mutex - cond := sync.NewCond(&mu) - walStates := make([]walRestoreState, maxWALIndex-minWALIndex+1) - - parallelism := opt.Parallelism - if parallelism < 1 { - parallelism = 1 - } - - // Download WAL files to disk in parallel. - g, ctx := errgroup.WithContext(ctx) - for i := 0; i < parallelism; i++ { - g.Go(func() error { - for { - select { - case <-ctx.Done(): - cond.Broadcast() - return err - case index, ok := <-ch: - if !ok { - cond.Broadcast() - return nil - } - - startTime := time.Now() - - err := downloadWAL(ctx, r, opt.Generation, index, tmpPath) - if err != nil { - err = fmt.Errorf("cannot download wal %s/%08x: %w", opt.Generation, index, err) - } - - // Mark index as ready-to-apply and notify applying code. - mu.Lock() - walStates[index-minWALIndex] = walRestoreState{ready: true, err: err} - mu.Unlock() - cond.Broadcast() - - // Returning the error here will cancel the other goroutines. - if err != nil { - return err - } - - logger.Printf("%s: downloaded wal %s/%08x elapsed=%s", - logPrefix, opt.Generation, index, - time.Since(startTime).String(), - ) - } - } - }) - } - - // Apply WAL files in order as they are ready. - for index := minWALIndex; index <= maxWALIndex; index++ { - // Wait until next WAL file is ready to apply. - mu.Lock() - for !walStates[index-minWALIndex].ready { - if err := ctx.Err(); err != nil { - return err - } - cond.Wait() - } - if err := walStates[index-minWALIndex].err; err != nil { - return err - } - mu.Unlock() - - // Apply WAL to database file. - startTime := time.Now() - if err = applyWAL(ctx, index, tmpPath); err != nil { - return fmt.Errorf("cannot apply wal: %w", err) - } - logger.Printf("%s: applied wal %s/%08x elapsed=%s", - logPrefix, opt.Generation, index, - time.Since(startTime).String(), - ) - } - - // Ensure all goroutines finish. All errors should have been handled during - // the processing of WAL files but this ensures that all processing is done. - if err := g.Wait(); err != nil { - return err - } - - // Copy file to final location. - logger.Printf("%s: renaming database from temporary location", logPrefix) - if err := os.Rename(tmpPath, opt.OutputPath); err != nil { - return err - } - - return nil -} - -type walRestoreState struct { - ready bool - err error -} - // CalcRestoreTarget returns a replica & generation to restore from based on opt criteria. -func (db *DB) CalcRestoreTarget(ctx context.Context, opt RestoreOptions) (Replica, string, error) { +func (db *DB) CalcRestoreTarget(ctx context.Context, opt RestoreOptions) (*Replica, string, error) { var target struct { - replica Replica + replica *Replica generation string - stats GenerationStats + updatedAt time.Time } for _, r := range db.Replicas { @@ -1594,134 +1396,21 @@ func (db *DB) CalcRestoreTarget(ctx context.Context, opt RestoreOptions) (Replic continue } - generation, stats, err := CalcReplicaRestoreTarget(ctx, r, opt) + generation, updatedAt, err := r.CalcRestoreTarget(ctx, opt) if err != nil { return nil, "", err } // Use the latest replica if we have multiple candidates. - if !stats.UpdatedAt.After(target.stats.UpdatedAt) { + if !updatedAt.After(target.updatedAt) { continue } - target.replica, target.generation, target.stats = r, generation, stats + target.replica, target.generation, target.updatedAt = r, generation, updatedAt } return target.replica, target.generation, nil } -// CalcReplicaRestoreTarget returns a generation to restore from. -func CalcReplicaRestoreTarget(ctx context.Context, r Replica, opt RestoreOptions) (generation string, stats GenerationStats, err error) { - var target struct { - generation string - stats GenerationStats - } - - generations, err := r.Generations(ctx) - if err != nil { - return "", stats, fmt.Errorf("cannot fetch generations: %w", err) - } - - // Search generations for one that contains the requested timestamp. - for _, generation := range generations { - // Skip generation if it does not match filter. - if opt.Generation != "" && generation != opt.Generation { - continue - } - - // Fetch stats for generation. - stats, err := r.GenerationStats(ctx, generation) - if err != nil { - return "", stats, fmt.Errorf("cannot determine stats for generation (%s/%s): %s", r.Name(), generation, err) - } - - // Skip if it does not contain timestamp. - if !opt.Timestamp.IsZero() { - if opt.Timestamp.Before(stats.CreatedAt) || opt.Timestamp.After(stats.UpdatedAt) { - continue - } - } - - // Use the latest replica if we have multiple candidates. - if !stats.UpdatedAt.After(target.stats.UpdatedAt) { - continue - } - - target.generation = generation - target.stats = stats - } - - return target.generation, target.stats, nil -} - -// restoreSnapshot copies a snapshot from the replica to a file. -func restoreSnapshot(ctx context.Context, r Replica, generation string, index int, filename string) error { - // Determine the user/group & mode based on the DB, if available. - uid, gid, mode := -1, -1, os.FileMode(0600) - diruid, dirgid, dirmode := -1, -1, os.FileMode(0700) - if db := r.DB(); db != nil { - uid, gid, mode = db.uid, db.gid, db.mode - diruid, dirgid, dirmode = db.diruid, db.dirgid, db.dirmode - } - - if err := mkdirAll(filepath.Dir(filename), dirmode, diruid, dirgid); err != nil { - return err - } - - f, err := createFile(filename, mode, uid, gid) - if err != nil { - return err - } - defer f.Close() - - rd, err := r.SnapshotReader(ctx, generation, index) - if err != nil { - return err - } - defer rd.Close() - - if _, err := io.Copy(f, rd); err != nil { - return err - } - - if err := f.Sync(); err != nil { - return err - } - return f.Close() -} - -// downloadWAL copies a WAL file from the replica to a local copy next to the DB. -// The WAL is later applied by applyWAL(). This function can be run in parallel -// to download multiple WAL files simultaneously. -func downloadWAL(ctx context.Context, r Replica, generation string, index int, dbPath string) error { - // Determine the user/group & mode based on the DB, if available. - uid, gid, mode := -1, -1, os.FileMode(0600) - if db := r.DB(); db != nil { - uid, gid, mode = db.uid, db.gid, db.mode - } - - // Open WAL file from replica. - rd, err := r.WALReader(ctx, generation, index) - if err != nil { - return err - } - defer rd.Close() - - // Open handle to destination WAL path. - f, err := createFile(fmt.Sprintf("%s-%08x-wal", dbPath, index), mode, uid, gid) - if err != nil { - return err - } - defer f.Close() - - // Copy WAL to target path. - if _, err := io.Copy(f, rd); err != nil { - return err - } else if err := f.Close(); err != nil { - return err - } - return nil -} - // applyWAL performs a truncating checkpoint on the given database. func applyWAL(ctx context.Context, index int, dbPath string) error { // Copy WAL file from it's staging path to the correct "-wal" location. @@ -1770,7 +1459,7 @@ func (db *DB) CRC64(ctx context.Context) (uint64, Pos, error) { } // Force a RESTART checkpoint to ensure the database is at the start of the WAL. - if err := db.checkpointAndInit(ctx, generation, CheckpointModeRestart); err != nil { + if err := db.checkpoint(ctx, generation, CheckpointModeRestart); err != nil { return 0, Pos{}, err } @@ -1836,80 +1525,58 @@ func NewRestoreOptions() RestoreOptions { // Database metrics. var ( dbSizeGaugeVec = promauto.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "litestream", - Subsystem: "db", - Name: "db_size", - Help: "The current size of the real DB", + Name: "litestream_db_size", + Help: "The current size of the real DB", }, []string{"db"}) walSizeGaugeVec = promauto.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "litestream", - Subsystem: "db", - Name: "wal_size", - Help: "The current size of the real WAL", + Name: "litestream_wal_size", + Help: "The current size of the real WAL", }, []string{"db"}) totalWALBytesCounterVec = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "litestream", - Subsystem: "db", - Name: "total_wal_bytes", - Help: "Total number of bytes written to shadow WAL", + Name: "litestream_total_wal_bytes", + Help: "Total number of bytes written to shadow WAL", }, []string{"db"}) shadowWALIndexGaugeVec = promauto.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "litestream", - Subsystem: "db", - Name: "shadow_wal_index", - Help: "The current index of the shadow WAL", + Name: "litestream_shadow_wal_index", + Help: "The current index of the shadow WAL", }, []string{"db"}) shadowWALSizeGaugeVec = promauto.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "litestream", - Subsystem: "db", - Name: "shadow_wal_size", - Help: "Current size of shadow WAL, in bytes", + Name: "litestream_shadow_wal_size", + Help: "Current size of shadow WAL, in bytes", }, []string{"db"}) syncNCounterVec = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "litestream", - Subsystem: "db", - Name: "sync_count", - Help: "Number of sync operations performed", + Name: "litestream_sync_count", + Help: "Number of sync operations performed", }, []string{"db"}) syncErrorNCounterVec = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "litestream", - Subsystem: "db", - Name: "sync_error_count", - Help: "Number of sync errors that have occurred", + Name: "litestream_sync_error_count", + Help: "Number of sync errors that have occurred", }, []string{"db"}) syncSecondsCounterVec = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "litestream", - Subsystem: "db", - Name: "sync_seconds", - Help: "Time spent syncing shadow WAL, in seconds", + Name: "litestream_sync_seconds", + Help: "Time spent syncing shadow WAL, in seconds", }, []string{"db"}) checkpointNCounterVec = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "litestream", - Subsystem: "db", - Name: "checkpoint_count", - Help: "Number of checkpoint operations performed", + Name: "litestream_checkpoint_count", + Help: "Number of checkpoint operations performed", }, []string{"db", "mode"}) checkpointErrorNCounterVec = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "litestream", - Subsystem: "db", - Name: "checkpoint_error_count", - Help: "Number of checkpoint errors that have occurred", + Name: "litestream_checkpoint_error_count", + Help: "Number of checkpoint errors that have occurred", }, []string{"db", "mode"}) checkpointSecondsCounterVec = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "litestream", - Subsystem: "db", - Name: "checkpoint_seconds", - Help: "Time spent checkpointing WAL, in seconds", + Name: "litestream_checkpoint_seconds", + Help: "Time spent checkpointing WAL, in seconds", }, []string{"db", "mode"}) ) diff --git a/db_test.go b/db_test.go index 35b9951..b7eb54b 100644 --- a/db_test.go +++ b/db_test.go @@ -151,7 +151,7 @@ func TestDB_CRC64(t *testing.T) { } // Checkpoint change into database. Checksum should change. - if _, err := sqldb.Exec(`PRAGMA wal_checkpoint(TRUNCATE);`); err != nil { + if err := db.Checkpoint(context.Background(), litestream.CheckpointModeTruncate); err != nil { t.Fatal(err) } @@ -262,7 +262,7 @@ func TestDB_Sync(t *testing.T) { } // Checkpoint & fully close which should close WAL file. - if err := db.Checkpoint(litestream.CheckpointModeTruncate); err != nil { + if err := db.Checkpoint(context.Background(), litestream.CheckpointModeTruncate); err != nil { t.Fatal(err) } else if err := db.Close(); err != nil { t.Fatal(err) diff --git a/file/replica_client.go b/file/replica_client.go new file mode 100644 index 0000000..4254c46 --- /dev/null +++ b/file/replica_client.go @@ -0,0 +1,380 @@ +package file + +import ( + "context" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "sort" + + "github.com/benbjohnson/litestream" + "github.com/benbjohnson/litestream/internal" +) + +var _ litestream.ReplicaClient = (*ReplicaClient)(nil) + +// ReplicaClient is a client for writing snapshots & WAL segments to disk. +type ReplicaClient struct { + path string // destination path + + Replica *litestream.Replica +} + +// NewReplicaClient returns a new instance of ReplicaClient. +func NewReplicaClient(path string) *ReplicaClient { + return &ReplicaClient{ + path: path, + } +} + +// db returns the database, if available. +func (c *ReplicaClient) db() *litestream.DB{ + if c.Replica == nil { + return nil + } + return c.Replica.DB() +} + +// Type returns "file" as the client type. +func (c *ReplicaClient) Type() string { + return "file" +} + +// Path returns the destination path to replicate the database to. +func (c *ReplicaClient) Path() string { + return c.path +} + +// GenerationsDir returns the path to a generation root directory. +func (c *ReplicaClient) GenerationsDir() (string, error) { + if c.path == "" { + return "", fmt.Errorf("file replica path required") + } + return filepath.Join(c.path, "generations"), nil +} + +// GenerationDir returns the path to a generation's root directory. +func (c *ReplicaClient) GenerationDir(generation string) (string, error) { + dir, err := c.GenerationsDir() + if err != nil { + return "", err + } else if generation == "" { + return "", fmt.Errorf("generation required") + } + return filepath.Join(dir, generation), nil +} + +// SnapshotsDir returns the path to a generation's snapshot directory. +func (c *ReplicaClient) SnapshotsDir(generation string) (string, error) { + dir, err := c.GenerationDir(generation) + if err != nil { + return "", err + } + return filepath.Join(dir, "snapshots"), nil +} + +// SnapshotPath returns the path to an uncompressed snapshot file. +func (c *ReplicaClient) SnapshotPath(generation string, index int) (string, error) { + dir, err := c.SnapshotsDir(generation) + if err != nil { + return "", err + } + return filepath.Join(dir, litestream.FormatSnapshotPath(index)), nil +} + +// WALDir returns the path to a generation's WAL directory +func (c *ReplicaClient) WALDir(generation string) (string, error) { + dir, err := c.GenerationDir(generation) + if err != nil { + return "", err + } + return filepath.Join(dir, "wal"), nil +} + +// WALSegmentPath returns the path to a WAL segment file. +func (c *ReplicaClient) WALSegmentPath(generation string, index int, offset int64) (string, error) { + dir, err := c.WALDir(generation) + if err != nil { + return "", err + } + return filepath.Join(dir, litestream.FormatWALSegmentPath(index, offset)), nil +} + +// Generations returns a list of available generation names. +func (c *ReplicaClient) Generations(ctx context.Context) ([]string, error) { + root, err := c.GenerationsDir() + if err != nil { + return nil, fmt.Errorf("cannot determine generations path: %w", err) + } + + fis, err := ioutil.ReadDir(root) + if os.IsNotExist(err) { + return nil, nil + } else if err != nil { + return nil, err + } + + var generations []string + for _, fi := range fis { + if !litestream.IsGenerationName(fi.Name()) { + continue + } else if !fi.IsDir() { + continue + } + generations = append(generations, fi.Name()) + } + return generations, nil +} + +// DeleteGeneration deletes all snapshots & WAL segments within a generation. +func (c *ReplicaClient) DeleteGeneration(ctx context.Context, generation string) error { + dir, err := c.GenerationDir(generation) + if err != nil { + return fmt.Errorf("cannot determine generation directory: %w", err) + } + + if err := os.RemoveAll(dir); err != nil && !os.IsNotExist(err) { + return err + } + return nil +} + +// Snapshots returns an iterator over all available snapshots for a generation. +func (c *ReplicaClient) Snapshots(ctx context.Context, generation string) (litestream.SnapshotIterator, error) { + dir, err := c.SnapshotsDir(generation) + if err != nil { + return nil, fmt.Errorf("cannot determine snapshot directory: %w", err) + } + + f, err := os.Open(dir) + if os.IsNotExist(err) { + return litestream.NewSnapshotInfoSliceIterator(nil), nil + } else if err != nil { + return nil, err + } + defer f.Close() + + fis, err := f.Readdir(-1) + if err != nil { + return nil, err + } + + // Iterate over every file and convert to metadata. + infos := make([]litestream.SnapshotInfo, 0, len(fis)) + for _, fi := range fis { + // Parse index from filename. + index, err := litestream.ParseSnapshotPath(fi.Name()) + if err != nil { + continue + } + + infos = append(infos, litestream.SnapshotInfo{ + Generation: generation, + Index: index, + Size: fi.Size(), + CreatedAt: fi.ModTime().UTC(), + }) + } + + sort.Sort(litestream.SnapshotInfoSlice(infos)) + + return litestream.NewSnapshotInfoSliceIterator(infos), nil +} + +// WriteSnapshot writes LZ4 compressed data from rd into a file on disk. +func (c *ReplicaClient) WriteSnapshot(ctx context.Context, generation string, index int, rd io.Reader) (info litestream.SnapshotInfo, err error) { + filename, err := c.SnapshotPath(generation, index) + if err != nil { + return info, fmt.Errorf("cannot determine snapshot path: %w", err) + } + + var fileInfo, dirInfo os.FileInfo + if db := c.db(); db != nil { + fileInfo, dirInfo = db.FileInfo(), db.DirInfo() + } + + + // Ensure parent directory exists. + if err := internal.MkdirAll(filepath.Dir(filename), dirInfo); err != nil { + return info, err + } + + // Write snapshot to temporary file next to destination path. + f, err := internal.CreateFile(filename+".tmp", fileInfo) + if err != nil { + return info, err + } + defer f.Close() + + if _, err := io.Copy(f, rd); err != nil { + return info, err + } else if err := f.Sync(); err != nil { + return info, err + } else if err := f.Close(); err != nil { + return info, err + } + + // Build metadata. + fi, err := os.Stat(filename + ".tmp") + if err != nil { + return info, err + } + info = litestream.SnapshotInfo{ + Generation: generation, + Index: index, + Size: fi.Size(), + CreatedAt: fi.ModTime().UTC(), + } + + // Move snapshot to final path when it has been fully written & synced to disk. + if err := os.Rename(filename+".tmp", filename); err != nil { + return info, err + } + + return info, nil +} + +// SnapshotReader returns a reader for snapshot data at the given generation/index. +// Returns os.ErrNotExist if no matching index is found. +func (c *ReplicaClient) SnapshotReader(ctx context.Context, generation string, index int) (io.ReadCloser, error) { + filename, err := c.SnapshotPath(generation, index) + if err != nil { + return nil, fmt.Errorf("cannot determine snapshot path: %w", err) + } + return os.Open(filename) +} + +// DeleteSnapshot deletes a snapshot with the given generation & index. +func (c *ReplicaClient) DeleteSnapshot(ctx context.Context, generation string, index int) error { + filename, err := c.SnapshotPath(generation, index) + if err != nil { + return fmt.Errorf("cannot determine snapshot path: %w", err) + } + if err := os.Remove(filename); err != nil && !os.IsNotExist(err) { + return err + } + return nil +} + +// WALSegments returns an iterator over all available WAL files for a generation. +func (c *ReplicaClient) WALSegments(ctx context.Context, generation string) (litestream.WALSegmentIterator, error) { + dir, err := c.WALDir(generation) + if err != nil { + return nil, fmt.Errorf("cannot determine wal directory: %w", err) + } + + f, err := os.Open(dir) + if os.IsNotExist(err) { + return litestream.NewWALSegmentInfoSliceIterator(nil), nil + } else if err != nil { + return nil, err + } + defer f.Close() + + fis, err := f.Readdir(-1) + if err != nil { + return nil, err + } + + // Iterate over every file and convert to metadata. + infos := make([]litestream.WALSegmentInfo, 0, len(fis)) + for _, fi := range fis { + // Parse index from filename. + index, offset, err := litestream.ParseWALSegmentPath(fi.Name()) + if err != nil { + continue + } + + infos = append(infos, litestream.WALSegmentInfo{ + Generation: generation, + Index: index, + Offset: offset, + Size: fi.Size(), + CreatedAt: fi.ModTime().UTC(), + }) + } + + sort.Sort(litestream.WALSegmentInfoSlice(infos)) + + return litestream.NewWALSegmentInfoSliceIterator(infos), nil +} + +// WriteWALSegment writes LZ4 compressed data from rd into a file on disk. +func (c *ReplicaClient) WriteWALSegment(ctx context.Context, pos litestream.Pos, rd io.Reader) (info litestream.WALSegmentInfo, err error) { + filename, err := c.WALSegmentPath(pos.Generation, pos.Index, pos.Offset) + if err != nil { + return info, fmt.Errorf("cannot determine wal segment path: %w", err) + } + + var fileInfo, dirInfo os.FileInfo + if db := c.db(); db != nil { + fileInfo, dirInfo = db.FileInfo(), db.DirInfo() + } + + // Ensure parent directory exists. + if err := internal.MkdirAll(filepath.Dir(filename), dirInfo); err != nil { + return info, err + } + + // Write WAL segment to temporary file next to destination path. + f, err := internal.CreateFile(filename+".tmp", fileInfo) + if err != nil { + return info, err + } + defer f.Close() + + if _, err := io.Copy(f, rd); err != nil { + return info, err + } else if err := f.Sync(); err != nil { + return info, err + } else if err := f.Close(); err != nil { + return info, err + } + + // Build metadata. + fi, err := os.Stat(filename + ".tmp") + if err != nil { + return info, err + } + info = litestream.WALSegmentInfo{ + Generation: pos.Generation, + Index: pos.Index, + Offset: pos.Offset, + Size: fi.Size(), + CreatedAt: fi.ModTime().UTC(), + } + + // Move WAL segment to final path when it has been written & synced to disk. + if err := os.Rename(filename+".tmp", filename); err != nil { + return info, err + } + + return info, nil +} + +// WALSegmentReader returns a reader for a section of WAL data at the given position. +// Returns os.ErrNotExist if no matching index/offset is found. +func (c *ReplicaClient) WALSegmentReader(ctx context.Context, pos litestream.Pos) (io.ReadCloser, error) { + filename, err := c.WALSegmentPath(pos.Generation, pos.Index, pos.Offset) + if err != nil { + return nil, fmt.Errorf("cannot determine wal segment path: %w", err) + } + return os.Open(filename) +} + +// DeleteWALSegments deletes WAL segments at the given positions. +func (c *ReplicaClient) DeleteWALSegments(ctx context.Context, a []litestream.Pos) error { + for _, pos := range a { + filename, err := c.WALSegmentPath(pos.Generation, pos.Index, pos.Offset) + if err != nil { + return fmt.Errorf("cannot determine wal segment path: %w", err) + } + if err := os.Remove(filename); err != nil && !os.IsNotExist(err) { + return err + } + } + return nil +} + diff --git a/file/replica_client_test.go b/file/replica_client_test.go new file mode 100644 index 0000000..0d24979 --- /dev/null +++ b/file/replica_client_test.go @@ -0,0 +1,680 @@ +package file_test + +import ( + "bytes" + "context" + "io/ioutil" + "os" + "path/filepath" + "reflect" + "sort" + "testing" + + "github.com/benbjohnson/litestream" + "github.com/benbjohnson/litestream/file" + "github.com/pierrec/lz4/v4" +) + +func TestReplicaClient_Path(t *testing.T) { + c := file.NewReplicaClient("/foo/bar") + if got, want := c.Path(), "/foo/bar"; got != want { + t.Fatalf("Path()=%v, want %v", got, want) + } +} + +func TestReplicaClient_Type(t *testing.T) { + if got, want := file.NewReplicaClient("").Type(), "file"; got != want { + t.Fatalf("Type()=%v, want %v", got, want) + } +} + +func TestReplicaClient_GenerationsDir(t *testing.T) { + t.Run("OK", func(t *testing.T) { + if got, err := file.NewReplicaClient("/foo").GenerationsDir(); err != nil { + t.Fatal(err) + } else if want := "/foo/generations"; got != want { + t.Fatalf("GenerationsDir()=%v, want %v", got, want) + } + }) + t.Run("ErrNoPath", func(t *testing.T) { + if _, err := file.NewReplicaClient("").GenerationsDir(); err == nil || err.Error() != `file replica path required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_GenerationDir(t *testing.T) { + t.Run("OK", func(t *testing.T) { + if got, err := file.NewReplicaClient("/foo").GenerationDir("0123456701234567"); err != nil { + t.Fatal(err) + } else if want := "/foo/generations/0123456701234567"; got != want { + t.Fatalf("GenerationDir()=%v, want %v", got, want) + } + }) + t.Run("ErrNoPath", func(t *testing.T) { + if _, err := file.NewReplicaClient("").GenerationDir("0123456701234567"); err == nil || err.Error() != `file replica path required` { + t.Fatalf("expected error: %v", err) + } + }) + t.Run("ErrNoGeneration", func(t *testing.T) { + if _, err := file.NewReplicaClient("/foo").GenerationDir(""); err == nil || err.Error() != `generation required` { + t.Fatalf("expected error: %v", err) + } + }) +} + +func TestReplicaClient_SnapshotsDir(t *testing.T) { + t.Run("OK", func(t *testing.T) { + if got, err := file.NewReplicaClient("/foo").SnapshotsDir("0123456701234567"); err != nil { + t.Fatal(err) + } else if want := "/foo/generations/0123456701234567/snapshots"; got != want { + t.Fatalf("SnapshotsDir()=%v, want %v", got, want) + } + }) + t.Run("ErrNoPath", func(t *testing.T) { + if _, err := file.NewReplicaClient("").SnapshotsDir("0123456701234567"); err == nil || err.Error() != `file replica path required` { + t.Fatalf("unexpected error: %v", err) + } + }) + t.Run("ErrNoGeneration", func(t *testing.T) { + if _, err := file.NewReplicaClient("/foo").SnapshotsDir(""); err == nil || err.Error() != `generation required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_SnapshotPath(t *testing.T) { + t.Run("OK", func(t *testing.T) { + if got, err := file.NewReplicaClient("/foo").SnapshotPath("0123456701234567", 1000); err != nil { + t.Fatal(err) + } else if want := "/foo/generations/0123456701234567/snapshots/000003e8.snapshot.lz4"; got != want { + t.Fatalf("SnapshotPath()=%v, want %v", got, want) + } + }) + t.Run("ErrNoPath", func(t *testing.T) { + if _, err := file.NewReplicaClient("").SnapshotPath("0123456701234567", 1000); err == nil || err.Error() != `file replica path required` { + t.Fatalf("unexpected error: %v", err) + } + }) + t.Run("ErrNoGeneration", func(t *testing.T) { + if _, err := file.NewReplicaClient("/foo").SnapshotPath("", 1000); err == nil || err.Error() != `generation required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_WALDir(t *testing.T) { + t.Run("OK", func(t *testing.T) { + if got, err := file.NewReplicaClient("/foo").WALDir("0123456701234567"); err != nil { + t.Fatal(err) + } else if want := "/foo/generations/0123456701234567/wal"; got != want { + t.Fatalf("WALDir()=%v, want %v", got, want) + } + }) + t.Run("ErrNoPath", func(t *testing.T) { + if _, err := file.NewReplicaClient("").WALDir("0123456701234567"); err == nil || err.Error() != `file replica path required` { + t.Fatalf("unexpected error: %v", err) + } + }) + t.Run("ErrNoGeneration", func(t *testing.T) { + if _, err := file.NewReplicaClient("/foo").WALDir(""); err == nil || err.Error() != `generation required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_WALSegmentPath(t *testing.T) { + t.Run("OK", func(t *testing.T) { + if got, err := file.NewReplicaClient("/foo").WALSegmentPath("0123456701234567", 1000, 1001); err != nil { + t.Fatal(err) + } else if want := "/foo/generations/0123456701234567/wal/000003e8_000003e9.wal.lz4"; got != want { + t.Fatalf("WALPath()=%v, want %v", got, want) + } + }) + t.Run("ErrNoPath", func(t *testing.T) { + if _, err := file.NewReplicaClient("").WALSegmentPath("0123456701234567", 1000, 0); err == nil || err.Error() != `file replica path required` { + t.Fatalf("unexpected error: %v", err) + } + }) + t.Run("ErrNoGeneration", func(t *testing.T) { + if _, err := file.NewReplicaClient("/foo").WALSegmentPath("", 1000, 0); err == nil || err.Error() != `generation required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_Generations(t *testing.T) { + t.Run("OK", func(t *testing.T) { + dir := t.TempDir() + if err := os.MkdirAll(filepath.Join(dir, "generations", "5efbd8d042012dca"), 0777); err != nil { + t.Fatal(err) + } else if err := os.MkdirAll(filepath.Join(dir, "generations", "b16ddcf5c697540f"), 0777); err != nil { + t.Fatal(err) + } else if err := os.MkdirAll(filepath.Join(dir, "generations", "155fe292f8333c72"), 0777); err != nil { + t.Fatal(err) + } + + c := file.NewReplicaClient(dir) + if got, err := c.Generations(context.Background()); err != nil { + t.Fatal(err) + } else if want := []string{"155fe292f8333c72", "5efbd8d042012dca", "b16ddcf5c697540f"}; !reflect.DeepEqual(got, want) { + t.Fatalf("Generations()=%v, want %v", got, want) + } + }) + + t.Run("WithInvalidEntries", func(t *testing.T) { + dir := t.TempDir() + if err := os.MkdirAll(filepath.Join(dir, "generations", "5efbd8d042012dca"), 0777); err != nil { + t.Fatal(err) + } else if err := os.MkdirAll(filepath.Join(dir, "generations", "not_a_generation"), 0777); err != nil { + t.Fatal(err) + } else if err := ioutil.WriteFile(filepath.Join(dir, "generations", "0000000000000000"), nil, 0666); err != nil { + t.Fatal(err) + } + + c := file.NewReplicaClient(dir) + if got, err := c.Generations(context.Background()); err != nil { + t.Fatal(err) + } else if want := []string{"5efbd8d042012dca"}; !reflect.DeepEqual(got, want) { + t.Fatalf("Generations()=%v, want %v", got, want) + } + }) + + t.Run("NoGenerationsDir", func(t *testing.T) { + c := file.NewReplicaClient(t.TempDir()) + if generations, err := c.Generations(context.Background()); err != nil { + t.Fatal(err) + } else if got, want := len(generations), 0; got != want { + t.Fatalf("len(Generations())=%v, want %v", got, want) + } + }) + + t.Run("ErrNoPath", func(t *testing.T) { + if _, err := file.NewReplicaClient("").Generations(context.Background()); err == nil || err.Error() != `cannot determine generations path: file replica path required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_Snapshots(t *testing.T) { + t.Run("OK", func(t *testing.T) { + dir := t.TempDir() + if err := os.MkdirAll(filepath.Join(dir, "generations", "5efbd8d042012dca", "snapshots"), 0777); err != nil { + t.Fatal(err) + } else if err := ioutil.WriteFile(filepath.Join(dir, "generations", "5efbd8d042012dca", "snapshots", "00000001.snapshot.lz4"), nil, 0666); err != nil { + t.Fatal(err) + } + + if err := os.MkdirAll(filepath.Join(dir, "generations", "b16ddcf5c697540f", "snapshots"), 0777); err != nil { + t.Fatal(err) + } else if err := ioutil.WriteFile(filepath.Join(dir, "generations", "b16ddcf5c697540f", "snapshots", "00000005.snapshot.lz4"), []byte("x"), 0666); err != nil { + t.Fatal(err) + } else if err := ioutil.WriteFile(filepath.Join(dir, "generations", "b16ddcf5c697540f", "snapshots", "0000000a.snapshot.lz4"), []byte("xyz"), 0666); err != nil { + t.Fatal(err) + } else if err := ioutil.WriteFile(filepath.Join(dir, "generations", "b16ddcf5c697540f", "snapshots", "not_a_snapshot.snapshot.lz4"), nil, 0666); err != nil { + t.Fatal(err) + } + + c := file.NewReplicaClient(dir) + itr, err := c.Snapshots(context.Background(), "b16ddcf5c697540f") + if err != nil { + t.Fatal(err) + } + defer itr.Close() + + // Read all snapshots into a slice so they can be sorted. + a, err := litestream.SliceSnapshotIterator(itr) + if err != nil { + t.Fatal(err) + } else if got, want := len(a), 2; got != want { + t.Fatalf("len=%v, want %v", got, want) + } + sort.Sort(litestream.SnapshotInfoSlice(a)) + + // Verify first snapshot metadata. + if got, want := a[0].Generation, "b16ddcf5c697540f"; got != want { + t.Fatalf("Generation=%v, want %v", got, want) + } else if got, want := a[0].Index, 5; got != want { + t.Fatalf("Index=%v, want %v", got, want) + } else if got, want := a[0].Size, int64(1); got != want { + t.Fatalf("Size=%v, want %v", got, want) + } else if a[0].CreatedAt.IsZero() { + t.Fatalf("expected CreatedAt") + } + + // Verify second snapshot metadata. + if got, want := a[1].Generation, "b16ddcf5c697540f"; got != want { + t.Fatalf("Generation=%v, want %v", got, want) + } else if got, want := a[1].Index, 0xA; got != want { + t.Fatalf("Index=%v, want %v", got, want) + } else if got, want := a[1].Size, int64(3); got != want { + t.Fatalf("Size=%v, want %v", got, want) + } else if a[1].CreatedAt.IsZero() { + t.Fatalf("expected CreatedAt") + } + + // Ensure close is clean. + if err := itr.Close(); err != nil { + t.Fatal(err) + } + }) + + t.Run("NoGenerationDir", func(t *testing.T) { + c := file.NewReplicaClient(t.TempDir()) + itr, err := c.Snapshots(context.Background(), "5efbd8d042012dca") + if err != nil { + t.Fatal(err) + } + defer itr.Close() + + if itr.Next() { + t.Fatal("expected no snapshots") + } + }) + + t.Run("NoSnapshots", func(t *testing.T) { + dir := t.TempDir() + if err := os.MkdirAll(filepath.Join(dir, "generations", "5efbd8d042012dca", "snapshots"), 0777); err != nil { + t.Fatal(err) + } + + c := file.NewReplicaClient(dir) + itr, err := c.Snapshots(context.Background(), "5efbd8d042012dca") + if err != nil { + t.Fatal(err) + } + defer itr.Close() + + if itr.Next() { + t.Fatal("expected no snapshots") + } + }) + + t.Run("ErrNoPath", func(t *testing.T) { + if _, err := file.NewReplicaClient("").Snapshots(context.Background(), "b16ddcf5c697540f"); err == nil || err.Error() != `cannot determine snapshot directory: file replica path required` { + t.Fatalf("unexpected error: %v", err) + } + }) + t.Run("ErrNoGeneration", func(t *testing.T) { + if _, err := file.NewReplicaClient(t.TempDir()).Snapshots(context.Background(), ""); err == nil || err.Error() != `cannot determine snapshot directory: generation required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_WriteSnapshot(t *testing.T) { + t.Run("OK", func(t *testing.T) { + data := []byte("foobar") + + c := file.NewReplicaClient(t.TempDir()) + if _, err := c.WriteSnapshot(context.Background(), "b16ddcf5c697540f", 1000, bytes.NewReader(compress(t, data))); err != nil { + t.Fatal(err) + } + + if r, err := c.SnapshotReader(context.Background(), "b16ddcf5c697540f", 1000); err != nil { + t.Fatal(err) + } else if buf, err := ioutil.ReadAll(lz4.NewReader(r)); err != nil { + t.Fatal(err) + } else if err := r.Close(); err != nil { + t.Fatal(err) + } else if got, want := buf, data; !bytes.Equal(got, want) { + t.Fatalf("data=%q, want %q", got, want) + } + }) + + t.Run("ErrNoPath", func(t *testing.T) { + if _, err := file.NewReplicaClient("").WriteSnapshot(context.Background(), "b16ddcf5c697540f", 0, nil); err == nil || err.Error() != `cannot determine snapshot path: file replica path required` { + t.Fatalf("unexpected error: %v", err) + } + }) + t.Run("ErrNoGeneration", func(t *testing.T) { + if _, err := file.NewReplicaClient(t.TempDir()).WriteSnapshot(context.Background(), "", 0, nil); err == nil || err.Error() != `cannot determine snapshot path: generation required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_SnapshotReader(t *testing.T) { + t.Run("OK", func(t *testing.T) { + dir := t.TempDir() + if err := os.MkdirAll(filepath.Join(dir, "generations", "5efbd8d042012dca", "snapshots"), 0777); err != nil { + t.Fatal(err) + } else if err := ioutil.WriteFile(filepath.Join(dir, "generations", "5efbd8d042012dca", "snapshots", "0000000a.snapshot.lz4"), compress(t, []byte("foo")), 0666); err != nil { + t.Fatal(err) + } + + c := file.NewReplicaClient(dir) + r, err := c.SnapshotReader(context.Background(), "5efbd8d042012dca", 10) + if err != nil { + t.Fatal(err) + } + defer r.Close() + + if buf, err := ioutil.ReadAll(lz4.NewReader(r)); err != nil { + t.Fatal(err) + } else if got, want := buf, []byte("foo"); !bytes.Equal(got, want) { + t.Fatalf("ReadAll=%v, want %v", got, want) + } + }) + + t.Run("ErrNotFound", func(t *testing.T) { + dir := t.TempDir() + if err := os.MkdirAll(filepath.Join(dir, "generations", "5efbd8d042012dca", "snapshots"), 0777); err != nil { + t.Fatal(err) + } + + c := file.NewReplicaClient(dir) + if _, err := c.SnapshotReader(context.Background(), "5efbd8d042012dca", 1); !os.IsNotExist(err) { + t.Fatalf("expected not exist, got %#v", err) + } + }) + + t.Run("ErrNoPath", func(t *testing.T) { + c := file.NewReplicaClient("") + if _, err := c.SnapshotReader(context.Background(), "5efbd8d042012dca", 1); err == nil || err.Error() != `cannot determine snapshot path: file replica path required` { + t.Fatalf("unexpected error: %v", err) + } + }) + + t.Run("ErrGeneration", func(t *testing.T) { + dir := t.TempDir() + c := file.NewReplicaClient(dir) + if _, err := c.SnapshotReader(context.Background(), "", 1); err == nil || err.Error() != `cannot determine snapshot path: generation required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_WALs(t *testing.T) { + t.Run("OK", func(t *testing.T) { + dir := t.TempDir() + if err := os.MkdirAll(filepath.Join(dir, "generations", "5efbd8d042012dca", "wal"), 0777); err != nil { + t.Fatal(err) + } else if err := ioutil.WriteFile(filepath.Join(dir, "generations", "5efbd8d042012dca", "wal", "00000001_00000000.wal.lz4"), nil, 0666); err != nil { + t.Fatal(err) + } + + if err := os.MkdirAll(filepath.Join(dir, "generations", "b16ddcf5c697540f", "wal"), 0777); err != nil { + t.Fatal(err) + } else if err := ioutil.WriteFile(filepath.Join(dir, "generations", "b16ddcf5c697540f", "wal", "00000002_00000000.wal.lz4"), []byte("12345"), 0666); err != nil { + t.Fatal(err) + } else if err := ioutil.WriteFile(filepath.Join(dir, "generations", "b16ddcf5c697540f", "wal", "00000002_00000005.wal.lz4"), []byte("67"), 0666); err != nil { + t.Fatal(err) + } else if err := ioutil.WriteFile(filepath.Join(dir, "generations", "b16ddcf5c697540f", "wal", "00000003_00000000.wal.lz4"), []byte("xyz"), 0666); err != nil { + t.Fatal(err) + } + + c := file.NewReplicaClient(dir) + itr, err := c.WALSegments(context.Background(), "b16ddcf5c697540f") + if err != nil { + t.Fatal(err) + } + defer itr.Close() + + // Read all WAL segment files into a slice so they can be sorted. + a, err := litestream.SliceWALSegmentIterator(itr) + if err != nil { + t.Fatal(err) + } else if got, want := len(a), 3; got != want { + t.Fatalf("len=%v, want %v", got, want) + } + sort.Sort(litestream.WALSegmentInfoSlice(a)) + + // Verify first WAL segment metadata. + if got, want := a[0].Generation, "b16ddcf5c697540f"; got != want { + t.Fatalf("Generation=%v, want %v", got, want) + } else if got, want := a[0].Index, 2; got != want { + t.Fatalf("Index=%v, want %v", got, want) + } else if got, want := a[0].Offset, int64(0); got != want { + t.Fatalf("Offset=%v, want %v", got, want) + } else if got, want := a[0].Size, int64(5); got != want { + t.Fatalf("Size=%v, want %v", got, want) + } else if a[0].CreatedAt.IsZero() { + t.Fatalf("expected CreatedAt") + } + + // Verify first WAL segment metadata. + if got, want := a[1].Generation, "b16ddcf5c697540f"; got != want { + t.Fatalf("Generation=%v, want %v", got, want) + } else if got, want := a[1].Index, 2; got != want { + t.Fatalf("Index=%v, want %v", got, want) + } else if got, want := a[1].Offset, int64(5); got != want { + t.Fatalf("Offset=%v, want %v", got, want) + } else if got, want := a[1].Size, int64(2); got != want { + t.Fatalf("Size=%v, want %v", got, want) + } else if a[1].CreatedAt.IsZero() { + t.Fatalf("expected CreatedAt") + } + + // Verify third WAL segment metadata. + if got, want := a[2].Generation, "b16ddcf5c697540f"; got != want { + t.Fatalf("Generation=%v, want %v", got, want) + } else if got, want := a[2].Index, 3; got != want { + t.Fatalf("Index=%v, want %v", got, want) + } else if got, want := a[2].Offset, int64(0); got != want { + t.Fatalf("Offset=%v, want %v", got, want) + } else if got, want := a[2].Size, int64(3); got != want { + t.Fatalf("Size=%v, want %v", got, want) + } else if a[1].CreatedAt.IsZero() { + t.Fatalf("expected CreatedAt") + } + + // Ensure close is clean. + if err := itr.Close(); err != nil { + t.Fatal(err) + } + }) + + t.Run("NoGenerationDir", func(t *testing.T) { + c := file.NewReplicaClient(t.TempDir()) + itr, err := c.WALSegments(context.Background(), "5efbd8d042012dca") + if err != nil { + t.Fatal(err) + } + defer itr.Close() + + if itr.Next() { + t.Fatal("expected no wal files") + } + }) + + t.Run("NoWALs", func(t *testing.T) { + dir := t.TempDir() + if err := os.MkdirAll(filepath.Join(dir, "generations", "5efbd8d042012dca", "wals"), 0777); err != nil { + t.Fatal(err) + } + + c := file.NewReplicaClient(dir) + itr, err := c.WALSegments(context.Background(), "5efbd8d042012dca") + if err != nil { + t.Fatal(err) + } + defer itr.Close() + + if itr.Next() { + t.Fatal("expected no wal files") + } + }) + + t.Run("ErrNoPath", func(t *testing.T) { + if _, err := file.NewReplicaClient("").WALSegments(context.Background(), "b16ddcf5c697540f"); err == nil || err.Error() != `cannot determine wal directory: file replica path required` { + t.Fatalf("unexpected error: %v", err) + } + }) + t.Run("ErrNoGeneration", func(t *testing.T) { + if _, err := file.NewReplicaClient(t.TempDir()).WALSegments(context.Background(), ""); err == nil || err.Error() != `cannot determine wal directory: generation required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_WriteWALSegment(t *testing.T) { + t.Run("OK", func(t *testing.T) { + data := []byte("foobar") + + c := file.NewReplicaClient(t.TempDir()) + if _, err := c.WriteWALSegment(context.Background(), litestream.Pos{Generation: "b16ddcf5c697540f", Index: 1000, Offset: 2000}, bytes.NewReader(compress(t, data))); err != nil { + t.Fatal(err) + } + + if r, err := c.WALSegmentReader(context.Background(), litestream.Pos{Generation: "b16ddcf5c697540f", Index: 1000, Offset: 2000}); err != nil { + t.Fatal(err) + } else if buf, err := ioutil.ReadAll(lz4.NewReader(r)); err != nil { + t.Fatal(err) + } else if err := r.Close(); err != nil { + t.Fatal(err) + } else if got, want := buf, data; !bytes.Equal(got, want) { + t.Fatalf("data=%q, want %q", got, want) + } + }) + + t.Run("ErrNoPath", func(t *testing.T) { + if _, err := file.NewReplicaClient("").WriteWALSegment(context.Background(), litestream.Pos{Generation: "b16ddcf5c697540f", Index: 0, Offset: 0}, nil); err == nil || err.Error() != `cannot determine wal segment path: file replica path required` { + t.Fatalf("unexpected error: %v", err) + } + }) + t.Run("ErrNoGeneration", func(t *testing.T) { + if _, err := file.NewReplicaClient(t.TempDir()).WriteWALSegment(context.Background(), litestream.Pos{Generation: "", Index: 0, Offset: 0}, nil); err == nil || err.Error() != `cannot determine wal segment path: generation required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_WALReader(t *testing.T) { + dir := t.TempDir() + if err := os.MkdirAll(filepath.Join(dir, "generations", "5efbd8d042012dca", "wal"), 0777); err != nil { + t.Fatal(err) + } else if err := ioutil.WriteFile(filepath.Join(dir, "generations", "5efbd8d042012dca", "wal", "0000000a_00000005.wal.lz4"), compress(t, []byte("foobar")), 0666); err != nil { + t.Fatal(err) + } + + c := file.NewReplicaClient(dir) + + t.Run("OK", func(t *testing.T) { + r, err := c.WALSegmentReader(context.Background(), litestream.Pos{Generation: "5efbd8d042012dca", Index: 10, Offset: 5}) + if err != nil { + t.Fatal(err) + } + defer r.Close() + + if buf, err := ioutil.ReadAll(lz4.NewReader(r)); err != nil { + t.Fatal(err) + } else if got, want := buf, []byte("foobar"); !bytes.Equal(got, want) { + t.Fatalf("ReadAll=%v, want %v", got, want) + } + }) + + t.Run("ErrNotFound", func(t *testing.T) { + dir := t.TempDir() + if err := os.MkdirAll(filepath.Join(dir, "generations", "5efbd8d042012dca", "wal"), 0777); err != nil { + t.Fatal(err) + } + + c := file.NewReplicaClient(dir) + if _, err := c.WALSegmentReader(context.Background(), litestream.Pos{Generation: "5efbd8d042012dca", Index: 1, Offset: 0}); !os.IsNotExist(err) { + t.Fatalf("expected not exist, got %#v", err) + } + }) +} + +/* +func TestReplica_Sync(t *testing.T) { + // Ensure replica can successfully sync after DB has sync'd. + t.Run("InitialSync", func(t *testing.T) { + db, sqldb := MustOpenDBs(t) + defer MustCloseDBs(t, db, sqldb) + + r := litestream.NewReplica(db, "", file.NewReplicaClient(t.TempDir())) + r.MonitorEnabled = false + db.Replicas = []*litestream.Replica{r} + + // Sync database & then sync replica. + if err := db.Sync(context.Background()); err != nil { + t.Fatal(err) + } else if err := r.Sync(context.Background()); err != nil { + t.Fatal(err) + } + + // Ensure posistions match. + if want, err := db.Pos(); err != nil { + t.Fatal(err) + } else if got, err := r.Pos(context.Background()); err != nil { + t.Fatal(err) + } else if got != want { + t.Fatalf("Pos()=%v, want %v", got, want) + } + }) + + // Ensure replica can successfully sync multiple times. + t.Run("MultiSync", func(t *testing.T) { + db, sqldb := MustOpenDBs(t) + defer MustCloseDBs(t, db, sqldb) + + r := litestream.NewReplica(db, "", file.NewReplicaClient(t.TempDir())) + r.MonitorEnabled = false + db.Replicas = []*litestream.Replica{r} + + if _, err := sqldb.Exec(`CREATE TABLE foo (bar TEXT);`); err != nil { + t.Fatal(err) + } + + // Write to the database multiple times and sync after each write. + for i, n := 0, db.MinCheckpointPageN*2; i < n; i++ { + if _, err := sqldb.Exec(`INSERT INTO foo (bar) VALUES ('baz')`); err != nil { + t.Fatal(err) + } + + // Sync periodically. + if i%100 == 0 || i == n-1 { + if err := db.Sync(context.Background()); err != nil { + t.Fatal(err) + } else if err := r.Sync(context.Background()); err != nil { + t.Fatal(err) + } + } + } + + // Ensure posistions match. + pos, err := db.Pos() + if err != nil { + t.Fatal(err) + } else if got, want := pos.Index, 2; got != want { + t.Fatalf("Index=%v, want %v", got, want) + } + + if want, err := r.Pos(context.Background()); err != nil { + t.Fatal(err) + } else if got := pos; got != want { + t.Fatalf("Pos()=%v, want %v", got, want) + } + }) + + // Ensure replica returns an error if there is no generation available from the DB. + t.Run("ErrNoGeneration", func(t *testing.T) { + db, sqldb := MustOpenDBs(t) + defer MustCloseDBs(t, db, sqldb) + + r := litestream.NewReplica(db, "", file.NewReplicaClient(t.TempDir())) + r.MonitorEnabled = false + db.Replicas = []*litestream.Replica{r} + + if err := r.Sync(context.Background()); err == nil || err.Error() != `no generation, waiting for data` { + t.Fatal(err) + } + }) +} +*/ + +// compress compresses b using LZ4. +func compress(tb testing.TB, b []byte) []byte { + tb.Helper() + + var buf bytes.Buffer + zw := lz4.NewWriter(&buf) + if _, err := zw.Write(b); err != nil { + tb.Fatal(err) + } else if err := zw.Close(); err != nil { + tb.Fatal(err) + } + return buf.Bytes() +} diff --git a/go.mod b/go.mod index 8c21364..5413de6 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/benbjohnson/litestream -go 1.15 +go 1.16 require ( github.com/aws/aws-sdk-go v1.27.0 diff --git a/internal/internal.go b/internal/internal.go index 45b7d8c..4b4febc 100644 --- a/internal/internal.go +++ b/internal/internal.go @@ -2,6 +2,8 @@ package internal import ( "io" + "os" + "syscall" ) // ReadCloser wraps a reader to also attach a separate closer. @@ -30,3 +32,95 @@ func (r *ReadCloser) Close() error { } return r.c.Close() } + +// ReadCounter wraps an io.Reader and counts the total number of bytes read. +type ReadCounter struct { + r io.Reader + n int64 +} + +// NewReadCounter returns a new instance of ReadCounter that wraps r. +func NewReadCounter(r io.Reader) *ReadCounter { + return &ReadCounter{r: r} +} + +// Read reads from the underlying reader into p and adds the bytes read to the counter. +func (r *ReadCounter) Read(p []byte) (int, error) { + n, err := r.r.Read(p) + r.n += int64(n) + return n, err +} + +// N returns the total number of bytes read. +func (r *ReadCounter) N() int64 { return r.n } + +// CreateFile creates the file and matches the mode & uid/gid of fi. +func CreateFile(filename string, fi os.FileInfo) (*os.File, error) { + mode := os.FileMode(0600) + if fi != nil { + mode = fi.Mode() + } + + f, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode) + if err != nil { + return nil, err + } + + uid, gid := Fileinfo(fi) + _ = f.Chown(uid, gid) + return f, nil +} + +// MkdirAll is a copy of os.MkdirAll() except that it attempts to set the +// mode/uid/gid to match fi for each created directory. +func MkdirAll(path string, fi os.FileInfo) error { + uid, gid := Fileinfo(fi) + + // Fast path: if we can tell whether path is a directory or file, stop with success or error. + dir, err := os.Stat(path) + if err == nil { + if dir.IsDir() { + return nil + } + return &os.PathError{Op: "mkdir", Path: path, Err: syscall.ENOTDIR} + } + + // Slow path: make sure parent exists and then call Mkdir for path. + i := len(path) + for i > 0 && os.IsPathSeparator(path[i-1]) { // Skip trailing path separator. + i-- + } + + j := i + for j > 0 && !os.IsPathSeparator(path[j-1]) { // Scan backward over element. + j-- + } + + if j > 1 { + // Create parent. + err = MkdirAll(fixRootDirectory(path[:j-1]), fi) + if err != nil { + return err + } + } + + // Parent now exists; invoke Mkdir and use its result. + mode := os.FileMode(0700) + if fi != nil { + mode = fi.Mode() + } + err = os.Mkdir(path, mode) + if err != nil { + // Handle arguments like "foo/." by + // double-checking that directory doesn't exist. + dir, err1 := os.Lstat(path) + if err1 == nil && dir.IsDir() { + _ = os.Chown(path, uid, gid) + return nil + } + return err + } + _ = os.Chown(path, uid, gid) + return nil +} + diff --git a/litestream_unix.go b/internal/internal_unix.go similarity index 59% rename from litestream_unix.go rename to internal/internal_unix.go index 7ec7618..cedc947 100644 --- a/litestream_unix.go +++ b/internal/internal_unix.go @@ -1,14 +1,17 @@ // +build aix darwin dragonfly freebsd linux netbsd openbsd solaris -package litestream +package internal import ( "os" "syscall" ) -// fileinfo returns syscall fields from a FileInfo object. -func fileinfo(fi os.FileInfo) (uid, gid int) { +// Fileinfo returns syscall fields from a FileInfo object. +func Fileinfo(fi os.FileInfo) (uid, gid int) { + if fi == nil { + return -1, -1 + } stat := fi.Sys().(*syscall.Stat_t) return int(stat.Uid), int(stat.Gid) } diff --git a/litestream_windows.go b/internal/internal_windows.go similarity index 74% rename from litestream_windows.go rename to internal/internal_windows.go index 67457ef..1853164 100644 --- a/litestream_windows.go +++ b/internal/internal_windows.go @@ -1,13 +1,13 @@ // +build windows -package litestream +package internal import ( "os" ) -// fileinfo returns syscall fields from a FileInfo object. -func fileinfo(fi os.FileInfo) (uid, gid int) { +// Fileinfo returns syscall fields from a FileInfo object. +func Fileinfo(fi os.FileInfo) (uid, gid int) { return -1, -1 } diff --git a/internal/metrics.go b/internal/metrics.go deleted file mode 100644 index 098f392..0000000 --- a/internal/metrics.go +++ /dev/null @@ -1,44 +0,0 @@ -package internal - -import ( - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" -) - -// Shared replica metrics. -var ( - ReplicaSnapshotTotalGaugeVec = promauto.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "litestream", - Subsystem: "replica", - Name: "snapshot_total", - Help: "The current number of snapshots", - }, []string{"db", "name"}) - - ReplicaWALBytesCounterVec = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "litestream", - Subsystem: "replica", - Name: "wal_bytes", - Help: "The number wal bytes written", - }, []string{"db", "name"}) - - ReplicaWALIndexGaugeVec = promauto.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "litestream", - Subsystem: "replica", - Name: "wal_index", - Help: "The current WAL index", - }, []string{"db", "name"}) - - ReplicaWALOffsetGaugeVec = promauto.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "litestream", - Subsystem: "replica", - Name: "wal_offset", - Help: "The current WAL offset", - }, []string{"db", "name"}) - - ReplicaValidationTotalCounterVec = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "litestream", - Subsystem: "replica", - Name: "validation_total", - Help: "The number of validations performed", - }, []string{"db", "name", "status"}) -) diff --git a/litestream.go b/litestream.go index 3b3952b..8e39cbc 100644 --- a/litestream.go +++ b/litestream.go @@ -11,7 +11,6 @@ import ( "regexp" "strconv" "strings" - "syscall" "time" ) @@ -19,9 +18,10 @@ import ( const ( MetaDirSuffix = "-litestream" - WALDirName = "wal" - WALExt = ".wal" - SnapshotExt = ".snapshot" + WALDirName = "wal" + WALExt = ".wal" + WALSegmentExt = ".wal.lz4" + SnapshotExt = ".snapshot.lz4" GenerationNameLen = 16 ) @@ -41,19 +41,160 @@ var ( ErrChecksumMismatch = errors.New("invalid replica, checksum mismatch") ) +// SnapshotIterator represents an iterator over a collection of snapshot metadata. +type SnapshotIterator interface { + io.Closer + + // Prepares the the next snapshot for reading with the Snapshot() method. + // Returns true if another snapshot is available. Returns false if no more + // snapshots are available or if an error occured. + Next() bool + + // Returns an error that occurred during iteration. + Err() error + + // Returns metadata for the currently positioned snapshot. + Snapshot() SnapshotInfo +} + +// SliceSnapshotIterator returns all snapshots from an iterator as a slice. +func SliceSnapshotIterator(itr SnapshotIterator) ([]SnapshotInfo, error) { + var a []SnapshotInfo + for itr.Next() { + a = append(a, itr.Snapshot()) + } + return a, itr.Close() +} + +var _ SnapshotIterator = (*SnapshotInfoSliceIterator)(nil) + +// SnapshotInfoSliceIterator represents an iterator for iterating over a slice of snapshots. +type SnapshotInfoSliceIterator struct { + init bool + a []SnapshotInfo +} + +// NewSnapshotInfoSliceIterator returns a new instance of SnapshotInfoSliceIterator. +func NewSnapshotInfoSliceIterator(a []SnapshotInfo) *SnapshotInfoSliceIterator { + return &SnapshotInfoSliceIterator{a: a} +} + +// Close always returns nil. +func (itr *SnapshotInfoSliceIterator) Close() error { return nil } + +// Next moves to the next snapshot. Returns true if another snapshot is available. +func (itr *SnapshotInfoSliceIterator) Next() bool { + if !itr.init { + itr.init = true + return len(itr.a) > 0 + } + itr.a = itr.a[1:] + return len(itr.a) > 0 +} + +// Err always returns nil. +func (itr *SnapshotInfoSliceIterator) Err() error { return nil } + +// Snapshot returns the metadata from the currently positioned snapshot. +func (itr *SnapshotInfoSliceIterator) Snapshot() SnapshotInfo { + if len(itr.a) == 0 { + return SnapshotInfo{} + } + return itr.a[0] +} + +// WALSegmentIterator represents an iterator over a collection of WAL segments. +type WALSegmentIterator interface { + io.Closer + + // Prepares the the next WAL for reading with the WAL() method. + // Returns true if another WAL is available. Returns false if no more + // WAL files are available or if an error occured. + Next() bool + + // Returns an error that occurred during iteration. + Err() error + + // Returns metadata for the currently positioned WAL segment file. + WALSegment() WALSegmentInfo +} + +// SliceWALSegmentIterator returns all WAL segment files from an iterator as a slice. +func SliceWALSegmentIterator(itr WALSegmentIterator) ([]WALSegmentInfo, error) { + var a []WALSegmentInfo + for itr.Next() { + a = append(a, itr.WALSegment()) + } + return a, itr.Close() +} + +var _ WALSegmentIterator = (*WALSegmentInfoSliceIterator)(nil) + +// WALSegmentInfoSliceIterator represents an iterator for iterating over a slice of wal segments. +type WALSegmentInfoSliceIterator struct { + init bool + a []WALSegmentInfo +} + +// NewWALSegmentInfoSliceIterator returns a new instance of WALSegmentInfoSliceIterator. +func NewWALSegmentInfoSliceIterator(a []WALSegmentInfo) *WALSegmentInfoSliceIterator { + return &WALSegmentInfoSliceIterator{a: a} +} + +// Close always returns nil. +func (itr *WALSegmentInfoSliceIterator) Close() error { return nil } + +// Next moves to the next wal segment. Returns true if another segment is available. +func (itr *WALSegmentInfoSliceIterator) Next() bool { + if !itr.init { + itr.init = true + return len(itr.a) > 0 + } + itr.a = itr.a[1:] + return len(itr.a) > 0 +} + +// Err always returns nil. +func (itr *WALSegmentInfoSliceIterator) Err() error { return nil } + +// WALSegment returns the metadata from the currently positioned wal segment. +func (itr *WALSegmentInfoSliceIterator) WALSegment() WALSegmentInfo { + if len(itr.a) == 0 { + return WALSegmentInfo{} + } + return itr.a[0] +} + // SnapshotInfo represents file information about a snapshot. type SnapshotInfo struct { - Name string - Replica string Generation string Index int Size int64 CreatedAt time.Time } +// Pos returns the WAL position when the snapshot was made. +func (info *SnapshotInfo) Pos() Pos { + return Pos{Generation: info.Generation, Index: info.Index} +} + +// SnapshotInfoSlice represents a slice of snapshot metadata. +type SnapshotInfoSlice []SnapshotInfo + +func (a SnapshotInfoSlice) Len() int { return len(a) } + +func (a SnapshotInfoSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } + +func (a SnapshotInfoSlice) Less(i, j int) bool { + if a[i].Generation != a[j].Generation { + return a[i].Generation < a[j].Generation + } + return a[i].Index < a[j].Index +} + // FilterSnapshotsAfter returns all snapshots that were created on or after t. -func FilterSnapshotsAfter(a []*SnapshotInfo, t time.Time) []*SnapshotInfo { - other := make([]*SnapshotInfo, 0, len(a)) +func FilterSnapshotsAfter(a []SnapshotInfo, t time.Time) []SnapshotInfo { + other := make([]SnapshotInfo, 0, len(a)) for _, snapshot := range a { if !snapshot.CreatedAt.Before(t) { other = append(other, snapshot) @@ -63,13 +204,13 @@ func FilterSnapshotsAfter(a []*SnapshotInfo, t time.Time) []*SnapshotInfo { } // FindMinSnapshotByGeneration finds the snapshot with the lowest index in a generation. -func FindMinSnapshotByGeneration(a []*SnapshotInfo, generation string) *SnapshotInfo { +func FindMinSnapshotByGeneration(a []SnapshotInfo, generation string) *SnapshotInfo { var min *SnapshotInfo for _, snapshot := range a { if snapshot.Generation != generation { continue } else if min == nil || snapshot.Index < min.Index { - min = snapshot + min = &snapshot } } return min @@ -77,8 +218,27 @@ func FindMinSnapshotByGeneration(a []*SnapshotInfo, generation string) *Snapshot // WALInfo represents file information about a WAL file. type WALInfo struct { - Name string - Replica string + Generation string + Index int + CreatedAt time.Time +} + +// WALInfoSlice represents a slice of WAL metadata. +type WALInfoSlice []WALInfo + +func (a WALInfoSlice) Len() int { return len(a) } + +func (a WALInfoSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } + +func (a WALInfoSlice) Less(i, j int) bool { + if a[i].Generation != a[j].Generation { + return a[i].Generation < a[j].Generation + } + return a[i].Index < a[j].Index +} + +// WALSegmentInfo represents file information about a WAL segment file. +type WALSegmentInfo struct { Generation string Index int Offset int64 @@ -86,6 +246,27 @@ type WALInfo struct { CreatedAt time.Time } +// Pos returns the WAL position when the segment was made. +func (info *WALSegmentInfo) Pos() Pos { + return Pos{Generation: info.Generation, Index: info.Index, Offset: info.Offset} +} + +// WALSegmentInfoSlice represents a slice of WAL segment metadata. +type WALSegmentInfoSlice []WALSegmentInfo + +func (a WALSegmentInfoSlice) Len() int { return len(a) } + +func (a WALSegmentInfoSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } + +func (a WALSegmentInfoSlice) Less(i, j int) bool { + if a[i].Generation != a[j].Generation { + return a[i].Generation < a[j].Generation + } else if a[i].Index != a[j].Index { + return a[i].Index < a[j].Index + } + return a[i].Offset < a[j].Offset +} + // Pos is a position in the WAL for a generation. type Pos struct { Generation string // generation name @@ -106,6 +287,11 @@ func (p Pos) IsZero() bool { return p == (Pos{}) } +// Truncate returns p with the offset truncated to zero. +func (p Pos) Truncate() Pos { + return Pos{Generation: p.Generation, Index: p.Index} +} + // Checksum computes a running SQLite checksum over a byte slice. func Checksum(bo binary.ByteOrder, s0, s1 uint32, b []byte) (uint32, uint32) { assert(len(b)%8 == 0, "misaligned checksum byte slice") @@ -204,38 +390,43 @@ func IsSnapshotPath(s string) bool { // ParseSnapshotPath returns the index for the snapshot. // Returns an error if the path is not a valid snapshot path. -func ParseSnapshotPath(s string) (index int, ext string, err error) { +func ParseSnapshotPath(s string) (index int, err error) { s = filepath.Base(s) a := snapshotPathRegex.FindStringSubmatch(s) if a == nil { - return 0, "", fmt.Errorf("invalid snapshot path: %s", s) + return 0, fmt.Errorf("invalid snapshot path: %s", s) } i64, _ := strconv.ParseUint(a[1], 16, 64) - return int(i64), a[2], nil + return int(i64), nil } -var snapshotPathRegex = regexp.MustCompile(`^([0-9a-f]{8})(.snapshot(?:.lz4)?)$`) +// FormatSnapshotPath formats a snapshot filename with a given index. +func FormatSnapshotPath(index int) string { + assert(index >= 0, "snapshot index must be non-negative") + return fmt.Sprintf("%08x%s", index, SnapshotExt) +} + +var snapshotPathRegex = regexp.MustCompile(`^([0-9a-f]{8})\.snapshot\.lz4$`) // IsWALPath returns true if s is a path to a WAL file. func IsWALPath(s string) bool { return walPathRegex.MatchString(s) } -// ParseWALPath returns the index & offset for the WAL file. -// Returns an error if the path is not a valid snapshot path. -func ParseWALPath(s string) (index int, offset int64, ext string, err error) { +// ParseWALPath returns the index for the WAL file. +// Returns an error if the path is not a valid WAL path. +func ParseWALPath(s string) (index int, err error) { s = filepath.Base(s) a := walPathRegex.FindStringSubmatch(s) if a == nil { - return 0, 0, "", fmt.Errorf("invalid wal path: %s", s) + return 0, fmt.Errorf("invalid wal path: %s", s) } i64, _ := strconv.ParseUint(a[1], 16, 64) - off64, _ := strconv.ParseUint(a[2], 16, 64) - return int(i64), int64(off64), a[3], nil + return int(i64), nil } // FormatWALPath formats a WAL filename with a given index. @@ -244,77 +435,37 @@ func FormatWALPath(index int) string { return fmt.Sprintf("%08x%s", index, WALExt) } -// FormatWALPathWithOffset formats a WAL filename with a given index & offset. -func FormatWALPathWithOffset(index int, offset int64) string { - assert(index >= 0, "wal index must be non-negative") - assert(offset >= 0, "wal offset must be non-negative") - return fmt.Sprintf("%08x_%08x%s", index, offset, WALExt) +var walPathRegex = regexp.MustCompile(`^([0-9a-f]{8})\.wal$`) + +// ParseWALSegmentPath returns the index & offset for the WAL segment file. +// Returns an error if the path is not a valid wal segment path. +func ParseWALSegmentPath(s string) (index int, offset int64, err error) { + s = filepath.Base(s) + + a := walSegmentPathRegex.FindStringSubmatch(s) + if a == nil { + return 0, 0, fmt.Errorf("invalid wal segment path: %s", s) + } + + i64, _ := strconv.ParseUint(a[1], 16, 64) + off64, _ := strconv.ParseUint(a[2], 16, 64) + return int(i64), int64(off64), nil } -var walPathRegex = regexp.MustCompile(`^([0-9a-f]{8})(?:_([0-9a-f]{8}))?(.wal(?:.lz4)?)$`) +// FormatWALSegmentPath formats a WAL segment filename with a given index & offset. +func FormatWALSegmentPath(index int, offset int64) string { + assert(index >= 0, "wal index must be non-negative") + assert(offset >= 0, "wal offset must be non-negative") + return fmt.Sprintf("%08x_%08x%s", index, offset, WALSegmentExt) +} + +var walSegmentPathRegex = regexp.MustCompile(`^([0-9a-f]{8})(?:_([0-9a-f]{8}))\.wal\.lz4$`) // isHexChar returns true if ch is a lowercase hex character. func isHexChar(ch rune) bool { return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') } -// createFile creates the file and attempts to set the UID/GID. -func createFile(filename string, perm os.FileMode, uid, gid int) (*os.File, error) { - f, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE|os.O_TRUNC, perm) - if err != nil { - return nil, err - } - _ = f.Chown(uid, gid) - return f, nil -} - -// mkdirAll is a copy of os.MkdirAll() except that it attempts to set the -// uid/gid for each created directory. -func mkdirAll(path string, perm os.FileMode, uid, gid int) error { - // Fast path: if we can tell whether path is a directory or file, stop with success or error. - dir, err := os.Stat(path) - if err == nil { - if dir.IsDir() { - return nil - } - return &os.PathError{Op: "mkdir", Path: path, Err: syscall.ENOTDIR} - } - - // Slow path: make sure parent exists and then call Mkdir for path. - i := len(path) - for i > 0 && os.IsPathSeparator(path[i-1]) { // Skip trailing path separator. - i-- - } - - j := i - for j > 0 && !os.IsPathSeparator(path[j-1]) { // Scan backward over element. - j-- - } - - if j > 1 { - // Create parent. - err = mkdirAll(fixRootDirectory(path[:j-1]), perm, uid, gid) - if err != nil { - return err - } - } - - // Parent now exists; invoke Mkdir and use its result. - err = os.Mkdir(path, perm) - if err != nil { - // Handle arguments like "foo/." by - // double-checking that directory doesn't exist. - dir, err1 := os.Lstat(path) - if err1 == nil && dir.IsDir() { - _ = os.Chown(path, uid, gid) - return nil - } - return err - } - _ = os.Chown(path, uid, gid) - return nil -} - // Tracef is used for low-level tracing. var Tracef = func(format string, a ...interface{}) {} diff --git a/mock/replica_client.go b/mock/replica_client.go new file mode 100644 index 0000000..a8bd998 --- /dev/null +++ b/mock/replica_client.go @@ -0,0 +1,65 @@ +package mock + +import ( + "context" + "io" + + "github.com/benbjohnson/litestream" +) + +var _ litestream.ReplicaClient = (*ReplicaClient)(nil) + +type ReplicaClient struct { + GenerationsFunc func(ctx context.Context) ([]string, error) + DeleteGenerationFunc func(ctx context.Context, generation string) error + SnapshotsFunc func(ctx context.Context, generation string) (litestream.SnapshotIterator, error) + WriteSnapshotFunc func(ctx context.Context, generation string, index int, r io.Reader) (litestream.SnapshotInfo, error) + DeleteSnapshotFunc func(ctx context.Context, generation string, index int) error + SnapshotReaderFunc func(ctx context.Context, generation string, index int) (io.ReadCloser, error) + WALSegmentsFunc func(ctx context.Context, generation string) (litestream.WALSegmentIterator, error) + WriteWALSegmentFunc func(ctx context.Context, pos litestream.Pos, r io.Reader) (litestream.WALSegmentInfo, error) + DeleteWALSegmentsFunc func(ctx context.Context, a []litestream.Pos) error + WALSegmentReaderFunc func(ctx context.Context, pos litestream.Pos) (io.ReadCloser, error) +} + +func (c *ReplicaClient) Type() string { return "mock" } + +func (c *ReplicaClient) Generations(ctx context.Context) ([]string, error) { + return c.GenerationsFunc(ctx) +} + +func (c *ReplicaClient) DeleteGeneration(ctx context.Context, generation string) error { + return c.DeleteGenerationFunc(ctx, generation) +} + +func (c *ReplicaClient) Snapshots(ctx context.Context, generation string) (litestream.SnapshotIterator, error) { + return c.SnapshotsFunc(ctx, generation) +} + +func (c *ReplicaClient) WriteSnapshot(ctx context.Context, generation string, index int, r io.Reader) (litestream.SnapshotInfo, error) { + return c.WriteSnapshotFunc(ctx, generation, index, r) +} + +func (c *ReplicaClient) DeleteSnapshot(ctx context.Context, generation string, index int) error { + return c.DeleteSnapshotFunc(ctx, generation, index) +} + +func (c *ReplicaClient) SnapshotReader(ctx context.Context, generation string, index int) (io.ReadCloser, error) { + return c.SnapshotReaderFunc(ctx, generation, index) +} + +func (c *ReplicaClient) WALSegments(ctx context.Context, generation string) (litestream.WALSegmentIterator, error) { + return c.WALSegmentsFunc(ctx, generation) +} + +func (c *ReplicaClient) WriteWALSegment(ctx context.Context, pos litestream.Pos, r io.Reader) (litestream.WALSegmentInfo, error) { + return c.WriteWALSegmentFunc(ctx, pos, r) +} + +func (c *ReplicaClient) DeleteWALSegments(ctx context.Context, a []litestream.Pos) error { + return c.DeleteWALSegmentsFunc(ctx, a) +} + +func (c *ReplicaClient) WALSegmentReader(ctx context.Context, pos litestream.Pos) (io.ReadCloser, error) { + return c.WALSegmentReaderFunc(ctx, pos) +} diff --git a/replica.go b/replica.go index 74015e8..1d08e96 100644 --- a/replica.go +++ b/replica.go @@ -18,81 +18,26 @@ import ( "github.com/benbjohnson/litestream/internal" "github.com/pierrec/lz4/v4" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "golang.org/x/sync/errgroup" ) -// Replica represents a remote destination to replicate the database & WAL. -type Replica interface { - // The name of the replica. Defaults to type if no name specified. - Name() string - - // String identifier for the type of replica ("file", "s3", etc). - Type() string - - // The parent database. - DB() *DB - - // Starts replicating in a background goroutine. - Start(ctx context.Context) error - - // Stops all replication processing. Blocks until processing stopped. - Stop(hard bool) error - - // Performs a backup of outstanding WAL frames to the replica. - Sync(ctx context.Context) error - - // Returns the last replication position. - LastPos() Pos - - // Returns the computed position of the replica for a given generation. - CalcPos(ctx context.Context, generation string) (Pos, error) - - // Returns a list of generation names for the replica. - Generations(ctx context.Context) ([]string, error) - - // Returns basic information about a generation including the number of - // snapshot & WAL files as well as the time range covered. - GenerationStats(ctx context.Context, generation string) (GenerationStats, error) - - // Returns a list of available snapshots in the replica. - Snapshots(ctx context.Context) ([]*SnapshotInfo, error) - - // Returns a list of available WAL files in the replica. - WALs(ctx context.Context) ([]*WALInfo, error) - - // Returns a reader for snapshot data at the given generation/index. - SnapshotReader(ctx context.Context, generation string, index int) (io.ReadCloser, error) - - // Returns a reader for WAL data at the given position. - WALReader(ctx context.Context, generation string, index int) (io.ReadCloser, error) -} - -// GenerationStats represents high level stats for a single generation. -type GenerationStats struct { - // Count of snapshot & WAL files. - SnapshotN int - WALN int - - // Time range for the earliest snapshot & latest WAL file update. - CreatedAt time.Time - UpdatedAt time.Time -} - -// Default file replica settings. +// Default replica settings. const ( + DefaultSyncInterval = 1 * time.Second DefaultRetention = 24 * time.Hour DefaultRetentionCheckInterval = 1 * time.Hour ) -var _ Replica = (*FileReplica)(nil) - -// FileReplica is a replica that replicates a DB to a local file path. -type FileReplica struct { - db *DB // source database - name string // replica name, optional - dst string // destination path +// Replica connects a database to a replication destination via a ReplicaClient. +// The replica manages periodic synchronization and maintaining the current +// replica position. +type Replica struct { + db *DB + name string mu sync.RWMutex - pos Pos // last position + pos Pos // current replicated position muf sync.Mutex f *os.File // long-running file descriptor to avoid non-OFD lock issues @@ -100,10 +45,11 @@ type FileReplica struct { wg sync.WaitGroup cancel func() - snapshotTotalGauge prometheus.Gauge - walBytesCounter prometheus.Counter - walIndexGauge prometheus.Gauge - walOffsetGauge prometheus.Gauge + // Client used to connect to the remote replica. + Client ReplicaClient + + // Time between syncs with the shadow WAL. + SyncInterval time.Duration // Frequency to create new snapshots. SnapshotInterval time.Duration @@ -123,283 +69,34 @@ type FileReplica struct { MonitorEnabled bool } -// NewFileReplica returns a new instance of FileReplica. -func NewFileReplica(db *DB, name, dst string) *FileReplica { - r := &FileReplica{ +func NewReplica(db *DB, name string) *Replica { + r := &Replica{ db: db, name: name, - dst: dst, cancel: func() {}, + SyncInterval: DefaultSyncInterval, Retention: DefaultRetention, RetentionCheckInterval: DefaultRetentionCheckInterval, MonitorEnabled: true, } - var dbPath string - if db != nil { - dbPath = db.Path() - } - r.snapshotTotalGauge = internal.ReplicaSnapshotTotalGaugeVec.WithLabelValues(dbPath, r.Name()) - r.walBytesCounter = internal.ReplicaWALBytesCounterVec.WithLabelValues(dbPath, r.Name()) - r.walIndexGauge = internal.ReplicaWALIndexGaugeVec.WithLabelValues(dbPath, r.Name()) - r.walOffsetGauge = internal.ReplicaWALOffsetGaugeVec.WithLabelValues(dbPath, r.Name()) - return r } -// Name returns the name of the replica. Returns the type if no name set. -func (r *FileReplica) Name() string { - if r.name != "" { - return r.name +// Name returns the name of the replica. +func (r *Replica) Name() string { + if r.name == "" && r.Client != nil { + return r.Client.Type() } - return r.Type() + return r.name } -// Type returns the type of replica. -func (r *FileReplica) Type() string { - return "file" -} +// DB returns a reference to the database the replica is attached to, if any. +func (r *Replica) DB() *DB { return r.db } -// DB returns the parent database reference. -func (r *FileReplica) DB() *DB { - return r.db -} - -// Path returns the path the replica was initialized with. -func (r *FileReplica) Path() string { - return r.dst -} - -// LastPos returns the last successfully replicated position. -func (r *FileReplica) LastPos() Pos { - r.mu.RLock() - defer r.mu.RUnlock() - return r.pos -} - -// GenerationDir returns the path to a generation's root directory. -func (r *FileReplica) GenerationDir(generation string) string { - return filepath.Join(r.dst, "generations", generation) -} - -// SnapshotDir returns the path to a generation's snapshot directory. -func (r *FileReplica) SnapshotDir(generation string) string { - return filepath.Join(r.GenerationDir(generation), "snapshots") -} - -// SnapshotPath returns the path to a snapshot file. -func (r *FileReplica) SnapshotPath(generation string, index int) string { - return filepath.Join(r.SnapshotDir(generation), fmt.Sprintf("%08x.snapshot.lz4", index)) -} - -// MaxSnapshotIndex returns the highest index for the snapshots. -func (r *FileReplica) MaxSnapshotIndex(generation string) (int, error) { - fis, err := ioutil.ReadDir(r.SnapshotDir(generation)) - if err != nil { - return 0, err - } - - index := -1 - for _, fi := range fis { - if idx, _, err := ParseSnapshotPath(fi.Name()); err != nil { - continue - } else if index == -1 || idx > index { - index = idx - } - } - if index == -1 { - return 0, fmt.Errorf("no snapshots found") - } - return index, nil -} - -// WALDir returns the path to a generation's WAL directory -func (r *FileReplica) WALDir(generation string) string { - return filepath.Join(r.GenerationDir(generation), "wal") -} - -// WALPath returns the path to a WAL file. -func (r *FileReplica) WALPath(generation string, index int) string { - return filepath.Join(r.WALDir(generation), fmt.Sprintf("%08x.wal", index)) -} - -// Generations returns a list of available generation names. -func (r *FileReplica) Generations(ctx context.Context) ([]string, error) { - fis, err := ioutil.ReadDir(filepath.Join(r.dst, "generations")) - if os.IsNotExist(err) { - return nil, nil - } else if err != nil { - return nil, err - } - - var generations []string - for _, fi := range fis { - if !IsGenerationName(fi.Name()) { - continue - } else if !fi.IsDir() { - continue - } - generations = append(generations, fi.Name()) - } - return generations, nil -} - -// GenerationStats returns stats for a generation. -func (r *FileReplica) GenerationStats(ctx context.Context, generation string) (stats GenerationStats, err error) { - // Determine stats for all snapshots. - n, min, max, err := r.snapshotStats(generation) - if err != nil { - return stats, err - } - stats.SnapshotN = n - stats.CreatedAt, stats.UpdatedAt = min, max - - // Update stats if we have WAL files. - n, min, max, err = r.walStats(generation) - if err != nil { - return stats, err - } else if n == 0 { - return stats, nil - } - - stats.WALN = n - if stats.CreatedAt.IsZero() || min.Before(stats.CreatedAt) { - stats.CreatedAt = min - } - if stats.UpdatedAt.IsZero() || max.After(stats.UpdatedAt) { - stats.UpdatedAt = max - } - return stats, nil -} - -func (r *FileReplica) snapshotStats(generation string) (n int, min, max time.Time, err error) { - fis, err := ioutil.ReadDir(r.SnapshotDir(generation)) - if os.IsNotExist(err) { - return n, min, max, nil - } else if err != nil { - return n, min, max, err - } - - for _, fi := range fis { - if !IsSnapshotPath(fi.Name()) { - continue - } - modTime := fi.ModTime().UTC() - - n++ - if min.IsZero() || modTime.Before(min) { - min = modTime - } - if max.IsZero() || modTime.After(max) { - max = modTime - } - } - return n, min, max, nil -} - -func (r *FileReplica) walStats(generation string) (n int, min, max time.Time, err error) { - fis, err := ioutil.ReadDir(r.WALDir(generation)) - if os.IsNotExist(err) { - return n, min, max, nil - } else if err != nil { - return n, min, max, err - } - - for _, fi := range fis { - if !IsWALPath(fi.Name()) { - continue - } - modTime := fi.ModTime().UTC() - - n++ - if min.IsZero() || modTime.Before(min) { - min = modTime - } - if max.IsZero() || modTime.After(max) { - max = modTime - } - } - return n, min, max, nil -} - -// Snapshots returns a list of available snapshots in the replica. -func (r *FileReplica) Snapshots(ctx context.Context) ([]*SnapshotInfo, error) { - generations, err := r.Generations(ctx) - if err != nil { - return nil, err - } - - var infos []*SnapshotInfo - for _, generation := range generations { - fis, err := ioutil.ReadDir(r.SnapshotDir(generation)) - if os.IsNotExist(err) { - continue - } else if err != nil { - return nil, err - } - - for _, fi := range fis { - index, _, err := ParseSnapshotPath(fi.Name()) - if err != nil { - continue - } - - infos = append(infos, &SnapshotInfo{ - Name: fi.Name(), - Replica: r.Name(), - Generation: generation, - Index: index, - Size: fi.Size(), - CreatedAt: fi.ModTime().UTC(), - }) - } - } - - return infos, nil -} - -// WALs returns a list of available WAL files in the replica. -func (r *FileReplica) WALs(ctx context.Context) ([]*WALInfo, error) { - generations, err := r.Generations(ctx) - if err != nil { - return nil, err - } - - var infos []*WALInfo - for _, generation := range generations { - // Find a list of all WAL files. - fis, err := ioutil.ReadDir(r.WALDir(generation)) - if os.IsNotExist(err) { - continue - } else if err != nil { - return nil, err - } - - // Iterate over each WAL file. - for _, fi := range fis { - index, offset, _, err := ParseWALPath(fi.Name()) - if err != nil { - continue - } - - infos = append(infos, &WALInfo{ - Name: fi.Name(), - Replica: r.Name(), - Generation: generation, - Index: index, - Offset: offset, - Size: fi.Size(), - CreatedAt: fi.ModTime().UTC(), - }) - } - } - - return infos, nil -} - -// Start starts replication for a given generation. -func (r *FileReplica) Start(ctx context.Context) (err error) { +// Starts replicating in a background goroutine. +func (r *Replica) Start(ctx context.Context) error { // Ignore if replica is being used sychronously. if !r.MonitorEnabled { return nil @@ -426,7 +123,7 @@ func (r *FileReplica) Start(ctx context.Context) (err error) { // Performing a hard stop will close the DB file descriptor which could release // locks on per-process locks. Hard stops should only be performed when // stopping the entire process. -func (r *FileReplica) Stop(hard bool) (err error) { +func (r *Replica) Stop(hard bool) (err error) { r.cancel() r.wg.Wait() @@ -440,19 +137,482 @@ func (r *FileReplica) Stop(hard bool) (err error) { return err } -// monitor runs in a separate goroutine and continuously replicates the DB. -func (r *FileReplica) monitor(ctx context.Context) { - // Clear old temporary files that my have been left from a crash. - if err := removeTmpFiles(r.dst); err != nil { - log.Printf("%s(%s): monitor: cannot remove tmp files: %s", r.db.Path(), r.Name(), err) +// Sync copies new WAL frames from the shadow WAL to the replica client. +func (r *Replica) Sync(ctx context.Context) (err error) { + // Clear last position if if an error occurs during sync. + defer func() { + if err != nil { + r.mu.Lock() + r.pos = Pos{} + r.mu.Unlock() + } + }() + + // Find current position of database. + dpos, err := r.db.Pos() + if err != nil { + return fmt.Errorf("cannot determine current generation: %w", err) + } else if dpos.IsZero() { + return fmt.Errorf("no generation, waiting for data") } + generation := dpos.Generation + + Tracef("%s(%s): replica sync: db.pos=%s", r.db.Path(), r.Name(), dpos) + + // Create snapshot if no snapshots exist for generation. + snapshotN, err := r.snapshotN(generation) + if err != nil { + return err + } else if snapshotN == 0 { + if info, err := r.Snapshot(ctx); err != nil { + return err + } else if info.Generation != generation { + return fmt.Errorf("generation changed during snapshot, exiting sync") + } + snapshotN = 1 + } + replicaSnapshotTotalGaugeVec.WithLabelValues(r.db.Path(), r.Name()).Set(float64(snapshotN)) + + // Determine position, if necessary. + if r.Pos().Generation != generation { + pos, err := r.calcPos(ctx, generation) + if err != nil { + return fmt.Errorf("cannot determine replica position: %s", err) + } + + Tracef("%s(%s): replica sync: calc new pos: %s", r.db.Path(), r.Name(), pos) + r.mu.Lock() + r.pos = pos + r.mu.Unlock() + } + + // Read all WAL files since the last position. + for { + if err = r.syncWAL(ctx); err == io.EOF { + break + } else if err != nil { + return err + } + } + + return nil +} + +func (r *Replica) syncWAL(ctx context.Context) (err error) { + rd, err := r.db.ShadowWALReader(r.Pos()) + if err == io.EOF { + return err + } else if err != nil { + return fmt.Errorf("replica wal reader: %w", err) + } + defer rd.Close() + + // Copy shadow WAL to client write via io.Pipe(). + pr, pw := io.Pipe() + defer func() { _ = pw.CloseWithError(err) }() + + // Obtain initial position from shadow reader. + // It may have moved to the next index if previous position was at the end. + pos := rd.Pos() + + // Copy through pipe into client from the starting position. + var g errgroup.Group + g.Go(func() error { + _, err := r.Client.WriteWALSegment(ctx, pos, pr) + return err + }) + + // Wrap writer to LZ4 compress. + zw := lz4.NewWriter(pw) + + // Track total WAL bytes written to replica client. + walBytesCounter := replicaWALBytesCounterVec.WithLabelValues(r.db.Path(), r.Name()) + + // Copy header if at offset zero. + var psalt uint64 // previous salt value + if pos := rd.Pos(); pos.Offset == 0 { + buf := make([]byte, WALHeaderSize) + if _, err := io.ReadFull(rd, buf); err != nil { + return err + } + + psalt = binary.BigEndian.Uint64(buf[16:24]) + + n, err := zw.Write(buf) + if err != nil { + return err + } + walBytesCounter.Add(float64(n)) + } + + // Copy frames. + for { + pos := rd.Pos() + assert(pos.Offset == frameAlign(pos.Offset, r.db.pageSize), "shadow wal reader not frame aligned") + + buf := make([]byte, WALFrameHeaderSize+r.db.pageSize) + if _, err := io.ReadFull(rd, buf); err == io.EOF { + break + } else if err != nil { + return err + } + + // Verify salt matches the previous frame/header read. + salt := binary.BigEndian.Uint64(buf[8:16]) + if psalt != 0 && psalt != salt { + return fmt.Errorf("replica salt mismatch: %s", pos.String()) + } + psalt = salt + + n, err := zw.Write(buf) + if err != nil { + return err + } + walBytesCounter.Add(float64(n)) + } + + // Flush LZ4 writer and close pipe. + if err := zw.Close(); err != nil { + return err + } else if err := pw.Close(); err != nil { + return err + } + + // Wait for client to finish write. + if err := g.Wait(); err != nil { + return fmt.Errorf("client write: %w", err) + } + + // Save last replicated position. + r.mu.Lock() + r.pos = rd.Pos() + r.mu.Unlock() + + // Track current position + replicaWALIndexGaugeVec.WithLabelValues(r.db.Path(), r.Name()).Set(float64(rd.Pos().Index)) + replicaWALOffsetGaugeVec.WithLabelValues(r.db.Path(), r.Name()).Set(float64(rd.Pos().Offset)) + + return nil +} + +// snapshotN returns the number of snapshots for a generation. +func (r *Replica) snapshotN(generation string) (int, error) { + itr, err := r.Client.Snapshots(context.Background(), generation) + if err != nil { + return 0, err + } + defer itr.Close() + + var n int + for itr.Next() { + n++ + } + return n, itr.Close() +} + +// calcPos returns the last position for the given generation. +func (r *Replica) calcPos(ctx context.Context, generation string) (pos Pos, err error) { + // Fetch last snapshot. Return error if no snapshots exist. + snapshot, err := r.maxSnapshot(ctx, generation) + if err != nil { + return pos, fmt.Errorf("max snapshot: %w", err) + } else if snapshot == nil { + return pos, fmt.Errorf("no snapshot available: generation=%s", generation) + } + + // Determine last WAL segment available. Use snapshot if none exist. + segment, err := r.maxWALSegment(ctx, generation) + if err != nil { + return pos, fmt.Errorf("max wal segment: %w", err) + } else if segment == nil { + return Pos{Generation: snapshot.Generation, Index: snapshot.Index}, nil + } + + // Read segment to determine size to add to offset. + rd, err := r.Client.WALSegmentReader(ctx, segment.Pos()) + if err != nil { + return pos, fmt.Errorf("wal segment reader: %w", err) + } + defer rd.Close() + + n, err := io.Copy(ioutil.Discard, lz4.NewReader(rd)) + if err != nil { + return pos, err + } + + // Return the position at the end of the last WAL segment. + return Pos{ + Generation: segment.Generation, + Index: segment.Index, + Offset: segment.Offset + n, + }, nil +} + +// maxSnapshot returns the last snapshot in a generation. +func (r *Replica) maxSnapshot(ctx context.Context, generation string) (*SnapshotInfo, error) { + itr, err := r.Client.Snapshots(ctx, generation) + if err != nil { + return nil, err + } + defer itr.Close() + + var max *SnapshotInfo + for itr.Next() { + if info := itr.Snapshot(); max == nil || info.Index > max.Index { + max = &info + } + } + return max, itr.Close() +} + +// maxWALSegment returns the highest WAL segment in a generation. +func (r *Replica) maxWALSegment(ctx context.Context, generation string) (*WALSegmentInfo, error) { + itr, err := r.Client.WALSegments(ctx, generation) + if err != nil { + return nil, err + } + defer itr.Close() + + var max *WALSegmentInfo + for itr.Next() { + if info := itr.WALSegment(); max == nil || info.Index > max.Index || (info.Index == max.Index && info.Offset > max.Offset) { + max = &info + } + } + return max, itr.Close() +} + +// Pos returns the current replicated position. +// Returns a zero value if the current position cannot be determined. +func (r *Replica) Pos() Pos { + r.mu.RLock() + defer r.mu.RUnlock() + return r.pos +} + +// Snapshots returns a list of all snapshots across all generations. +func (r *Replica) Snapshots(ctx context.Context) ([]SnapshotInfo, error) { + generations, err := r.Client.Generations(ctx) + if err != nil { + return nil, fmt.Errorf("cannot fetch generations: %w", err) + } + + var a []SnapshotInfo + for _, generation := range generations { + if err := func() error { + itr, err := r.Client.Snapshots(ctx, generation) + if err != nil { + return err + } + defer itr.Close() + + other, err := SliceSnapshotIterator(itr) + if err != nil { + return err + } + a = append(a, other...) + + return itr.Close() + }(); err != nil { + return a, err + } + } + + sort.Sort(SnapshotInfoSlice(a)) + + return a, nil +} + +// Snapshot copies the entire database to the replica path. +func (r *Replica) Snapshot(ctx context.Context) (info SnapshotInfo, err error) { + if r.db == nil || r.db.db == nil { + return info, fmt.Errorf("no database available") + } + + r.muf.Lock() + defer r.muf.Unlock() + + // Issue a passive checkpoint to flush any pages to disk before snapshotting. + if _, err := r.db.db.ExecContext(ctx, `PRAGMA wal_checkpoint(PASSIVE);`); err != nil { + return info, fmt.Errorf("pre-snapshot checkpoint: %w", err) + } + + // Acquire a read lock on the database during snapshot to prevent checkpoints. + tx, err := r.db.db.Begin() + if err != nil { + return info, err + } else if _, err := tx.ExecContext(ctx, `SELECT COUNT(1) FROM _litestream_seq;`); err != nil { + _ = tx.Rollback() + return info, err + } + defer func() { _ = tx.Rollback() }() + + // Obtain current position. + pos, err := r.db.Pos() + if err != nil { + return info, fmt.Errorf("cannot determine db position: %w", err) + } else if pos.IsZero() { + return info, ErrNoGeneration + } + + // TODO: Check if snapshot already exists & skip. + + // startTime := time.Now() + + // Open db file descriptor, if not already open, & position at beginning. + if r.f == nil { + if r.f, err = os.Open(r.db.Path()); err != nil { + return info, err + } + } + if _, err := r.f.Seek(0, io.SeekStart); err != nil { + return info, err + } + + // Use a pipe to convert the LZ4 writer to a reader. + pr, pw := io.Pipe() + + // Copy the database file to the LZ4 writer in a separate goroutine. + var g errgroup.Group + g.Go(func() error { + zr := lz4.NewWriter(pw) + defer zr.Close() + + if _, err := io.Copy(zr, r.f); err != nil { + pw.CloseWithError(err) + return err + } else if err := zr.Close(); err != nil { + pw.CloseWithError(err) + return err + } + return pw.Close() + }) + + // Delegate write to client & wait for writer goroutine to finish. + if info, err = r.Client.WriteSnapshot(ctx, pos.Generation, pos.Index, pr); err != nil { + return info, err + } + + return info, g.Wait() +} + +// EnforceRetention forces a new snapshot once the retention interval has passed. +// Older snapshots and WAL files are then removed. +func (r *Replica) EnforceRetention(ctx context.Context) (err error) { + // Obtain list of snapshots that are within the retention period. + snapshots, err := r.Snapshots(ctx) + if err != nil { + return fmt.Errorf("snapshots: %w", err) + } + retained := FilterSnapshotsAfter(snapshots, time.Now().Add(-r.Retention)) + + // If no retained snapshots exist, create a new snapshot. + if len(retained) == 0 { + snapshot, err := r.Snapshot(ctx) + if err != nil { + return fmt.Errorf("snapshot: %w", err) + } + retained = append(retained, snapshot) + } + + // Loop over generations and delete unretained snapshots & WAL files. + generations, err := r.Client.Generations(ctx) + if err != nil { + return fmt.Errorf("generations: %w", err) + } + for _, generation := range generations { + // Find earliest retained snapshot for this generation. + snapshot := FindMinSnapshotByGeneration(retained, generation) + + // Delete entire generation if no snapshots are being retained. + if snapshot == nil { + if err := r.Client.DeleteGeneration(ctx, generation); err != nil { + return fmt.Errorf("delete generation: %w", err) + } + continue + } + + // Otherwise remove all earlier snapshots & WAL segments. + if err := r.deleteSnapshotsBeforeIndex(ctx, generation, snapshot.Index); err != nil { + return fmt.Errorf("delete snapshots before index: %w", err) + } else if err := r.deleteWALSegmentsBeforeIndex(ctx, generation, snapshot.Index); err != nil { + return fmt.Errorf("delete wal segments before index: %w", err) + } + } + + return nil +} + +func (r *Replica) deleteSnapshotsBeforeIndex(ctx context.Context, generation string, index int) error { + itr, err := r.Client.Snapshots(ctx, generation) + if err != nil { + return fmt.Errorf("fetch snapshots: %w", err) + } + defer itr.Close() + + for itr.Next() { + info := itr.Snapshot() + if info.Index >= index { + continue + } + + if err := r.Client.DeleteSnapshot(ctx, info.Generation, info.Index); err != nil { + return fmt.Errorf("delete snapshot %s/%08x: %w", info.Generation, info.Index, err) + } + } + + // log.Printf("%s(%s): retainer: deleting snapshots before %s/%08x; n=%d", r.db.Path(), r.Name(), generation, index, n) + return itr.Close() +} + +func (r *Replica) deleteWALSegmentsBeforeIndex(ctx context.Context, generation string, index int) error { + itr, err := r.Client.WALSegments(ctx, generation) + if err != nil { + return fmt.Errorf("fetch wal segments: %w", err) + } + defer itr.Close() + + var a []Pos + for itr.Next() { + info := itr.WALSegment() + if info.Index >= index { + continue + } + a = append(a, info.Pos()) + } + if err := itr.Close(); err != nil { + return err + } + + if err := r.Client.DeleteWALSegments(ctx, a); err != nil { + return fmt.Errorf("delete wal segments: %w", err) + } + + // log.Printf("%s(%s): retainer: deleting wal segment %s/%08x:%d", r.db.Path(), r.Name(), generation, index, offset) + return nil +} + +// monitor runs in a separate goroutine and continuously replicates the DB. +func (r *Replica) monitor(ctx context.Context) { + ticker := time.NewTicker(r.SyncInterval) + defer ticker.Stop() // Continuously check for new data to replicate. ch := make(chan struct{}) close(ch) var notify <-chan struct{} = ch - for { + for initial := true; ; initial = false { + // Enforce a minimum time between synchronization. + if !initial { + select { + case <-ctx.Done(): + return + case <-ticker.C: + } + } + + // Wait for changes to the database. select { case <-ctx.Done(): return @@ -471,7 +631,7 @@ func (r *FileReplica) monitor(ctx context.Context) { } // retainer runs in a separate goroutine and handles retention. -func (r *FileReplica) retainer(ctx context.Context) { +func (r *Replica) retainer(ctx context.Context) { // Disable retention enforcement if retention period is non-positive. if r.Retention <= 0 { return @@ -500,7 +660,7 @@ func (r *FileReplica) retainer(ctx context.Context) { } // snapshotter runs in a separate goroutine and handles snapshotting. -func (r *FileReplica) snapshotter(ctx context.Context) { +func (r *Replica) snapshotter(ctx context.Context) { if r.SnapshotInterval <= 0 { return } @@ -513,7 +673,7 @@ func (r *FileReplica) snapshotter(ctx context.Context) { case <-ctx.Done(): return case <-ticker.C: - if err := r.Snapshot(ctx); err != nil && err != ErrNoGeneration { + if _, err := r.Snapshot(ctx); err != nil && err != ErrNoGeneration { log.Printf("%s(%s): snapshotter error: %s", r.db.Path(), r.Name(), err) continue } @@ -522,10 +682,10 @@ func (r *FileReplica) snapshotter(ctx context.Context) { } // validator runs in a separate goroutine and handles periodic validation. -func (r *FileReplica) validator(ctx context.Context) { +func (r *Replica) validator(ctx context.Context) { // Initialize counters since validation occurs infrequently. for _, status := range []string{"ok", "error"} { - internal.ReplicaValidationTotalCounterVec.WithLabelValues(r.db.Path(), r.Name(), status).Add(0) + replicaValidationTotalCounterVec.WithLabelValues(r.db.Path(), r.Name(), status).Add(0) } // Exit validation if interval is not set. @@ -541,7 +701,7 @@ func (r *FileReplica) validator(ctx context.Context) { case <-ctx.Done(): return case <-ticker.C: - if err := ValidateReplica(ctx, r); err != nil { + if err := r.Validate(ctx); err != nil { log.Printf("%s(%s): validation error: %s", r.db.Path(), r.Name(), err) continue } @@ -549,649 +709,51 @@ func (r *FileReplica) validator(ctx context.Context) { } } -// CalcPos returns the position for the replica for the current generation. -// Returns a zero value if there is no active generation. -func (r *FileReplica) CalcPos(ctx context.Context, generation string) (pos Pos, err error) { - pos.Generation = generation +// ReplicaClient represents client to connect to a Replica. +type ReplicaClient interface { + // Returns the type of client. + Type() string - // Find maximum snapshot index. - if pos.Index, err = r.MaxSnapshotIndex(generation); err != nil { - return Pos{}, err - } + // Returns a list of available generations. + Generations(ctx context.Context) ([]string, error) - // Find the max WAL file within WAL. - fis, err := ioutil.ReadDir(r.WALDir(generation)) - if os.IsNotExist(err) { - return pos, nil // no replicated wal, start at snapshot index. - } else if err != nil { - return Pos{}, err - } + // Deletes all snapshots & WAL segments within a generation. + DeleteGeneration(ctx context.Context, generation string) error - index := -1 - for _, fi := range fis { - if idx, _, _, err := ParseWALPath(fi.Name()); err != nil { - continue // invalid wal filename - } else if index == -1 || idx > index { - index = idx - } - } - if index == -1 { - return pos, nil // wal directory exists but no wal files, return snapshot position - } - pos.Index = index + // Returns an iterator of all snapshots within a generation on the replica. + Snapshots(ctx context.Context, generation string) (SnapshotIterator, error) - // Determine current offset. - fi, err := os.Stat(r.WALPath(pos.Generation, pos.Index)) - if err != nil { - return Pos{}, err - } - pos.Offset = fi.Size() + // Writes LZ4 compressed snapshot data to the replica at a given index + // within a generation. Returns metadata for the snapshot. + WriteSnapshot(ctx context.Context, generation string, index int, r io.Reader) (SnapshotInfo, error) - return pos, nil + // Deletes a snapshot with the given generation & index. + DeleteSnapshot(ctx context.Context, generation string, index int) error + + // Returns a reader that contains LZ4 compressed snapshot data for a + // given index within a generation. Returns an os.ErrNotFound error if + // the snapshot does not exist. + SnapshotReader(ctx context.Context, generation string, index int) (io.ReadCloser, error) + + // Returns an iterator of all WAL segments within a generation on the replica. + WALSegments(ctx context.Context, generation string) (WALSegmentIterator, error) + + // Writes an LZ4 compressed WAL segment at a given position. + // Returns metadata for the written segment. + WriteWALSegment(ctx context.Context, pos Pos, r io.Reader) (WALSegmentInfo, error) + + // Deletes one or more WAL segments at the given positions. + DeleteWALSegments(ctx context.Context, a []Pos) error + + // Returns a reader that contains an LZ4 compressed WAL segment at a given + // index/offset within a generation. Returns an os.ErrNotFound error if the + // WAL segment does not exist. + WALSegmentReader(ctx context.Context, pos Pos) (io.ReadCloser, error) } -// Snapshot copies the entire database to the replica path. -func (r *FileReplica) Snapshot(ctx context.Context) error { - // Find current position of database. - pos, err := r.db.Pos() - if err != nil { - return fmt.Errorf("cannot determine current db generation: %w", err) - } else if pos.IsZero() { - return ErrNoGeneration - } - return r.snapshot(ctx, pos.Generation, pos.Index) -} - -// snapshot copies the entire database to the replica path. -func (r *FileReplica) snapshot(ctx context.Context, generation string, index int) error { - r.muf.Lock() - defer r.muf.Unlock() - - // Issue a passive checkpoint to flush any pages to disk before snapshotting. - if _, err := r.db.db.ExecContext(ctx, `PRAGMA wal_checkpoint(PASSIVE);`); err != nil { - return fmt.Errorf("pre-snapshot checkpoint: %w", err) - } - - // Acquire a read lock on the database during snapshot to prevent checkpoints. - tx, err := r.db.db.Begin() - if err != nil { - return err - } else if _, err := tx.ExecContext(ctx, `SELECT COUNT(1) FROM _litestream_seq;`); err != nil { - _ = tx.Rollback() - return err - } - defer func() { _ = tx.Rollback() }() - - // Ignore if we already have a snapshot for the given WAL index. - snapshotPath := r.SnapshotPath(generation, index) - if _, err := os.Stat(snapshotPath); err == nil { - return nil - } - - startTime := time.Now() - - if err := mkdirAll(filepath.Dir(snapshotPath), r.db.dirmode, r.db.diruid, r.db.dirgid); err != nil { - return err - } - - // Open db file descriptor, if not already open. - if r.f == nil { - if r.f, err = os.Open(r.db.Path()); err != nil { - return err - } - } - - if _, err := r.f.Seek(0, io.SeekStart); err != nil { - return err - } - - fi, err := r.f.Stat() - if err != nil { - return err - } - - w, err := createFile(snapshotPath+".tmp", fi.Mode(), r.db.uid, r.db.gid) - if err != nil { - return err - } - defer w.Close() - - zr := lz4.NewWriter(w) - defer zr.Close() - - // Copy & compress file contents to temporary file. - if _, err := io.Copy(zr, r.f); err != nil { - return err - } else if err := zr.Close(); err != nil { - return err - } else if err := w.Sync(); err != nil { - return err - } else if err := w.Close(); err != nil { - return err - } - - // Move compressed file to final location. - if err := os.Rename(snapshotPath+".tmp", snapshotPath); err != nil { - return err - } - - log.Printf("%s(%s): snapshot: creating %s/%08x t=%s", r.db.Path(), r.Name(), generation, index, time.Since(startTime).Truncate(time.Millisecond)) - return nil -} - -// snapshotN returns the number of snapshots for a generation. -func (r *FileReplica) snapshotN(generation string) (int, error) { - fis, err := ioutil.ReadDir(r.SnapshotDir(generation)) - if os.IsNotExist(err) { - return 0, nil - } else if err != nil { - return 0, err - } - - var n int - for _, fi := range fis { - if _, _, err := ParseSnapshotPath(fi.Name()); err == nil { - n++ - } - } - return n, nil -} - -// Sync replays data from the shadow WAL into the file replica. -func (r *FileReplica) Sync(ctx context.Context) (err error) { - // Clear last position if if an error occurs during sync. - defer func() { - if err != nil { - r.mu.Lock() - r.pos = Pos{} - r.mu.Unlock() - } - }() - - // Find current position of database. - dpos, err := r.db.Pos() - if err != nil { - return fmt.Errorf("cannot determine current generation: %w", err) - } else if dpos.IsZero() { - return fmt.Errorf("no generation, waiting for data") - } - generation := dpos.Generation - - Tracef("%s(%s): replica sync: db.pos=%s", r.db.Path(), r.Name(), dpos) - - // Create snapshot if no snapshots exist for generation. - if n, err := r.snapshotN(generation); err != nil { - return err - } else if n == 0 { - if err := r.snapshot(ctx, generation, dpos.Index); err != nil { - return err - } - r.snapshotTotalGauge.Set(1.0) - } else { - r.snapshotTotalGauge.Set(float64(n)) - } - - // Determine position, if necessary. - if r.LastPos().Generation != generation { - pos, err := r.CalcPos(ctx, generation) - if err != nil { - return fmt.Errorf("cannot determine replica position: %s", err) - } - - Tracef("%s(%s): replica sync: calc new pos: %s", r.db.Path(), r.Name(), pos) - r.mu.Lock() - r.pos = pos - r.mu.Unlock() - } - - // Read all WAL files since the last position. - for { - if err = r.syncWAL(ctx); err == io.EOF { - break - } else if err != nil { - return err - } - } - - // Compress any old WAL files. - if generation != "" { - if err := r.compress(ctx, generation); err != nil { - return fmt.Errorf("cannot compress: %s", err) - } - } - - return nil -} - -func (r *FileReplica) syncWAL(ctx context.Context) (err error) { - rd, err := r.db.ShadowWALReader(r.LastPos()) - if err == io.EOF { - return err - } else if err != nil { - return fmt.Errorf("wal reader: %w", err) - } - defer rd.Close() - - // Ensure parent directory exists for WAL file. - filename := r.WALPath(rd.Pos().Generation, rd.Pos().Index) - if err := mkdirAll(filepath.Dir(filename), r.db.dirmode, r.db.diruid, r.db.dirgid); err != nil { - return err - } - - w, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, r.db.mode) - if err != nil { - return err - } - defer w.Close() - - _ = os.Chown(filename, r.db.uid, r.db.gid) - - // Seek, copy & sync WAL contents. - if _, err := w.Seek(rd.Pos().Offset, io.SeekStart); err != nil { - return err - } - - // Copy header if at offset zero. - var psalt uint64 // previous salt value - if pos := rd.Pos(); pos.Offset == 0 { - buf := make([]byte, WALHeaderSize) - if _, err := io.ReadFull(rd, buf); err != nil { - return err - } - - psalt = binary.BigEndian.Uint64(buf[16:24]) - - n, err := w.Write(buf) - if err != nil { - return err - } - r.walBytesCounter.Add(float64(n)) - } - - // Copy frames. - for { - pos := rd.Pos() - assert(pos.Offset == frameAlign(pos.Offset, r.db.pageSize), "shadow wal reader not frame aligned") - - buf := make([]byte, WALFrameHeaderSize+r.db.pageSize) - if _, err := io.ReadFull(rd, buf); err == io.EOF { - break - } else if err != nil { - return err - } - - // Verify salt matches the previous frame/header read. - salt := binary.BigEndian.Uint64(buf[8:16]) - if psalt != 0 && psalt != salt { - return fmt.Errorf("replica salt mismatch: %s", filepath.Base(filename)) - } - psalt = salt - - n, err := w.Write(buf) - if err != nil { - return err - } - r.walBytesCounter.Add(float64(n)) - } - - if err := w.Sync(); err != nil { - return err - } else if err := w.Close(); err != nil { - return err - } - - // Save last replicated position. - r.mu.Lock() - r.pos = rd.Pos() - r.mu.Unlock() - - // Track current position - r.walIndexGauge.Set(float64(rd.Pos().Index)) - r.walOffsetGauge.Set(float64(rd.Pos().Offset)) - - return nil -} - -// compress compresses all WAL files before the current one. -func (r *FileReplica) compress(ctx context.Context, generation string) error { - filenames, err := filepath.Glob(filepath.Join(r.WALDir(generation), "*.wal")) - if err != nil { - return err - } else if len(filenames) <= 1 { - return nil // no uncompressed wal files or only one active file - } - - // Ensure filenames are sorted & remove the last (active) WAL. - sort.Strings(filenames) - filenames = filenames[:len(filenames)-1] - - // Compress each file from oldest to newest. - for _, filename := range filenames { - select { - case <-ctx.Done(): - return err - default: - } - - dst := filename + ".lz4" - if err := compressWALFile(filename, dst, r.db.uid, r.db.gid); err != nil { - return err - } else if err := os.Remove(filename); err != nil { - return err - } - } - - return nil -} - -// SnapshotReader returns a reader for snapshot data at the given generation/index. -// Returns os.ErrNotExist if no matching index is found. -func (r *FileReplica) SnapshotReader(ctx context.Context, generation string, index int) (io.ReadCloser, error) { - dir := r.SnapshotDir(generation) - fis, err := ioutil.ReadDir(dir) - if err != nil { - return nil, err - } - - for _, fi := range fis { - // Parse index from snapshot filename. Skip if no match. - idx, ext, err := ParseSnapshotPath(fi.Name()) - if err != nil || index != idx { - continue - } - - // Open & return the file handle if uncompressed. - f, err := os.Open(filepath.Join(dir, fi.Name())) - if err != nil { - return nil, err - } else if ext == ".snapshot" { - return f, nil // not compressed, return as-is. - } - assert(ext == ".snapshot.lz4", "invalid snapshot extension") - - // If compressed, wrap in an lz4 reader and return with wrapper to - // ensure that the underlying file is closed. - return internal.NewReadCloser(lz4.NewReader(f), f), nil - } - return nil, os.ErrNotExist -} - -// WALReader returns a reader for WAL data at the given index. -// Returns os.ErrNotExist if no matching index is found. -func (r *FileReplica) WALReader(ctx context.Context, generation string, index int) (io.ReadCloser, error) { - filename := r.WALPath(generation, index) - - // Attempt to read uncompressed file first. - f, err := os.Open(filename) - if err == nil { - return f, nil // file exist, return - } else if err != nil && !os.IsNotExist(err) { - return nil, err - } - - // Otherwise read the compressed file. Return error if file doesn't exist. - f, err = os.Open(filename + ".lz4") - if err != nil { - return nil, err - } - - // If compressed, wrap in an lz4 reader and return with wrapper to - // ensure that the underlying file is closed. - return internal.NewReadCloser(lz4.NewReader(f), f), nil -} - -// EnforceRetention forces a new snapshot once the retention interval has passed. -// Older snapshots and WAL files are then removed. -func (r *FileReplica) EnforceRetention(ctx context.Context) (err error) { - // Find current position of database. - pos, err := r.db.Pos() - if err != nil { - return fmt.Errorf("cannot determine current generation: %w", err) - } else if pos.IsZero() { - return fmt.Errorf("no generation, waiting for data") - } - - // Obtain list of snapshots that are within the retention period. - snapshots, err := r.Snapshots(ctx) - if err != nil { - return fmt.Errorf("cannot obtain snapshot list: %w", err) - } - snapshots = FilterSnapshotsAfter(snapshots, time.Now().Add(-r.Retention)) - - // If no retained snapshots exist, create a new snapshot. - if len(snapshots) == 0 { - if err := r.snapshot(ctx, pos.Generation, pos.Index); err != nil { - return fmt.Errorf("cannot snapshot: %w", err) - } - snapshots = append(snapshots, &SnapshotInfo{Generation: pos.Generation, Index: pos.Index}) - } - - // Loop over generations and delete unretained snapshots & WAL files. - generations, err := r.Generations(ctx) - if err != nil { - return fmt.Errorf("cannot obtain generations: %w", err) - } - for _, generation := range generations { - // Find earliest retained snapshot for this generation. - snapshot := FindMinSnapshotByGeneration(snapshots, generation) - - // Delete generations if it has no snapshots being retained. - if snapshot == nil { - log.Printf("%s(%s): retainer: deleting generation %q has no retained snapshots, deleting", r.db.Path(), r.Name(), generation) - if err := os.RemoveAll(r.GenerationDir(generation)); err != nil { - return fmt.Errorf("cannot delete generation %q dir: %w", generation, err) - } - continue - } - - // Otherwise delete all snapshots & WAL files before a lowest retained index. - if err := r.deleteGenerationSnapshotsBefore(ctx, generation, snapshot.Index); err != nil { - return fmt.Errorf("cannot delete generation %q snapshots before index %d: %w", generation, snapshot.Index, err) - } else if err := r.deleteGenerationWALBefore(ctx, generation, snapshot.Index); err != nil { - return fmt.Errorf("cannot delete generation %q wal before index %d: %w", generation, snapshot.Index, err) - } - } - - return nil -} - -// deleteGenerationSnapshotsBefore deletes snapshot before a given index. -func (r *FileReplica) deleteGenerationSnapshotsBefore(ctx context.Context, generation string, index int) (err error) { - dir := r.SnapshotDir(generation) - - fis, err := ioutil.ReadDir(dir) - if os.IsNotExist(err) { - return nil - } else if err != nil { - return err - } - - var n int - for _, fi := range fis { - idx, _, err := ParseSnapshotPath(fi.Name()) - if err != nil { - continue - } else if idx >= index { - continue - } - - if err := os.Remove(filepath.Join(dir, fi.Name())); err != nil { - return err - } - n++ - } - if n > 0 { - log.Printf("%s(%s): retainer: deleting snapshots before %s/%08x; n=%d", r.db.Path(), r.Name(), generation, index, n) - } - - return nil -} - -// deleteGenerationWALBefore deletes WAL files before a given index. -func (r *FileReplica) deleteGenerationWALBefore(ctx context.Context, generation string, index int) (err error) { - dir := r.WALDir(generation) - - fis, err := ioutil.ReadDir(dir) - if os.IsNotExist(err) { - return nil - } else if err != nil { - return err - } - - var n int - for _, fi := range fis { - idx, _, _, err := ParseWALPath(fi.Name()) - if err != nil { - continue - } else if idx >= index { - continue - } - - if err := os.Remove(filepath.Join(dir, fi.Name())); err != nil { - return err - } - n++ - } - if n > 0 { - log.Printf("%s(%s): retainer: deleting wal files before %s/%08x n=%d", r.db.Path(), r.Name(), generation, index, n) - } - - return nil -} - -// SnapshotIndexAt returns the highest index for a snapshot within a generation -// that occurs before timestamp. If timestamp is zero, returns the latest snapshot. -func SnapshotIndexAt(ctx context.Context, r Replica, generation string, timestamp time.Time) (int, error) { - snapshots, err := r.Snapshots(ctx) - if err != nil { - return 0, err - } else if len(snapshots) == 0 { - return 0, ErrNoSnapshots - } - - snapshotIndex := -1 - var max time.Time - for _, snapshot := range snapshots { - if snapshot.Generation != generation { - continue // generation mismatch, skip - } else if !timestamp.IsZero() && snapshot.CreatedAt.After(timestamp) { - continue // after timestamp, skip - } - - // Use snapshot if it newer. - if max.IsZero() || snapshot.CreatedAt.After(max) { - snapshotIndex, max = snapshot.Index, snapshot.CreatedAt - } - } - - if snapshotIndex == -1 { - return 0, ErrNoSnapshots - } - return snapshotIndex, nil -} - -// SnapshotIndexbyIndex returns the highest index for a snapshot within a generation -// that occurs before a given index. If index is MaxInt32, returns the latest snapshot. -func SnapshotIndexByIndex(ctx context.Context, r Replica, generation string, index int) (int, error) { - snapshots, err := r.Snapshots(ctx) - if err != nil { - return 0, err - } else if len(snapshots) == 0 { - return 0, ErrNoSnapshots - } - - snapshotIndex := -1 - for _, snapshot := range snapshots { - if index < math.MaxInt32 && snapshot.Index > index { - continue // after index, skip - } - - // Use snapshot if it newer. - if snapshotIndex == -1 || snapshotIndex >= snapshotIndex { - snapshotIndex = snapshot.Index - } - } - - if snapshotIndex == -1 { - return 0, ErrNoSnapshots - } - return snapshotIndex, nil -} - -// WALIndexAt returns the highest index for a WAL file that occurs before -// maxIndex & timestamp. If timestamp is zero, returns the highest WAL index. -// Returns -1 if no WAL found and MaxInt32 specified. -func WALIndexAt(ctx context.Context, r Replica, generation string, maxIndex int, timestamp time.Time) (int, error) { - wals, err := r.WALs(ctx) - if err != nil { - return 0, err - } - - index := -1 - for _, wal := range wals { - if wal.Generation != generation { - continue - } - - if !timestamp.IsZero() && wal.CreatedAt.After(timestamp) { - continue // after timestamp, skip - } else if wal.Index > maxIndex { - continue // after max index, skip - } else if wal.Index < index { - continue // earlier index, skip - } - - index = wal.Index - } - - // If max index is specified but not found, return an error. - if maxIndex != math.MaxInt32 && index != maxIndex { - return index, fmt.Errorf("unable to locate index %d in generation %q, highest index was %d", maxIndex, generation, index) - } - return index, nil -} - -// compressWALFile compresses a file and replaces it with a new file with a .lz4 extension. -// Do not use this on database files because of issues with non-OFD locks. -func compressWALFile(src, dst string, uid, gid int) error { - r, err := os.Open(src) - if err != nil { - return err - } - defer r.Close() - - fi, err := r.Stat() - if err != nil { - return err - } - - w, err := createFile(dst+".tmp", fi.Mode(), uid, gid) - if err != nil { - return err - } - defer w.Close() - - zr := lz4.NewWriter(w) - defer zr.Close() - - // Copy & compress file contents to temporary file. - if _, err := io.Copy(zr, r); err != nil { - return err - } else if err := zr.Close(); err != nil { - return err - } else if err := w.Sync(); err != nil { - return err - } else if err := w.Close(); err != nil { - return err - } - - // Move compressed file to final location. - return os.Rename(dst+".tmp", dst) -} - -// ValidateReplica restores the most recent data from a replica and validates +// Validate restores the most recent data from a replica and validates // that the resulting database matches the current database. -func ValidateReplica(ctx context.Context, r Replica) error { +func (r *Replica) Validate(ctx context.Context) error { db := r.DB() // Restore replica to a temporary directory. @@ -1209,12 +771,12 @@ func ValidateReplica(ctx context.Context, r Replica) error { } // Wait until replica catches up to position. - if err := waitForReplica(ctx, r, pos); err != nil { + if err := r.waitForReplica(ctx, pos); err != nil { return fmt.Errorf("cannot wait for replica: %w", err) } restorePath := filepath.Join(tmpdir, "replica") - if err := RestoreReplica(ctx, r, RestoreOptions{ + if err := r.Restore(ctx, RestoreOptions{ OutputPath: restorePath, ReplicaName: r.Name(), Generation: pos.Generation, @@ -1248,11 +810,11 @@ func ValidateReplica(ctx context.Context, r Replica) error { // Validate checksums match. if mismatch { - internal.ReplicaValidationTotalCounterVec.WithLabelValues(db.Path(), r.Name(), "error").Inc() + replicaValidationTotalCounterVec.WithLabelValues(r.db.Path(), r.Name(), "error").Inc() return ErrChecksumMismatch } - internal.ReplicaValidationTotalCounterVec.WithLabelValues(db.Path(), r.Name(), "ok").Inc() + replicaValidationTotalCounterVec.WithLabelValues(r.db.Path(), r.Name(), "ok").Inc() if err := os.RemoveAll(tmpdir); err != nil { return fmt.Errorf("cannot remove temporary validation directory: %w", err) @@ -1261,7 +823,7 @@ func ValidateReplica(ctx context.Context, r Replica) error { } // waitForReplica blocks until replica reaches at least the given position. -func waitForReplica(ctx context.Context, r Replica, pos Pos) error { +func (r *Replica) waitForReplica(ctx context.Context, pos Pos) error { db := r.DB() ticker := time.NewTicker(500 * time.Millisecond) @@ -1284,9 +846,9 @@ func waitForReplica(ctx context.Context, r Replica, pos Pos) error { } // Obtain current position of replica, check if past target position. - curr, err := r.CalcPos(ctx, pos.Generation) - if err != nil { - log.Printf("%s(%s): validator: cannot obtain replica position: %s", db.Path(), r.Name(), err) + curr := r.Pos() + if curr.IsZero() { + log.Printf("%s(%s): validator: no replica position available", db.Path(), r.Name()) continue } @@ -1312,3 +874,509 @@ func waitForReplica(ctx context.Context, r Replica, pos Pos) error { return nil } } + +// GenerationCreatedAt returns the earliest creation time of any snapshot. +// Returns zero time if no snapshots exist. +func (r *Replica) GenerationCreatedAt(ctx context.Context, generation string) (time.Time, error) { + var min time.Time + + itr, err := r.Client.Snapshots(ctx, generation) + if err != nil { + return min, err + } + defer itr.Close() + + for itr.Next() { + if info := itr.Snapshot(); min.IsZero() || info.CreatedAt.Before(min) { + min = info.CreatedAt + } + } + return min, itr.Close() +} + +// GenerationTimeBounds returns the creation time & last updated time of a generation. +// Returns zero time if no snapshots or WAL segments exist. +func (r *Replica) GenerationTimeBounds(ctx context.Context, generation string) (createdAt, updatedAt time.Time, err error) { + // Iterate over snapshots. + sitr, err := r.Client.Snapshots(ctx, generation) + if err != nil { + return createdAt, updatedAt, err + } + defer sitr.Close() + + for sitr.Next() { + info := sitr.Snapshot() + if createdAt.IsZero() || info.CreatedAt.Before(createdAt) { + createdAt = info.CreatedAt + } + if updatedAt.IsZero() || info.CreatedAt.After(updatedAt) { + updatedAt = info.CreatedAt + } + } + if err := sitr.Close(); err != nil { + return createdAt, updatedAt, err + } + + // Iterate over WAL segments. + witr, err := r.Client.WALSegments(ctx, generation) + if err != nil { + return createdAt, updatedAt, err + } + defer witr.Close() + + for witr.Next() { + info := witr.WALSegment() + if createdAt.IsZero() || info.CreatedAt.Before(createdAt) { + createdAt = info.CreatedAt + } + if updatedAt.IsZero() || info.CreatedAt.After(updatedAt) { + updatedAt = info.CreatedAt + } + } + if err := witr.Close(); err != nil { + return createdAt, updatedAt, err + } + + return createdAt, updatedAt, nil +} + +// CalcRestoreTarget returns a generation to restore from. +func (r *Replica) CalcRestoreTarget(ctx context.Context, opt RestoreOptions) (generation string, updatedAt time.Time, err error) { + var target struct { + generation string + updatedAt time.Time + } + + generations, err := r.Client.Generations(ctx) + if err != nil { + return "", time.Time{}, fmt.Errorf("cannot fetch generations: %w", err) + } + + // Search generations for one that contains the requested timestamp. + for _, generation := range generations { + // Skip generation if it does not match filter. + if opt.Generation != "" && generation != opt.Generation { + continue + } + + // Determine the time bounds for the generation. + createdAt, updatedAt, err := r.GenerationTimeBounds(ctx, generation) + if err != nil { + return "", time.Time{}, fmt.Errorf("generation created at: %w", err) + } + + // Skip if it does not contain timestamp. + if !opt.Timestamp.IsZero() { + if opt.Timestamp.Before(createdAt) || opt.Timestamp.After(updatedAt) { + continue + } + } + + // Use the latest replica if we have multiple candidates. + if !updatedAt.After(target.updatedAt) { + continue + } + + target.generation = generation + target.updatedAt = updatedAt + } + + return target.generation, target.updatedAt, nil +} + +// Replica restores the database from a replica based on the options given. +// This method will restore into opt.OutputPath, if specified, or into the +// DB's original database path. It can optionally restore from a specific +// replica or generation or it will automatically choose the best one. Finally, +// a timestamp can be specified to restore the database to a specific +// point-in-time. +func (r *Replica) Restore(ctx context.Context, opt RestoreOptions) (err error) { + // Validate options. + if opt.OutputPath == "" { + return fmt.Errorf("output path required") + } else if opt.Generation == "" && opt.Index != math.MaxInt32 { + return fmt.Errorf("must specify generation when restoring to index") + } else if opt.Index != math.MaxInt32 && !opt.Timestamp.IsZero() { + return fmt.Errorf("cannot specify index & timestamp to restore") + } + + // Ensure logger exists. + logger := opt.Logger + if logger == nil { + logger = log.New(ioutil.Discard, "", 0) + } + + logPrefix := r.Name() + if db := r.DB(); db != nil { + logPrefix = fmt.Sprintf("%s(%s)", db.Path(), r.Name()) + } + + // Ensure output path does not already exist. + if _, err := os.Stat(opt.OutputPath); err == nil { + return fmt.Errorf("cannot restore, output path already exists: %s", opt.OutputPath) + } else if err != nil && !os.IsNotExist(err) { + return err + } + + // Find lastest snapshot that occurs before timestamp or index. + var minWALIndex int + if opt.Index < math.MaxInt32 { + if minWALIndex, err = r.SnapshotIndexByIndex(ctx, opt.Generation, opt.Index); err != nil { + return fmt.Errorf("cannot find snapshot index: %w", err) + } + } else { + if minWALIndex, err = r.SnapshotIndexAt(ctx, opt.Generation, opt.Timestamp); err != nil { + return fmt.Errorf("cannot find snapshot index by timestamp: %w", err) + } + } + + // Compute list of offsets for each WAL index. + walSegmentMap, err := r.walSegmentMap(ctx, opt.Generation, opt.Index, opt.Timestamp) + if err != nil { + return fmt.Errorf("cannot find max wal index for restore: %w", err) + } + + // Find the maximum WAL index that occurs before timestamp. + maxWALIndex := -1 + for index := range walSegmentMap { + if index > maxWALIndex { + maxWALIndex = index + } + } + + // Ensure that we found the specific index, if one was specified. + if opt.Index != math.MaxInt32 && opt.Index != opt.Index { + return fmt.Errorf("unable to locate index %d in generation %q, highest index was %d", opt.Index, opt.Generation, maxWALIndex) + } + + // If no WAL files were found, mark this as a snapshot-only restore. + snapshotOnly := maxWALIndex == -1 + + // Initialize starting position. + pos := Pos{Generation: opt.Generation, Index: minWALIndex} + tmpPath := opt.OutputPath + ".tmp" + + // Copy snapshot to output path. + logger.Printf("%s: restoring snapshot %s/%08x to %s", logPrefix, opt.Generation, minWALIndex, tmpPath) + if err := r.restoreSnapshot(ctx, pos.Generation, pos.Index, tmpPath); err != nil { + return fmt.Errorf("cannot restore snapshot: %w", err) + } + + // If no WAL files available, move snapshot to final path & exit early. + if snapshotOnly { + logger.Printf("%s: snapshot only, finalizing database", logPrefix) + return os.Rename(tmpPath, opt.OutputPath) + } + + // Begin processing WAL files. + logger.Printf("%s: restoring wal files: generation=%s index=[%08x,%08x]", logPrefix, opt.Generation, minWALIndex, maxWALIndex) + + // Fill input channel with all WAL indexes to be loaded in order. + // Verify every index has at least one offset. + ch := make(chan int, maxWALIndex-minWALIndex+1) + for index := minWALIndex; index <= maxWALIndex; index++ { + if len(walSegmentMap[index]) == 0 { + return fmt.Errorf("missing WAL index: %s/%08x", opt.Generation, index) + } + ch <- index + } + close(ch) + + // Track load state for each WAL. + var mu sync.Mutex + cond := sync.NewCond(&mu) + walStates := make([]walRestoreState, maxWALIndex-minWALIndex+1) + + parallelism := opt.Parallelism + if parallelism < 1 { + parallelism = 1 + } + + // Download WAL files to disk in parallel. + g, ctx := errgroup.WithContext(ctx) + for i := 0; i < parallelism; i++ { + g.Go(func() error { + for { + select { + case <-ctx.Done(): + cond.Broadcast() + return err + case index, ok := <-ch: + if !ok { + cond.Broadcast() + return nil + } + + startTime := time.Now() + + err := r.downloadWAL(ctx, opt.Generation, index, walSegmentMap[index], tmpPath) + if err != nil { + err = fmt.Errorf("cannot download wal %s/%08x: %w", opt.Generation, index, err) + } + + // Mark index as ready-to-apply and notify applying code. + mu.Lock() + walStates[index-minWALIndex] = walRestoreState{ready: true, err: err} + mu.Unlock() + cond.Broadcast() + + // Returning the error here will cancel the other goroutines. + if err != nil { + return err + } + + logger.Printf("%s: downloaded wal %s/%08x elapsed=%s", + logPrefix, opt.Generation, index, + time.Since(startTime).String(), + ) + } + } + }) + } + + // Apply WAL files in order as they are ready. + for index := minWALIndex; index <= maxWALIndex; index++ { + // Wait until next WAL file is ready to apply. + mu.Lock() + for !walStates[index-minWALIndex].ready { + if err := ctx.Err(); err != nil { + return err + } + cond.Wait() + } + if err := walStates[index-minWALIndex].err; err != nil { + return err + } + mu.Unlock() + + // Apply WAL to database file. + startTime := time.Now() + if err = applyWAL(ctx, index, tmpPath); err != nil { + return fmt.Errorf("cannot apply wal: %w", err) + } + logger.Printf("%s: applied wal %s/%08x elapsed=%s", + logPrefix, opt.Generation, index, + time.Since(startTime).String(), + ) + } + + // Ensure all goroutines finish. All errors should have been handled during + // the processing of WAL files but this ensures that all processing is done. + if err := g.Wait(); err != nil { + return err + } + + // Copy file to final location. + logger.Printf("%s: renaming database from temporary location", logPrefix) + if err := os.Rename(tmpPath, opt.OutputPath); err != nil { + return err + } + + return nil +} + +type walRestoreState struct { + ready bool + err error +} + +// SnapshotIndexAt returns the highest index for a snapshot within a generation +// that occurs before timestamp. If timestamp is zero, returns the latest snapshot. +func (r *Replica) SnapshotIndexAt(ctx context.Context, generation string, timestamp time.Time) (int, error) { + itr, err := r.Client.Snapshots(ctx, generation) + if err != nil { + return 0, err + } + defer itr.Close() + + snapshotIndex := -1 + var max time.Time + for itr.Next() { + snapshot := itr.Snapshot() + if !timestamp.IsZero() && snapshot.CreatedAt.After(timestamp) { + continue // after timestamp, skip + } + + // Use snapshot if it newer. + if max.IsZero() || snapshot.CreatedAt.After(max) { + snapshotIndex, max = snapshot.Index, snapshot.CreatedAt + } + } + if err := itr.Close(); err != nil { + return 0, err + } else if snapshotIndex == -1 { + return 0, ErrNoSnapshots + } + return snapshotIndex, nil +} + +// SnapshotIndexbyIndex returns the highest index for a snapshot within a generation +// that occurs before a given index. If index is MaxInt32, returns the latest snapshot. +func (r *Replica) SnapshotIndexByIndex(ctx context.Context, generation string, index int) (int, error) { + itr, err := r.Client.Snapshots(ctx, generation) + if err != nil { + return 0, err + } + defer itr.Close() + + snapshotIndex := -1 + for itr.Next() { + snapshot := itr.Snapshot() + + if index < math.MaxInt32 && snapshot.Index > index { + continue // after index, skip + } + + // Use snapshot if it newer. + if snapshotIndex == -1 || snapshotIndex >= snapshotIndex { + snapshotIndex = snapshot.Index + } + } + if err := itr.Close(); err != nil { + return 0, err + } else if snapshotIndex == -1 { + return 0, ErrNoSnapshots + } + return snapshotIndex, nil +} + +// walSegmentMap returns a map of WAL indices to their segments. +// Filters by a max timestamp or a max index. +func (r *Replica) walSegmentMap(ctx context.Context, generation string, maxIndex int, maxTimestamp time.Time) (map[int][]int64, error) { + itr, err := r.Client.WALSegments(ctx, generation) + if err != nil { + return nil, err + } + defer itr.Close() + + m := make(map[int][]int64) + for itr.Next() { + info := itr.WALSegment() + + // Exit if we go past the max timestamp or index. + if !maxTimestamp.IsZero() && info.CreatedAt.After(maxTimestamp) { + break // after max timestamp, skip + } else if info.Index > maxIndex { + break // after max index, skip + } + + // Verify offsets are added in order. + offsets := m[info.Index] + if len(offsets) == 0 && info.Offset != 0 { + return nil, fmt.Errorf("missing initial wal segment: generation=%s index=%08x offset=%d", generation, info.Index, info.Offset) + } else if len(offsets) > 0 && offsets[len(offsets)-1] >= info.Offset { + return nil, fmt.Errorf("wal segments out of order: generation=%s index=%08x offsets=(%d,%d)", generation, info.Index, offsets[len(offsets)-1], info.Offset) + } + + // Append to the end of the WAL file. + m[info.Index] = append(offsets, info.Offset) + } + return m, itr.Close() +} + +// restoreSnapshot copies a snapshot from the replica to a file. +func (r *Replica) restoreSnapshot(ctx context.Context, generation string, index int, filename string) error { + // Determine the user/group & mode based on the DB, if available. + var fileInfo, dirInfo os.FileInfo + if db := r.DB(); db != nil { + fileInfo, dirInfo = db.fileInfo, db.dirInfo + } + + if err := internal.MkdirAll(filepath.Dir(filename), dirInfo); err != nil { + return err + } + + f, err := internal.CreateFile(filename, fileInfo) + if err != nil { + return err + } + defer f.Close() + + rd, err := r.Client.SnapshotReader(ctx, generation, index) + if err != nil { + return err + } + defer rd.Close() + + if _, err := io.Copy(f, lz4.NewReader(rd)); err != nil { + return err + } else if err := f.Sync(); err != nil { + return err + } + return f.Close() +} + +// downloadWAL copies a WAL file from the replica to a local copy next to the DB. +// The WAL is later applied by applyWAL(). This function can be run in parallel +// to download multiple WAL files simultaneously. +func (r *Replica) downloadWAL(ctx context.Context, generation string, index int, offsets []int64, dbPath string) (err error) { + // Determine the user/group & mode based on the DB, if available. + var fileInfo os.FileInfo + if db := r.DB(); db != nil { + fileInfo = db.fileInfo + } + + // Open readers for every segment in the WAL file, in order. + var readers []io.Reader + for _, offset := range offsets { + rd, err := r.Client.WALSegmentReader(ctx, Pos{Generation: generation, Index: index, Offset: offset}) + if err != nil { + return err + } + defer rd.Close() + readers = append(readers, lz4.NewReader(rd)) + } + + // Open handle to destination WAL path. + f, err := internal.CreateFile(fmt.Sprintf("%s-%08x-wal", dbPath, index), fileInfo) + if err != nil { + return err + } + defer f.Close() + + // Combine segments together and copy WAL to target path. + if _, err := io.Copy(f, io.MultiReader(readers...)); err != nil { + return err + } else if err := f.Close(); err != nil { + return err + } + return nil +} + +// Replica metrics. +var ( + replicaSnapshotTotalGaugeVec = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "litestream", + Subsystem: "replica", + Name: "snapshot_total", + Help: "The current number of snapshots", + }, []string{"db", "name"}) + + replicaWALBytesCounterVec = promauto.NewCounterVec(prometheus.CounterOpts{ + Namespace: "litestream", + Subsystem: "replica", + Name: "wal_bytes", + Help: "The number wal bytes written", + }, []string{"db", "name"}) + + replicaWALIndexGaugeVec = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "litestream", + Subsystem: "replica", + Name: "wal_index", + Help: "The current WAL index", + }, []string{"db", "name"}) + + replicaWALOffsetGaugeVec = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "litestream", + Subsystem: "replica", + Name: "wal_offset", + Help: "The current WAL offset", + }, []string{"db", "name"}) + + replicaValidationTotalCounterVec = promauto.NewCounterVec(prometheus.CounterOpts{ + Namespace: "litestream", + Subsystem: "replica", + Name: "validation_total", + Help: "The number of validations performed", + }, []string{"db", "name", "status"}) +) diff --git a/replica_test.go b/replica_test.go index a70459f..7f42c08 100644 --- a/replica_test.go +++ b/replica_test.go @@ -1,90 +1,144 @@ package litestream_test import ( + "bytes" "context" + "io" + "os" "testing" "github.com/benbjohnson/litestream" + "github.com/benbjohnson/litestream/file" + "github.com/benbjohnson/litestream/mock" + "github.com/pierrec/lz4/v4" ) -func TestFileReplica_Sync(t *testing.T) { - // Ensure replica can successfully sync after DB has sync'd. - t.Run("InitialSync", func(t *testing.T) { - db, sqldb := MustOpenDBs(t) - defer MustCloseDBs(t, db, sqldb) - r := NewTestFileReplica(t, db) - - // Sync database & then sync replica. - if err := db.Sync(context.Background()); err != nil { - t.Fatal(err) - } else if err := r.Sync(context.Background()); err != nil { - t.Fatal(err) - } - - // Ensure posistions match. - if pos, err := db.Pos(); err != nil { - t.Fatal(err) - } else if got, want := r.LastPos(), pos; got != want { - t.Fatalf("LastPos()=%v, want %v", got, want) +func TestReplica_Name(t *testing.T) { + t.Run("WithName", func(t *testing.T) { + if got, want := litestream.NewReplica(nil, "NAME").Name(), "NAME"; got != want { + t.Fatalf("Name()=%v, want %v", got, want) } }) - - // Ensure replica can successfully sync multiple times. - t.Run("MultiSync", func(t *testing.T) { - db, sqldb := MustOpenDBs(t) - defer MustCloseDBs(t, db, sqldb) - r := NewTestFileReplica(t, db) - - if _, err := sqldb.Exec(`CREATE TABLE foo (bar TEXT);`); err != nil { - t.Fatal(err) - } - - // Write to the database multiple times and sync after each write. - for i, n := 0, db.MinCheckpointPageN*2; i < n; i++ { - if _, err := sqldb.Exec(`INSERT INTO foo (bar) VALUES ('baz')`); err != nil { - t.Fatal(err) - } - - // Sync periodically. - if i%100 == 0 || i == n-1 { - if err := db.Sync(context.Background()); err != nil { - t.Fatal(err) - } else if err := r.Sync(context.Background()); err != nil { - t.Fatal(err) - } - } - } - - // Ensure posistions match. - if pos, err := db.Pos(); err != nil { - t.Fatal(err) - } else if got, want := pos.Index, 2; got != want { - t.Fatalf("Index=%v, want %v", got, want) - } else if calcPos, err := r.CalcPos(context.Background(), pos.Generation); err != nil { - t.Fatal(err) - } else if got, want := calcPos, pos; got != want { - t.Fatalf("CalcPos()=%v, want %v", got, want) - } else if got, want := r.LastPos(), pos; got != want { - t.Fatalf("LastPos()=%v, want %v", got, want) - } - }) - - // Ensure replica returns an error if there is no generation available from the DB. - t.Run("ErrNoGeneration", func(t *testing.T) { - db, sqldb := MustOpenDBs(t) - defer MustCloseDBs(t, db, sqldb) - r := NewTestFileReplica(t, db) - - if err := r.Sync(context.Background()); err == nil || err.Error() != `no generation, waiting for data` { - t.Fatal(err) + t.Run("WithoutName", func(t *testing.T) { + r := litestream.NewReplica(nil, "") + r.Client = &mock.ReplicaClient{} + if got, want := r.Name(), "mock"; got != want { + t.Fatalf("Name()=%v, want %v", got, want) } }) } -// NewTestFileReplica returns a new replica using a temp directory & with monitoring disabled. -func NewTestFileReplica(tb testing.TB, db *litestream.DB) *litestream.FileReplica { - r := litestream.NewFileReplica(db, "", tb.TempDir()) - r.MonitorEnabled = false - db.Replicas = []litestream.Replica{r} - return r +func TestReplica_Sync(t *testing.T) { + db, sqldb := MustOpenDBs(t) + defer MustCloseDBs(t, db, sqldb) + + // Execute a query to force a write to the WAL. + if _, err := sqldb.Exec(`CREATE TABLE foo (bar TEXT);`); err != nil { + t.Fatal(err) + } + + // Issue initial database sync to setup generation. + if err := db.Sync(context.Background()); err != nil { + t.Fatal(err) + } + + // Fetch current database position. + dpos, err := db.Pos() + if err != nil { + t.Fatal(err) + } + + c := file.NewReplicaClient(t.TempDir()) + r := litestream.NewReplica(db, "") + c.Replica, r.Client = r, c + + if err := r.Sync(context.Background()); err != nil { + t.Fatal(err) + } + + // Verify client generation matches database. + generations, err := c.Generations(context.Background()) + if err != nil { + t.Fatal(err) + } else if got, want := len(generations), 1; got != want { + t.Fatalf("len(generations)=%v, want %v", got, want) + } else if got, want := generations[0], dpos.Generation; got != want { + t.Fatalf("generations[0]=%v, want %v", got, want) + } + + // Verify WAL matches replica WAL. + if b0, err := os.ReadFile(db.Path() + "-wal"); err != nil { + t.Fatal(err) + } else if r, err := c.WALSegmentReader(context.Background(), litestream.Pos{Generation: generations[0], Index: 0, Offset: 0}); err != nil { + t.Fatal(err) + } else if b1, err := io.ReadAll(lz4.NewReader(r)); err != nil { + t.Fatal(err) + } else if err := r.Close(); err != nil { + t.Fatal(err) + } else if !bytes.Equal(b0, b1) { + t.Fatalf("wal mismatch: len(%d), len(%d)", len(b0), len(b1)) + } +} + +func TestReplica_Snapshot(t *testing.T) { + db, sqldb := MustOpenDBs(t) + defer MustCloseDBs(t, db, sqldb) + + c := file.NewReplicaClient(t.TempDir()) + r := litestream.NewReplica(db, "") + r.Client = c + + // Execute a query to force a write to the WAL. + if _, err := sqldb.Exec(`CREATE TABLE foo (bar TEXT);`); err != nil { + t.Fatal(err) + } else if err := db.Sync(context.Background()); err != nil { + t.Fatal(err) + } else if err := r.Sync(context.Background()); err != nil { + t.Fatal(err) + } + + // Fetch current database position & snapshot. + pos0, err := db.Pos() + if err != nil { + t.Fatal(err) + } else if info, err := r.Snapshot(context.Background()); err != nil { + t.Fatal(err) + } else if got, want := info.Pos(), pos0.Truncate(); got != want { + t.Fatalf("pos=%s, want %s", got, want) + } + + // Sync database and then replica. + if err := db.Sync(context.Background()); err != nil { + t.Fatal(err) + } else if err := r.Sync(context.Background()); err != nil { + t.Fatal(err) + } + + // Execute a query to force a write to the WAL & truncate to start new index. + if _, err := sqldb.Exec(`INSERT INTO foo (bar) VALUES ('baz');`); err != nil { + t.Fatal(err) + } else if err := db.Checkpoint(context.Background(), litestream.CheckpointModeTruncate); err != nil { + t.Fatal(err) + } + + // Fetch current database position & snapshot. + pos1, err := db.Pos() + if err != nil { + t.Fatal(err) + } else if info, err := r.Snapshot(context.Background()); err != nil { + t.Fatal(err) + } else if got, want := info.Pos(), pos1.Truncate(); got != want { + t.Fatalf("pos=%v, want %v", got, want) + } + + // Verify two snapshots exist. + if infos, err := r.Snapshots(context.Background()); err != nil { + t.Fatal(err) + } else if got, want := len(infos), 2; got != want { + t.Fatalf("len=%v, want %v", got, want) + } else if got, want := infos[0].Pos(), pos0.Truncate(); got != want { + t.Fatalf("info[0]=%s, want %s", got, want) + } else if got, want := infos[1].Pos(), pos1.Truncate(); got != want { + t.Fatalf("info[1]=%s, want %s", got, want) + } } diff --git a/s3/replica_client.go b/s3/replica_client.go new file mode 100644 index 0000000..1d8f486 --- /dev/null +++ b/s3/replica_client.go @@ -0,0 +1,746 @@ +package s3 + +import ( + "context" + "crypto/tls" + "fmt" + "io" + "net/http" + "os" + "path" + "sync" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/defaults" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3" + "github.com/aws/aws-sdk-go/service/s3/s3manager" + "github.com/benbjohnson/litestream" + "github.com/benbjohnson/litestream/internal" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "golang.org/x/sync/errgroup" +) + +// MaxKeys is the number of keys S3 can operate on per batch. +const MaxKeys = 1000 + +// DefaultRegion is the region used if one is not specified. +const DefaultRegion = "us-east-1" + +var _ litestream.ReplicaClient = (*ReplicaClient)(nil) + +// ReplicaClient is a client for writing snapshots & WAL segments to disk. +type ReplicaClient struct { + mu sync.Mutex + s3 *s3.S3 // s3 service + uploader *s3manager.Uploader + + // AWS authentication keys. + AccessKeyID string + SecretAccessKey string + + // S3 bucket information + Region string + Bucket string + Path string + Endpoint string + ForcePathStyle bool + SkipVerify bool +} + +// NewReplicaClient returns a new instance of ReplicaClient. +func NewReplicaClient() *ReplicaClient { + return &ReplicaClient{} +} + +// Type returns "s3" as the client type. +func (c *ReplicaClient) Type() string { + return "s3" +} + +// GenerationsDir returns the path to a generation root directory. +func (c *ReplicaClient) GenerationsDir() string { + return path.Join(c.Path, "generations") +} + +// GenerationDir returns the path to a generation's root directory. +func (c *ReplicaClient) GenerationDir(generation string) (string, error) { + dir := c.GenerationsDir() + if generation == "" { + return "", fmt.Errorf("generation required") + } + return path.Join(dir, generation), nil +} + +// SnapshotsDir returns the path to a generation's snapshot directory. +func (c *ReplicaClient) SnapshotsDir(generation string) (string, error) { + dir, err := c.GenerationDir(generation) + if err != nil { + return "", err + } + return path.Join(dir, "snapshots"), nil +} + +// SnapshotPath returns the path to an uncompressed snapshot file. +func (c *ReplicaClient) SnapshotPath(generation string, index int) (string, error) { + dir, err := c.SnapshotsDir(generation) + if err != nil { + return "", err + } + return path.Join(dir, litestream.FormatSnapshotPath(index)), nil +} + +// WALDir returns the path to a generation's WAL directory +func (c *ReplicaClient) WALDir(generation string) (string, error) { + dir, err := c.GenerationDir(generation) + if err != nil { + return "", err + } + return path.Join(dir, "wal"), nil +} + +// WALSegmentPath returns the path to a WAL segment file. +func (c *ReplicaClient) WALSegmentPath(generation string, index int, offset int64) (string, error) { + dir, err := c.WALDir(generation) + if err != nil { + return "", err + } + return path.Join(dir, litestream.FormatWALSegmentPath(index, offset)), nil +} + +// Init initializes the connection to S3. No-op if already initialized. +func (c *ReplicaClient) Init(ctx context.Context) (err error) { + c.mu.Lock() + defer c.mu.Unlock() + + if c.s3 != nil { + return nil + } + + // Look up region if not specified and no endpoint is used. + // Endpoints are typically used for non-S3 object stores and do not + // necessarily require a region. + region := c.Region + if region == "" { + if c.Endpoint == "" { + if region, err = c.findBucketRegion(ctx, c.Bucket); err != nil { + return fmt.Errorf("cannot lookup bucket region: %w", err) + } + } else { + region = DefaultRegion // default for non-S3 object stores + } + } + + // Create new AWS session. + config := c.config() + if region != "" { + config.Region = aws.String(region) + } + sess, err := session.NewSession(config) + if err != nil { + return fmt.Errorf("cannot create aws session: %w", err) + } + c.s3 = s3.New(sess) + c.uploader = s3manager.NewUploader(sess) + return nil +} + +// config returns the AWS configuration. Uses the default credential chain +// unless a key/secret are explicitly set. +func (c *ReplicaClient) config() *aws.Config { + config := defaults.Get().Config + if c.AccessKeyID != "" || c.SecretAccessKey != "" { + config.Credentials = credentials.NewStaticCredentials(c.AccessKeyID, c.SecretAccessKey, "") + } + if c.Endpoint != "" { + config.Endpoint = aws.String(c.Endpoint) + } + if c.ForcePathStyle { + config.S3ForcePathStyle = aws.Bool(c.ForcePathStyle) + } + if c.SkipVerify { + config.HTTPClient = &http.Client{Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + }} + } + + return config +} + +func (c *ReplicaClient) findBucketRegion(ctx context.Context, bucket string) (string, error) { + // Connect to US standard region to fetch info. + config := c.config() + config.Region = aws.String(DefaultRegion) + sess, err := session.NewSession(config) + if err != nil { + return "", err + } + + // Fetch bucket location, if possible. Must be bucket owner. + // This call can return a nil location which means it's in us-east-1. + if out, err := s3.New(sess).GetBucketLocation(&s3.GetBucketLocationInput{ + Bucket: aws.String(bucket), + }); err != nil { + return "", err + } else if out.LocationConstraint != nil { + return *out.LocationConstraint, nil + } + return DefaultRegion, nil +} + +// Generations returns a list of available generation names. +func (c *ReplicaClient) Generations(ctx context.Context) ([]string, error) { + if err := c.Init(ctx); err != nil { + return nil, err + } + + var generations []string + if err := c.s3.ListObjectsPagesWithContext(ctx, &s3.ListObjectsInput{ + Bucket: aws.String(c.Bucket), + Prefix: aws.String(c.GenerationsDir() + "/"), + Delimiter: aws.String("/"), + }, func(page *s3.ListObjectsOutput, lastPage bool) bool { + operationTotalCounterVec.WithLabelValues("LIST").Inc() + + for _, prefix := range page.CommonPrefixes { + name := path.Base(*prefix.Prefix) + if !litestream.IsGenerationName(name) { + continue + } + generations = append(generations, name) + } + return true + }); err != nil { + return nil, err + } + + return generations, nil +} + +// DeleteGeneration deletes all snapshots & WAL segments within a generation. +func (c *ReplicaClient) DeleteGeneration(ctx context.Context, generation string) error { + if err := c.Init(ctx); err != nil { + return err + } + + dir, err := c.GenerationDir(generation) + if err != nil { + return fmt.Errorf("cannot determine generation directory path: %w", err) + } + + // Collect all files for the generation. + var objIDs []*s3.ObjectIdentifier + if err := c.s3.ListObjectsPagesWithContext(ctx, &s3.ListObjectsInput{ + Bucket: aws.String(c.Bucket), + Prefix: aws.String(dir), + }, func(page *s3.ListObjectsOutput, lastPage bool) bool { + operationTotalCounterVec.WithLabelValues("LIST").Inc() + + for _, obj := range page.Contents { + objIDs = append(objIDs, &s3.ObjectIdentifier{Key: obj.Key}) + } + return true + }); err != nil { + return err + } + + // Delete all files in batches. + for len(objIDs) > 0 { + n := MaxKeys + if len(objIDs) < n { + n = len(objIDs) + } + + if _, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{ + Bucket: aws.String(c.Bucket), + Delete: &s3.Delete{Objects: objIDs[:n], Quiet: aws.Bool(true)}, + }); err != nil { + return err + } + operationTotalCounterVec.WithLabelValues("DELETE").Inc() + + objIDs = objIDs[n:] + } + + // log.Printf("%s(%s): retainer: deleting generation: %s", r.db.Path(), r.Name(), generation) + + return nil +} + +// Snapshots returns an iterator over all available snapshots for a generation. +func (c *ReplicaClient) Snapshots(ctx context.Context, generation string) (litestream.SnapshotIterator, error) { + if err := c.Init(ctx); err != nil { + return nil, err + } + return newSnapshotIterator(ctx, c, generation), nil +} + +// WriteSnapshot writes LZ4 compressed data from rd into a file on disk. +func (c *ReplicaClient) WriteSnapshot(ctx context.Context, generation string, index int, rd io.Reader) (info litestream.SnapshotInfo, err error) { + if err := c.Init(ctx); err != nil { + return info, err + } + + key, err := c.SnapshotPath(generation, index) + if err != nil { + return info, fmt.Errorf("cannot determine snapshot path: %w", err) + } + startTime := time.Now() + + rc := internal.NewReadCounter(rd) + if _, err := c.uploader.UploadWithContext(ctx, &s3manager.UploadInput{ + Bucket: aws.String(c.Bucket), + Key: aws.String(key), + Body: rc, + }); err != nil { + return info, err + } + + operationTotalCounterVec.WithLabelValues("PUT").Inc() + operationBytesCounterVec.WithLabelValues("PUT").Add(float64(rc.N())) + + // log.Printf("%s(%s): snapshot: creating %s/%08x t=%s", r.db.Path(), r.Name(), generation, index, time.Since(startTime).Truncate(time.Millisecond)) + + return litestream.SnapshotInfo{ + Generation: generation, + Index: index, + Size: rc.N(), + CreatedAt: startTime.UTC(), + }, nil +} + +// SnapshotReader returns a reader for snapshot data at the given generation/index. +func (c *ReplicaClient) SnapshotReader(ctx context.Context, generation string, index int) (io.ReadCloser, error) { + if err := c.Init(ctx); err != nil { + return nil, err + } + + key, err := c.SnapshotPath(generation, index) + if err != nil { + return nil, fmt.Errorf("cannot determine snapshot path: %w", err) + } + + out, err := c.s3.GetObjectWithContext(ctx, &s3.GetObjectInput{ + Bucket: aws.String(c.Bucket), + Key: aws.String(key), + }) + if isNotExists(err) { + return nil, os.ErrNotExist + } else if err != nil { + return nil, err + } + operationTotalCounterVec.WithLabelValues("GET").Inc() + operationBytesCounterVec.WithLabelValues("GET").Add(float64(*out.ContentLength)) + + return out.Body, nil +} + +// DeleteSnapshot deletes a snapshot with the given generation & index. +func (c *ReplicaClient) DeleteSnapshot(ctx context.Context, generation string, index int) error { + if err := c.Init(ctx); err != nil { + return err + } + + key, err := c.SnapshotPath(generation, index) + if err != nil { + return fmt.Errorf("cannot determine snapshot path: %w", err) + } + + if _, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{ + Bucket: aws.String(c.Bucket), + Delete: &s3.Delete{Objects: []*s3.ObjectIdentifier{{Key: &key}}, Quiet: aws.Bool(true)}, + }); err != nil { + return err + } + + operationTotalCounterVec.WithLabelValues("DELETE").Inc() + return nil +} + +// WALSegments returns an iterator over all available WAL files for a generation. +func (c *ReplicaClient) WALSegments(ctx context.Context, generation string) (litestream.WALSegmentIterator, error) { + if err := c.Init(ctx); err != nil { + return nil, err + } + return newWALSegmentIterator(ctx, c, generation), nil +} + +// WriteWALSegment writes LZ4 compressed data from rd into a file on disk. +func (c *ReplicaClient) WriteWALSegment(ctx context.Context, pos litestream.Pos, rd io.Reader) (info litestream.WALSegmentInfo, err error) { + if err := c.Init(ctx); err != nil { + return info, err + } + + key, err := c.WALSegmentPath(pos.Generation, pos.Index, pos.Offset) + if err != nil { + return info, fmt.Errorf("cannot determine wal segment path: %w", err) + } + startTime := time.Now() + + rc := internal.NewReadCounter(rd) + if _, err := c.uploader.UploadWithContext(ctx, &s3manager.UploadInput{ + Bucket: aws.String(c.Bucket), + Key: aws.String(key), + Body: rc, + }); err != nil { + return info, err + } + + operationTotalCounterVec.WithLabelValues("PUT").Inc() + operationBytesCounterVec.WithLabelValues("PUT").Add(float64(rc.N())) + + return litestream.WALSegmentInfo{ + Generation: pos.Generation, + Index: pos.Index, + Offset: pos.Offset, + Size: rc.N(), + CreatedAt: startTime.UTC(), + }, nil +} + +// WALSegmentReader returns a reader for a section of WAL data at the given index. +// Returns os.ErrNotExist if no matching index/offset is found. +func (c *ReplicaClient) WALSegmentReader(ctx context.Context, pos litestream.Pos) (io.ReadCloser, error) { + if err := c.Init(ctx); err != nil { + return nil, err + } + + key, err := c.WALSegmentPath(pos.Generation, pos.Index, pos.Offset) + if err != nil { + return nil, fmt.Errorf("cannot determine wal segment path: %w", err) + } + + out, err := c.s3.GetObjectWithContext(ctx, &s3.GetObjectInput{ + Bucket: aws.String(c.Bucket), + Key: aws.String(key), + }) + if isNotExists(err) { + return nil, os.ErrNotExist + } else if err != nil { + return nil, err + } + operationTotalCounterVec.WithLabelValues("GET").Inc() + operationBytesCounterVec.WithLabelValues("GET").Add(float64(*out.ContentLength)) + + return out.Body, nil +} + +// DeleteWALSegments deletes WAL segments with at the given positions. +func (c *ReplicaClient) DeleteWALSegments(ctx context.Context, a []litestream.Pos) error { + if err := c.Init(ctx); err != nil { + return err + } + + objIDs := make([]*s3.ObjectIdentifier, MaxKeys) + for len(a) > 0 { + n := MaxKeys + if len(a) < n { + n = len(a) + } + + // Generate a batch of object IDs for deleting the WAL segments. + for i, pos := range a[:n] { + key, err := c.WALSegmentPath(pos.Generation, pos.Index, pos.Offset) + if err != nil { + return fmt.Errorf("cannot determine wal segment path: %w", err) + } + objIDs[i] = &s3.ObjectIdentifier{Key: &key} + } + + // Delete S3 objects in bulk. + if _, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{ + Bucket: aws.String(c.Bucket), + Delete: &s3.Delete{Objects: objIDs[:n], Quiet: aws.Bool(true)}, + }); err != nil { + return err + } + + operationTotalCounterVec.WithLabelValues("DELETE").Inc() + + a = a[n:] + } + + return nil +} + +// DeleteAll deletes everything on the remote path. Mainly used for testing. +func (c *ReplicaClient) DeleteAll(ctx context.Context) error { + if err := c.Init(ctx); err != nil { + return err + } + + prefix := c.Path + if prefix != "" { + prefix += "/" + } + + // Collect all files for the generation. + var objIDs []*s3.ObjectIdentifier + if err := c.s3.ListObjectsPagesWithContext(ctx, &s3.ListObjectsInput{ + Bucket: aws.String(c.Bucket), + Prefix: aws.String(prefix), + }, func(page *s3.ListObjectsOutput, lastPage bool) bool { + operationTotalCounterVec.WithLabelValues("LIST").Inc() + + for _, obj := range page.Contents { + objIDs = append(objIDs, &s3.ObjectIdentifier{Key: obj.Key}) + } + return true + }); err != nil { + return err + } + + // Delete all files in batches. + for len(objIDs) > 0 { + n := MaxKeys + if len(objIDs) < n { + n = len(objIDs) + } + + if _, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{ + Bucket: aws.String(c.Bucket), + Delete: &s3.Delete{Objects: objIDs[:n], Quiet: aws.Bool(true)}, + }); err != nil { + return err + } + operationTotalCounterVec.WithLabelValues("DELETE").Inc() + + objIDs = objIDs[n:] + } + + return nil +} + +type snapshotIterator struct { + client *ReplicaClient + generation string + + ch chan litestream.SnapshotInfo + g errgroup.Group + ctx context.Context + cancel func() + + info litestream.SnapshotInfo + err error +} + +func newSnapshotIterator(ctx context.Context, client *ReplicaClient, generation string) *snapshotIterator { + itr := &snapshotIterator{ + client: client, + generation: generation, + ch: make(chan litestream.SnapshotInfo), + } + + itr.ctx, itr.cancel = context.WithCancel(ctx) + itr.g.Go(itr.fetch) + + return itr +} + +// fetch runs in a separate goroutine to fetch pages of objects and stream them to a channel. +func (itr *snapshotIterator) fetch() error { + defer close(itr.ch) + + dir, err := itr.client.SnapshotsDir(itr.generation) + if err != nil { + return fmt.Errorf("cannot determine snapshot directory path: %w", err) + } + + return itr.client.s3.ListObjectsPagesWithContext(itr.ctx, &s3.ListObjectsInput{ + Bucket: aws.String(itr.client.Bucket), + Prefix: aws.String(dir + "/"), + Delimiter: aws.String("/"), + }, func(page *s3.ListObjectsOutput, lastPage bool) bool { + operationTotalCounterVec.WithLabelValues("LIST").Inc() + + for _, obj := range page.Contents { + key := path.Base(*obj.Key) + index, err := litestream.ParseSnapshotPath(key) + if err != nil { + continue + } + + info := litestream.SnapshotInfo{ + Generation: itr.generation, + Index: index, + Size: *obj.Size, + CreatedAt: obj.LastModified.UTC(), + } + + select { + case <-itr.ctx.Done(): + case itr.ch <- info: + } + } + return true + }) +} + +func (itr *snapshotIterator) Close() (err error) { + err = itr.err + + // Cancel context and wait for error group to finish. + itr.cancel() + if e := itr.g.Wait(); e != nil && err == nil { + err = e + } + + return err +} + +func (itr *snapshotIterator) Next() bool { + // Exit if an error has already occurred. + if itr.err != nil { + return false + } + + // Return false if context was canceled or if there are no more snapshots. + // Otherwise fetch the next snapshot and store it on the iterator. + select { + case <-itr.ctx.Done(): + return false + case info, ok := <-itr.ch: + if !ok { + return false + } + itr.info = info + return true + } +} + +func (itr *snapshotIterator) Err() error { return itr.err } + +func (itr *snapshotIterator) Snapshot() litestream.SnapshotInfo { + return itr.info +} + +type walSegmentIterator struct { + client *ReplicaClient + generation string + + ch chan litestream.WALSegmentInfo + g errgroup.Group + ctx context.Context + cancel func() + + info litestream.WALSegmentInfo + err error +} + +func newWALSegmentIterator(ctx context.Context, client *ReplicaClient, generation string) *walSegmentIterator { + itr := &walSegmentIterator{ + client: client, + generation: generation, + ch: make(chan litestream.WALSegmentInfo), + } + + itr.ctx, itr.cancel = context.WithCancel(ctx) + itr.g.Go(itr.fetch) + + return itr +} + +// fetch runs in a separate goroutine to fetch pages of objects and stream them to a channel. +func (itr *walSegmentIterator) fetch() error { + defer close(itr.ch) + + dir, err := itr.client.WALDir(itr.generation) + if err != nil { + return fmt.Errorf("cannot determine wal directory path: %w", err) + } + + return itr.client.s3.ListObjectsPagesWithContext(itr.ctx, &s3.ListObjectsInput{ + Bucket: aws.String(itr.client.Bucket), + Prefix: aws.String(dir + "/"), + Delimiter: aws.String("/"), + }, func(page *s3.ListObjectsOutput, lastPage bool) bool { + operationTotalCounterVec.WithLabelValues("LIST").Inc() + + for _, obj := range page.Contents { + key := path.Base(*obj.Key) + index, offset, err := litestream.ParseWALSegmentPath(key) + if err != nil { + continue + } + + info := litestream.WALSegmentInfo{ + Generation: itr.generation, + Index: index, + Offset: offset, + Size: *obj.Size, + CreatedAt: obj.LastModified.UTC(), + } + + select { + case <-itr.ctx.Done(): + return false + case itr.ch <- info: + } + } + return true + }) +} + +func (itr *walSegmentIterator) Close() (err error) { + err = itr.err + + // Cancel context and wait for error group to finish. + itr.cancel() + if e := itr.g.Wait(); e != nil && err == nil { + err = e + } + + return err +} + +func (itr *walSegmentIterator) Next() bool { + // Exit if an error has already occurred. + if itr.err != nil { + return false + } + + // Return false if context was canceled or if there are no more segments. + // Otherwise fetch the next segment and store it on the iterator. + select { + case <-itr.ctx.Done(): + return false + case info, ok := <-itr.ch: + if !ok { + return false + } + itr.info = info + return true + } +} + +func (itr *walSegmentIterator) Err() error { return itr.err } + +func (itr *walSegmentIterator) WALSegment() litestream.WALSegmentInfo { + return itr.info +} + +func isNotExists(err error) bool { + switch err := err.(type) { + case awserr.Error: + return err.Code() == `NoSuchKey` + default: + return false + } +} + +// S3 metrics. +var ( + operationTotalCounterVec = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "litestream_s3_operation_total", + Help: "The number of S3 operations performed", + }, []string{"type"}) + + operationBytesCounterVec = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "litestream_s3_operation_bytes", + Help: "The number of bytes used by S3 operations", + }, []string{"type"}) +) diff --git a/s3/replica_client_test.go b/s3/replica_client_test.go new file mode 100644 index 0000000..f80947c --- /dev/null +++ b/s3/replica_client_test.go @@ -0,0 +1,605 @@ +package s3_test + +import ( + "context" + "flag" + "fmt" + "io/ioutil" + "math/rand" + "os" + "path" + "reflect" + "sort" + "strings" + "testing" + + "github.com/benbjohnson/litestream" + "github.com/benbjohnson/litestream/s3" +) + +var ( + // Enables integration tests. + integration = flag.Bool("integration", false, "") + + // Replica client settings + accessKeyID = flag.String("access-key-id", os.Getenv("LITESTREAM_S3_ACCESS_KEY_ID"), "") + secretAccessKey = flag.String("secret-access-key", os.Getenv("LITESTREAM_S3_SECRET_ACCESS_KEY"), "") + region = flag.String("region", os.Getenv("LITESTREAM_S3_REGION"), "") + bucket = flag.String("bucket", os.Getenv("LITESTREAM_S3_BUCKET"), "") + pathFlag = flag.String("path", os.Getenv("LITESTREAM_S3_PATH"), "") + endpoint = flag.String("endpoint", os.Getenv("LITESTREAM_S3_ENDPOINT"), "") + forcePathStyle = flag.Bool("force-path-style", os.Getenv("LITESTREAM_S3_FORCE_PATH_STYLE") == "true", "") + skipVerify = flag.Bool("skip-verify", os.Getenv("LITESTREAM_S3_SKIP_VERIFY") == "true", "") +) + +func TestReplicaClient_Type(t *testing.T) { + if got, want := s3.NewReplicaClient().Type(), "s3"; got != want { + t.Fatalf("Type()=%v, want %v", got, want) + } +} + +func TestReplicaClient_GenerationsDir(t *testing.T) { + t.Run("OK", func(t *testing.T) { + c := s3.NewReplicaClient() + c.Path = "foo" + if got, want := c.GenerationsDir(), "foo/generations"; got != want { + t.Fatalf("GenerationsDir()=%v, want %v", got, want) + } + }) + t.Run("NoPath", func(t *testing.T) { + if got, want := s3.NewReplicaClient().GenerationsDir(), "generations"; got != want { + t.Fatalf("GenerationsDir()=%v, want %v", got, want) + } + }) +} + +func TestReplicaClient_GenerationDir(t *testing.T) { + t.Run("OK", func(t *testing.T) { + c := s3.NewReplicaClient() + c.Path = "foo" + if got, err := c.GenerationDir("0123456701234567"); err != nil { + t.Fatal(err) + } else if want := "foo/generations/0123456701234567"; got != want { + t.Fatalf("GenerationDir()=%v, want %v", got, want) + } + }) + t.Run("ErrNoGeneration", func(t *testing.T) { + if _, err := s3.NewReplicaClient().GenerationDir(""); err == nil || err.Error() != `generation required` { + t.Fatalf("expected error: %v", err) + } + }) +} + +func TestReplicaClient_SnapshotsDir(t *testing.T) { + t.Run("OK", func(t *testing.T) { + c := s3.NewReplicaClient() + c.Path = "foo" + if got, err := c.SnapshotsDir("0123456701234567"); err != nil { + t.Fatal(err) + } else if want := "foo/generations/0123456701234567/snapshots"; got != want { + t.Fatalf("SnapshotsDir()=%v, want %v", got, want) + } + }) + t.Run("ErrNoGeneration", func(t *testing.T) { + if _, err := s3.NewReplicaClient().SnapshotsDir(""); err == nil || err.Error() != `generation required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_SnapshotPath(t *testing.T) { + t.Run("OK", func(t *testing.T) { + c := s3.NewReplicaClient() + c.Path = "foo" + if got, err := c.SnapshotPath("0123456701234567", 1000); err != nil { + t.Fatal(err) + } else if want := "foo/generations/0123456701234567/snapshots/000003e8.snapshot.lz4"; got != want { + t.Fatalf("SnapshotPath()=%v, want %v", got, want) + } + }) + t.Run("ErrNoGeneration", func(t *testing.T) { + if _, err := s3.NewReplicaClient().SnapshotPath("", 1000); err == nil || err.Error() != `generation required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_WALDir(t *testing.T) { + t.Run("OK", func(t *testing.T) { + c := s3.NewReplicaClient() + c.Path = "foo" + if got, err := c.WALDir("0123456701234567"); err != nil { + t.Fatal(err) + } else if want := "foo/generations/0123456701234567/wal"; got != want { + t.Fatalf("WALDir()=%v, want %v", got, want) + } + }) + t.Run("ErrNoGeneration", func(t *testing.T) { + if _, err := s3.NewReplicaClient().WALDir(""); err == nil || err.Error() != `generation required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_WALSegmentPath(t *testing.T) { + t.Run("OK", func(t *testing.T) { + c := s3.NewReplicaClient() + c.Path = "foo" + if got, err := c.WALSegmentPath("0123456701234567", 1000, 1001); err != nil { + t.Fatal(err) + } else if want := "foo/generations/0123456701234567/wal/000003e8_000003e9.wal.lz4"; got != want { + t.Fatalf("WALPath()=%v, want %v", got, want) + } + }) + t.Run("ErrNoGeneration", func(t *testing.T) { + if _, err := s3.NewReplicaClient().WALSegmentPath("", 1000, 0); err == nil || err.Error() != `generation required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_Generations(t *testing.T) { + t.Run("OK", func(t *testing.T) { + t.Parallel() + + c := NewIntegrationReplicaClient(t) + defer MustDeleteAll(t, c) + + // Write snapshots. + if _, err := c.WriteSnapshot(context.Background(), "5efbd8d042012dca", 0, strings.NewReader(`foo`)); err != nil { + t.Fatal(err) + } else if _, err := c.WriteSnapshot(context.Background(), "b16ddcf5c697540f", 0, strings.NewReader(`bar`)); err != nil { + t.Fatal(err) + } else if _, err := c.WriteSnapshot(context.Background(), "155fe292f8333c72", 0, strings.NewReader(`baz`)); err != nil { + t.Fatal(err) + } + + // Verify returned generations. + if got, err := c.Generations(context.Background()); err != nil { + t.Fatal(err) + } else if want := []string{"155fe292f8333c72", "5efbd8d042012dca", "b16ddcf5c697540f"}; !reflect.DeepEqual(got, want) { + t.Fatalf("Generations()=%v, want %v", got, want) + } + }) + + t.Run("NoGenerationsDir", func(t *testing.T) { + t.Parallel() + + c := NewIntegrationReplicaClient(t) + defer MustDeleteAll(t, c) + if generations, err := c.Generations(context.Background()); err != nil { + t.Fatal(err) + } else if got, want := len(generations), 0; got != want { + t.Fatalf("len(Generations())=%v, want %v", got, want) + } + }) +} + +func TestReplicaClient_Snapshots(t *testing.T) { + t.Run("OK", func(t *testing.T) { + t.Parallel() + + c := NewIntegrationReplicaClient(t) + defer MustDeleteAll(t, c) + + // Write snapshots. + if _, err := c.WriteSnapshot(context.Background(), "5efbd8d042012dca", 1, strings.NewReader(``)); err != nil { + t.Fatal(err) + } else if _, err := c.WriteSnapshot(context.Background(), "b16ddcf5c697540f", 5, strings.NewReader(`x`)); err != nil { + t.Fatal(err) + } else if _, err := c.WriteSnapshot(context.Background(), "b16ddcf5c697540f", 10, strings.NewReader(`xyz`)); err != nil { + t.Fatal(err) + } + + // Fetch all snapshots by generation. + itr, err := c.Snapshots(context.Background(), "b16ddcf5c697540f") + if err != nil { + t.Fatal(err) + } + defer itr.Close() + + // Read all snapshots into a slice so they can be sorted. + a, err := litestream.SliceSnapshotIterator(itr) + if err != nil { + t.Fatal(err) + } else if got, want := len(a), 2; got != want { + t.Fatalf("len=%v, want %v", got, want) + } + sort.Sort(litestream.SnapshotInfoSlice(a)) + + // Verify first snapshot metadata. + if got, want := a[0].Generation, "b16ddcf5c697540f"; got != want { + t.Fatalf("Generation=%v, want %v", got, want) + } else if got, want := a[0].Index, 5; got != want { + t.Fatalf("Index=%v, want %v", got, want) + } else if got, want := a[0].Size, int64(1); got != want { + t.Fatalf("Size=%v, want %v", got, want) + } else if a[0].CreatedAt.IsZero() { + t.Fatalf("expected CreatedAt") + } + + // Verify second snapshot metadata. + if got, want := a[1].Generation, "b16ddcf5c697540f"; got != want { + t.Fatalf("Generation=%v, want %v", got, want) + } else if got, want := a[1].Index, 0xA; got != want { + t.Fatalf("Index=%v, want %v", got, want) + } else if got, want := a[1].Size, int64(3); got != want { + t.Fatalf("Size=%v, want %v", got, want) + } else if a[1].CreatedAt.IsZero() { + t.Fatalf("expected CreatedAt") + } + + // Ensure close is clean. + if err := itr.Close(); err != nil { + t.Fatal(err) + } + }) + + t.Run("NoGenerationDir", func(t *testing.T) { + t.Parallel() + + c := NewIntegrationReplicaClient(t) + defer MustDeleteAll(t, c) + + itr, err := c.Snapshots(context.Background(), "5efbd8d042012dca") + if err != nil { + t.Fatal(err) + } + defer itr.Close() + + if itr.Next() { + t.Fatal("expected no snapshots") + } + }) + + t.Run("ErrNoGeneration", func(t *testing.T) { + t.Parallel() + + c := NewIntegrationReplicaClient(t) + defer MustDeleteAll(t, c) + + if itr, err := c.Snapshots(context.Background(), ""); err != nil { + t.Fatal(err) + } else if err := itr.Close(); err == nil || err.Error() != `cannot determine snapshot directory path: generation required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_WriteSnapshot(t *testing.T) { + t.Run("OK", func(t *testing.T) { + t.Parallel() + + c := NewIntegrationReplicaClient(t) + defer MustDeleteAll(t, c) + + if _, err := c.WriteSnapshot(context.Background(), "b16ddcf5c697540f", 1000, strings.NewReader(`foobar`)); err != nil { + t.Fatal(err) + } + + if r, err := c.SnapshotReader(context.Background(), "b16ddcf5c697540f", 1000); err != nil { + t.Fatal(err) + } else if buf, err := ioutil.ReadAll(r); err != nil { + t.Fatal(err) + } else if err := r.Close(); err != nil { + t.Fatal(err) + } else if got, want := string(buf), `foobar`; got != want { + t.Fatalf("data=%q, want %q", got, want) + } + }) + + t.Run("ErrNoGeneration", func(t *testing.T) { + t.Parallel() + if _, err := NewIntegrationReplicaClient(t).WriteSnapshot(context.Background(), "", 0, nil); err == nil || err.Error() != `cannot determine snapshot path: generation required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_SnapshotReader(t *testing.T) { + t.Run("OK", func(t *testing.T) { + t.Parallel() + + c := NewIntegrationReplicaClient(t) + defer MustDeleteAll(t, c) + + if _, err := c.WriteSnapshot(context.Background(), "5efbd8d042012dca", 10, strings.NewReader(`foo`)); err != nil { + t.Fatal(err) + } + + r, err := c.SnapshotReader(context.Background(), "5efbd8d042012dca", 10) + if err != nil { + t.Fatal(err) + } + defer r.Close() + + if buf, err := ioutil.ReadAll(r); err != nil { + t.Fatal(err) + } else if got, want := string(buf), "foo"; got != want { + t.Fatalf("ReadAll=%v, want %v", got, want) + } + }) + + t.Run("ErrNotFound", func(t *testing.T) { + t.Parallel() + + c := NewIntegrationReplicaClient(t) + defer MustDeleteAll(t, c) + + if _, err := c.SnapshotReader(context.Background(), "5efbd8d042012dca", 1); !os.IsNotExist(err) { + t.Fatalf("expected not exist, got %#v", err) + } + }) + + t.Run("ErrGeneration", func(t *testing.T) { + t.Parallel() + + c := NewIntegrationReplicaClient(t) + defer MustDeleteAll(t, c) + + if _, err := c.SnapshotReader(context.Background(), "", 1); err == nil || err.Error() != `cannot determine snapshot path: generation required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_WALs(t *testing.T) { + t.Run("OK", func(t *testing.T) { + t.Parallel() + + c := NewIntegrationReplicaClient(t) + defer MustDeleteAll(t, c) + + if _, err := c.WriteWALSegment(context.Background(), litestream.Pos{Generation: "5efbd8d042012dca", Index: 1, Offset: 0}, strings.NewReader(``)); err != nil { + t.Fatal(err) + } + if _, err := c.WriteWALSegment(context.Background(), litestream.Pos{Generation: "b16ddcf5c697540f", Index: 2, Offset: 0}, strings.NewReader(`12345`)); err != nil { + t.Fatal(err) + } else if _, err := c.WriteWALSegment(context.Background(), litestream.Pos{Generation: "b16ddcf5c697540f", Index: 2, Offset: 5}, strings.NewReader(`67`)); err != nil { + t.Fatal(err) + } else if _, err := c.WriteWALSegment(context.Background(), litestream.Pos{Generation: "b16ddcf5c697540f", Index: 3, Offset: 0}, strings.NewReader(`xyz`)); err != nil { + t.Fatal(err) + } + + itr, err := c.WALSegments(context.Background(), "b16ddcf5c697540f") + if err != nil { + t.Fatal(err) + } + defer itr.Close() + + // Read all WAL segment files into a slice so they can be sorted. + a, err := litestream.SliceWALSegmentIterator(itr) + if err != nil { + t.Fatal(err) + } else if got, want := len(a), 3; got != want { + t.Fatalf("len=%v, want %v", got, want) + } + sort.Sort(litestream.WALSegmentInfoSlice(a)) + + // Verify first WAL segment metadata. + if got, want := a[0].Generation, "b16ddcf5c697540f"; got != want { + t.Fatalf("Generation=%v, want %v", got, want) + } else if got, want := a[0].Index, 2; got != want { + t.Fatalf("Index=%v, want %v", got, want) + } else if got, want := a[0].Offset, int64(0); got != want { + t.Fatalf("Offset=%v, want %v", got, want) + } else if got, want := a[0].Size, int64(5); got != want { + t.Fatalf("Size=%v, want %v", got, want) + } else if a[0].CreatedAt.IsZero() { + t.Fatalf("expected CreatedAt") + } + + // Verify first WAL segment metadata. + if got, want := a[1].Generation, "b16ddcf5c697540f"; got != want { + t.Fatalf("Generation=%v, want %v", got, want) + } else if got, want := a[1].Index, 2; got != want { + t.Fatalf("Index=%v, want %v", got, want) + } else if got, want := a[1].Offset, int64(5); got != want { + t.Fatalf("Offset=%v, want %v", got, want) + } else if got, want := a[1].Size, int64(2); got != want { + t.Fatalf("Size=%v, want %v", got, want) + } else if a[1].CreatedAt.IsZero() { + t.Fatalf("expected CreatedAt") + } + + // Verify third WAL segment metadata. + if got, want := a[2].Generation, "b16ddcf5c697540f"; got != want { + t.Fatalf("Generation=%v, want %v", got, want) + } else if got, want := a[2].Index, 3; got != want { + t.Fatalf("Index=%v, want %v", got, want) + } else if got, want := a[2].Offset, int64(0); got != want { + t.Fatalf("Offset=%v, want %v", got, want) + } else if got, want := a[2].Size, int64(3); got != want { + t.Fatalf("Size=%v, want %v", got, want) + } else if a[1].CreatedAt.IsZero() { + t.Fatalf("expected CreatedAt") + } + + // Ensure close is clean. + if err := itr.Close(); err != nil { + t.Fatal(err) + } + }) + + t.Run("NoGenerationDir", func(t *testing.T) { + t.Parallel() + + c := NewIntegrationReplicaClient(t) + defer MustDeleteAll(t, c) + + itr, err := c.WALSegments(context.Background(), "5efbd8d042012dca") + if err != nil { + t.Fatal(err) + } + defer itr.Close() + + if itr.Next() { + t.Fatal("expected no wal files") + } + }) + + t.Run("NoWALs", func(t *testing.T) { + t.Parallel() + + c := NewIntegrationReplicaClient(t) + defer MustDeleteAll(t, c) + + if _, err := c.WriteSnapshot(context.Background(), "5efbd8d042012dca", 0, strings.NewReader(`foo`)); err != nil { + t.Fatal(err) + } + + itr, err := c.WALSegments(context.Background(), "5efbd8d042012dca") + if err != nil { + t.Fatal(err) + } + defer itr.Close() + + if itr.Next() { + t.Fatal("expected no wal files") + } + }) + + t.Run("ErrNoGeneration", func(t *testing.T) { + t.Parallel() + + c := NewIntegrationReplicaClient(t) + defer MustDeleteAll(t, c) + + if itr, err := c.WALSegments(context.Background(), ""); err != nil { + t.Fatal(err) + } else if err := itr.Close(); err == nil || err.Error() != `cannot determine wal directory path: generation required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_WriteWALSegment(t *testing.T) { + t.Run("OK", func(t *testing.T) { + t.Parallel() + + c := NewIntegrationReplicaClient(t) + defer MustDeleteAll(t, c) + + if _, err := c.WriteWALSegment(context.Background(), litestream.Pos{Generation: "b16ddcf5c697540f", Index: 1000, Offset: 2000}, strings.NewReader(`foobar`)); err != nil { + t.Fatal(err) + } + + if r, err := c.WALSegmentReader(context.Background(), litestream.Pos{Generation: "b16ddcf5c697540f", Index: 1000, Offset: 2000}); err != nil { + t.Fatal(err) + } else if buf, err := ioutil.ReadAll(r); err != nil { + t.Fatal(err) + } else if err := r.Close(); err != nil { + t.Fatal(err) + } else if got, want := string(buf), `foobar`; got != want { + t.Fatalf("data=%q, want %q", got, want) + } + }) + + t.Run("ErrNoGeneration", func(t *testing.T) { + t.Parallel() + if _, err := NewIntegrationReplicaClient(t).WriteWALSegment(context.Background(), litestream.Pos{Generation: "", Index: 0, Offset: 0}, nil); err == nil || err.Error() != `cannot determine wal segment path: generation required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestReplicaClient_WALReader(t *testing.T) { + t.Parallel() + + c := NewIntegrationReplicaClient(t) + defer MustDeleteAll(t, c) + + if _, err := c.WriteWALSegment(context.Background(), litestream.Pos{Generation: "5efbd8d042012dca", Index: 10, Offset: 5}, strings.NewReader(`foobar`)); err != nil { + t.Fatal(err) + } + + t.Run("OK", func(t *testing.T) { + r, err := c.WALSegmentReader(context.Background(), litestream.Pos{Generation: "5efbd8d042012dca", Index: 10, Offset: 5}) + if err != nil { + t.Fatal(err) + } + defer r.Close() + + if buf, err := ioutil.ReadAll(r); err != nil { + t.Fatal(err) + } else if got, want := string(buf), "foobar"; got != want { + t.Fatalf("ReadAll=%v, want %v", got, want) + } + }) + + t.Run("ErrNotFound", func(t *testing.T) { + t.Parallel() + + c := NewIntegrationReplicaClient(t) + defer MustDeleteAll(t, c) + + if _, err := c.WALSegmentReader(context.Background(), litestream.Pos{Generation: "5efbd8d042012dca", Index: 1, Offset: 0}); !os.IsNotExist(err) { + t.Fatalf("expected not exist, got %#v", err) + } + }) +} + +func TestReplicaClient_DeleteWALSegments(t *testing.T) { + t.Run("OK", func(t *testing.T) { + t.Parallel() + + c := NewIntegrationReplicaClient(t) + defer MustDeleteAll(t, c) + + if _, err := c.WriteWALSegment(context.Background(), litestream.Pos{Generation: "b16ddcf5c697540f", Index: 1, Offset: 2}, strings.NewReader(`foo`)); err != nil { + t.Fatal(err) + } else if _, err := c.WriteWALSegment(context.Background(), litestream.Pos{Generation: "5efbd8d042012dca", Index: 3, Offset: 4}, strings.NewReader(`bar`)); err != nil { + t.Fatal(err) + } + + if err := c.DeleteWALSegments(context.Background(), []litestream.Pos{ + {Generation: "b16ddcf5c697540f", Index: 1, Offset: 2}, + {Generation: "5efbd8d042012dca", Index: 3, Offset: 4}, + }); err != nil { + t.Fatal(err) + } + + if _, err := c.WALSegmentReader(context.Background(), litestream.Pos{Generation: "b16ddcf5c697540f", Index: 1, Offset: 2}); !os.IsNotExist(err) { + t.Fatalf("expected not exist, got %#v", err) + } else if _, err := c.WALSegmentReader(context.Background(), litestream.Pos{Generation: "5efbd8d042012dca", Index: 3, Offset: 4}); !os.IsNotExist(err) { + t.Fatalf("expected not exist, got %#v", err) + } + }) + + t.Run("ErrNoGeneration", func(t *testing.T) { + t.Parallel() + if err := NewIntegrationReplicaClient(t).DeleteWALSegments(context.Background(), []litestream.Pos{{}}); err == nil || err.Error() != `cannot determine wal segment path: generation required` { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +// NewIntegrationReplicaClient returns a new client for integration testing. +// If integration flag is not set then test/benchmark is skipped. +func NewIntegrationReplicaClient(tb testing.TB) *s3.ReplicaClient { + tb.Helper() + + if !*integration { + tb.Skip("integration tests disabled") + } + + c := s3.NewReplicaClient() + c.AccessKeyID = *accessKeyID + c.SecretAccessKey = *secretAccessKey + c.Region = *region + c.Bucket = *bucket + c.Path = path.Join(*pathFlag, fmt.Sprintf("%016x", rand.Uint64())) + c.Endpoint = *endpoint + c.ForcePathStyle = *forcePathStyle + c.SkipVerify = *skipVerify + + return c +} + +// MustDeleteAll deletes all objects under the client's path. +func MustDeleteAll(tb testing.TB, c *s3.ReplicaClient) { + tb.Helper() + if err := c.DeleteAll(context.Background()); err != nil { + tb.Fatal(err) + } +} diff --git a/s3/s3.go b/s3/s3.go index cdcab6f..33d1fbb 100644 --- a/s3/s3.go +++ b/s3/s3.go @@ -1,1142 +1,11 @@ package s3 import ( - "bytes" - "context" - "crypto/tls" "fmt" - "io" - "io/ioutil" - "log" "net" - "net/http" - "os" - "path" "regexp" - "sync" - "time" - - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/aws/credentials" - "github.com/aws/aws-sdk-go/aws/defaults" - "github.com/aws/aws-sdk-go/aws/session" - "github.com/aws/aws-sdk-go/service/s3" - "github.com/aws/aws-sdk-go/service/s3/s3manager" - "github.com/benbjohnson/litestream" - "github.com/benbjohnson/litestream/internal" - "github.com/pierrec/lz4/v4" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" ) -// S3 replica default settings. -const ( - DefaultSyncInterval = 10 * time.Second - - DefaultRetention = 24 * time.Hour - - DefaultRetentionCheckInterval = 1 * time.Hour -) - -// MaxKeys is the number of keys S3 can operate on per batch. -const MaxKeys = 1000 - -var _ litestream.Replica = (*Replica)(nil) - -// Replica is a replica that replicates a DB to an S3 bucket. -type Replica struct { - db *litestream.DB // source database - name string // replica name, optional - s3 *s3.S3 // s3 service - uploader *s3manager.Uploader - - mu sync.RWMutex - snapshotMu sync.Mutex - pos litestream.Pos // last position - - muf sync.Mutex - f *os.File // long-lived read-only db file descriptor - - wg sync.WaitGroup - cancel func() - - snapshotTotalGauge prometheus.Gauge - walBytesCounter prometheus.Counter - walIndexGauge prometheus.Gauge - walOffsetGauge prometheus.Gauge - putOperationTotalCounter prometheus.Counter - putOperationBytesCounter prometheus.Counter - getOperationTotalCounter prometheus.Counter - getOperationBytesCounter prometheus.Counter - listOperationTotalCounter prometheus.Counter - deleteOperationTotalCounter prometheus.Counter - - // AWS authentication keys. - AccessKeyID string - SecretAccessKey string - - // S3 bucket information - Region string - Bucket string - Path string - Endpoint string - ForcePathStyle bool - SkipVerify bool - - // Time between syncs with the shadow WAL. - SyncInterval time.Duration - - // Frequency to create new snapshots. - SnapshotInterval time.Duration - - // Time to keep snapshots and related WAL files. - // Database is snapshotted after interval and older WAL files are discarded. - Retention time.Duration - - // Time between retention checks. - RetentionCheckInterval time.Duration - - // Time between validation checks. - ValidationInterval time.Duration - - // If true, replica monitors database for changes automatically. - // Set to false if replica is being used synchronously (such as in tests). - MonitorEnabled bool -} - -// NewReplica returns a new instance of Replica. -func NewReplica(db *litestream.DB, name string) *Replica { - r := &Replica{ - db: db, - name: name, - cancel: func() {}, - - SyncInterval: DefaultSyncInterval, - Retention: DefaultRetention, - RetentionCheckInterval: DefaultRetentionCheckInterval, - - MonitorEnabled: true, - } - - var dbPath string - if db != nil { - dbPath = db.Path() - } - r.snapshotTotalGauge = internal.ReplicaSnapshotTotalGaugeVec.WithLabelValues(dbPath, r.Name()) - r.walBytesCounter = internal.ReplicaWALBytesCounterVec.WithLabelValues(dbPath, r.Name()) - r.walIndexGauge = internal.ReplicaWALIndexGaugeVec.WithLabelValues(dbPath, r.Name()) - r.walOffsetGauge = internal.ReplicaWALOffsetGaugeVec.WithLabelValues(dbPath, r.Name()) - r.putOperationTotalCounter = operationTotalCounterVec.WithLabelValues(dbPath, r.Name(), "PUT") - r.putOperationBytesCounter = operationBytesCounterVec.WithLabelValues(dbPath, r.Name(), "PUT") - r.getOperationTotalCounter = operationTotalCounterVec.WithLabelValues(dbPath, r.Name(), "GET") - r.getOperationBytesCounter = operationBytesCounterVec.WithLabelValues(dbPath, r.Name(), "GET") - r.listOperationTotalCounter = operationTotalCounterVec.WithLabelValues(dbPath, r.Name(), "LIST") - r.deleteOperationTotalCounter = operationTotalCounterVec.WithLabelValues(dbPath, r.Name(), "DELETE") - - return r -} - -// Name returns the name of the replica. Returns the type if no name set. -func (r *Replica) Name() string { - if r.name != "" { - return r.name - } - return r.Type() -} - -// Type returns the type of replica. -func (r *Replica) Type() string { - return "s3" -} - -// DB returns the parent database reference. -func (r *Replica) DB() *litestream.DB { - return r.db -} - -// LastPos returns the last successfully replicated position. -func (r *Replica) LastPos() litestream.Pos { - r.mu.RLock() - defer r.mu.RUnlock() - return r.pos -} - -// GenerationDir returns the path to a generation's root directory. -func (r *Replica) GenerationDir(generation string) string { - return path.Join(r.Path, "generations", generation) -} - -// SnapshotDir returns the path to a generation's snapshot directory. -func (r *Replica) SnapshotDir(generation string) string { - return path.Join(r.GenerationDir(generation), "snapshots") -} - -// SnapshotPath returns the path to a snapshot file. -func (r *Replica) SnapshotPath(generation string, index int) string { - return path.Join(r.SnapshotDir(generation), fmt.Sprintf("%08x.snapshot.lz4", index)) -} - -// MaxSnapshotIndex returns the highest index for the snapshots. -func (r *Replica) MaxSnapshotIndex(generation string) (int, error) { - snapshots, err := r.Snapshots(context.Background()) - if err != nil { - return 0, err - } - - index := -1 - for _, snapshot := range snapshots { - if snapshot.Generation != generation { - continue - } else if index == -1 || snapshot.Index > index { - index = snapshot.Index - } - } - if index == -1 { - return 0, fmt.Errorf("no snapshots found") - } - return index, nil -} - -// WALDir returns the path to a generation's WAL directory -func (r *Replica) WALDir(generation string) string { - return path.Join(r.GenerationDir(generation), "wal") -} - -// Generations returns a list of available generation names. -func (r *Replica) Generations(ctx context.Context) ([]string, error) { - if err := r.Init(ctx); err != nil { - return nil, err - } - - var generations []string - if err := r.s3.ListObjectsPagesWithContext(ctx, &s3.ListObjectsInput{ - Bucket: aws.String(r.Bucket), - Prefix: aws.String(path.Join(r.Path, "generations") + "/"), - Delimiter: aws.String("/"), - }, func(page *s3.ListObjectsOutput, lastPage bool) bool { - r.listOperationTotalCounter.Inc() - - for _, prefix := range page.CommonPrefixes { - name := path.Base(*prefix.Prefix) - if !litestream.IsGenerationName(name) { - continue - } - generations = append(generations, name) - } - return true - }); err != nil { - return nil, err - } - - return generations, nil -} - -// GenerationStats returns stats for a generation. -func (r *Replica) GenerationStats(ctx context.Context, generation string) (stats litestream.GenerationStats, err error) { - if err := r.Init(ctx); err != nil { - return stats, err - } - - // Determine stats for all snapshots. - n, min, max, err := r.snapshotStats(ctx, generation) - if err != nil { - return stats, err - } - stats.SnapshotN = n - stats.CreatedAt, stats.UpdatedAt = min, max - - // Update stats if we have WAL files. - n, min, max, err = r.walStats(ctx, generation) - if err != nil { - return stats, err - } else if n == 0 { - return stats, nil - } - - stats.WALN = n - if stats.CreatedAt.IsZero() || min.Before(stats.CreatedAt) { - stats.CreatedAt = min - } - if stats.UpdatedAt.IsZero() || max.After(stats.UpdatedAt) { - stats.UpdatedAt = max - } - return stats, nil -} - -func (r *Replica) snapshotStats(ctx context.Context, generation string) (n int, min, max time.Time, err error) { - if err := r.s3.ListObjectsPagesWithContext(ctx, &s3.ListObjectsInput{ - Bucket: aws.String(r.Bucket), - Prefix: aws.String(r.SnapshotDir(generation) + "/"), - }, func(page *s3.ListObjectsOutput, lastPage bool) bool { - r.listOperationTotalCounter.Inc() - - for _, obj := range page.Contents { - if !litestream.IsSnapshotPath(path.Base(*obj.Key)) { - continue - } - modTime := obj.LastModified.UTC() - - n++ - if min.IsZero() || modTime.Before(min) { - min = modTime - } - if max.IsZero() || modTime.After(max) { - max = modTime - } - } - return true - }); err != nil { - return n, min, max, err - } - return n, min, max, nil -} - -func (r *Replica) walStats(ctx context.Context, generation string) (n int, min, max time.Time, err error) { - if err := r.s3.ListObjectsPagesWithContext(ctx, &s3.ListObjectsInput{ - Bucket: aws.String(r.Bucket), - Prefix: aws.String(r.WALDir(generation) + "/"), - }, func(page *s3.ListObjectsOutput, lastPage bool) bool { - r.listOperationTotalCounter.Inc() - - for _, obj := range page.Contents { - if !litestream.IsWALPath(path.Base(*obj.Key)) { - continue - } - modTime := obj.LastModified.UTC() - - n++ - if min.IsZero() || modTime.Before(min) { - min = modTime - } - if max.IsZero() || modTime.After(max) { - max = modTime - } - } - return true - }); err != nil { - return n, min, max, err - } - return n, min, max, nil -} - -// Snapshots returns a list of available snapshots in the replica. -func (r *Replica) Snapshots(ctx context.Context) ([]*litestream.SnapshotInfo, error) { - if err := r.Init(ctx); err != nil { - return nil, err - } - - generations, err := r.Generations(ctx) - if err != nil { - return nil, err - } - - var infos []*litestream.SnapshotInfo - for _, generation := range generations { - if err := r.s3.ListObjectsPagesWithContext(ctx, &s3.ListObjectsInput{ - Bucket: aws.String(r.Bucket), - Prefix: aws.String(r.SnapshotDir(generation) + "/"), - Delimiter: aws.String("/"), - }, func(page *s3.ListObjectsOutput, lastPage bool) bool { - r.listOperationTotalCounter.Inc() - - for _, obj := range page.Contents { - key := path.Base(*obj.Key) - index, _, err := litestream.ParseSnapshotPath(key) - if err != nil { - continue - } - - infos = append(infos, &litestream.SnapshotInfo{ - Name: key, - Replica: r.Name(), - Generation: generation, - Index: index, - Size: *obj.Size, - CreatedAt: obj.LastModified.UTC(), - }) - } - return true - }); err != nil { - return nil, err - } - } - - return infos, nil -} - -// WALs returns a list of available WAL files in the replica. -func (r *Replica) WALs(ctx context.Context) ([]*litestream.WALInfo, error) { - if err := r.Init(ctx); err != nil { - return nil, err - } - - generations, err := r.Generations(ctx) - if err != nil { - return nil, err - } - - var infos []*litestream.WALInfo - for _, generation := range generations { - var prev *litestream.WALInfo - if err := r.s3.ListObjectsPagesWithContext(ctx, &s3.ListObjectsInput{ - Bucket: aws.String(r.Bucket), - Prefix: aws.String(r.WALDir(generation) + "/"), - Delimiter: aws.String("/"), - }, func(page *s3.ListObjectsOutput, lastPage bool) bool { - r.listOperationTotalCounter.Inc() - - for _, obj := range page.Contents { - key := path.Base(*obj.Key) - - index, offset, _, err := litestream.ParseWALPath(key) - if err != nil { - continue - } - - // Update previous record if generation & index match. - if prev != nil && prev.Index == index { - prev.Size += *obj.Size - prev.CreatedAt = obj.LastModified.UTC() - continue - } - - // Append new WAL record and keep reference to append additional - // size for segmented WAL files. - prev = &litestream.WALInfo{ - Name: key, - Replica: r.Name(), - Generation: generation, - Index: index, - Offset: offset, - Size: *obj.Size, - CreatedAt: obj.LastModified.UTC(), - } - infos = append(infos, prev) - } - return true - }); err != nil { - return nil, err - } - } - - return infos, nil -} - -// Start starts replication for a given generation. -func (r *Replica) Start(ctx context.Context) (err error) { - // Ignore if replica is being used sychronously. - if !r.MonitorEnabled { - return nil - } - - // Stop previous replication. - r.Stop(false) - - // Wrap context with cancelation. - ctx, r.cancel = context.WithCancel(ctx) - - // Start goroutines to manage replica data. - r.wg.Add(4) - go func() { defer r.wg.Done(); r.monitor(ctx) }() - go func() { defer r.wg.Done(); r.retainer(ctx) }() - go func() { defer r.wg.Done(); r.snapshotter(ctx) }() - go func() { defer r.wg.Done(); r.validator(ctx) }() - - return nil -} - -// Stop cancels any outstanding replication and blocks until finished. -// -// Performing a hard stop will close the DB file descriptor which could release -// locks on per-process locks. Hard stops should only be performed when -// stopping the entire process. -func (r *Replica) Stop(hard bool) (err error) { - r.cancel() - r.wg.Wait() - - r.muf.Lock() - defer r.muf.Unlock() - - if hard && r.f != nil { - if e := r.f.Close(); e != nil && err == nil { - err = e - } - } - - return err -} - -// monitor runs in a separate goroutine and continuously replicates the DB. -func (r *Replica) monitor(ctx context.Context) { - ticker := time.NewTicker(r.SyncInterval) - defer ticker.Stop() - - // Continuously check for new data to replicate. - ch := make(chan struct{}) - close(ch) - var notify <-chan struct{} = ch - - for initial := true; ; initial = false { - // Enforce a minimum time between synchronization. - if !initial { - select { - case <-ctx.Done(): - return - case <-ticker.C: - } - } - - // Wait for changes to the database. - select { - case <-ctx.Done(): - return - case <-notify: - } - - // Fetch new notify channel before replicating data. - notify = r.db.Notify() - - // Synchronize the shadow wal into the replication directory. - if err := r.Sync(ctx); err != nil { - log.Printf("%s(%s): monitor error: %s", r.db.Path(), r.Name(), err) - continue - } - } -} - -// retainer runs in a separate goroutine and handles retention. -func (r *Replica) retainer(ctx context.Context) { - // Disable retention enforcement if retention period is non-positive. - if r.Retention <= 0 { - return - } - - // Ensure check interval is not longer than retention period. - checkInterval := r.RetentionCheckInterval - if checkInterval > r.Retention { - checkInterval = r.Retention - } - - ticker := time.NewTicker(checkInterval) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - return - case <-ticker.C: - if err := r.EnforceRetention(ctx); err != nil { - log.Printf("%s(%s): retain error: %s", r.db.Path(), r.Name(), err) - continue - } - } - } -} - -// snapshotter runs in a separate goroutine and handles snapshotting. -func (r *Replica) snapshotter(ctx context.Context) { - if r.SnapshotInterval <= 0 { - return - } - - ticker := time.NewTicker(r.SnapshotInterval) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - return - case <-ticker.C: - if err := r.Snapshot(ctx); err != nil && err != litestream.ErrNoGeneration { - log.Printf("%s(%s): snapshotter error: %s", r.db.Path(), r.Name(), err) - continue - } - } - } -} - -// validator runs in a separate goroutine and handles periodic validation. -func (r *Replica) validator(ctx context.Context) { - // Initialize counters since validation occurs infrequently. - for _, status := range []string{"ok", "error"} { - internal.ReplicaValidationTotalCounterVec.WithLabelValues(r.db.Path(), r.Name(), status).Add(0) - } - - if r.ValidationInterval <= 0 { - return - } - - ticker := time.NewTicker(r.ValidationInterval) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - return - case <-ticker.C: - if err := litestream.ValidateReplica(ctx, r); err != nil { - log.Printf("%s(%s): validation error: %s", r.db.Path(), r.Name(), err) - continue - } - } - } -} - -// CalcPos returns the position for the replica for the current generation. -// Returns a zero value if there is no active generation. -func (r *Replica) CalcPos(ctx context.Context, generation string) (pos litestream.Pos, err error) { - if err := r.Init(ctx); err != nil { - return pos, err - } - - pos.Generation = generation - - // Find maximum snapshot index. - if pos.Index, err = r.MaxSnapshotIndex(generation); err != nil { - return litestream.Pos{}, err - } - - index := -1 - var offset int64 - if err := r.s3.ListObjectsPagesWithContext(ctx, &s3.ListObjectsInput{ - Bucket: aws.String(r.Bucket), - Prefix: aws.String(r.WALDir(generation) + "/"), - Delimiter: aws.String("/"), - }, func(page *s3.ListObjectsOutput, lastPage bool) bool { - r.listOperationTotalCounter.Inc() - - for _, obj := range page.Contents { - key := path.Base(*obj.Key) - - idx, off, _, err := litestream.ParseWALPath(key) - if err != nil { - continue // invalid wal filename - } - - if index == -1 || idx > index { - index, offset = idx, 0 // start tracking new wal - } else if idx == index && off > offset { - offset = off // update offset - } - } - return true - }); err != nil { - return litestream.Pos{}, err - } - if index == -1 { - return pos, nil // no wal files - } - pos.Index = index - pos.Offset = offset - - return pos, nil -} - -// Snapshot copies the entire database to the replica path. -func (r *Replica) Snapshot(ctx context.Context) error { - // Find current position of database. - pos, err := r.db.Pos() - if err != nil { - return fmt.Errorf("cannot determine current db generation: %w", err) - } else if pos.IsZero() { - return litestream.ErrNoGeneration - } - return r.snapshot(ctx, pos.Generation, pos.Index) -} - -// snapshot copies the entire database to the replica path. -func (r *Replica) snapshot(ctx context.Context, generation string, index int) error { - r.muf.Lock() - defer r.muf.Unlock() - - // Issue a passive checkpoint to flush any pages to disk before snapshotting. - if _, err := r.db.SQLDB().ExecContext(ctx, `PRAGMA wal_checkpoint(PASSIVE);`); err != nil { - return fmt.Errorf("pre-snapshot checkpoint: %w", err) - } - - // Acquire a read lock on the database during snapshot to prevent checkpoints. - tx, err := r.db.SQLDB().Begin() - if err != nil { - return err - } else if _, err := tx.ExecContext(ctx, `SELECT COUNT(1) FROM _litestream_seq;`); err != nil { - _ = tx.Rollback() - return err - } - defer func() { _ = tx.Rollback() }() - - // Open long-lived file descriptor on database. - if r.f == nil { - if r.f, err = os.Open(r.db.Path()); err != nil { - return err - } - } - - // Move the file descriptor to the beginning. We only use one long lived - // file descriptor because some operating systems will remove the database - // lock when closing a separate file descriptor on the DB. - if _, err := r.f.Seek(0, io.SeekStart); err != nil { - return err - } - - fi, err := r.f.Stat() - if err != nil { - return err - } - - pr, pw := io.Pipe() - zw := lz4.NewWriter(pw) - go func() { - if _, err := io.Copy(zw, r.f); err != nil { - _ = pw.CloseWithError(err) - return - } - _ = pw.CloseWithError(zw.Close()) - }() - - snapshotPath := r.SnapshotPath(generation, index) - startTime := time.Now() - - if _, err := r.uploader.UploadWithContext(ctx, &s3manager.UploadInput{ - Bucket: aws.String(r.Bucket), - Key: aws.String(snapshotPath), - Body: pr, - }); err != nil { - return err - } - - r.putOperationTotalCounter.Inc() - r.putOperationBytesCounter.Add(float64(fi.Size())) - - log.Printf("%s(%s): snapshot: creating %s/%08x t=%s", r.db.Path(), r.Name(), generation, index, time.Since(startTime).Truncate(time.Millisecond)) - return nil -} - -// snapshotN returns the number of snapshots for a generation. -func (r *Replica) snapshotN(generation string) (int, error) { - snapshots, err := r.Snapshots(context.Background()) - if err != nil { - return 0, err - } - - var n int - for _, snapshot := range snapshots { - if snapshot.Generation == generation { - n++ - } - } - return n, nil -} - -// Init initializes the connection to S3. No-op if already initialized. -func (r *Replica) Init(ctx context.Context) (err error) { - r.mu.Lock() - defer r.mu.Unlock() - - if r.s3 != nil { - return nil - } - - // Look up region if not specified and no endpoint is used. - // Endpoints are typically used for non-S3 object stores and do not - // necessarily require a region. - region := r.Region - if region == "" { - if r.Endpoint == "" { - if region, err = r.findBucketRegion(ctx, r.Bucket); err != nil { - return fmt.Errorf("cannot lookup bucket region: %w", err) - } - } else { - region = "us-east-1" // default for non-S3 object stores - } - } - - // Create new AWS session. - config := r.config() - if region != "" { - config.Region = aws.String(region) - } - sess, err := session.NewSession(config) - if err != nil { - return fmt.Errorf("cannot create aws session: %w", err) - } - r.s3 = s3.New(sess) - r.uploader = s3manager.NewUploader(sess) - return nil -} - -// config returns the AWS configuration. Uses the default credential chain -// unless a key/secret are explicitly set. -func (r *Replica) config() *aws.Config { - config := defaults.Get().Config - if r.AccessKeyID != "" || r.SecretAccessKey != "" { - config.Credentials = credentials.NewStaticCredentials(r.AccessKeyID, r.SecretAccessKey, "") - } - if r.Endpoint != "" { - config.Endpoint = aws.String(r.Endpoint) - } - if r.ForcePathStyle { - config.S3ForcePathStyle = aws.Bool(r.ForcePathStyle) - } - if r.SkipVerify { - config.HTTPClient = &http.Client{Transport: &http.Transport{ - TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, - }} - } - - return config -} - -func (r *Replica) findBucketRegion(ctx context.Context, bucket string) (string, error) { - // Connect to US standard region to fetch info. - config := r.config() - config.Region = aws.String("us-east-1") - sess, err := session.NewSession(config) - if err != nil { - return "", err - } - - // Fetch bucket location, if possible. Must be bucket owner. - // This call can return a nil location which means it's in us-east-1. - if out, err := s3.New(sess).GetBucketLocation(&s3.GetBucketLocationInput{ - Bucket: aws.String(bucket), - }); err != nil { - return "", err - } else if out.LocationConstraint != nil { - return *out.LocationConstraint, nil - } - return "us-east-1", nil -} - -// Sync replays data from the shadow WAL and uploads it to S3. -func (r *Replica) Sync(ctx context.Context) (err error) { - // Clear last position if if an error occurs during sync. - defer func() { - if err != nil { - r.mu.Lock() - r.pos = litestream.Pos{} - r.mu.Unlock() - } - }() - - // Connect to S3, if necessary. - if err := r.Init(ctx); err != nil { - return err - } - - // Find current position of database. - dpos, err := r.db.Pos() - if err != nil { - return fmt.Errorf("cannot determine current generation: %w", err) - } else if dpos.IsZero() { - return fmt.Errorf("no generation, waiting for data") - } - generation := dpos.Generation - - // Calculate position if we don't have a previous position or if the generation changes. - // Ensure sync & retainer do not snapshot at the same time. - if lastPos := r.LastPos(); lastPos.IsZero() || lastPos.Generation != generation { - if err := func() error { - r.snapshotMu.Lock() - defer r.snapshotMu.Unlock() - - // Create snapshot if no snapshots exist for generation. - if n, err := r.snapshotN(generation); err != nil { - return err - } else if n == 0 { - if err := r.snapshot(ctx, generation, dpos.Index); err != nil { - return err - } - r.snapshotTotalGauge.Set(1.0) - } else { - r.snapshotTotalGauge.Set(float64(n)) - } - - // Determine position, if necessary. - pos, err := r.CalcPos(ctx, generation) - if err != nil { - return fmt.Errorf("cannot determine replica position: %s", err) - } - - r.mu.Lock() - defer r.mu.Unlock() - r.pos = pos - - return nil - }(); err != nil { - return err - } - } - - // Read all WAL files since the last position. - for { - if err = r.syncWAL(ctx); err == io.EOF { - break - } else if err != nil { - return err - } - } - - return nil -} - -func (r *Replica) syncWAL(ctx context.Context) (err error) { - rd, err := r.db.ShadowWALReader(r.LastPos()) - if err == io.EOF { - return err - } else if err != nil { - return fmt.Errorf("wal reader: %w", err) - } - defer rd.Close() - - // Read to intermediate buffer to determine size. - pos := rd.Pos() - b, err := ioutil.ReadAll(rd) - if err != nil { - return err - } - - var buf bytes.Buffer - zw := lz4.NewWriter(&buf) - n, err := zw.Write(b) - if err != nil { - return err - } else if err := zw.Close(); err != nil { - return err - } - - // Build a WAL path with the index/offset as well as size so we can ensure - // that files are contiguous without having to decompress. - walPath := path.Join( - r.WALDir(rd.Pos().Generation), - litestream.FormatWALPathWithOffset(pos.Index, pos.Offset)+".lz4", - ) - - if _, err := r.uploader.UploadWithContext(ctx, &s3manager.UploadInput{ - Bucket: aws.String(r.Bucket), - Key: aws.String(walPath), - Body: &buf, - }); err != nil { - return err - } - r.putOperationTotalCounter.Inc() - r.putOperationBytesCounter.Add(float64(n)) // compressed bytes - - // Save last replicated position. - r.mu.Lock() - r.pos = rd.Pos() - r.mu.Unlock() - - // Track raw bytes processed & current position. - r.walBytesCounter.Add(float64(len(b))) // raw bytes - r.walIndexGauge.Set(float64(rd.Pos().Index)) - r.walOffsetGauge.Set(float64(rd.Pos().Offset)) - - return nil -} - -// SnapshotReader returns a reader for snapshot data at the given generation/index. -func (r *Replica) SnapshotReader(ctx context.Context, generation string, index int) (io.ReadCloser, error) { - if err := r.Init(ctx); err != nil { - return nil, err - } - - // Pipe download to return an io.Reader. - out, err := r.s3.GetObjectWithContext(ctx, &s3.GetObjectInput{ - Bucket: aws.String(r.Bucket), - Key: aws.String(r.SnapshotPath(generation, index)), - }) - if err != nil { - return nil, err - } - r.getOperationTotalCounter.Inc() - r.getOperationBytesCounter.Add(float64(*out.ContentLength)) - - // Decompress the snapshot file. - return internal.NewReadCloser(lz4.NewReader(out.Body), out.Body), nil -} - -// WALReader returns a reader for WAL data at the given index. -// Returns os.ErrNotExist if no matching index is found. -func (r *Replica) WALReader(ctx context.Context, generation string, index int) (io.ReadCloser, error) { - if err := r.Init(ctx); err != nil { - return nil, err - } - - // Collect all files for the index. - var keys []string - if err := r.s3.ListObjectsPagesWithContext(ctx, &s3.ListObjectsInput{ - Bucket: aws.String(r.Bucket), - Prefix: aws.String(path.Join(r.WALDir(generation), fmt.Sprintf("%08x_", index))), - }, func(page *s3.ListObjectsOutput, lastPage bool) bool { - r.listOperationTotalCounter.Inc() - - for _, obj := range page.Contents { - _, _, _, err := litestream.ParseWALPath(path.Base(*obj.Key)) - if err != nil { - continue - } - keys = append(keys, *obj.Key) - } - return true - }); err != nil { - return nil, err - } else if len(keys) == 0 { - return nil, os.ErrNotExist - } - - // Open each file and concatenate into a multi-reader. - var buf bytes.Buffer - var offset int64 - for _, key := range keys { - // Ensure offset is correct as we copy segments into buffer. - _, off, _, _ := litestream.ParseWALPath(path.Base(key)) - if off != offset { - return nil, fmt.Errorf("out of sequence wal segments: %s/%08x at remote offset %d, expected offset %d", generation, index, off, offset) - } - - // Pipe download to return an io.Reader. - out, err := r.s3.GetObjectWithContext(ctx, &s3.GetObjectInput{ - Bucket: aws.String(r.Bucket), - Key: aws.String(key), - }) - if err != nil { - return nil, err - } - defer out.Body.Close() - - r.getOperationTotalCounter.Inc() - r.getOperationBytesCounter.Add(float64(*out.ContentLength)) - - zr := lz4.NewReader(out.Body) - - n, err := io.Copy(&buf, zr) - if err != nil { - return nil, err - } - offset += int64(n) - } - - return ioutil.NopCloser(&buf), nil -} - -// EnforceRetention forces a new snapshot once the retention interval has passed. -// Older snapshots and WAL files are then removed. -func (r *Replica) EnforceRetention(ctx context.Context) (err error) { - if err := r.Init(ctx); err != nil { - return err - } - - // Ensure sync & retainer do not snapshot at the same time. - var snapshots []*litestream.SnapshotInfo - if err := func() error { - r.snapshotMu.Lock() - defer r.snapshotMu.Unlock() - - // Find current position of database. - pos, err := r.db.Pos() - if err != nil { - return fmt.Errorf("cannot determine current generation: %w", err) - } else if pos.IsZero() { - return fmt.Errorf("no generation, waiting for data") - } - - // Obtain list of snapshots that are within the retention period. - if snapshots, err = r.Snapshots(ctx); err != nil { - return fmt.Errorf("cannot obtain snapshot list: %w", err) - } - snapshots = litestream.FilterSnapshotsAfter(snapshots, time.Now().Add(-r.Retention)) - - // If no retained snapshots exist, create a new snapshot. - if len(snapshots) == 0 { - if err := r.snapshot(ctx, pos.Generation, pos.Index); err != nil { - return fmt.Errorf("cannot snapshot: %w", err) - } - snapshots = append(snapshots, &litestream.SnapshotInfo{Generation: pos.Generation, Index: pos.Index}) - } - - return nil - }(); err != nil { - return err - } - - // Loop over generations and delete unretained snapshots & WAL files. - generations, err := r.Generations(ctx) - if err != nil { - return fmt.Errorf("cannot obtain generations: %w", err) - } - for _, generation := range generations { - // Find earliest retained snapshot for this generation. - snapshot := litestream.FindMinSnapshotByGeneration(snapshots, generation) - - // Delete generations if it has no snapshots being retained. - if snapshot == nil { - if err := r.deleteGenerationBefore(ctx, generation, -1); err != nil { - return fmt.Errorf("cannot delete generation %q dir: %w", generation, err) - } - continue - } - - // Otherwise delete all snapshots & WAL files before a lowest retained index. - if err := r.deleteGenerationBefore(ctx, generation, snapshot.Index); err != nil { - return fmt.Errorf("cannot delete generation %q files before index %d: %w", generation, snapshot.Index, err) - } - } - - return nil -} - -func (r *Replica) deleteGenerationBefore(ctx context.Context, generation string, index int) (err error) { - // Collect all files for the generation. - var objIDs []*s3.ObjectIdentifier - if err := r.s3.ListObjectsPagesWithContext(ctx, &s3.ListObjectsInput{ - Bucket: aws.String(r.Bucket), - Prefix: aws.String(r.GenerationDir(generation)), - }, func(page *s3.ListObjectsOutput, lastPage bool) bool { - r.listOperationTotalCounter.Inc() - - for _, obj := range page.Contents { - // Skip snapshots or WALs that are after the search index unless -1. - if index != -1 { - if idx, _, err := litestream.ParseSnapshotPath(path.Base(*obj.Key)); err == nil && idx >= index { - continue - } else if idx, _, _, err := litestream.ParseWALPath(path.Base(*obj.Key)); err == nil && idx >= index { - continue - } - } - - objIDs = append(objIDs, &s3.ObjectIdentifier{Key: obj.Key}) - } - return true - }); err != nil { - return err - } - - // Delete all files in batches. - var n int - for i := 0; i < len(objIDs); i += MaxKeys { - j := i + MaxKeys - if j > len(objIDs) { - j = len(objIDs) - } - - if _, err := r.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{ - Bucket: aws.String(r.Bucket), - Delete: &s3.Delete{ - Objects: objIDs[i:j], - Quiet: aws.Bool(true), - }, - }); err != nil { - return err - } - n += len(objIDs[i:j]) - r.deleteOperationTotalCounter.Inc() - } - - log.Printf("%s(%s): retainer: deleting wal files before %s/%08x n=%d", r.db.Path(), r.Name(), generation, index, n) - - return nil -} - // ParseHost extracts data from a hostname depending on the service provider. func ParseHost(s string) (bucket, region, endpoint string, forcePathStyle bool) { // Extract port if one is specified. @@ -1190,20 +59,3 @@ var ( backblazeRegex = regexp.MustCompile(`^(?:(.+)\.)?s3.([^.]+)\.backblazeb2.com$`) gcsRegex = regexp.MustCompile(`^(?:(.+)\.)?storage.googleapis.com$`) ) - -// S3 metrics. -var ( - operationTotalCounterVec = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "litestream", - Subsystem: "s3", - Name: "operation_total", - Help: "The number of S3 operations performed", - }, []string{"db", "name", "type"}) - - operationBytesCounterVec = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "litestream", - Subsystem: "s3", - Name: "operation_bytes", - Help: "The number of bytes used by S3 operations", - }, []string{"db", "name", "type"}) -)