Compare commits

..

12 Commits

Author SHA1 Message Date
c2946ef0d0 moved metapath 2024-10-12 11:21:45 +01:00
Ben Johnson
977d4a5ee4 Fix replica-without-db logger (#512) 2023-10-24 09:51:17 -05:00
Ben Johnson
c81010e7ab Fix darwin make target 2023-10-23 15:01:27 -06:00
guangwu
c1ae968188 fix: typo (#509) 2023-10-19 18:29:04 -06:00
Toni Spets
9f0e50ddf7 Add missing slog calls to commands (#508) 2023-10-19 18:28:49 -06:00
Toni Spets
fe9ab5c517 Force truncation checkpoint if WAL becomes runaway (#473) 2023-10-19 18:27:51 -06:00
Toni Spets
d02ba97453 Sync replica snapshots to previous (#480) 2023-10-19 18:27:15 -06:00
Toni Spets
b1abd6bd99 Use structured logging with slog (#475) 2023-10-16 15:05:22 -06:00
Ben Johnson
fd892eef6d Re-add arm/v7 build to Docker (#502) 2023-08-15 07:37:32 -06:00
Markus Schanz
1bfcaa4a17 Recognize Scaleway S3 replica URLs (#499) 2023-08-13 18:07:19 -06:00
Alex Garcia
a369b05ee4 Build darwin-arm64 build during make dist-macos (#500) 2023-08-09 13:00:50 -06:00
Toni Spets
e0493f979a Copy WAL frames through temp file to shadow (#474) 2023-08-08 11:40:43 -06:00
21 changed files with 303 additions and 169 deletions

View File

@@ -8,7 +8,7 @@ jobs:
- uses: actions/checkout@v2
- uses: actions/setup-go@v2
with:
go-version: '1.20'
go-version: '1.21'
- uses: actions/cache@v2
with:
path: ~/go/pkg/mod

View File

@@ -15,7 +15,7 @@ jobs:
docker:
runs-on: ubuntu-latest
env:
PLATFORMS: "linux/amd64,linux/arm64"
PLATFORMS: "linux/amd64,linux/arm64,linux/arm/v7"
VERSION: "${{ github.event_name == 'release' && github.event.release.name || github.sha }}"
steps:

View File

@@ -31,7 +31,7 @@ jobs:
- uses: actions/checkout@v2
- uses: actions/setup-go@v2
with:
go-version: '1.20'
go-version: '1.21'
- id: release
uses: bruceadams/get-release@v1.2.2

View File

@@ -1,4 +1,4 @@
FROM golang:1.20.1 as builder
FROM golang:1.21.3 as builder
WORKDIR /src/litestream
COPY . .

View File

@@ -20,11 +20,17 @@ ifndef LITESTREAM_VERSION
$(error LITESTREAM_VERSION is undefined)
endif
mkdir -p dist
go build -v -ldflags "-s -w -X 'main.Version=${LITESTREAM_VERSION}'" -o dist/litestream ./cmd/litestream
GOOS=darwin GOARCH=amd64 CC="gcc -target amd64-apple-macos11" CGO_ENABLED=1 go build -v -ldflags "-s -w -X 'main.Version=${LITESTREAM_VERSION}'" -o dist/litestream ./cmd/litestream
gon etc/gon.hcl
mv dist/litestream.zip dist/litestream-${LITESTREAM_VERSION}-darwin-amd64.zip
openssl dgst -sha256 dist/litestream-${LITESTREAM_VERSION}-darwin-amd64.zip
GOOS=darwin GOARCH=arm64 CC="gcc -target arm64-apple-macos11" CGO_ENABLED=1 go build -v -ldflags "-s -w -X 'main.Version=${LITESTREAM_VERSION}'" -o dist/litestream ./cmd/litestream
gon etc/gon.hcl
mv dist/litestream.zip dist/litestream-${LITESTREAM_VERSION}-darwin-arm64.zip
openssl dgst -sha256 dist/litestream-${LITESTREAM_VERSION}-darwin-arm64.zip
clean:
rm -rf dist

View File

@@ -4,7 +4,6 @@ import (
"context"
"flag"
"fmt"
"log"
"os"
"text/tabwriter"
"time"
@@ -87,7 +86,7 @@ func (c *GenerationsCommand) Run(ctx context.Context, args []string) (err error)
for _, r := range replicas {
generations, err := r.Client.Generations(ctx)
if err != nil {
log.Printf("%s: cannot list generations: %s", r.Name(), err)
r.Logger().Error("cannot list generations", "error", err)
continue
}
@@ -95,7 +94,7 @@ func (c *GenerationsCommand) Run(ctx context.Context, args []string) (err error)
for _, generation := range generations {
createdAt, updatedAt, err := r.GenerationTimeBounds(ctx, generation)
if err != nil {
log.Printf("%s: cannot determine generation time bounds: %s", r.Name(), err)
r.Logger().Error("cannot determine generation time bounds", "error", err)
continue
}

View File

@@ -6,7 +6,7 @@ import (
"flag"
"fmt"
"io/ioutil"
"log"
"log/slog"
"net/url"
"os"
"os/user"
@@ -37,13 +37,11 @@ var (
var errStop = errors.New("stop")
func main() {
log.SetFlags(0)
m := NewMain()
if err := m.Run(context.Background(), os.Args[1:]); err == flag.ErrHelp || err == errStop {
os.Exit(1)
} else if err != nil {
log.Println(err)
slog.Error("failed to run", "error", err)
os.Exit(1)
}
}
@@ -95,17 +93,17 @@ func (m *Main) Run(ctx context.Context, args []string) (err error) {
// Wait for signal to stop program.
select {
case err = <-c.execCh:
fmt.Println("subprocess exited, litestream shutting down")
slog.Info("subprocess exited, litestream shutting down")
case sig := <-signalCh:
fmt.Println("signal received, litestream shutting down")
slog.Info("signal received, litestream shutting down")
if c.cmd != nil {
fmt.Println("sending signal to exec process")
slog.Info("sending signal to exec process")
if err := c.cmd.Process.Signal(sig); err != nil {
return fmt.Errorf("cannot signal exec process: %w", err)
}
fmt.Println("waiting for exec process to close")
slog.Info("waiting for exec process to close")
if err := <-c.execCh; err != nil && !strings.HasPrefix(err.Error(), "signal:") {
return fmt.Errorf("cannot wait for exec process: %w", err)
}
@@ -116,7 +114,7 @@ func (m *Main) Run(ctx context.Context, args []string) (err error) {
if e := c.Close(); e != nil && err == nil {
err = e
}
fmt.Println("litestream shut down")
slog.Info("litestream shut down")
return err
case "restore":
@@ -172,6 +170,16 @@ type Config struct {
// Global S3 settings
AccessKeyID string `yaml:"access-key-id"`
SecretAccessKey string `yaml:"secret-access-key"`
// Logging
Logging LoggingConfig `yaml:"logging"`
}
// LoggingConfig configures logging.
type LoggingConfig struct {
Level string `yaml:"level"`
Type string `yaml:"type"`
Stderr bool `yaml:"stderr"`
}
// propagateGlobalSettings copies global S3 settings to replica configs.
@@ -241,6 +249,36 @@ func ReadConfigFile(filename string, expandEnv bool) (_ Config, err error) {
// Propage settings from global config to replica configs.
config.propagateGlobalSettings()
// Configure logging.
logOutput := os.Stdout
if config.Logging.Stderr {
logOutput = os.Stderr
}
logOptions := slog.HandlerOptions{
Level: slog.LevelInfo,
}
switch strings.ToUpper(config.Logging.Level) {
case "DEBUG":
logOptions.Level = slog.LevelDebug
case "WARN", "WARNING":
logOptions.Level = slog.LevelWarn
case "ERROR":
logOptions.Level = slog.LevelError
}
var logHandler slog.Handler
switch config.Logging.Type {
case "json":
logHandler = slog.NewJSONHandler(logOutput, &logOptions)
case "text", "":
logHandler = slog.NewTextHandler(logOutput, &logOptions)
}
// Set global default logger.
slog.SetDefault(slog.New(logHandler))
return config, nil
}

View File

@@ -1,4 +1,4 @@
// +build !windows
//go:build !windows
package main

View File

@@ -1,11 +1,10 @@
// +build windows
//go:build windows
package main
import (
"context"
"io"
"log"
"os"
"os/signal"
@@ -63,13 +62,13 @@ func (s *windowsService) Execute(args []string, r <-chan svc.ChangeRequest, stat
// Instantiate replication command and load configuration.
c := NewReplicateCommand()
if c.Config, err = ReadConfigFile(DefaultConfigPath(), true); err != nil {
log.Printf("cannot load configuration: %s", err)
slog.Error("cannot load configuration", "error", err)
return true, 1
}
// Execute replication command.
if err := c.Run(s.ctx); err != nil {
log.Printf("cannot replicate: %s", err)
slog.Error("cannot replicate", "error", err)
statusCh <- svc.Status{State: svc.StopPending}
return true, 2
}
@@ -88,7 +87,7 @@ func (s *windowsService) Execute(args []string, r <-chan svc.ChangeRequest, stat
case svc.Interrogate:
statusCh <- req.CurrentStatus
default:
log.Printf("Litestream service received unexpected change request cmd: %d", req.Cmd)
slog.Error("Litestream service received unexpected change request", "cmd", req.Cmd)
}
}
}

View File

@@ -4,7 +4,7 @@ import (
"context"
"flag"
"fmt"
"log"
"log/slog"
"net"
"net/http"
_ "net/http/pprof"
@@ -42,7 +42,6 @@ func NewReplicateCommand() *ReplicateCommand {
func (c *ReplicateCommand) ParseFlags(ctx context.Context, args []string) (err error) {
fs := flag.NewFlagSet("litestream-replicate", flag.ContinueOnError)
execFlag := fs.String("exec", "", "execute subcommand")
tracePath := fs.String("trace", "", "trace path")
configPath, noExpandEnv := registerConfigFlag(fs)
fs.Usage = c.Usage
if err := fs.Parse(args); err != nil {
@@ -80,27 +79,17 @@ func (c *ReplicateCommand) ParseFlags(ctx context.Context, args []string) (err e
c.Config.Exec = *execFlag
}
// Enable trace logging.
if *tracePath != "" {
f, err := os.Create(*tracePath)
if err != nil {
return err
}
defer f.Close()
litestream.Tracef = log.New(f, "", log.LstdFlags|log.Lmicroseconds|log.LUTC|log.Lshortfile).Printf
}
return nil
}
// Run loads all databases specified in the configuration.
func (c *ReplicateCommand) Run() (err error) {
// Display version information.
log.Printf("litestream %s", Version)
slog.Info("litestream", "version", Version)
// Setup databases.
if len(c.Config.DBs) == 0 {
log.Println("no databases specified in configuration")
slog.Error("no databases specified in configuration")
}
for _, dbConfig := range c.Config.DBs {
@@ -118,21 +107,22 @@ func (c *ReplicateCommand) Run() (err error) {
// Notify user that initialization is done.
for _, db := range c.DBs {
log.Printf("initialized db: %s", db.Path())
slog.Info("initialized db", "path", db.Path())
for _, r := range db.Replicas {
slog := slog.With("name", r.Name(), "type", r.Client.Type(), "sync-interval", r.SyncInterval)
switch client := r.Client.(type) {
case *file.ReplicaClient:
log.Printf("replicating to: name=%q type=%q path=%q", r.Name(), client.Type(), client.Path())
slog.Info("replicating to", "path", client.Path())
case *s3.ReplicaClient:
log.Printf("replicating to: name=%q type=%q bucket=%q path=%q region=%q endpoint=%q sync-interval=%s", r.Name(), client.Type(), client.Bucket, client.Path, client.Region, client.Endpoint, r.SyncInterval)
slog.Info("replicating to", "bucket", client.Bucket, "path", client.Path, "region", client.Region, "endpoint", client.Endpoint)
case *gcs.ReplicaClient:
log.Printf("replicating to: name=%q type=%q bucket=%q path=%q sync-interval=%s", r.Name(), client.Type(), client.Bucket, client.Path, r.SyncInterval)
slog.Info("replicating to", "bucket", client.Bucket, "path", client.Path)
case *abs.ReplicaClient:
log.Printf("replicating to: name=%q type=%q bucket=%q path=%q endpoint=%q sync-interval=%s", r.Name(), client.Type(), client.Bucket, client.Path, client.Endpoint, r.SyncInterval)
slog.Info("replicating to", "bucket", client.Bucket, "path", client.Path, "endpoint", client.Endpoint)
case *sftp.ReplicaClient:
log.Printf("replicating to: name=%q type=%q host=%q user=%q path=%q sync-interval=%s", r.Name(), client.Type(), client.Host, client.User, client.Path, r.SyncInterval)
slog.Info("replicating to", "host", client.Host, "user", client.User, "path", client.Path)
default:
log.Printf("replicating to: name=%q type=%q", r.Name(), client.Type())
slog.Info("replicating to")
}
}
}
@@ -146,11 +136,11 @@ func (c *ReplicateCommand) Run() (err error) {
hostport = net.JoinHostPort("localhost", port)
}
log.Printf("serving metrics on http://%s/metrics", hostport)
slog.Info("serving metrics on", "url", fmt.Sprintf("http://%s/metrics", hostport))
go func() {
http.Handle("/metrics", promhttp.Handler())
if err := http.ListenAndServe(c.Config.Addr, nil); err != nil {
log.Printf("cannot start metrics server: %s", err)
slog.Error("cannot start metrics server", "error", err)
}
}()
}
@@ -179,7 +169,7 @@ func (c *ReplicateCommand) Run() (err error) {
func (c *ReplicateCommand) Close() (err error) {
for _, db := range c.DBs {
if e := db.Close(); e != nil {
log.Printf("error closing db: path=%s err=%s", db.Path(), e)
db.Logger.Error("error closing db", "error", e)
if err == nil {
err = e
}
@@ -215,8 +205,5 @@ Arguments:
-no-expand-env
Disables environment variable expansion in configuration file.
-trace PATH
Write verbose trace logging to PATH.
`[1:], DefaultConfigPath())
}

View File

@@ -5,7 +5,7 @@ import (
"errors"
"flag"
"fmt"
"log"
"log/slog"
"os"
"strconv"
"time"
@@ -19,7 +19,6 @@ type RestoreCommand struct{}
// Run executes the command.
func (c *RestoreCommand) Run(ctx context.Context, args []string) (err error) {
opt := litestream.NewRestoreOptions()
opt.Verbose = true
fs := flag.NewFlagSet("litestream-restore", flag.ContinueOnError)
configPath, noExpandEnv := registerConfigFlag(fs)
@@ -31,7 +30,6 @@ func (c *RestoreCommand) Run(ctx context.Context, args []string) (err error) {
ifDBNotExists := fs.Bool("if-db-not-exists", false, "")
ifReplicaExists := fs.Bool("if-replica-exists", false, "")
timestampStr := fs.String("timestamp", "", "timestamp")
verbose := fs.Bool("v", false, "verbose output")
fs.Usage = c.Usage
if err := fs.Parse(args); err != nil {
return err
@@ -48,11 +46,6 @@ func (c *RestoreCommand) Run(ctx context.Context, args []string) (err error) {
}
}
// Instantiate logger if verbose output is enabled.
if *verbose {
opt.Logger = log.New(os.Stderr, "", log.LstdFlags|log.Lmicroseconds)
}
// Determine replica & generation to restore from.
var r *litestream.Replica
if isURL(fs.Arg(0)) {
@@ -60,7 +53,7 @@ func (c *RestoreCommand) Run(ctx context.Context, args []string) (err error) {
return fmt.Errorf("cannot specify a replica URL and the -config flag")
}
if r, err = c.loadFromURL(ctx, fs.Arg(0), *ifDBNotExists, &opt); err == errSkipDBExists {
fmt.Println("database already exists, skipping")
slog.Info("database already exists, skipping")
return nil
} else if err != nil {
return err
@@ -70,7 +63,7 @@ func (c *RestoreCommand) Run(ctx context.Context, args []string) (err error) {
*configPath = DefaultConfigPath()
}
if r, err = c.loadFromConfig(ctx, fs.Arg(0), *configPath, !*noExpandEnv, *ifDBNotExists, &opt); err == errSkipDBExists {
fmt.Println("database already exists, skipping")
slog.Info("database already exists, skipping")
return nil
} else if err != nil {
return err
@@ -81,7 +74,7 @@ func (c *RestoreCommand) Run(ctx context.Context, args []string) (err error) {
// If optional flag set, return success. Useful for automated recovery.
if opt.Generation == "" {
if *ifReplicaExists {
fmt.Println("no matching backups found")
slog.Info("no matching backups found")
return nil
}
return fmt.Errorf("no matching backups found")

View File

@@ -4,7 +4,7 @@ import (
"context"
"flag"
"fmt"
"log"
"log/slog"
"os"
"text/tabwriter"
"time"
@@ -82,7 +82,7 @@ func (c *SnapshotsCommand) Run(ctx context.Context, args []string) (err error) {
for _, r := range replicas {
infos, err := r.Snapshots(ctx)
if err != nil {
log.Printf("cannot determine snapshots: %s", err)
slog.Error("cannot determine snapshots", "error", err)
continue
}
for _, info := range infos {

View File

@@ -4,7 +4,6 @@ import (
"context"
"flag"
"fmt"
"log"
"os"
"text/tabwriter"
"time"
@@ -86,7 +85,7 @@ func (c *WALCommand) Run(ctx context.Context, args []string) (err error) {
generations = []string{*generation}
} else {
if generations, err = r.Client.Generations(ctx); err != nil {
log.Printf("%s: cannot determine generations: %s", r.Name(), err)
r.Logger().Error("cannot determine generations", "error", err)
continue
}
}
@@ -113,7 +112,7 @@ func (c *WALCommand) Run(ctx context.Context, args []string) (err error) {
}
return itr.Close()
}(); err != nil {
log.Printf("%s: cannot fetch wal segments: %s", r.Name(), err)
r.Logger().Error("cannot fetch wal segments", "error", err)
continue
}
}

193
db.go
View File

@@ -11,7 +11,7 @@ import (
"hash/crc64"
"io"
"io/ioutil"
"log"
"log/slog"
"math"
"math/rand"
"os"
@@ -31,6 +31,7 @@ const (
DefaultCheckpointInterval = 1 * time.Minute
DefaultMinCheckpointPageN = 1000
DefaultMaxCheckpointPageN = 10000
DefaultTruncatePageN = 500000
)
// MaxIndex is the maximum possible WAL index.
@@ -85,6 +86,16 @@ type DB struct {
// unbounded if there are always read transactions occurring.
MaxCheckpointPageN int
// Threshold of WAL size, in pages, before a forced truncation checkpoint.
// A forced truncation checkpoint will block new transactions and wait for
// existing transactions to finish before issuing a checkpoint and
// truncating the WAL.
//
// If zero, no truncates are forced. This can cause the WAL to grow
// unbounded if there's a sudden spike of changes between other
// checkpoints.
TruncatePageN int
// Time between automatic checkpoints in the WAL. This is done to allow
// more fine-grained WAL files so that restores can be performed with
// better precision.
@@ -97,8 +108,8 @@ type DB struct {
// Must be set before calling Open().
Replicas []*Replica
// Where to send log messages, defaults to log.Default()
Logger *log.Logger
// Where to send log messages, defaults to global slog with databas epath.
Logger *slog.Logger
}
// NewDB returns a new instance of DB for a given path.
@@ -112,10 +123,44 @@ func NewDB(path string) *DB {
MinCheckpointPageN: DefaultMinCheckpointPageN,
MaxCheckpointPageN: DefaultMaxCheckpointPageN,
TruncatePageN: DefaultTruncatePageN,
CheckpointInterval: DefaultCheckpointInterval,
MonitorInterval: DefaultMonitorInterval,
Logger: slog.With("db", path),
}
Logger: log.Default(),
db.dbSizeGauge = dbSizeGaugeVec.WithLabelValues(db.path)
db.walSizeGauge = walSizeGaugeVec.WithLabelValues(db.path)
db.totalWALBytesCounter = totalWALBytesCounterVec.WithLabelValues(db.path)
db.shadowWALIndexGauge = shadowWALIndexGaugeVec.WithLabelValues(db.path)
db.shadowWALSizeGauge = shadowWALSizeGaugeVec.WithLabelValues(db.path)
db.syncNCounter = syncNCounterVec.WithLabelValues(db.path)
db.syncErrorNCounter = syncErrorNCounterVec.WithLabelValues(db.path)
db.syncSecondsCounter = syncSecondsCounterVec.WithLabelValues(db.path)
db.checkpointNCounterVec = checkpointNCounterVec.MustCurryWith(prometheus.Labels{"db": db.path})
db.checkpointErrorNCounterVec = checkpointErrorNCounterVec.MustCurryWith(prometheus.Labels{"db": db.path})
db.checkpointSecondsCounterVec = checkpointSecondsCounterVec.MustCurryWith(prometheus.Labels{"db": db.path})
db.ctx, db.cancel = context.WithCancel(context.Background())
return db
}
// NewDB returns a new instance of DB for a given path.
func NewLSDB(path string, metapath string) *DB {
_, file := filepath.Split(path)
db := &DB{
path: path,
metaPath: filepath.Join(metapath, "."+file+MetaDirSuffix),
notify: make(chan struct{}),
MinCheckpointPageN: DefaultMinCheckpointPageN,
MaxCheckpointPageN: DefaultMaxCheckpointPageN,
TruncatePageN: DefaultTruncatePageN,
CheckpointInterval: DefaultCheckpointInterval,
MonitorInterval: DefaultMonitorInterval,
Logger: slog.With("db", path),
}
db.dbSizeGauge = dbSizeGaugeVec.WithLabelValues(db.path)
@@ -461,7 +506,7 @@ func (db *DB) init() (err error) {
// If we have an existing shadow WAL, ensure the headers match.
if err := db.verifyHeadersMatch(); err != nil {
db.Logger.Printf("%s: init: cannot determine last wal position, clearing generation; %s", db.path, err)
db.Logger.Warn("init: cannot determine last wal position, clearing generation", "error", err)
if err := os.Remove(db.GenerationNamePath()); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("remove generation name: %w", err)
}
@@ -703,7 +748,7 @@ func (db *DB) Sync(ctx context.Context) (err error) {
if err := db.init(); err != nil {
return err
} else if db.db == nil {
Tracef("%s: sync: no database found", db.path)
db.Logger.Debug("sync: no database found")
return nil
}
@@ -729,7 +774,7 @@ func (db *DB) Sync(ctx context.Context) (err error) {
if err != nil {
return fmt.Errorf("cannot verify wal state: %w", err)
}
Tracef("%s: sync: info=%#v", db.path, info)
db.Logger.Debug("sync", "info", &info)
// Track if anything in the shadow WAL changes and then notify at the end.
changed := info.walSize != info.shadowWALSize || info.restart || info.reason != ""
@@ -740,7 +785,7 @@ func (db *DB) Sync(ctx context.Context) (err error) {
if info.generation, err = db.createGeneration(); err != nil {
return fmt.Errorf("create generation: %w", err)
}
db.Logger.Printf("%s: sync: new generation %q, %s", db.path, info.generation, info.reason)
db.Logger.Info("sync: new generation", "generation", info.generation, "reason", info.reason)
// Clear shadow wal info.
info.shadowWALPath = db.ShadowWALPath(info.generation, 0)
@@ -751,7 +796,7 @@ func (db *DB) Sync(ctx context.Context) (err error) {
}
// Synchronize real WAL with current shadow WAL.
newWALSize, err := db.syncWAL(info)
origWALSize, newWALSize, err := db.syncWAL(info)
if err != nil {
return fmt.Errorf("sync wal: %w", err)
}
@@ -760,7 +805,9 @@ func (db *DB) Sync(ctx context.Context) (err error) {
// If WAL size is greater than min threshold, attempt checkpoint.
var checkpoint bool
checkpointMode := CheckpointModePassive
if db.MaxCheckpointPageN > 0 && newWALSize >= calcWALSize(db.pageSize, db.MaxCheckpointPageN) {
if db.TruncatePageN > 0 && origWALSize >= calcWALSize(db.pageSize, db.TruncatePageN) {
checkpoint, checkpointMode = true, CheckpointModeTruncate
} else if db.MaxCheckpointPageN > 0 && newWALSize >= calcWALSize(db.pageSize, db.MaxCheckpointPageN) {
checkpoint, checkpointMode = true, CheckpointModeRestart
} else if newWALSize >= calcWALSize(db.pageSize, db.MinCheckpointPageN) {
checkpoint = true
@@ -794,7 +841,7 @@ func (db *DB) Sync(ctx context.Context) (err error) {
db.notify = make(chan struct{})
}
Tracef("%s: sync: ok", db.path)
db.Logger.Debug("sync: ok")
return nil
}
@@ -919,29 +966,29 @@ type syncInfo struct {
}
// syncWAL copies pending bytes from the real WAL to the shadow WAL.
func (db *DB) syncWAL(info syncInfo) (newSize int64, err error) {
func (db *DB) syncWAL(info syncInfo) (origSize int64, newSize int64, err error) {
// Copy WAL starting from end of shadow WAL. Exit if no new shadow WAL needed.
newSize, err = db.copyToShadowWAL(info.shadowWALPath)
origSize, newSize, err = db.copyToShadowWAL(info.shadowWALPath)
if err != nil {
return newSize, fmt.Errorf("cannot copy to shadow wal: %w", err)
return origSize, newSize, fmt.Errorf("cannot copy to shadow wal: %w", err)
} else if !info.restart {
return newSize, nil // If no restart required, exit.
return origSize, newSize, nil // If no restart required, exit.
}
// Parse index of current shadow WAL file.
dir, base := filepath.Split(info.shadowWALPath)
index, err := ParseWALPath(base)
if err != nil {
return 0, fmt.Errorf("cannot parse shadow wal filename: %s", base)
return 0, 0, fmt.Errorf("cannot parse shadow wal filename: %s", base)
}
// Start a new shadow WAL file with next index.
newShadowWALPath := filepath.Join(dir, FormatWALPath(index+1))
newSize, err = db.initShadowWALFile(newShadowWALPath)
if err != nil {
return 0, fmt.Errorf("cannot init shadow wal file: name=%s err=%w", newShadowWALPath, err)
return 0, 0, fmt.Errorf("cannot init shadow wal file: name=%s err=%w", newShadowWALPath, err)
}
return newSize, nil
return origSize, newSize, nil
}
func (db *DB) initShadowWALFile(filename string) (int64, error) {
@@ -977,80 +1024,96 @@ func (db *DB) initShadowWALFile(filename string) (int64, error) {
_ = os.Chown(filename, uid, gid)
// Copy as much shadow WAL as available.
newSize, err := db.copyToShadowWAL(filename)
_, newSize, err := db.copyToShadowWAL(filename)
if err != nil {
return 0, fmt.Errorf("cannot copy to new shadow wal: %w", err)
}
return newSize, nil
}
func (db *DB) copyToShadowWAL(filename string) (newSize int64, err error) {
Tracef("%s: copy-shadow: %s", db.path, filename)
func (db *DB) copyToShadowWAL(filename string) (origWalSize int64, newSize int64, err error) {
logger := db.Logger.With("filename", filename)
logger.Debug("copy-shadow")
r, err := os.Open(db.WALPath())
if err != nil {
return 0, err
return 0, 0, err
}
defer r.Close()
fi, err := r.Stat()
if err != nil {
return 0, 0, err
}
origWalSize = frameAlign(fi.Size(), db.pageSize)
w, err := os.OpenFile(filename, os.O_RDWR, 0666)
if err != nil {
return 0, err
return 0, 0, err
}
defer w.Close()
fi, err := w.Stat()
fi, err = w.Stat()
if err != nil {
return 0, err
return 0, 0, err
}
origSize := frameAlign(fi.Size(), db.pageSize)
// Read shadow WAL header to determine byte order for checksum & salt.
hdr := make([]byte, WALHeaderSize)
if _, err := io.ReadFull(w, hdr); err != nil {
return 0, fmt.Errorf("read header: %w", err)
return 0, 0, fmt.Errorf("read header: %w", err)
}
hsalt0 := binary.BigEndian.Uint32(hdr[16:])
hsalt1 := binary.BigEndian.Uint32(hdr[20:])
bo, err := headerByteOrder(hdr)
if err != nil {
return 0, err
return 0, 0, err
}
// Read previous checksum.
chksum0, chksum1, err := readLastChecksumFrom(w, db.pageSize)
if err != nil {
return 0, fmt.Errorf("last checksum: %w", err)
return 0, 0, fmt.Errorf("last checksum: %w", err)
}
// Write to a temporary shadow file.
tempFilename := filename + ".tmp"
defer os.Remove(tempFilename)
f, err := internal.CreateFile(tempFilename, db.fileInfo)
if err != nil {
return 0, 0, fmt.Errorf("create temp file: %w", err)
}
defer f.Close()
// Seek to correct position on real wal.
if _, err := r.Seek(origSize, io.SeekStart); err != nil {
return 0, fmt.Errorf("real wal seek: %w", err)
return 0, 0, fmt.Errorf("real wal seek: %w", err)
} else if _, err := w.Seek(origSize, io.SeekStart); err != nil {
return 0, fmt.Errorf("shadow wal seek: %w", err)
return 0, 0, fmt.Errorf("shadow wal seek: %w", err)
}
// Read through WAL from last position to find the page of the last
// committed transaction.
frame := make([]byte, db.pageSize+WALFrameHeaderSize)
var buf bytes.Buffer
offset := origSize
lastCommitSize := origSize
for {
// Read next page from WAL file.
if _, err := io.ReadFull(r, frame); err == io.EOF || err == io.ErrUnexpectedEOF {
Tracef("%s: copy-shadow: break %s @ %d; err=%s", db.path, filename, offset, err)
logger.Debug("copy-shadow: break", "offset", offset, "error", err)
break // end of file or partial page
} else if err != nil {
return 0, fmt.Errorf("read wal: %w", err)
return 0, 0, fmt.Errorf("read wal: %w", err)
}
// Read frame salt & compare to header salt. Stop reading on mismatch.
salt0 := binary.BigEndian.Uint32(frame[8:])
salt1 := binary.BigEndian.Uint32(frame[12:])
if salt0 != hsalt0 || salt1 != hsalt1 {
Tracef("%s: copy-shadow: break: salt mismatch", db.path)
logger.Debug("copy-shadow: break: salt mismatch")
break
}
@@ -1060,38 +1123,60 @@ func (db *DB) copyToShadowWAL(filename string) (newSize int64, err error) {
chksum0, chksum1 = Checksum(bo, chksum0, chksum1, frame[:8]) // frame header
chksum0, chksum1 = Checksum(bo, chksum0, chksum1, frame[24:]) // frame data
if chksum0 != fchksum0 || chksum1 != fchksum1 {
Tracef("%s: copy shadow: checksum mismatch, skipping: offset=%d (%x,%x) != (%x,%x)", db.path, offset, chksum0, chksum1, fchksum0, fchksum1)
logger.Debug("copy shadow: checksum mismatch, skipping", "offset", offset, "check", fmt.Sprintf("(%x,%x) != (%x,%x)", chksum0, chksum1, fchksum0, fchksum1))
break
}
// Add page to the new size of the shadow WAL.
buf.Write(frame)
// Write page to temporary WAL file.
if _, err := f.Write(frame); err != nil {
return 0, 0, fmt.Errorf("write temp shadow wal: %w", err)
}
Tracef("%s: copy-shadow: ok %s offset=%d salt=%x %x", db.path, filename, offset, salt0, salt1)
logger.Debug("copy-shadow: ok", "offset", offset, "salt", fmt.Sprintf("%x %x", salt0, salt1))
offset += int64(len(frame))
// Flush to shadow WAL if commit record.
// Update new size if written frame was a commit record.
newDBSize := binary.BigEndian.Uint32(frame[4:])
if newDBSize != 0 {
if _, err := buf.WriteTo(w); err != nil {
return 0, fmt.Errorf("write shadow wal: %w", err)
}
buf.Reset()
lastCommitSize = offset
}
}
// Sync & close.
// If no WAL writes found, exit.
if origSize == lastCommitSize {
return origSize, lastCommitSize, nil
}
walByteN := lastCommitSize - origSize
// Move to beginning of temporary file.
if _, err := f.Seek(0, io.SeekStart); err != nil {
return 0, 0, fmt.Errorf("temp file seek: %w", err)
}
// Copy from temporary file to shadow WAL.
if _, err := io.Copy(w, &io.LimitedReader{R: f, N: walByteN}); err != nil {
return 0, 0, fmt.Errorf("write shadow file: %w", err)
}
// Close & remove temporary file.
if err := f.Close(); err != nil {
return 0, 0, err
} else if err := os.Remove(tempFilename); err != nil {
return 0, 0, err
}
// Sync & close shadow WAL.
if err := w.Sync(); err != nil {
return 0, err
return 0, 0, err
} else if err := w.Close(); err != nil {
return 0, err
return 0, 0, err
}
// Track total number of bytes written to WAL.
db.totalWALBytesCounter.Add(float64(lastCommitSize - origSize))
db.totalWALBytesCounter.Add(float64(walByteN))
return lastCommitSize, nil
return origWalSize, lastCommitSize, nil
}
// ShadowWALReader opens a reader for a shadow WAL file at a given position.
@@ -1266,7 +1351,7 @@ func (db *DB) checkpoint(ctx context.Context, generation, mode string) error {
}
// Copy shadow WAL before checkpoint to copy as much as possible.
if _, err := db.copyToShadowWAL(shadowWALPath); err != nil {
if _, _, err := db.copyToShadowWAL(shadowWALPath); err != nil {
return fmt.Errorf("cannot copy to end of shadow wal before checkpoint: %w", err)
}
@@ -1301,7 +1386,7 @@ func (db *DB) checkpoint(ctx context.Context, generation, mode string) error {
}
// Copy the end of the previous WAL before starting a new shadow WAL.
if _, err := db.copyToShadowWAL(shadowWALPath); err != nil {
if _, _, err := db.copyToShadowWAL(shadowWALPath); err != nil {
return fmt.Errorf("cannot copy to end of shadow wal: %w", err)
}
@@ -1360,7 +1445,7 @@ func (db *DB) execCheckpoint(mode string) (err error) {
if err := db.db.QueryRow(rawsql).Scan(&row[0], &row[1], &row[2]); err != nil {
return err
}
Tracef("%s: checkpoint: mode=%v (%d,%d,%d)", db.path, mode, row[0], row[1], row[2])
db.Logger.Debug("checkpoint", "mode", mode, "result", fmt.Sprintf("%d,%d,%d", row[0], row[1], row[2]))
// Reacquire the read lock immediately after the checkpoint.
if err := db.acquireReadLock(); err != nil {
@@ -1385,7 +1470,7 @@ func (db *DB) monitor() {
// Sync the database to the shadow WAL.
if err := db.Sync(db.ctx); err != nil && !errors.Is(err, context.Canceled) {
db.Logger.Printf("%s: sync error: %s", db.path, err)
db.Logger.Error("sync error", "error", err)
}
}
}
@@ -1529,10 +1614,6 @@ type RestoreOptions struct {
// Specifies how many WAL files are downloaded in parallel during restore.
Parallelism int
// Logging settings.
Logger *log.Logger
Verbose bool
}
// NewRestoreOptions returns a new instance of RestoreOptions with defaults.

2
go.mod
View File

@@ -1,6 +1,6 @@
module github.com/benbjohnson/litestream
go 1.19
go 1.21
require (
cloud.google.com/go/storage v1.31.0

4
go.sum
View File

@@ -87,6 +87,7 @@ github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/martian/v3 v3.3.2 h1:IqNFLAmvJOgVlpdEBiQbDc2EwKW77amAycfTuWKdfvw=
github.com/google/martian/v3 v3.3.2/go.mod h1:oBOf6HBosgwRXnUGWUB05QECsc6uvmMiJ3+6W4l/CUk=
github.com/google/s2a-go v0.1.4 h1:1kZ/sQM3srePvKs3tXAvQzo66XfcReoqFpIpIccE7Oc=
github.com/google/s2a-go v0.1.4/go.mod h1:Ej+mSEMGRnqRzjc7VtF+jdBwYG5fuJfiZ8ELkjEwM0A=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
@@ -106,6 +107,7 @@ github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8=
github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
@@ -137,6 +139,7 @@ github.com/prometheus/procfs v0.11.1 h1:xRC8Iq1yyca5ypa9n1EZnWZkt7dwcoRPQwX/5gwa
github.com/prometheus/procfs v0.11.1/go.mod h1:eesXgaPo1q7lBpVMoMy0ZOFTth9hBn4W/y0/p/ScXhY=
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
@@ -212,6 +215,7 @@ golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9sn
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.11.0 h1:F9tnn/DA/Im8nCwm+fX+1/eBwi4qFjRT++MhtVC4ZX0=
golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=

View File

@@ -1,3 +1,4 @@
//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris
// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris
package internal

View File

@@ -1,3 +1,4 @@
//go:build windows
// +build windows
package internal

View File

@@ -540,9 +540,6 @@ func isHexChar(ch rune) bool {
return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f')
}
// Tracef is used for low-level tracing.
var Tracef = func(format string, a ...interface{}) {}
func assert(condition bool, message string) {
if !condition {
panic("assertion failed: " + message)

View File

@@ -7,7 +7,7 @@ import (
"hash/crc64"
"io"
"io/ioutil"
"log"
"log/slog"
"math"
"os"
"path/filepath"
@@ -72,9 +72,6 @@ type Replica struct {
// Encryption identities and recipients
AgeIdentities []age.Identity
AgeRecipients []age.Recipient
// The logger to send logging messages to. Defaults to log.Default()
Logger *log.Logger
}
func NewReplica(db *DB, name string) *Replica {
@@ -87,7 +84,6 @@ func NewReplica(db *DB, name string) *Replica {
Retention: DefaultRetention,
RetentionCheckInterval: DefaultRetentionCheckInterval,
MonitorEnabled: true,
Logger: log.Default(),
}
return r
@@ -101,6 +97,15 @@ func (r *Replica) Name() string {
return r.name
}
// Logger returns the DB sub-logger for this replica.
func (r *Replica) Logger() *slog.Logger {
logger := slog.Default()
if r.db != nil {
logger = r.db.Logger
}
return logger.With("replica", r.Name())
}
// DB returns a reference to the database the replica is attached to, if any.
func (r *Replica) DB() *DB { return r.db }
@@ -166,7 +171,7 @@ func (r *Replica) Sync(ctx context.Context) (err error) {
}
generation := dpos.Generation
Tracef("%s(%s): replica sync: db.pos=%s", r.db.Path(), r.Name(), dpos)
r.Logger().Debug("replica sync", "position", dpos.String())
// Create a new snapshot and update the current replica position if
// the generation on the database has changed.
@@ -188,7 +193,7 @@ func (r *Replica) Sync(ctx context.Context) (err error) {
return fmt.Errorf("cannot determine replica position: %s", err)
}
Tracef("%s(%s): replica sync: calc new pos: %s", r.db.Path(), r.Name(), pos)
r.Logger().Debug("replica sync: calc new pos", "position", pos.String())
r.mu.Lock()
r.pos = pos
r.mu.Unlock()
@@ -222,6 +227,12 @@ func (r *Replica) syncWAL(ctx context.Context) (err error) {
// Obtain initial position from shadow reader.
// It may have moved to the next index if previous position was at the end.
pos := rd.Pos()
initialPos := pos
startTime := time.Now()
var bytesWritten int
logger := r.Logger()
logger.Info("write wal segment", "position", initialPos.String())
// Copy through pipe into client from the starting position.
var g errgroup.Group
@@ -263,6 +274,7 @@ func (r *Replica) syncWAL(ctx context.Context) (err error) {
return err
}
walBytesCounter.Add(float64(n))
bytesWritten += n
}
// Copy frames.
@@ -289,6 +301,7 @@ func (r *Replica) syncWAL(ctx context.Context) (err error) {
return err
}
walBytesCounter.Add(float64(n))
bytesWritten += n
}
// Flush LZ4 writer, encryption writer and close pipe.
@@ -314,6 +327,7 @@ func (r *Replica) syncWAL(ctx context.Context) (err error) {
replicaWALIndexGaugeVec.WithLabelValues(r.db.Path(), r.Name()).Set(float64(rd.Pos().Index))
replicaWALOffsetGaugeVec.WithLabelValues(r.db.Path(), r.Name()).Set(float64(rd.Pos().Offset))
logger.Info("wal segment written", "position", initialPos.String(), "elapsed", time.Since(startTime).String(), "sz", bytesWritten)
return nil
}
@@ -535,6 +549,10 @@ func (r *Replica) Snapshot(ctx context.Context) (info SnapshotInfo, err error) {
return wc.Close()
})
logger := r.Logger()
logger.Info("write snapshot", "position", pos.String())
startTime := time.Now()
// Delegate write to client & wait for writer goroutine to finish.
if info, err = r.Client.WriteSnapshot(ctx, pos.Generation, pos.Index, pr); err != nil {
return info, err
@@ -542,8 +560,7 @@ func (r *Replica) Snapshot(ctx context.Context) (info SnapshotInfo, err error) {
return info, err
}
r.Logger.Printf("%s(%s): snapshot written %s/%08x", r.db.Path(), r.Name(), pos.Generation, pos.Index)
logger.Info("snapshot written", "position", pos.String(), "elapsed", time.Since(startTime).String(), "sz", info.Size)
return info, nil
}
@@ -610,7 +627,7 @@ func (r *Replica) deleteSnapshotsBeforeIndex(ctx context.Context, generation str
if err := r.Client.DeleteSnapshot(ctx, info.Generation, info.Index); err != nil {
return fmt.Errorf("delete snapshot %s/%08x: %w", info.Generation, info.Index, err)
}
r.Logger.Printf("%s(%s): snapshot deleted %s/%08x", r.db.Path(), r.Name(), generation, index)
r.Logger().Info("snapshot deleted", "generation", generation, "index", index)
}
return itr.Close()
@@ -642,8 +659,8 @@ func (r *Replica) deleteWALSegmentsBeforeIndex(ctx context.Context, generation s
if err := r.Client.DeleteWALSegments(ctx, a); err != nil {
return fmt.Errorf("delete wal segments: %w", err)
}
r.Logger.Printf("%s(%s): wal segmented deleted before %s/%08x: n=%d", r.db.Path(), r.Name(), generation, index, len(a))
r.Logger().Info("wal segmented deleted before", "generation", generation, "index", index, "n", len(a))
return nil
}
@@ -679,7 +696,7 @@ func (r *Replica) monitor(ctx context.Context) {
// Synchronize the shadow wal into the replication directory.
if err := r.Sync(ctx); err != nil {
r.Logger.Printf("%s(%s): monitor error: %s", r.db.Path(), r.Name(), err)
r.Logger().Error("monitor error", "error", err)
continue
}
}
@@ -707,7 +724,7 @@ func (r *Replica) retainer(ctx context.Context) {
return
case <-ticker.C:
if err := r.EnforceRetention(ctx); err != nil {
r.Logger.Printf("%s(%s): retainer error: %s", r.db.Path(), r.Name(), err)
r.Logger().Error("retainer error", "error", err)
continue
}
}
@@ -720,6 +737,31 @@ func (r *Replica) snapshotter(ctx context.Context) {
return
}
logger := r.Logger()
if pos, err := r.db.Pos(); err != nil {
logger.Error("snapshotter cannot determine generation", "error", err)
} else if !pos.IsZero() {
if snapshot, err := r.maxSnapshot(ctx, pos.Generation); err != nil {
logger.Error("snapshotter cannot determine latest snapshot", "error", err)
} else if snapshot != nil {
nextSnapshot := r.SnapshotInterval - time.Since(snapshot.CreatedAt)
if nextSnapshot < 0 {
nextSnapshot = 0
}
logger.Info("snapshot interval adjusted", "previous", snapshot.CreatedAt.Format(time.RFC3339), "next", nextSnapshot.String())
select {
case <-ctx.Done():
return
case <-time.After(nextSnapshot):
if _, err := r.Snapshot(ctx); err != nil && err != ErrNoGeneration {
logger.Error("snapshotter error", "error", err)
}
}
}
}
ticker := time.NewTicker(r.SnapshotInterval)
defer ticker.Stop()
@@ -729,7 +771,7 @@ func (r *Replica) snapshotter(ctx context.Context) {
return
case <-ticker.C:
if _, err := r.Snapshot(ctx); err != nil && err != ErrNoGeneration {
r.Logger.Printf("%s(%s): snapshotter error: %s", r.db.Path(), r.Name(), err)
r.Logger().Error("snapshotter error", "error", err)
continue
}
}
@@ -757,7 +799,7 @@ func (r *Replica) validator(ctx context.Context) {
return
case <-ticker.C:
if err := r.Validate(ctx); err != nil {
r.Logger.Printf("%s(%s): validation error: %s", r.db.Path(), r.Name(), err)
r.Logger().Error("validation error", "error", err)
continue
}
}
@@ -794,7 +836,6 @@ func (r *Replica) Validate(ctx context.Context) error {
ReplicaName: r.Name(),
Generation: pos.Generation,
Index: pos.Index - 1,
Logger: r.Logger,
}); err != nil {
return fmt.Errorf("cannot restore: %w", err)
}
@@ -819,7 +860,7 @@ func (r *Replica) Validate(ctx context.Context) error {
if mismatch {
status = "mismatch"
}
r.Logger.Printf("%s(%s): validator: status=%s db=%016x replica=%016x pos=%s", db.Path(), r.Name(), status, chksum0, chksum1, pos)
r.Logger().Info("validator", "status", status, "db", fmt.Sprintf("%016x", chksum0), "replica", fmt.Sprintf("%016x", chksum1), "position", pos.String())
// Validate checksums match.
if mismatch {
@@ -837,8 +878,6 @@ func (r *Replica) Validate(ctx context.Context) error {
// waitForReplica blocks until replica reaches at least the given position.
func (r *Replica) waitForReplica(ctx context.Context, pos Pos) error {
db := r.DB()
ticker := time.NewTicker(500 * time.Millisecond)
defer ticker.Stop()
@@ -861,7 +900,7 @@ func (r *Replica) waitForReplica(ctx context.Context, pos Pos) error {
// Obtain current position of replica, check if past target position.
curr := r.Pos()
if curr.IsZero() {
r.Logger.Printf("%s(%s): validator: no replica position available", db.Path(), r.Name())
r.Logger().Info("validator: no replica position available")
continue
}
@@ -1013,17 +1052,6 @@ func (r *Replica) Restore(ctx context.Context, opt RestoreOptions) (err error) {
return fmt.Errorf("cannot specify index & timestamp to restore")
}
// Ensure logger exists.
logger := opt.Logger
if logger == nil {
logger = r.Logger
}
logPrefix := r.Name()
if db := r.DB(); db != nil {
logPrefix = fmt.Sprintf("%s(%s)", db.Path(), r.Name())
}
// Ensure output path does not already exist.
if _, err := os.Stat(opt.OutputPath); err == nil {
return fmt.Errorf("cannot restore, output path already exists: %s", opt.OutputPath)
@@ -1070,19 +1098,19 @@ func (r *Replica) Restore(ctx context.Context, opt RestoreOptions) (err error) {
tmpPath := opt.OutputPath + ".tmp"
// Copy snapshot to output path.
logger.Printf("%s: restoring snapshot %s/%08x to %s", logPrefix, opt.Generation, minWALIndex, tmpPath)
r.Logger().Info("restoring snapshot", "generation", opt.Generation, "index", minWALIndex, "path", tmpPath)
if err := r.restoreSnapshot(ctx, pos.Generation, pos.Index, tmpPath); err != nil {
return fmt.Errorf("cannot restore snapshot: %w", err)
}
// If no WAL files available, move snapshot to final path & exit early.
if snapshotOnly {
logger.Printf("%s: snapshot only, finalizing database", logPrefix)
r.Logger().Info("snapshot only, finalizing database")
return os.Rename(tmpPath, opt.OutputPath)
}
// Begin processing WAL files.
logger.Printf("%s: restoring wal files: generation=%s index=[%08x,%08x]", logPrefix, opt.Generation, minWALIndex, maxWALIndex)
r.Logger().Info("restoring wal files", "generation", opt.Generation, "index_min", minWALIndex, "index_max", maxWALIndex)
// Fill input channel with all WAL indexes to be loaded in order.
// Verify every index has at least one offset.
@@ -1138,9 +1166,9 @@ func (r *Replica) Restore(ctx context.Context, opt RestoreOptions) (err error) {
return err
}
logger.Printf("%s: downloaded wal %s/%08x elapsed=%s",
logPrefix, opt.Generation, index,
time.Since(startTime).String(),
r.Logger().Info("downloaded wal",
"generation", opt.Generation, "index", index,
"elapsed", time.Since(startTime).String(),
)
}
}
@@ -1167,10 +1195,7 @@ func (r *Replica) Restore(ctx context.Context, opt RestoreOptions) (err error) {
if err = applyWAL(ctx, index, tmpPath); err != nil {
return fmt.Errorf("cannot apply wal: %w", err)
}
logger.Printf("%s: applied wal %s/%08x elapsed=%s",
logPrefix, opt.Generation, index,
time.Since(startTime).String(),
)
r.Logger().Info("applied wal", "generation", opt.Generation, "index", index, "elapsed", time.Since(startTime).String())
}
// Ensure all goroutines finish. All errors should have been handled during
@@ -1180,7 +1205,7 @@ func (r *Replica) Restore(ctx context.Context, opt RestoreOptions) (err error) {
}
// Copy file to final location.
logger.Printf("%s: renaming database from temporary location", logPrefix)
r.Logger().Info("renaming database from temporary location")
if err := os.Rename(tmpPath, opt.OutputPath); err != nil {
return err
}

View File

@@ -716,6 +716,9 @@ func ParseHost(s string) (bucket, region, endpoint string, forcePathStyle bool)
} else if a := digitalOceanRegex.FindStringSubmatch(host); a != nil {
bucket, region = a[1], a[2]
endpoint = fmt.Sprintf("%s.digitaloceanspaces.com", region)
} else if a := scalewayRegex.FindStringSubmatch(host); a != nil {
bucket, region = a[1], a[2]
endpoint = fmt.Sprintf("s3.%s.scw.cloud", region)
} else if a := linodeRegex.FindStringSubmatch(host); a != nil {
bucket, region = a[1], a[2]
endpoint = fmt.Sprintf("%s.linodeobjects.com", region)
@@ -742,6 +745,7 @@ var (
backblazeRegex = regexp.MustCompile(`^(?:(.+)\.)?s3.([^.]+)\.backblazeb2.com$`)
filebaseRegex = regexp.MustCompile(`^(?:(.+)\.)?s3.filebase.com$`)
digitalOceanRegex = regexp.MustCompile(`^(?:(.+)\.)?([^.]+)\.digitaloceanspaces.com$`)
scalewayRegex = regexp.MustCompile(`^(?:(.+)\.)?s3.([^.]+)\.scw\.cloud$`)
linodeRegex = regexp.MustCompile(`^(?:(.+)\.)?([^.]+)\.linodeobjects.com$`)
)
@@ -761,7 +765,7 @@ func deleteOutputError(out *s3.DeleteObjectsOutput) error {
case 1:
return fmt.Errorf("deleting object %s: %s - %s", *out.Errors[0].Key, *out.Errors[0].Code, *out.Errors[0].Message)
default:
return fmt.Errorf("%d errors occured deleting objects, %s: %s - (%s (and %d others)",
return fmt.Errorf("%d errors occurred deleting objects, %s: %s - (%s (and %d others)",
len(out.Errors), *out.Errors[0].Key, *out.Errors[0].Code, *out.Errors[0].Message, len(out.Errors)-1)
}
}