Files
litestream/litestream.go
Ben Johnson fc897b481f Group replica wal segments by index
This commit changes the replica path format to group segments within
a single index in the same directory. This is to eventually add the
ability to seek to a record on file-based systems without having
to iterate over the records. The DB shadow WAL will also be changed
to this same format to support live replicas.
2021-06-14 15:24:05 -06:00

426 lines
11 KiB
Go

package litestream
import (
"database/sql"
"encoding/binary"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"time"
)
// Naming constants.
const (
MetaDirSuffix = "-litestream"
WALDirName = "wal"
WALExt = ".wal"
WALSegmentExt = ".wal.lz4"
SnapshotExt = ".snapshot.lz4"
GenerationNameLen = 16
)
// SQLite checkpoint modes.
const (
CheckpointModePassive = "PASSIVE"
CheckpointModeFull = "FULL"
CheckpointModeRestart = "RESTART"
CheckpointModeTruncate = "TRUNCATE"
)
// Litestream errors.
var (
ErrNoGeneration = errors.New("no generation available")
ErrNoSnapshots = errors.New("no snapshots available")
ErrChecksumMismatch = errors.New("invalid replica, checksum mismatch")
)
// SnapshotIterator represents an iterator over a collection of snapshot metadata.
type SnapshotIterator interface {
io.Closer
// Prepares the the next snapshot for reading with the Snapshot() method.
// Returns true if another snapshot is available. Returns false if no more
// snapshots are available or if an error occured.
Next() bool
// Returns an error that occurred during iteration.
Err() error
// Returns metadata for the currently positioned snapshot.
Snapshot() SnapshotInfo
}
// SliceSnapshotIterator returns all snapshots from an iterator as a slice.
func SliceSnapshotIterator(itr SnapshotIterator) ([]SnapshotInfo, error) {
var a []SnapshotInfo
for itr.Next() {
a = append(a, itr.Snapshot())
}
return a, itr.Close()
}
var _ SnapshotIterator = (*SnapshotInfoSliceIterator)(nil)
// SnapshotInfoSliceIterator represents an iterator for iterating over a slice of snapshots.
type SnapshotInfoSliceIterator struct {
init bool
a []SnapshotInfo
}
// NewSnapshotInfoSliceIterator returns a new instance of SnapshotInfoSliceIterator.
func NewSnapshotInfoSliceIterator(a []SnapshotInfo) *SnapshotInfoSliceIterator {
return &SnapshotInfoSliceIterator{a: a}
}
// Close always returns nil.
func (itr *SnapshotInfoSliceIterator) Close() error { return nil }
// Next moves to the next snapshot. Returns true if another snapshot is available.
func (itr *SnapshotInfoSliceIterator) Next() bool {
if !itr.init {
itr.init = true
return len(itr.a) > 0
}
itr.a = itr.a[1:]
return len(itr.a) > 0
}
// Err always returns nil.
func (itr *SnapshotInfoSliceIterator) Err() error { return nil }
// Snapshot returns the metadata from the currently positioned snapshot.
func (itr *SnapshotInfoSliceIterator) Snapshot() SnapshotInfo {
if len(itr.a) == 0 {
return SnapshotInfo{}
}
return itr.a[0]
}
// WALSegmentIterator represents an iterator over a collection of WAL segments.
type WALSegmentIterator interface {
io.Closer
// Prepares the the next WAL for reading with the WAL() method.
// Returns true if another WAL is available. Returns false if no more
// WAL files are available or if an error occured.
Next() bool
// Returns an error that occurred during iteration.
Err() error
// Returns metadata for the currently positioned WAL segment file.
WALSegment() WALSegmentInfo
}
// SliceWALSegmentIterator returns all WAL segment files from an iterator as a slice.
func SliceWALSegmentIterator(itr WALSegmentIterator) ([]WALSegmentInfo, error) {
var a []WALSegmentInfo
for itr.Next() {
a = append(a, itr.WALSegment())
}
return a, itr.Close()
}
var _ WALSegmentIterator = (*WALSegmentInfoSliceIterator)(nil)
// WALSegmentInfoSliceIterator represents an iterator for iterating over a slice of wal segments.
type WALSegmentInfoSliceIterator struct {
init bool
a []WALSegmentInfo
}
// NewWALSegmentInfoSliceIterator returns a new instance of WALSegmentInfoSliceIterator.
func NewWALSegmentInfoSliceIterator(a []WALSegmentInfo) *WALSegmentInfoSliceIterator {
return &WALSegmentInfoSliceIterator{a: a}
}
// Close always returns nil.
func (itr *WALSegmentInfoSliceIterator) Close() error { return nil }
// Next moves to the next wal segment. Returns true if another segment is available.
func (itr *WALSegmentInfoSliceIterator) Next() bool {
if !itr.init {
itr.init = true
return len(itr.a) > 0
}
itr.a = itr.a[1:]
return len(itr.a) > 0
}
// Err always returns nil.
func (itr *WALSegmentInfoSliceIterator) Err() error { return nil }
// WALSegment returns the metadata from the currently positioned wal segment.
func (itr *WALSegmentInfoSliceIterator) WALSegment() WALSegmentInfo {
if len(itr.a) == 0 {
return WALSegmentInfo{}
}
return itr.a[0]
}
// SnapshotInfo represents file information about a snapshot.
type SnapshotInfo struct {
Generation string
Index int
Size int64
CreatedAt time.Time
}
// Pos returns the WAL position when the snapshot was made.
func (info *SnapshotInfo) Pos() Pos {
return Pos{Generation: info.Generation, Index: info.Index}
}
// SnapshotInfoSlice represents a slice of snapshot metadata.
type SnapshotInfoSlice []SnapshotInfo
func (a SnapshotInfoSlice) Len() int { return len(a) }
func (a SnapshotInfoSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a SnapshotInfoSlice) Less(i, j int) bool {
if a[i].Generation != a[j].Generation {
return a[i].Generation < a[j].Generation
}
return a[i].Index < a[j].Index
}
// FilterSnapshotsAfter returns all snapshots that were created on or after t.
func FilterSnapshotsAfter(a []SnapshotInfo, t time.Time) []SnapshotInfo {
other := make([]SnapshotInfo, 0, len(a))
for _, snapshot := range a {
if !snapshot.CreatedAt.Before(t) {
other = append(other, snapshot)
}
}
return other
}
// FindMinSnapshotByGeneration finds the snapshot with the lowest index in a generation.
func FindMinSnapshotByGeneration(a []SnapshotInfo, generation string) *SnapshotInfo {
var min *SnapshotInfo
for _, snapshot := range a {
if snapshot.Generation != generation {
continue
} else if min == nil || snapshot.Index < min.Index {
min = &snapshot
}
}
return min
}
// WALInfo represents file information about a WAL file.
type WALInfo struct {
Generation string
Index int
CreatedAt time.Time
}
// WALInfoSlice represents a slice of WAL metadata.
type WALInfoSlice []WALInfo
func (a WALInfoSlice) Len() int { return len(a) }
func (a WALInfoSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a WALInfoSlice) Less(i, j int) bool {
if a[i].Generation != a[j].Generation {
return a[i].Generation < a[j].Generation
}
return a[i].Index < a[j].Index
}
// WALSegmentInfo represents file information about a WAL segment file.
type WALSegmentInfo struct {
Generation string
Index int
Offset int64
Size int64
CreatedAt time.Time
}
// Pos returns the WAL position when the segment was made.
func (info *WALSegmentInfo) Pos() Pos {
return Pos{Generation: info.Generation, Index: info.Index, Offset: info.Offset}
}
// WALSegmentInfoSlice represents a slice of WAL segment metadata.
type WALSegmentInfoSlice []WALSegmentInfo
func (a WALSegmentInfoSlice) Len() int { return len(a) }
func (a WALSegmentInfoSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a WALSegmentInfoSlice) Less(i, j int) bool {
if a[i].Generation != a[j].Generation {
return a[i].Generation < a[j].Generation
} else if a[i].Index != a[j].Index {
return a[i].Index < a[j].Index
}
return a[i].Offset < a[j].Offset
}
// Pos is a position in the WAL for a generation.
type Pos struct {
Generation string // generation name
Index int // wal file index
Offset int64 // offset within wal file
}
// String returns a string representation.
func (p Pos) String() string {
if p.IsZero() {
return ""
}
return fmt.Sprintf("%s/%08x:%d", p.Generation, p.Index, p.Offset)
}
// IsZero returns true if p is the zero value.
func (p Pos) IsZero() bool {
return p == (Pos{})
}
// Truncate returns p with the offset truncated to zero.
func (p Pos) Truncate() Pos {
return Pos{Generation: p.Generation, Index: p.Index}
}
// Checksum computes a running SQLite checksum over a byte slice.
func Checksum(bo binary.ByteOrder, s0, s1 uint32, b []byte) (uint32, uint32) {
assert(len(b)%8 == 0, "misaligned checksum byte slice")
// Iterate over 8-byte units and compute checksum.
for i := 0; i < len(b); i += 8 {
s0 += bo.Uint32(b[i:]) + s1
s1 += bo.Uint32(b[i+4:]) + s0
}
return s0, s1
}
const (
// WALHeaderSize is the size of the WAL header, in bytes.
WALHeaderSize = 32
// WALFrameHeaderSize is the size of the WAL frame header, in bytes.
WALFrameHeaderSize = 24
)
// calcWALSize returns the size of the WAL, in bytes, for a given number of pages.
func calcWALSize(pageSize int, n int) int64 {
return int64(WALHeaderSize + ((WALFrameHeaderSize + pageSize) * n))
}
// rollback rolls back tx. Ignores already-rolled-back errors.
func rollback(tx *sql.Tx) error {
if err := tx.Rollback(); err != nil && !strings.Contains(err.Error(), `transaction has already been committed or rolled back`) {
return err
}
return nil
}
// readWALHeader returns the header read from a WAL file.
func readWALHeader(filename string) ([]byte, error) {
f, err := os.Open(filename)
if err != nil {
return nil, err
}
defer f.Close()
buf := make([]byte, WALHeaderSize)
n, err := io.ReadFull(f, buf)
return buf[:n], err
}
// readWALFileAt reads a slice from a file. Do not use this with database files
// as it causes problems with non-OFD locks.
func readWALFileAt(filename string, offset, n int64) ([]byte, error) {
f, err := os.Open(filename)
if err != nil {
return nil, err
}
defer f.Close()
buf := make([]byte, n)
if n, err := f.ReadAt(buf, offset); err != nil {
return buf[:n], err
} else if n < len(buf) {
return buf[:n], io.ErrUnexpectedEOF
}
return buf, nil
}
// removeTmpFiles recursively finds and removes .tmp files.
func removeTmpFiles(root string) error {
return filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil // skip errored files
} else if info.IsDir() {
return nil // skip directories
} else if !strings.HasSuffix(path, ".tmp") {
return nil // skip non-temp files
}
return os.Remove(path)
})
}
// IsGenerationName returns true if s is the correct length and is only lowercase hex characters.
func IsGenerationName(s string) bool {
if len(s) != GenerationNameLen {
return false
}
for _, ch := range s {
if !isHexChar(ch) {
return false
}
}
return true
}
// FormatIndex formats an index as an 8-character hex value.
func FormatIndex(index int) string {
return fmt.Sprintf("%08x", index)
}
// ParseIndex parses a hex-formatted index into an integer.
func ParseIndex(s string) (int, error) {
v, err := strconv.ParseUint(s, 16, 32)
if err != nil {
return -1, fmt.Errorf("cannot parse index: %q", s)
}
return int(v), nil
}
// FormatOffset formats an offset as an 8-character hex value.
func FormatOffset(offset int64) string {
return fmt.Sprintf("%08x", offset)
}
// ParseOffset parses a hex-formatted offset into an integer.
func ParseOffset(s string) (int64, error) {
v, err := strconv.ParseInt(s, 16, 32)
if err != nil {
return -1, fmt.Errorf("cannot parse index: %q", s)
}
return v, nil
}
// isHexChar returns true if ch is a lowercase hex character.
func isHexChar(ch rune) bool {
return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f')
}
// Tracef is used for low-level tracing.
var Tracef = func(format string, a ...interface{}) {}
func assert(condition bool, message string) {
if !condition {
panic("assertion failed: " + message)
}
}