Group replica wal segments by index

This commit changes the replica path format to group segments within
a single index in the same directory. This is to eventually add the
ability to seek to a record on file-based systems without having
to iterate over the records. The DB shadow WAL will also be changed
to this same format to support live replicas.
This commit is contained in:
Ben Johnson
2021-06-14 15:19:27 -06:00
parent 55c17b9d8e
commit fc897b481f
12 changed files with 504 additions and 426 deletions

View File

@@ -8,6 +8,7 @@ import (
"os"
"path/filepath"
"sort"
"strings"
"github.com/benbjohnson/litestream"
"github.com/benbjohnson/litestream/internal"
@@ -84,7 +85,7 @@ func (c *ReplicaClient) SnapshotPath(generation string, index int) (string, erro
if err != nil {
return "", err
}
return filepath.Join(dir, litestream.FormatSnapshotPath(index)), nil
return filepath.Join(dir, litestream.FormatIndex(index)+".snapshot.lz4"), nil
}
// WALDir returns the path to a generation's WAL directory
@@ -102,7 +103,7 @@ func (c *ReplicaClient) WALSegmentPath(generation string, index int, offset int6
if err != nil {
return "", err
}
return filepath.Join(dir, litestream.FormatWALSegmentPath(index, offset)), nil
return filepath.Join(dir, litestream.FormatIndex(index), fmt.Sprintf("%08x.wal.lz4", offset)), nil
}
// Generations returns a list of available generation names.
@@ -148,7 +149,7 @@ func (c *ReplicaClient) DeleteGeneration(ctx context.Context, generation string)
func (c *ReplicaClient) Snapshots(ctx context.Context, generation string) (litestream.SnapshotIterator, error) {
dir, err := c.SnapshotsDir(generation)
if err != nil {
return nil, fmt.Errorf("cannot determine snapshots path: %w", err)
return nil, err
}
f, err := os.Open(dir)
@@ -168,7 +169,7 @@ func (c *ReplicaClient) Snapshots(ctx context.Context, generation string) (lites
infos := make([]litestream.SnapshotInfo, 0, len(fis))
for _, fi := range fis {
// Parse index from filename.
index, err := litestream.ParseSnapshotPath(fi.Name())
index, err := internal.ParseSnapshotPath(filepath.Base(fi.Name()))
if err != nil {
continue
}
@@ -190,7 +191,7 @@ func (c *ReplicaClient) Snapshots(ctx context.Context, generation string) (lites
func (c *ReplicaClient) WriteSnapshot(ctx context.Context, generation string, index int, rd io.Reader) (info litestream.SnapshotInfo, err error) {
filename, err := c.SnapshotPath(generation, index)
if err != nil {
return info, fmt.Errorf("cannot determine snapshot path: %w", err)
return info, err
}
var fileInfo, dirInfo os.FileInfo
@@ -243,7 +244,7 @@ func (c *ReplicaClient) WriteSnapshot(ctx context.Context, generation string, in
func (c *ReplicaClient) SnapshotReader(ctx context.Context, generation string, index int) (io.ReadCloser, error) {
filename, err := c.SnapshotPath(generation, index)
if err != nil {
return nil, fmt.Errorf("cannot determine snapshot path: %w", err)
return nil, err
}
return os.Open(filename)
}
@@ -264,7 +265,7 @@ func (c *ReplicaClient) DeleteSnapshot(ctx context.Context, generation string, i
func (c *ReplicaClient) WALSegments(ctx context.Context, generation string) (litestream.WALSegmentIterator, error) {
dir, err := c.WALDir(generation)
if err != nil {
return nil, fmt.Errorf("cannot determine wal path: %w", err)
return nil, err
}
f, err := os.Open(dir)
@@ -281,33 +282,25 @@ func (c *ReplicaClient) WALSegments(ctx context.Context, generation string) (lit
}
// Iterate over every file and convert to metadata.
infos := make([]litestream.WALSegmentInfo, 0, len(fis))
indexes := make([]int, 0, len(fis))
for _, fi := range fis {
// Parse index from filename.
index, offset, err := litestream.ParseWALSegmentPath(fi.Name())
if err != nil {
index, err := litestream.ParseIndex(fi.Name())
if err != nil || !fi.IsDir() {
continue
}
infos = append(infos, litestream.WALSegmentInfo{
Generation: generation,
Index: index,
Offset: offset,
Size: fi.Size(),
CreatedAt: fi.ModTime().UTC(),
})
indexes = append(indexes, index)
}
sort.Sort(litestream.WALSegmentInfoSlice(infos))
sort.Ints(indexes)
return litestream.NewWALSegmentInfoSliceIterator(infos), nil
return newWALSegmentIterator(dir, generation, indexes), nil
}
// WriteWALSegment writes LZ4 compressed data from rd into a file on disk.
func (c *ReplicaClient) WriteWALSegment(ctx context.Context, pos litestream.Pos, rd io.Reader) (info litestream.WALSegmentInfo, err error) {
filename, err := c.WALSegmentPath(pos.Generation, pos.Index, pos.Offset)
if err != nil {
return info, fmt.Errorf("cannot determine wal segment path: %w", err)
return info, err
}
var fileInfo, dirInfo os.FileInfo
@@ -361,7 +354,7 @@ func (c *ReplicaClient) WriteWALSegment(ctx context.Context, pos litestream.Pos,
func (c *ReplicaClient) WALSegmentReader(ctx context.Context, pos litestream.Pos) (io.ReadCloser, error) {
filename, err := c.WALSegmentPath(pos.Generation, pos.Index, pos.Offset)
if err != nil {
return nil, fmt.Errorf("cannot determine wal segment path: %w", err)
return nil, err
}
return os.Open(filename)
}
@@ -371,7 +364,7 @@ func (c *ReplicaClient) DeleteWALSegments(ctx context.Context, a []litestream.Po
for _, pos := range a {
filename, err := c.WALSegmentPath(pos.Generation, pos.Index, pos.Offset)
if err != nil {
return fmt.Errorf("cannot determine wal segment path: %w", err)
return err
}
if err := os.Remove(filename); err != nil && !os.IsNotExist(err) {
return err
@@ -379,3 +372,97 @@ func (c *ReplicaClient) DeleteWALSegments(ctx context.Context, a []litestream.Po
}
return nil
}
type walSegmentIterator struct {
dir string
generation string
indexes []int
infos []litestream.WALSegmentInfo
err error
}
func newWALSegmentIterator(dir, generation string, indexes []int) *walSegmentIterator {
return &walSegmentIterator{
dir: dir,
generation: generation,
indexes: indexes,
}
}
func (itr *walSegmentIterator) Close() (err error) {
return itr.err
}
func (itr *walSegmentIterator) Next() bool {
// Exit if an error has already occurred.
if itr.err != nil {
return false
}
for {
// Move to the next segment in cache, if available.
if len(itr.infos) > 1 {
itr.infos = itr.infos[1:]
return true
}
itr.infos = itr.infos[:0] // otherwise clear infos
// Move to the next index unless this is the first time initializing.
if itr.infos != nil && len(itr.indexes) > 0 {
itr.indexes = itr.indexes[1:]
}
// If no indexes remain, stop iteration.
if len(itr.indexes) == 0 {
return false
}
// Read segments into a cache for the current index.
index := itr.indexes[0]
f, err := os.Open(filepath.Join(itr.dir, litestream.FormatIndex(index)))
if err != nil {
itr.err = err
return false
}
defer f.Close()
fis, err := f.Readdir(-1)
if err != nil {
itr.err = err
return false
}
for _, fi := range fis {
filename := filepath.Base(fi.Name())
if fi.IsDir() {
continue
}
offset, err := litestream.ParseOffset(strings.TrimSuffix(filename, ".wal.lz4"))
if err != nil {
continue
}
itr.infos = append(itr.infos, litestream.WALSegmentInfo{
Generation: itr.generation,
Index: index,
Offset: offset,
Size: fi.Size(),
CreatedAt: fi.ModTime().UTC(),
})
}
if len(itr.infos) > 0 {
return true
}
}
}
func (itr *walSegmentIterator) Err() error { return itr.err }
func (itr *walSegmentIterator) WALSegment() litestream.WALSegmentInfo {
if len(itr.infos) == 0 {
return litestream.WALSegmentInfo{}
}
return itr.infos[0]
}

View File

@@ -118,7 +118,7 @@ func TestReplicaClient_WALSegmentPath(t *testing.T) {
t.Run("OK", func(t *testing.T) {
if got, err := file.NewReplicaClient("/foo").WALSegmentPath("0123456701234567", 1000, 1001); err != nil {
t.Fatal(err)
} else if want := "/foo/generations/0123456701234567/wal/000003e8_000003e9.wal.lz4"; got != want {
} else if want := "/foo/generations/0123456701234567/wal/000003e8/000003e9.wal.lz4"; got != want {
t.Fatalf("WALPath()=%v, want %v", got, want)
}
})