Merge pull request #37 from benbjohnson/fix-shadow-write
Fix shadow wal corruption on stalled validation
This commit is contained in:
69
db.go
69
db.go
@@ -1025,66 +1025,65 @@ func (db *DB) copyToShadowWAL(filename string) (newSize int64, err error) {
|
|||||||
return 0, fmt.Errorf("last checksum: %w", err)
|
return 0, fmt.Errorf("last checksum: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Seek to correct position on both files.
|
// Seek to correct position on real wal.
|
||||||
if _, err := r.Seek(origSize, io.SeekStart); err != nil {
|
if _, err := r.Seek(origSize, io.SeekStart); err != nil {
|
||||||
return 0, fmt.Errorf("wal seek: %w", err)
|
return 0, fmt.Errorf("real wal seek: %w", err)
|
||||||
|
} else if _, err := w.Seek(origSize, io.SeekStart); err != nil {
|
||||||
|
return 0, fmt.Errorf("shadow wal seek: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read through WAL from last position to find the page of the last
|
// Read through WAL from last position to find the page of the last
|
||||||
// committed transaction.
|
// committed transaction.
|
||||||
tmpSz := origSize
|
frame := make([]byte, db.pageSize+WALFrameHeaderSize)
|
||||||
|
var buf bytes.Buffer
|
||||||
|
offset := origSize
|
||||||
lastCommitSize := origSize
|
lastCommitSize := origSize
|
||||||
buf := make([]byte, db.pageSize+WALFrameHeaderSize)
|
|
||||||
for {
|
for {
|
||||||
Tracef("%s: copy-shadow: %s @ %d", db.path, filename, tmpSz)
|
|
||||||
|
|
||||||
// Read next page from WAL file.
|
// Read next page from WAL file.
|
||||||
if _, err := io.ReadFull(r, buf); err == io.EOF || err == io.ErrUnexpectedEOF {
|
if _, err := io.ReadFull(r, frame); err == io.EOF || err == io.ErrUnexpectedEOF {
|
||||||
Tracef("%s: copy-shadow: break %s", db.path, err)
|
Tracef("%s: copy-shadow: break %s @ %d; err=%s", db.path, filename, offset, err)
|
||||||
break // end of file or partial page
|
break // end of file or partial page
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
return 0, fmt.Errorf("read wal: %w", err)
|
return 0, fmt.Errorf("read wal: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read frame salt & compare to header salt. Stop reading on mismatch.
|
// Read frame salt & compare to header salt. Stop reading on mismatch.
|
||||||
salt0 := binary.BigEndian.Uint32(buf[8:])
|
salt0 := binary.BigEndian.Uint32(frame[8:])
|
||||||
salt1 := binary.BigEndian.Uint32(buf[12:])
|
salt1 := binary.BigEndian.Uint32(frame[12:])
|
||||||
if salt0 != hsalt0 || salt1 != hsalt1 {
|
if salt0 != hsalt0 || salt1 != hsalt1 {
|
||||||
Tracef("%s: copy-shadow: break: salt mismatch", db.path)
|
Tracef("%s: copy-shadow: break: salt mismatch", db.path)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify checksum of page is valid.
|
// Verify checksum of page is valid.
|
||||||
fchksum0 := binary.BigEndian.Uint32(buf[16:])
|
fchksum0 := binary.BigEndian.Uint32(frame[16:])
|
||||||
fchksum1 := binary.BigEndian.Uint32(buf[20:])
|
fchksum1 := binary.BigEndian.Uint32(frame[20:])
|
||||||
chksum0, chksum1 = Checksum(bo, chksum0, chksum1, buf[:8]) // frame header
|
chksum0, chksum1 = Checksum(bo, chksum0, chksum1, frame[:8]) // frame header
|
||||||
chksum0, chksum1 = Checksum(bo, chksum0, chksum1, buf[24:]) // frame data
|
chksum0, chksum1 = Checksum(bo, chksum0, chksum1, frame[24:]) // frame data
|
||||||
if chksum0 != fchksum0 || chksum1 != fchksum1 {
|
if chksum0 != fchksum0 || chksum1 != fchksum1 {
|
||||||
log.Printf("copy shadow: checksum mismatch, skipping: offset=%d (%x,%x) != (%x,%x)", tmpSz, chksum0, chksum1, fchksum0, fchksum1)
|
log.Printf("copy shadow: checksum mismatch, skipping: offset=%d (%x,%x) != (%x,%x)", offset, chksum0, chksum1, fchksum0, fchksum1)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add page to the new size of the shadow WAL.
|
// Add page to the new size of the shadow WAL.
|
||||||
tmpSz += int64(len(buf))
|
buf.Write(frame)
|
||||||
|
|
||||||
// Mark commit record.
|
Tracef("%s: copy-shadow: ok %s offset=%d salt=%x %x", db.path, filename, offset, salt0, salt1)
|
||||||
newDBSize := binary.BigEndian.Uint32(buf[4:])
|
offset += int64(len(frame))
|
||||||
|
|
||||||
|
// Flush to shadow WAL if commit record.
|
||||||
|
newDBSize := binary.BigEndian.Uint32(frame[4:])
|
||||||
if newDBSize != 0 {
|
if newDBSize != 0 {
|
||||||
lastCommitSize = tmpSz
|
if _, err := buf.WriteTo(w); err != nil {
|
||||||
|
return 0, fmt.Errorf("write shadow wal: %w", err)
|
||||||
|
}
|
||||||
|
buf.Reset()
|
||||||
|
lastCommitSize = offset
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Seek to correct position on both files.
|
// Sync & close.
|
||||||
if _, err := r.Seek(origSize, io.SeekStart); err != nil {
|
if err := w.Sync(); err != nil {
|
||||||
return 0, fmt.Errorf("wal seek: %w", err)
|
|
||||||
} else if _, err := w.Seek(origSize, io.SeekStart); err != nil {
|
|
||||||
return 0, fmt.Errorf("shadow wal seek: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy bytes, sync & close.
|
|
||||||
if _, err := io.CopyN(w, r, lastCommitSize-origSize); err != nil {
|
|
||||||
return 0, err
|
|
||||||
} else if err := w.Sync(); err != nil {
|
|
||||||
return 0, err
|
return 0, err
|
||||||
} else if err := w.Close(); err != nil {
|
} else if err := w.Close(); err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
@@ -1107,6 +1106,8 @@ func (db *DB) ShadowWALReader(pos Pos) (r *ShadowWALReader, err error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
} else if r.N() > 0 {
|
} else if r.N() > 0 {
|
||||||
return r, nil
|
return r, nil
|
||||||
|
} else if err := r.Close(); err != nil { // no data, close, try next
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise attempt to read the start of the next WAL file.
|
// Otherwise attempt to read the start of the next WAL file.
|
||||||
@@ -1180,6 +1181,9 @@ type ShadowWALReader struct {
|
|||||||
pos Pos
|
pos Pos
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Name returns the filename of the underlying file.
|
||||||
|
func (r *ShadowWALReader) Name() string { return r.f.Name() }
|
||||||
|
|
||||||
// Close closes the underlying WAL file handle.
|
// Close closes the underlying WAL file handle.
|
||||||
func (r *ShadowWALReader) Close() error { return r.f.Close() }
|
func (r *ShadowWALReader) Close() error { return r.f.Close() }
|
||||||
|
|
||||||
@@ -1297,6 +1301,11 @@ func (db *DB) checkpointAndInit(generation, mode string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Copy shadow WAL before checkpoint to copy as much as possible.
|
||||||
|
if _, err := db.copyToShadowWAL(shadowWALPath); err != nil {
|
||||||
|
return fmt.Errorf("cannot copy to end of shadow wal before checkpoint: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
// Execute checkpoint and immediately issue a write to the WAL to ensure
|
// Execute checkpoint and immediately issue a write to the WAL to ensure
|
||||||
// a new page is written.
|
// a new page is written.
|
||||||
if err := db.checkpoint(mode); err != nil {
|
if err := db.checkpoint(mode); err != nil {
|
||||||
|
|||||||
@@ -95,9 +95,9 @@ type Pos struct {
|
|||||||
// String returns a string representation.
|
// String returns a string representation.
|
||||||
func (p Pos) String() string {
|
func (p Pos) String() string {
|
||||||
if p.IsZero() {
|
if p.IsZero() {
|
||||||
return "<>"
|
return ""
|
||||||
}
|
}
|
||||||
return fmt.Sprintf("<%s,%08x,%d>", p.Generation, p.Index, p.Offset)
|
return fmt.Sprintf("%s/%08x:%d", p.Generation, p.Index, p.Offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsZero returns true if p is the zero value.
|
// IsZero returns true if p is the zero value.
|
||||||
|
|||||||
45
replica.go
45
replica.go
@@ -2,6 +2,7 @@ package litestream
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/binary"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
@@ -598,6 +599,8 @@ func (r *FileReplica) Sync(ctx context.Context) (err error) {
|
|||||||
}
|
}
|
||||||
generation := dpos.Generation
|
generation := dpos.Generation
|
||||||
|
|
||||||
|
Tracef("%s(%s): replica sync: db.pos=%s", r.db.Path(), r.Name(), dpos)
|
||||||
|
|
||||||
// Create snapshot if no snapshots exist for generation.
|
// Create snapshot if no snapshots exist for generation.
|
||||||
if n, err := r.snapshotN(generation); err != nil {
|
if n, err := r.snapshotN(generation); err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -617,6 +620,7 @@ func (r *FileReplica) Sync(ctx context.Context) (err error) {
|
|||||||
return fmt.Errorf("cannot determine replica position: %s", err)
|
return fmt.Errorf("cannot determine replica position: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Tracef("%s(%s): replica sync: calc new pos: %s", r.db.Path(), r.Name(), pos)
|
||||||
r.mu.Lock()
|
r.mu.Lock()
|
||||||
r.pos = pos
|
r.pos = pos
|
||||||
r.mu.Unlock()
|
r.mu.Unlock()
|
||||||
@@ -669,11 +673,48 @@ func (r *FileReplica) syncWAL(ctx context.Context) (err error) {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
n, err := io.Copy(w, rd)
|
// Copy header if at offset zero.
|
||||||
r.walBytesCounter.Add(float64(n))
|
var psalt uint64 // previous salt value
|
||||||
|
if pos := rd.Pos(); pos.Offset == 0 {
|
||||||
|
buf := make([]byte, WALHeaderSize)
|
||||||
|
if _, err := io.ReadFull(rd, buf); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
psalt = binary.BigEndian.Uint64(buf[16:24])
|
||||||
|
|
||||||
|
n, err := w.Write(buf)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
r.walBytesCounter.Add(float64(n))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy frames.
|
||||||
|
for {
|
||||||
|
pos := rd.Pos()
|
||||||
|
assert(pos.Offset == frameAlign(pos.Offset, r.db.pageSize), "shadow wal reader not frame aligned")
|
||||||
|
|
||||||
|
buf := make([]byte, WALFrameHeaderSize+r.db.pageSize)
|
||||||
|
if _, err := io.ReadFull(rd, buf); err == io.EOF {
|
||||||
|
break
|
||||||
|
} else if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify salt matches the previous frame/header read.
|
||||||
|
salt := binary.BigEndian.Uint64(buf[8:16])
|
||||||
|
if psalt != 0 && psalt != salt {
|
||||||
|
return fmt.Errorf("replica salt mismatch: %s", filepath.Base(filename))
|
||||||
|
}
|
||||||
|
psalt = salt
|
||||||
|
|
||||||
|
n, err := w.Write(buf)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
r.walBytesCounter.Add(float64(n))
|
||||||
|
}
|
||||||
|
|
||||||
if err := w.Sync(); err != nil {
|
if err := w.Sync(); err != nil {
|
||||||
return err
|
return err
|
||||||
|
|||||||
Reference in New Issue
Block a user