Add real+shadow recovery

This commit is contained in:
Ben Johnson
2020-11-11 16:35:10 -07:00
parent b9e9bd93e8
commit 231b41b29f
3 changed files with 257 additions and 72 deletions

290
db.go
View File

@@ -1,6 +1,7 @@
package litestream package litestream
import ( import (
"bytes"
"context" "context"
"fmt" "fmt"
"io" "io"
@@ -36,6 +37,7 @@ type DB struct {
walHeader *sqlite.WALHeader walHeader *sqlite.WALHeader
processedWALByteN int64 // bytes copied to shadow WAL processedWALByteN int64 // bytes copied to shadow WAL
pendingWALByteN int64 // bytes pending copy to shadow WAL pendingWALByteN int64 // bytes pending copy to shadow WAL
checksum uint64 // running checksum on real WAL
ctx context.Context ctx context.Context
cancel func() cancel func()
@@ -84,6 +86,48 @@ func (db *DB) ActiveShadowWALPath() string {
return filepath.Join(db.ShadowWALDir(), ActiveWALName) return filepath.Join(db.ShadowWALDir(), ActiveWALName)
} }
// LastShadowWALPath returns the active shadow WAL or the last snapshotted shadow WAL path.
// Returns an empty string if no shadow WAL files exist.
func (db *DB) LastShadowWALPath() (string, error) {
// Return active shadow WAL if it exists.
if _, err := os.Stat(db.ActiveShadowWALPath()); err == nil {
return db.ActiveShadowWALPath(), nil
} else if !os.IsNotExist(err) {
return "", nil
}
// Otherwise search for the largest shadow WAL file.
f, err := os.Open(db.ShadowWALDir())
if os.IsNotExist(err) {
return "", nil
} else if err != nil {
return "", err
}
defer f.Close()
var filename string
for {
fis, err := f.Readdir(512)
if err != nil {
return "", err
}
for _, fi := range fis {
if !strings.HasSuffix(fi.Name(), WALExt) {
continue
} else if filename == "" || fi.Name() > filename {
filename = fi.Name()
}
}
}
// Return an error if there is no shadow WAL files.
if filename == "" {
return "", os.ErrNotExist
}
return filepath.Join(db.ShadowWALDir(), filename), nil
}
// LogPath returns the path to the internal log directory. // LogPath returns the path to the internal log directory.
func (db *DB) LogPath() string { func (db *DB) LogPath() string {
return filepath.Join(db.MetaPath(), LogFilename) return filepath.Join(db.MetaPath(), LogFilename)
@@ -163,49 +207,7 @@ func (db *DB) recover() error {
// recoverRealWALOnly copies the real WAL to the active shadow WAL. // recoverRealWALOnly copies the real WAL to the active shadow WAL.
func (db *DB) recoverRealWALOnly() error { func (db *DB) recoverRealWALOnly() error {
db.logger.Printf("recovering: real WAL only") db.logger.Printf("recovering: real WAL only")
return db.sync()
// Open real WAL to read from.
r, err := os.Open(db.WALPath())
if err != nil {
return fmt.Errorf("cannot open wal: %w", err)
}
defer r.Close()
// Read header from real WAL.
var hdr sqlite.WALHeader
if _, err := hdr.ReadFrom(r); os.IsNotExist(err) {
return nil
} else if err != nil {
return err
}
// Create a new shadow WAL file.
w, err := db.createActiveShadowWAL(hdr)
if err != nil {
return fmt.Errorf("cannot create active shadow wal: %w", err)
}
defer w.Close()
// Read from real WAL and copy to shadow WAL.
buf := make([]byte, hdr.PageSize)
for {
// Read frame header & data from real WAL.
var fhdr sqlite.WALFrameHeader
if _, err := fhdr.ReadFrom(r); err != nil {
return fmt.Errorf("cannot read frame header: %w", err)
} else if _, err := io.ReadFull(r, buf); err != nil {
return fmt.Errorf("cannot read frame: %w", err)
}
// Copy to the shadow WAL.
if _, err := fhdr.WriteTo(w); err != nil {
return fmt.Errorf("cannot write frame to shadow: %w", err)
} else if _, err := w.Write(buf); err != nil {
return fmt.Errorf("cannot write frame to shadow: %w", err)
}
}
return nil
} }
// walExists returns true if the real WAL exists. // walExists returns true if the real WAL exists.
@@ -238,32 +240,6 @@ func (db *DB) shadowWALExists() (bool, error) {
} }
} }
// createActiveShadowWAL creates a new shadow WAL file with the given header.
func (db *DB) createActiveShadowWAL(hdr sqlite.WALHeader) (f *os.File, err error) {
if f, err = os.OpenFile(db.ActiveShadowWALPath(), os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0600); err != nil {
return nil, err
}
// Attempt to clean up if shadow WAL creation fails.
defer func() {
if err != nil {
f.Close()
os.Remove(f.Name())
}
}()
// Clear some fields from header that we won't use for the shadow WAL.
hdr = ClearShadowWALHeader(hdr)
// Write header & save it to the DB to ensure new WAL header writes match.
if _, err := hdr.WriteTo(f); err != nil {
return nil, err
}
db.walHeader = &hdr
return f, nil
}
// recoverShadowWALOnly verifies the last page in the shadow WAL matches the // recoverShadowWALOnly verifies the last page in the shadow WAL matches the
// contents of the database page. // contents of the database page.
func (db *DB) recoverShadowWALOnly() error { func (db *DB) recoverShadowWALOnly() error {
@@ -274,7 +250,131 @@ func (db *DB) recoverShadowWALOnly() error {
// recoverRealAndShadowWALs verifies the last page of the real & shadow WALs match. // recoverRealAndShadowWALs verifies the last page of the real & shadow WALs match.
func (db *DB) recoverRealAndShadowWALs() error { func (db *DB) recoverRealAndShadowWALs() error {
db.logger.Printf("recovering: real & shadow WAL") db.logger.Printf("recovering: real & shadow WAL")
panic("TODO")
// Read WAL header from shadow WAL.
lastShadowWALPath, err := db.LastShadowWALPath()
if err != nil {
return fmt.Errorf("cannot find last shadow wal path: %w", err)
}
hdr, err := readFileWALHeader(lastShadowWALPath)
if err != nil {
return fmt.Errorf("cannot read last shadow wal header: %w", err)
}
db.walHeader = &hdr
// Read last pages from shadow WAL & real WAL and ensure they match.
if fhdr0, data0, err := readLastWALPage(lastShadowWALPath); err != nil {
return fmt.Errorf("cannot read last shadow wal page: %w", err)
} else if fhdr1, data1, err := readLastWALPage(db.WALPath()); err != nil {
return fmt.Errorf("cannot read last shadow wal page: %w", err)
} else if fhdr0 != fhdr1 {
return fmt.Errorf("last frame header mismatch: %#v != %#v", fhdr0, fhdr1)
} else if !bytes.Equal(data0, data1) {
return fmt.Errorf("last frame data mismatch")
} else {
db.checksum = fhdr1.Checksum
}
// Update position within real WAL.
fi, err := os.Stat(db.WALPath())
if err != nil {
return fmt.Errorf("cannot stat wal: %w", err)
}
db.processedWALByteN = fi.Size()
db.pendingWALByteN = 0
return nil
}
// Sync synchronizes the real WAL to the shadow WAL.
func (db *DB) Sync() error {
db.mu.Lock()
defer db.mu.Unlock()
return db.sync()
}
func (db *DB) sync() (err error) {
db.logger.Printf("sync: begin")
defer func() {
db.logger.Printf("sync: end: err=%v", err)
}()
// Open real WAL to read from.
r, err := os.Open(db.WALPath())
if err != nil {
return fmt.Errorf("cannot open wal: %w", err)
} else if _, err := r.Seek(db.processedWALByteN, io.SeekStart); err != nil {
return fmt.Errorf("cannot seek wal: %w", err)
}
defer r.Close()
// TODO: Verify all frames are valid & committed before copy.
// Read header if we are at the beginning of the WAL.
if db.processedWALByteN == 0 {
var hdr sqlite.WALHeader
if _, err := hdr.ReadFrom(r); err != nil {
return fmt.Errorf("cannot read wal header: %w", err)
}
// Save checksum to verify later pages in WAL.
db.checksum = hdr.Checksum
// Clear out salt & checksum from header for shadow WAL.
hdr = ClearShadowWALHeader(hdr)
db.walHeader = &hdr
}
// Open shadow WAL to copy to.
w, err := os.OpenFile(db.ActiveShadowWALPath(), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0600)
if err != nil {
return fmt.Errorf("cannot open active shadow wal: %w", err)
}
defer w.Close()
// If we are at the start of a new shadow WAL, write a header.
if n, err := w.Seek(0, io.SeekCurrent); err != nil {
return fmt.Errorf("cannot seek shadow wal: %w", err)
} else if n == 0 {
db.logger.Printf("sync: new shadow wal, writing header: magic=%x page-size=%d", db.walHeader.Magic, db.walHeader.PageSize)
if _, err := db.walHeader.WriteTo(w); err != nil {
return fmt.Errorf("cannot write shadow wal header: %w", err)
}
}
// Read from real WAL and copy to shadow WAL.
buf := make([]byte, db.walHeader.PageSize)
for db.pendingWALByteN != 0 {
// Read frame header & data from real WAL.
var fhdr sqlite.WALFrameHeader
if _, err := fhdr.ReadFrom(r); err == io.EOF {
break
} else if err != nil {
return fmt.Errorf("cannot read frame header: %w", err)
} else if _, err := io.ReadFull(r, buf); err != nil {
return fmt.Errorf("cannot read frame: %w", err)
}
db.logger.Printf("sync: copy frame: pgno=%d pageN=%d salt=%x checksum=%x",
fhdr.Pgno,
fhdr.PageN,
fhdr.Salt,
fhdr.Checksum,
)
// Copy to the shadow WAL.
if _, err := fhdr.WriteTo(w); err != nil {
return fmt.Errorf("cannot write frame to shadow: %w", err)
} else if _, err := w.Write(buf); err != nil {
return fmt.Errorf("cannot write frame to shadow: %w", err)
}
byteN := int64(sqlite.WALFrameHeaderSize + len(buf))
db.processedWALByteN += byteN
db.pendingWALByteN -= byteN
}
return nil
} }
// Close stops management of the database. // Close stops management of the database.
@@ -340,6 +440,7 @@ func (db *DB) setHeader(page []byte) {
func (db *DB) AddPendingWALByteN(n int64) { func (db *DB) AddPendingWALByteN(n int64) {
db.mu.Lock() db.mu.Lock()
defer db.mu.Unlock() defer db.mu.Unlock()
db.logger.Printf("write: n=%d pending=%d", n, db.pendingWALByteN)
db.pendingWALByteN += n db.pendingWALByteN += n
} }
@@ -366,3 +467,52 @@ func ClearShadowWALHeader(hdr sqlite.WALHeader) sqlite.WALHeader {
hdr.Checksum = 0 hdr.Checksum = 0
return hdr return hdr
} }
// readLastWALPage reads the last frame header & data from a WAL file.
func readLastWALPage(path string) (fhdr sqlite.WALFrameHeader, data []byte, err error) {
f, err := os.Open(path)
if err != nil {
return fhdr, data, err
}
defer f.Close()
// Determine WAL file size.
fi, err := f.Stat()
if err != nil {
return fhdr, data, err
}
// Read WAL header to determine page size.
var hdr sqlite.WALHeader
if _, err := hdr.ReadFrom(f); err != nil {
return fhdr, data, fmt.Errorf("cannot read wal header: %w", err)
}
// WAL file size must be divisible by frame size (minus the header).
if (fi.Size()-sqlite.WALHeaderSize)%(sqlite.WALFrameHeaderSize+int64(hdr.PageSize)) != 0 {
return fhdr, data, fmt.Errorf("partial wal record: path=%s sz=%d", path, fi.Size())
}
// Seek to last frame and read header & data.
data = make([]byte, hdr.PageSize)
if _, err := f.Seek(sqlite.WALFrameHeaderSize+int64(hdr.PageSize), io.SeekStart); err != nil {
return fhdr, data, fmt.Errorf("cannot seek: %w", err)
} else if _, err := fhdr.ReadFrom(f); err != nil {
return fhdr, data, fmt.Errorf("cannot read frame header: %w", err)
} else if _, err := io.ReadFull(f, data); err != nil {
return fhdr, data, fmt.Errorf("cannot read frame data: %w", err)
}
return fhdr, data, nil
}
// readFileWALHeader reads the WAL header from file.
func readFileWALHeader(path string) (hdr sqlite.WALHeader, err error) {
f, err := os.Open(path)
if err != nil {
return hdr, err
}
defer f.Close()
_, err = hdr.ReadFrom(f)
return hdr, err
}

View File

@@ -37,6 +37,9 @@ func NewHandle(n *Node, f *os.File) *Handle {
// Release closes the underlying file descriptor. // Release closes the underlying file descriptor.
func (h *Handle) Release(ctx context.Context, req *fuse.ReleaseRequest) (err error) { func (h *Handle) Release(ctx context.Context, req *fuse.ReleaseRequest) (err error) {
if err := h.node.Sync(); err != nil {
return err
}
return h.f.Close() return h.f.Close()
} }
@@ -90,6 +93,9 @@ func (h *Handle) Write(ctx context.Context, req *fuse.WriteRequest, resp *fuse.W
// Flush is called when a file handle is synced to disk. Implements fs.HandleFlusher. // Flush is called when a file handle is synced to disk. Implements fs.HandleFlusher.
func (h *Handle) Flush(ctx context.Context, req *fuse.FlushRequest) (err error) { func (h *Handle) Flush(ctx context.Context, req *fuse.FlushRequest) (err error) {
if err := h.node.Sync(); err != nil {
return err
}
return h.f.Sync() return h.f.Sync()
} }

29
node.go
View File

@@ -56,6 +56,11 @@ func (n *Node) srcpath() string {
return filepath.Join(n.fs.TargetPath, n.path) return filepath.Join(n.fs.TargetPath, n.path)
} }
// IsWAL returns true if node path has a "-wal" suffix.
func (n *Node) IsWAL() bool {
return strings.HasSuffix(n.path, sqlite.WALSuffix)
}
// DB returns the DB object associated with the node, if any. // DB returns the DB object associated with the node, if any.
// If node points to a "-wal" file then the associated DB is returned. // If node points to a "-wal" file then the associated DB is returned.
func (n *Node) DB() *DB { func (n *Node) DB() *DB {
@@ -65,6 +70,25 @@ func (n *Node) DB() *DB {
return n.fs.DB(n.path) return n.fs.DB(n.path)
} }
// Sync synchronizes the data to the shadow WAL if this node is the WAL.
func (n *Node) Sync() (err error) {
println("dbg/node.sync")
// Ignore if this is not the WAL.
if !n.IsWAL() {
println("dbg/node.sync.notwal", n.path)
return nil
}
// Ignore if the node is not a managed db.
db := n.DB()
if db == nil {
println("dbg/node.sync.notmanaged", n.path)
return nil
}
return db.Sync()
}
func (n *Node) Attr(ctx context.Context, a *fuse.Attr) (err error) { func (n *Node) Attr(ctx context.Context, a *fuse.Attr) (err error) {
fi, err := os.Stat(n.srcpath()) fi, err := os.Stat(n.srcpath())
if err != nil { if err != nil {
@@ -312,6 +336,11 @@ func (n *Node) Mknod(ctx context.Context, req *fuse.MknodRequest) (_ fs.Node, er
} }
func (n *Node) Fsync(ctx context.Context, req *fuse.FsyncRequest) (err error) { func (n *Node) Fsync(ctx context.Context, req *fuse.FsyncRequest) (err error) {
// Synchronize to shadow WAL.
if err := n.Sync(); err != nil {
return err
}
f, err := os.Open(n.srcpath()) f, err := os.Open(n.srcpath())
if err != nil { if err != nil {
return err return err