Basic file replication working with WAL

This commit is contained in:
Ben Johnson
2020-12-24 13:23:52 -07:00
parent 8d7f5b28a9
commit 341eac268c
3 changed files with 417 additions and 27 deletions

187
db.go
View File

@@ -33,6 +33,8 @@ type DB struct {
rtx *sql.Tx // long running read transaction
pageSize int // page size, in bytes
notify chan struct{} // closes on WAL change
ctx context.Context
cancel func()
wg sync.WaitGroup
@@ -61,7 +63,9 @@ type DB struct {
// NewDB returns a new instance of DB for a given path.
func NewDB(path string) *DB {
db := &DB{
path: path,
path: path,
notify: make(chan struct{}),
MinCheckpointPageN: DefaultMinCheckpointPageN,
MonitorInterval: DefaultMonitorInterval,
}
@@ -128,7 +132,23 @@ func (db *DB) CurrentShadowWALPath(generation string) (string, error) {
return filepath.Join(dir, max), nil
}
// Notify returns a channel that closes when the shadow WAL changes.
func (db *DB) Notify() <-chan struct{} {
db.mu.RLock()
defer db.mu.RUnlock()
return db.notify
}
// PageSize returns the page size of the underlying database.
// Only valid after database exists & Init() has successfully run.
func (db *DB) PageSize() int {
db.mu.RLock()
defer db.mu.RUnlock()
return db.pageSize
}
func (db *DB) Open() (err error) {
// Start monitoring SQLite database in a separate goroutine.
db.wg.Add(1)
go func() { defer db.wg.Done(); db.monitor() }()
@@ -215,6 +235,11 @@ func (db *DB) Init() (err error) {
return fmt.Errorf("clean: %w", err)
}
// Start replication.
for _, r := range db.Replicators {
r.Start(db.ctx)
}
return nil
}
@@ -253,6 +278,11 @@ func (db *DB) SoftClose() (err error) {
db.cancel()
db.wg.Wait()
// Ensure replicators all stop replicating.
for _, r := range db.Replicators {
r.Stop()
}
if db.rtx != nil {
if e := db.releaseReadLock(); e != nil && err == nil {
err = e
@@ -344,13 +374,6 @@ func (db *DB) createGeneration() (string, error) {
return "", fmt.Errorf("rename generation file: %w", err)
}
// Issue snapshot by each replicator.
for _, r := range db.Replicators {
if err := r.BeginSnapshot(db.ctx); err != nil {
return "", fmt.Errorf("cannot snapshot %q replicator: %s", r.Name(), err)
}
}
// Remove old generations.
if err := db.clean(); err != nil {
return "", err
@@ -361,8 +384,8 @@ func (db *DB) createGeneration() (string, error) {
// Sync copies pending data from the WAL to the shadow WAL.
func (db *DB) Sync() (err error) {
db.mu.RLock()
defer db.mu.RUnlock()
db.mu.Lock()
defer db.mu.Unlock()
// No database exists, exit.
if db.db == nil {
@@ -400,12 +423,16 @@ func (db *DB) Sync() (err error) {
// Verify our last sync matches the current state of the WAL.
// This ensures that we have an existing generation & that the last sync
// position of the real WAL hasn't been overwritten by another process.
//
// If we are unable to verify the WAL state then we start a new generation.
info, err := db.verify()
if err != nil {
return fmt.Errorf("cannot verify wal state: %w", err)
} else if info.reason != "" {
}
// Track if anything in the shadow WAL changes and then notify at the end.
changed := info.walSize != info.shadowWALSize || info.restart || info.reason != ""
// If we are unable to verify the WAL state then we start a new generation.
if info.reason != "" {
// Start new generation & notify user via log message.
if info.generation, err = db.createGeneration(); err != nil {
return fmt.Errorf("create generation: %w", err)
@@ -417,6 +444,7 @@ func (db *DB) Sync() (err error) {
info.shadowWALSize = WALHeaderSize
info.restart = false
info.reason = ""
}
// Synchronize real WAL with current shadow WAL.
@@ -441,11 +469,19 @@ func (db *DB) Sync() (err error) {
// Issue the checkpoint.
if checkpoint {
changed = true
if err := db.checkpoint(info, forceCheckpoint); err != nil {
return fmt.Errorf("checkpoint: force=%v err=%w", err)
}
}
// Notify replicators of WAL changes.
if changed {
close(db.notify)
db.notify = make(chan struct{})
}
return nil
}
@@ -496,15 +532,12 @@ func (db *DB) verify() (info syncInfo, err error) {
if err != nil {
return info, err
}
info.shadowWALSize = fi.Size()
info.shadowWALSize = frameAlign(fi.Size(), db.pageSize)
// Truncate shadow WAL if there is a partial page.
// Exit if shadow WAL does not contain a full header.
frameSize := int64(db.pageSize) + WALFrameHeaderSize
if info.shadowWALSize < WALHeaderSize {
return info, fmt.Errorf("short shadow wal: %s", info.shadowWALPath)
} else if (info.shadowWALSize-WALHeaderSize)%frameSize != 0 {
info.shadowWALSize = ((info.shadowWALSize - WALHeaderSize) / frameSize) + WALHeaderSize
}
// If shadow WAL is larger than real WAL then the WAL has been truncated
@@ -523,7 +556,12 @@ func (db *DB) verify() (info syncInfo, err error) {
info.restart = !bytes.Equal(hdr0, hdr1)
}
// TODO: Handle checkpoint sequence number rollover.
// If we only have a header then ensure header matches.
// Otherwise we need to start a new generation.
if info.shadowWALSize == WALHeaderSize && info.restart {
info.reason = "wal header only, mismatched"
return info, nil
}
// Verify last page synced still matches.
if info.shadowWALSize > WALHeaderSize {
@@ -699,6 +737,114 @@ func (db *DB) copyToShadowWAL(filename string) (newSize int64, err error) {
return newSize, nil
}
// WALReader opens a reader for a shadow WAL file at a given position.
// If the reader is at the end of the file, it attempts to return the next file.
//
// The caller should check Pos() & Size() on the returned reader to check offset.
func (db *DB) WALReader(pos Pos) (r *WALReader, err error) {
// Fetch reader for the requested position. Return if it has data.
r, err = db.walReader(pos)
if err != nil {
return nil, err
} else if r.N() > 0 {
return r, nil
}
// Otherwise attempt to read the start of the next WAL file.
pos.Index, pos.Offset = pos.Index+1, 0
r, err = db.walReader(pos)
if os.IsNotExist(err) {
return nil, io.EOF
}
return r, err
}
// walReader opens a file reader for a shadow WAL file at a given position.
func (db *DB) walReader(pos Pos) (r *WALReader, err error) {
filename := db.ShadowWALPath(pos.Generation, pos.Index)
f, err := os.Open(filename)
if err != nil {
return nil, err
}
// Ensure file is closed if any error occurs.
defer func() {
if err != nil {
r.Close()
}
}()
// Fetch frame-aligned file size and ensure requested offset is not past EOF.
fi, err := f.Stat()
if err != nil {
return nil, err
}
fileSize := frameAlign(fi.Size(), db.pageSize)
if pos.Offset > fileSize {
return nil, fmt.Errorf("wal reader offset too high: %d > %d", pos.Offset, fi.Size())
}
// Move file handle to offset position.
if _, err := f.Seek(pos.Offset, io.SeekStart); err != nil {
return nil, err
}
return &WALReader{
f: f,
n: fileSize - pos.Offset,
pos: pos,
}, nil
}
// frameAlign returns a frame-aligned offset.
// Returns zero if offset is less than the WAL header size.
func frameAlign(offset int64, pageSize int) int64 {
assert(offset >= 0, "frameAlign(): offset must be non-negative")
assert(pageSize >= 0, "frameAlign(): page size must be non-negative")
if offset < WALHeaderSize {
return 0
}
frameSize := WALFrameHeaderSize + int64(pageSize)
frameN := (offset - WALHeaderSize) / frameSize
return (frameN * frameSize) + WALHeaderSize
}
// WALReader represents a reader for a WAL file that tracks WAL position.
type WALReader struct {
f *os.File
n int64
pos Pos
}
// Close closes the underlying WAL file handle.
func (r *WALReader) Close() error { return r.f.Close() }
// N returns the remaining bytes in the reader.
func (r *WALReader) N() int64 { return r.n }
// Pos returns the current WAL position.
func (r *WALReader) Pos() Pos { return r.pos }
// Read reads bytes into p, updates the position, and returns the bytes read.
// Returns io.EOF at the end of the available section of the WAL.
func (r *WALReader) Read(p []byte) (n int, err error) {
if r.n <= 0 {
return 0, io.EOF
}
if int64(len(p)) > r.n {
p = p[0:r.n]
}
n, err = r.f.Read(p)
r.n -= int64(n)
r.pos.Offset += int64(n)
return n, err
}
const WALHeaderChecksumOffset = 24
const WALFrameHeaderChecksumOffset = 16
@@ -770,6 +916,11 @@ func (db *DB) checkpoint(info syncInfo, force bool) error {
return nil
}
// Copy the end of the previous WAL before starting a new shadow WAL.
if _, err := db.copyToShadowWAL(info.shadowWALPath); err != nil {
return fmt.Errorf("cannot copy to end of shadow wal: %w", err)
}
// Parse index of current shadow WAL file.
dir, base := filepath.Split(info.shadowWALPath)
index, err := ParseWALFilename(base)