all: Reorder sequences for better rename detection (#6574)

This commit is contained in:
Audrius Butkevicius 2020-05-11 19:15:11 +01:00 committed by GitHub
parent aac3750298
commit decb967969
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 409 additions and 42 deletions

View File

@ -62,6 +62,9 @@ const (
// KeyTypeBlockList <block list hash> = BlockList
KeyTypeBlockList = 13
// KeyTypeBlockListMap <int32 folder ID> <block list hash> <file name> = <nothing>
KeyTypeBlockListMap = 14
)
type keyer interface {
@ -79,6 +82,8 @@ type keyer interface {
// block map key stuff (former BlockMap)
GenerateBlockMapKey(key, folder, hash, name []byte) (blockMapKey, error)
NameFromBlockMapKey(key []byte) []byte
GenerateBlockListMapKey(key, folder, hash, name []byte) (blockListMapKey, error)
NameFromBlockListMapKey(key []byte) []byte
// file need index
GenerateNeedFileKey(key, folder, name []byte) (needFileKey, error)
@ -203,6 +208,29 @@ func (k blockMapKey) WithoutHashAndName() []byte {
return k[:keyPrefixLen+keyFolderLen]
}
type blockListMapKey []byte
func (k defaultKeyer) GenerateBlockListMapKey(key, folder, hash, name []byte) (blockListMapKey, error) {
folderID, err := k.folderIdx.ID(folder)
if err != nil {
return nil, err
}
key = resize(key, keyPrefixLen+keyFolderLen+keyHashLen+len(name))
key[0] = KeyTypeBlockListMap
binary.BigEndian.PutUint32(key[keyPrefixLen:], folderID)
copy(key[keyPrefixLen+keyFolderLen:], hash)
copy(key[keyPrefixLen+keyFolderLen+keyHashLen:], name)
return key, nil
}
func (k defaultKeyer) NameFromBlockListMapKey(key []byte) []byte {
return key[keyPrefixLen+keyFolderLen+keyHashLen:]
}
func (k blockListMapKey) WithoutHashAndName() []byte {
return k[:keyPrefixLen+keyFolderLen]
}
type needFileKey []byte
func (k needFileKey) WithoutName() []byte {

View File

@ -195,6 +195,7 @@ func (db *Lowlevel) updateLocalFiles(folder []byte, fs []protocol.FileInfo, meta
if ok && unchanged(f, ef) {
continue
}
blocksHashSame := ok && bytes.Equal(ef.BlocksHash, f.BlocksHash)
if ok {
if !ef.IsDirectory() && !ef.IsDeleted() && !ef.IsInvalid() {
@ -207,6 +208,15 @@ func (db *Lowlevel) updateLocalFiles(folder []byte, fs []protocol.FileInfo, meta
return err
}
}
if !blocksHashSame {
keyBuf, err := db.keyer.GenerateBlockListMapKey(keyBuf, folder, ef.BlocksHash, name)
if err != nil {
return err
}
if err = t.Delete(keyBuf); err != nil {
return err
}
}
}
keyBuf, err = db.keyer.GenerateSequenceKey(keyBuf, folder, ef.SequenceNo())
@ -260,6 +270,15 @@ func (db *Lowlevel) updateLocalFiles(folder []byte, fs []protocol.FileInfo, meta
return err
}
}
if !blocksHashSame {
keyBuf, err := db.keyer.GenerateBlockListMapKey(keyBuf, folder, f.BlocksHash, name)
if err != nil {
return err
}
if err = t.Put(keyBuf, nil); err != nil {
return err
}
}
}
if err := t.Checkpoint(func() error {
@ -296,7 +315,7 @@ func (db *Lowlevel) dropFolder(folder []byte) error {
}
// Remove all sequences related to the folder
k1, err := db.keyer.GenerateSequenceKey(nil, folder, 0)
k1, err := db.keyer.GenerateSequenceKey(k0, folder, 0)
if err != nil {
return err
}
@ -305,7 +324,7 @@ func (db *Lowlevel) dropFolder(folder []byte) error {
}
// Remove all items related to the given folder from the global bucket
k2, err := db.keyer.GenerateGlobalVersionKey(nil, folder, nil)
k2, err := db.keyer.GenerateGlobalVersionKey(k1, folder, nil)
if err != nil {
return err
}
@ -314,7 +333,7 @@ func (db *Lowlevel) dropFolder(folder []byte) error {
}
// Remove all needs related to the folder
k3, err := db.keyer.GenerateNeedFileKey(nil, folder, nil)
k3, err := db.keyer.GenerateNeedFileKey(k2, folder, nil)
if err != nil {
return err
}
@ -323,7 +342,7 @@ func (db *Lowlevel) dropFolder(folder []byte) error {
}
// Remove the blockmap of the folder
k4, err := db.keyer.GenerateBlockMapKey(nil, folder, nil, nil)
k4, err := db.keyer.GenerateBlockMapKey(k3, folder, nil, nil)
if err != nil {
return err
}
@ -331,6 +350,14 @@ func (db *Lowlevel) dropFolder(folder []byte) error {
return err
}
k5, err := db.keyer.GenerateBlockListMapKey(k4, folder, nil, nil)
if err != nil {
return err
}
if err := t.deleteKeyPrefix(k5.WithoutHashAndName()); err != nil {
return err
}
return t.Commit()
}
@ -383,6 +410,13 @@ func (db *Lowlevel) dropDeviceFolder(device, folder []byte, meta *metadataTracke
if err := t.deleteKeyPrefix(key.WithoutHashAndName()); err != nil {
return err
}
key2, err := db.keyer.GenerateBlockListMapKey(key, folder, nil, nil)
if err != nil {
return err
}
if err := t.deleteKeyPrefix(key2.WithoutHashAndName()); err != nil {
return err
}
}
return t.Commit()
}

View File

@ -22,9 +22,9 @@ import (
// 6: v0.14.50
// 7: v0.14.53
// 8-9: v1.4.0
// 10: v1.6.0
// 10-11: v1.6.0
const (
dbVersion = 10
dbVersion = 11
dbMinSyncthingVersion = "v1.6.0"
)
@ -85,8 +85,9 @@ func (db *schemaUpdater) updateSchema() error {
{5, db.updateSchemaTo5},
{6, db.updateSchema5to6},
{7, db.updateSchema6to7},
{9, db.updateSchemato9},
{10, db.updateSchemato10},
{9, db.updateSchemaTo9},
{10, db.updateSchemaTo10},
{11, db.updateSchemaTo11},
}
for _, m := range migrations {
@ -450,7 +451,7 @@ func (db *schemaUpdater) updateSchema6to7(_ int) error {
return t.Commit()
}
func (db *schemaUpdater) updateSchemato9(prev int) error {
func (db *schemaUpdater) updateSchemaTo9(prev int) error {
// Loads and rewrites all files with blocks, to deduplicate block lists.
// Checks for missing or incorrect sequence entries and rewrites those.
@ -499,7 +500,7 @@ func (db *schemaUpdater) updateSchemato9(prev int) error {
return t.Commit()
}
func (db *schemaUpdater) updateSchemato10(_ int) error {
func (db *schemaUpdater) updateSchemaTo10(_ int) error {
t, err := db.newReadWriteTransaction()
if err != nil {
return err
@ -568,3 +569,44 @@ func (db *schemaUpdater) updateSchemato10(_ int) error {
return t.Commit()
}
func (db *schemaUpdater) updateSchemaTo11(_ int) error {
// Populates block list map for every folder.
t, err := db.newReadWriteTransaction()
if err != nil {
return err
}
defer t.close()
var dk []byte
for _, folderStr := range db.ListFolders() {
folder := []byte(folderStr)
var putErr error
err := t.withHave(folder, protocol.LocalDeviceID[:], nil, true, func(fi FileIntf) bool {
f := fi.(FileInfoTruncated)
if f.IsDirectory() || f.IsDeleted() || f.IsInvalid() || f.BlocksHash == nil {
return true
}
name := []byte(f.FileName())
dk, putErr = db.keyer.GenerateBlockListMapKey(dk, folder, f.BlocksHash, name)
if putErr != nil {
return false
}
if putErr = t.Put(dk, nil); putErr != nil {
return false
}
putErr = t.Checkpoint()
return putErr == nil
})
if putErr != nil {
return putErr
}
if err != nil {
return err
}
}
return t.Commit()
}

View File

@ -365,6 +365,13 @@ func (s *Snapshot) RemoteNeedFolderFiles(device protocol.DeviceID, page, perpage
return files
}
func (s *Snapshot) WithBlocksHash(hash []byte, fn Iterator) {
l.Debugf(`%s WithBlocksHash("%x")`, s.folder, hash)
if err := s.t.withBlocksHash([]byte(s.folder), hash, nativeFileIterator(fn)); err != nil && !backend.IsClosed(err) {
panic(err)
}
}
func (s *FileSet) Sequence(device protocol.DeviceID) int64 {
return s.meta.Sequence(device)
}

View File

@ -9,6 +9,7 @@ package db
import (
"bytes"
"errors"
"github.com/syncthing/syncthing/lib/osutil"
"github.com/syncthing/syncthing/lib/db/backend"
"github.com/syncthing/syncthing/lib/protocol"
@ -304,6 +305,47 @@ func (t *readOnlyTransaction) withGlobal(folder, prefix []byte, truncate bool, f
return dbi.Error()
}
func (t *readOnlyTransaction) withBlocksHash(folder, hash []byte, iterator Iterator) error {
key, err := t.keyer.GenerateBlockListMapKey(nil, folder, hash, nil)
if err != nil {
return err
}
iter, err := t.NewPrefixIterator(key)
if err != nil {
return err
}
defer iter.Release()
for iter.Next() {
file := string(t.keyer.NameFromBlockListMapKey(iter.Key()))
f, ok, err := t.getFile(folder, protocol.LocalDeviceID[:], []byte(osutil.NormalizedFilename(file)))
if err != nil {
return err
}
if !ok {
continue
}
f.Name = osutil.NativeFilename(f.Name)
if !bytes.Equal(f.BlocksHash, hash) {
l.Warnf("Mismatching block map list hashes: got %x expected %x", f.BlocksHash, hash)
continue
}
if f.IsDeleted() || f.IsInvalid() || f.IsDirectory() || f.IsSymlink() {
l.Warnf("Found something of unexpected type in block list map: %s", f)
continue
}
if !iterator(f) {
break
}
}
return iter.Error()
}
func (t *readOnlyTransaction) availability(folder, file []byte) ([]protocol.DeviceID, error) {
vl, err := t.getGlobalVersions(nil, folder, file)
if backend.IsNotFound(err) {

View File

@ -464,6 +464,13 @@ func (f *folder) scanSubdirs(subDirs []string) error {
batch.append(res.File)
changes++
if f.localFlags&protocol.FlagLocalReceiveOnly == 0 {
if nf, ok := f.findRename(snap, mtimefs, res.File); ok {
batch.append(nf)
changes++
}
}
}
if err := batch.flush(); err != nil {
@ -615,6 +622,48 @@ func (f *folder) scanSubdirs(subDirs []string) error {
return nil
}
func (f *folder) findRename(snap *db.Snapshot, mtimefs fs.Filesystem, file protocol.FileInfo) (protocol.FileInfo, bool) {
found := false
nf := protocol.FileInfo{}
snap.WithBlocksHash(file.BlocksHash, func(ifi db.FileIntf) bool {
fi := ifi.(protocol.FileInfo)
select {
case <-f.ctx.Done():
return false
default:
}
if fi.ShouldConflict() {
return true
}
if f.ignores.Match(fi.Name).IsIgnored() {
return true
}
// Only check the size.
// No point checking block equality, as that uses BlocksHash comparison if that is set (which it will be).
// No point checking BlocksHash comparison as WithBlocksHash already does that.
if file.Size != fi.Size {
return true
}
if !osutil.IsDeleted(mtimefs, fi.Name) {
return true
}
nf = fi
nf.SetDeleted(f.shortID)
nf.LocalFlags = f.localFlags
found = true
return false
})
return nf, found
}
func (f *folder) scanTimerFired() {
err := f.scanSubdirs(nil)

View File

@ -355,7 +355,7 @@ func (f *sendReceiveFolder) processNeeded(snap *db.Snapshot, dbUpdateChan chan<-
if ok && !df.IsDeleted() && !df.IsSymlink() && !df.IsDirectory() && !df.IsInvalid() {
fileDeletions[file.Name] = file
// Put files into buckets per first hash
key := string(df.Blocks[0].Hash)
key := string(df.BlocksHash)
buckets[key] = append(buckets[key], df)
} else {
f.deleteFileWithCurrent(file, df, ok, dbUpdateChan, scanChan)
@ -458,30 +458,28 @@ nextFile:
// Check our list of files to be removed for a match, in which case
// we can just do a rename instead.
key := string(fi.Blocks[0].Hash)
key := string(fi.BlocksHash)
for i, candidate := range buckets[key] {
if candidate.BlocksEqual(fi) {
// Remove the candidate from the bucket
lidx := len(buckets[key]) - 1
buckets[key][i] = buckets[key][lidx]
buckets[key] = buckets[key][:lidx]
// Remove the candidate from the bucket
lidx := len(buckets[key]) - 1
buckets[key][i] = buckets[key][lidx]
buckets[key] = buckets[key][:lidx]
// candidate is our current state of the file, where as the
// desired state with the delete bit set is in the deletion
// map.
desired := fileDeletions[candidate.Name]
if err := f.renameFile(candidate, desired, fi, snap, dbUpdateChan, scanChan); err != nil {
// Failed to rename, try to handle files as separate
// deletions and updates.
break
}
// Remove the pending deletion (as we performed it by renaming)
delete(fileDeletions, candidate.Name)
f.queue.Done(fileName)
continue nextFile
// candidate is our current state of the file, where as the
// desired state with the delete bit set is in the deletion
// map.
desired := fileDeletions[candidate.Name]
if err := f.renameFile(candidate, desired, fi, snap, dbUpdateChan, scanChan); err != nil {
l.Debugln("rename shortcut for %s failed: %S", fi.Name, err.Error())
// Failed to rename, try next one.
continue
}
// Remove the pending deletion (as we performed it by renaming)
delete(fileDeletions, candidate.Name)
f.queue.Done(fileName)
continue nextFile
}
devices := snap.Availability(fileName)
@ -1181,6 +1179,16 @@ func (f *sendReceiveFolder) copierRoutine(in <-chan copyBlocksState, pullChan ch
protocol.BufferPool.Put(buf)
}()
folderFilesystems := make(map[string]fs.Filesystem)
// Hope that it's usually in the same folder, so start with that one.
folders := []string{f.folderID}
for folder, cfg := range f.model.cfg.Folders() {
folderFilesystems[folder] = cfg.Filesystem()
if folder != f.folderID {
folders = append(folders, folder)
}
}
for state := range in {
if err := f.CheckAvailableSpace(state.file.Size); err != nil {
state.fail(err)
@ -1198,13 +1206,6 @@ func (f *sendReceiveFolder) copierRoutine(in <-chan copyBlocksState, pullChan ch
f.model.progressEmitter.Register(state.sharedPullerState)
folderFilesystems := make(map[string]fs.Filesystem)
var folders []string
for folder, cfg := range f.model.cfg.Folders() {
folderFilesystems[folder] = cfg.Filesystem()
folders = append(folders, folder)
}
var file fs.File
var weakHashFinder *weakhash.Finder

View File

@ -1926,9 +1926,15 @@ func (s *indexSender) sendIndexTo(ctx context.Context) error {
var f protocol.FileInfo
snap := s.fset.Snapshot()
defer snap.Release()
previousWasDelete := false
snap.WithHaveSequence(s.prevSequence+1, func(fi db.FileIntf) bool {
if err = batch.flushIfFull(); err != nil {
return false
// This is to make sure that renames (which is an add followed by a delete) land in the same batch.
// Even if the batch is full, we allow a last delete to slip in, we do this by making sure that
// the batch ends with a non-delete, or that the last item in the batch is already a delete
if batch.full() && (!fi.IsDeleted() || previousWasDelete) {
if err = batch.flush(); err != nil {
return false
}
}
if shouldDebug() {
@ -1964,6 +1970,8 @@ func (s *indexSender) sendIndexTo(ctx context.Context) error {
}
f.LocalFlags = 0 // never sent externally
previousWasDelete = f.IsDeleted()
batch.append(f)
return true
})
@ -2607,8 +2615,12 @@ func (b *fileInfoBatch) append(f protocol.FileInfo) {
b.size += f.ProtoSize()
}
func (b *fileInfoBatch) full() bool {
return len(b.infos) >= maxBatchSizeFiles || b.size >= maxBatchSizeBytes
}
func (b *fileInfoBatch) flushIfFull() error {
if len(b.infos) >= maxBatchSizeFiles || b.size >= maxBatchSizeBytes {
if b.full() {
return b.flush()
}
return nil

View File

@ -18,6 +18,7 @@ import (
"path/filepath"
"runtime"
"runtime/pprof"
"sort"
"strconv"
"strings"
"sync"
@ -3537,3 +3538,154 @@ func TestFolderAPIErrors(t *testing.T) {
}
}
}
func TestRenameSequenceOrder(t *testing.T) {
wcfg, fcfg := tmpDefaultWrapper()
m := setupModel(wcfg)
defer cleanupModel(m)
numFiles := 20
ffs := fcfg.Filesystem()
for i := 0; i < numFiles; i++ {
v := fmt.Sprintf("%d", i)
must(t, writeFile(ffs, v, []byte(v), 0644))
}
m.ScanFolders()
count := 0
snap := dbSnapshot(t, m, "default")
snap.WithHave(protocol.LocalDeviceID, func(i db.FileIntf) bool {
count++
return true
})
snap.Release()
if count != numFiles {
t.Errorf("Unexpected count: %d != %d", count, numFiles)
}
// Modify all the files, other than the ones we expect to rename
for i := 0; i < numFiles; i++ {
if i == 3 || i == 17 || i == 16 || i == 4 {
continue
}
v := fmt.Sprintf("%d", i)
must(t, writeFile(ffs, v, []byte(v+"-new"), 0644))
}
// Rename
must(t, ffs.Rename("3", "17"))
must(t, ffs.Rename("16", "4"))
// Scan
m.ScanFolders()
// Verify sequence of a appearing is followed by c disappearing.
snap = dbSnapshot(t, m, "default")
defer snap.Release()
var firstExpectedSequence int64
var secondExpectedSequence int64
failed := false
snap.WithHaveSequence(0, func(i db.FileIntf) bool {
t.Log(i)
if i.FileName() == "17" {
firstExpectedSequence = i.SequenceNo() + 1
}
if i.FileName() == "4" {
secondExpectedSequence = i.SequenceNo() + 1
}
if i.FileName() == "3" {
failed = i.SequenceNo() != firstExpectedSequence || failed
}
if i.FileName() == "16" {
failed = i.SequenceNo() != secondExpectedSequence || failed
}
return true
})
if failed {
t.Fail()
}
}
func TestBlockListMap(t *testing.T) {
wcfg, fcfg := tmpDefaultWrapper()
m := setupModel(wcfg)
defer cleanupModel(m)
ffs := fcfg.Filesystem()
must(t, writeFile(ffs, "one", []byte("content"), 0644))
must(t, writeFile(ffs, "two", []byte("content"), 0644))
must(t, writeFile(ffs, "three", []byte("content"), 0644))
must(t, writeFile(ffs, "four", []byte("content"), 0644))
must(t, writeFile(ffs, "five", []byte("content"), 0644))
m.ScanFolders()
snap := dbSnapshot(t, m, "default")
defer snap.Release()
fi, ok := snap.Get(protocol.LocalDeviceID, "one")
if !ok {
t.Error("failed to find existing file")
}
var paths []string
snap.WithBlocksHash(fi.BlocksHash, func(fi db.FileIntf) bool {
paths = append(paths, fi.FileName())
return true
})
snap.Release()
expected := []string{"one", "two", "three", "four", "five"}
if !equalStringsInAnyOrder(paths, expected) {
t.Errorf("expected %q got %q", expected, paths)
}
// Fudge the files around
// Remove
must(t, ffs.Remove("one"))
// Modify
must(t, ffs.Remove("two"))
must(t, writeFile(ffs, "two", []byte("mew-content"), 0644))
// Rename
must(t, ffs.Rename("three", "new-three"))
// Change type
must(t, ffs.Remove("four"))
must(t, ffs.Mkdir("four", 0644))
m.ScanFolders()
// Check we're left with 2 of the 5
snap = dbSnapshot(t, m, "default")
defer snap.Release()
paths = paths[:0]
snap.WithBlocksHash(fi.BlocksHash, func(fi db.FileIntf) bool {
paths = append(paths, fi.FileName())
return true
})
snap.Release()
expected = []string{"new-three", "five"}
if !equalStringsInAnyOrder(paths, expected) {
t.Errorf("expected %q got %q", expected, paths)
}
}
func equalStringsInAnyOrder(a, b []string) bool {
if len(a) != len(b) {
return false
}
sort.Strings(a)
sort.Strings(b)
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}