More graceful handling on folder errors (fixes #762)

Checks health before accepting every scanner batch, also
recovers from errors without having to restart.
This commit is contained in:
Audrius Butkevicius 2015-03-28 14:25:42 +00:00 committed by Jakob Borg
parent 34ba5678c3
commit 7406176fad
8 changed files with 370 additions and 103 deletions

View File

@ -515,8 +515,6 @@ func syncthingMain() {
m := model.NewModel(cfg, myID, myName, "syncthing", Version, ldb)
sanityCheckFolders(cfg, m)
// GUI
setupGUI(cfg, m)
@ -525,9 +523,7 @@ func syncthingMain() {
// start needing a bunch of files which are nowhere to be found. This
// needs to be changed when we correctly do persistent indexes.
for _, folderCfg := range cfg.Folders() {
if folderCfg.Invalid != "" {
continue
}
m.AddFolder(folderCfg)
for _, device := range folderCfg.DeviceIDs() {
if device == myID {
continue
@ -556,10 +552,6 @@ func syncthingMain() {
go listenConnect(myID, m, tlsCfg)
for _, folder := range cfg.Folders() {
if folder.Invalid != "" {
continue
}
// Routine to pull blocks from other devices to synchronize the local
// folder. Does not run when we are in read only (publish only) mode.
if folder.ReadOnly {
@ -676,55 +668,6 @@ func setupGUI(cfg *config.Wrapper, m *model.Model) {
}
}
func sanityCheckFolders(cfg *config.Wrapper, m *model.Model) {
nextFolder:
for id, folder := range cfg.Folders() {
if folder.Invalid != "" {
continue
}
m.AddFolder(folder)
fi, err := os.Stat(folder.Path)
if m.CurrentLocalVersion(id) > 0 {
// Safety check. If the cached index contains files but the
// folder doesn't exist, we have a problem. We would assume
// that all files have been deleted which might not be the case,
// so mark it as invalid instead.
if err != nil || !fi.IsDir() {
l.Warnf("Stopping folder %q - path does not exist, but has files in index", folder.ID)
cfg.InvalidateFolder(id, "folder path missing")
continue nextFolder
} else if !folder.HasMarker() {
l.Warnf("Stopping folder %q - path exists, but folder marker missing, check for mount issues", folder.ID)
cfg.InvalidateFolder(id, "folder marker missing")
continue nextFolder
}
} else if os.IsNotExist(err) {
// If we don't have any files in the index, and the directory
// doesn't exist, try creating it.
err = os.MkdirAll(folder.Path, 0700)
if err != nil {
l.Warnf("Stopping folder %q - %v", folder.ID, err)
cfg.InvalidateFolder(id, err.Error())
continue nextFolder
}
err = folder.CreateMarker()
} else if !folder.HasMarker() {
// If we don't have any files in the index, and the path does exist
// but the marker is not there, create it.
err = folder.CreateMarker()
}
if err != nil {
// If there was another error or we could not create the
// path, the folder is invalid.
l.Warnf("Stopping folder %q - %v", folder.ID, err)
cfg.InvalidateFolder(id, err.Error())
continue nextFolder
}
}
}
func defaultConfig(myName string) config.Configuration {
defaultFolder, err := osutil.ExpandTilde("~/Sync")
if err != nil {

View File

@ -19,7 +19,7 @@ import (
"github.com/syndtr/goleveldb/leveldb/storage"
)
func TestSanityCheck(t *testing.T) {
func TestFolderErrors(t *testing.T) {
fcfg := config.FolderConfiguration{
ID: "folder",
Path: "testdata/testfolder",
@ -28,7 +28,8 @@ func TestSanityCheck(t *testing.T) {
Folders: []config.FolderConfiguration{fcfg},
})
for _, file := range []string{".stfolder", "testfolder", "testfolder/.stfolder"} {
for _, file := range []string{".stfolder", "testfolder/.stfolder", "testfolder"} {
os.Remove("testdata/" + file)
_, err := os.Stat("testdata/" + file)
if err == nil {
t.Error("Found unexpected file")
@ -40,9 +41,9 @@ func TestSanityCheck(t *testing.T) {
// Case 1 - new folder, directory and marker created
m := model.NewModel(cfg, protocol.LocalDeviceID, "device", "syncthing", "dev", ldb)
sanityCheckFolders(cfg, m)
m.AddFolder(fcfg)
if cfg.Folders()["folder"].Invalid != "" {
if err := m.CheckFolderHealth("folder"); err != nil {
t.Error("Unexpected error", cfg.Folders()["folder"].Invalid)
}
@ -67,9 +68,9 @@ func TestSanityCheck(t *testing.T) {
})
m = model.NewModel(cfg, protocol.LocalDeviceID, "device", "syncthing", "dev", ldb)
sanityCheckFolders(cfg, m)
m.AddFolder(fcfg)
if cfg.Folders()["folder"].Invalid != "" {
if err := m.CheckFolderHealth("folder"); err != nil {
t.Error("Unexpected error", cfg.Folders()["folder"].Invalid)
}
@ -80,7 +81,7 @@ func TestSanityCheck(t *testing.T) {
os.Remove("testdata/.stfolder")
// Case 3 - marker missing
// Case 3 - Folder marker missing
set := db.NewFileSet("folder", ldb)
set.Update(protocol.LocalDeviceID, []protocol.FileInfo{
@ -88,23 +89,54 @@ func TestSanityCheck(t *testing.T) {
})
m = model.NewModel(cfg, protocol.LocalDeviceID, "device", "syncthing", "dev", ldb)
sanityCheckFolders(cfg, m)
m.AddFolder(fcfg)
if cfg.Folders()["folder"].Invalid != "folder marker missing" {
t.Error("Incorrect error")
if err := m.CheckFolderHealth("folder"); err == nil || err.Error() != "Folder marker missing" {
t.Error("Incorrect error: Folder marker missing !=", m.CheckFolderHealth("folder"))
}
// Case 4 - path missing
// Case 3.1 - recover after folder marker missing
if err = fcfg.CreateMarker(); err != nil {
t.Error(err)
}
if err := m.CheckFolderHealth("folder"); err != nil {
t.Error("Unexpected error", cfg.Folders()["folder"].Invalid)
}
// Case 4 - Folder path missing
os.Remove("testdata/testfolder/.stfolder")
os.Remove("testdata/testfolder/")
fcfg.Path = "testdata/testfolder"
cfg = config.Wrap("/tmp/test", config.Configuration{
cfg = config.Wrap("testdata/subfolder", config.Configuration{
Folders: []config.FolderConfiguration{fcfg},
})
m = model.NewModel(cfg, protocol.LocalDeviceID, "device", "syncthing", "dev", ldb)
sanityCheckFolders(cfg, m)
m.AddFolder(fcfg)
if cfg.Folders()["folder"].Invalid != "folder path missing" {
t.Error("Incorrect error")
if err := m.CheckFolderHealth("folder"); err == nil || err.Error() != "Folder path missing" {
t.Error("Incorrect error: Folder path missing !=", m.CheckFolderHealth("folder"))
}
// Case 4.1 - recover after folder path missing
os.Mkdir("testdata/testfolder", 0700)
if err := m.CheckFolderHealth("folder"); err == nil || err.Error() != "Folder marker missing" {
t.Error("Incorrect error: Folder marker missing !=", m.CheckFolderHealth("folder"))
}
// Case 4.2 - recover after missing marker
if err = fcfg.CreateMarker(); err != nil {
t.Error(err)
}
if err := m.CheckFolderHealth("folder"); err != nil {
t.Error("Unexpected error", cfg.Folders()["folder"].Invalid)
}
}

View File

@ -220,8 +220,8 @@ func (w *Wrapper) SetGUI(gui GUIConfiguration) {
w.replaces <- w.cfg
}
// InvalidateFolder sets the invalid marker on the given folder.
func (w *Wrapper) InvalidateFolder(id string, err string) {
// Sets the folder error state. Emits ConfigSaved to cause a GUI refresh.
func (w *Wrapper) SetFolderError(id string, err error) {
w.mut.Lock()
defer w.mut.Unlock()
@ -229,8 +229,15 @@ func (w *Wrapper) InvalidateFolder(id string, err string) {
for i := range w.cfg.Folders {
if w.cfg.Folders[i].ID == id {
w.cfg.Folders[i].Invalid = err
w.replaces <- w.cfg
errstr := ""
if err != nil {
errstr = err.Error()
}
if errstr != w.cfg.Folders[i].Invalid {
w.cfg.Folders[i].Invalid = errstr
events.Default.Log(events.ConfigSaved, w.cfg)
w.replaces <- w.cfg
}
return
}
}

View File

@ -1104,7 +1104,11 @@ func (m *Model) ScanFolders() map[string]error {
errorsMut.Lock()
errors[folder] = err
errorsMut.Unlock()
m.cfg.InvalidateFolder(folder, err.Error())
// Potentially sets the error twice, once in the scanner just
// by doing a check, and once here, if the error returned is
// the same one as returned by CheckFolderHealth, though
// duplicate set is handled by SetFolderError
m.cfg.SetFolderError(folder, err)
}
wg.Done()
}()
@ -1180,9 +1184,11 @@ nextSub:
}
runner.setState(FolderScanning)
defer runner.setState(FolderIdle)
fchan, err := w.Walk()
if err != nil {
m.cfg.SetFolderError(folder, err)
return err
}
batchSize := 100
@ -1196,12 +1202,20 @@ nextSub:
"size": f.Size(),
})
if len(batch) == batchSize {
if err := m.CheckFolderHealth(folder); err != nil {
l.Infoln("Stopping folder %s mid-scan due to folder error: %s", folder, err)
return err
}
fs.Update(protocol.LocalDeviceID, batch)
batch = batch[:0]
}
batch = append(batch, f)
}
if len(batch) > 0 {
if err := m.CheckFolderHealth(folder); err != nil {
l.Infoln("Stopping folder %s mid-scan due to folder error: %s", folder, err)
return err
} else if len(batch) > 0 {
fs.Update(protocol.LocalDeviceID, batch)
}
@ -1286,7 +1300,6 @@ nextSub:
fs.Update(protocol.LocalDeviceID, batch)
}
runner.setState(FolderIdle)
return nil
}
@ -1510,6 +1523,73 @@ func (m *Model) BringToFront(folder, file string) {
}
}
// Returns current folder error, or nil if the folder is healthy.
// Updates the Invalid field on the folder configuration struct, and emits a
// ConfigSaved event which causes a GUI refresh.
func (m *Model) CheckFolderHealth(id string) error {
folder, ok := m.cfg.Folders()[id]
if !ok {
return errors.New("Folder does not exist")
}
fi, err := os.Stat(folder.Path)
if m.CurrentLocalVersion(id) > 0 {
// Safety check. If the cached index contains files but the
// folder doesn't exist, we have a problem. We would assume
// that all files have been deleted which might not be the case,
// so mark it as invalid instead.
if err != nil || !fi.IsDir() {
err = errors.New("Folder path missing")
} else if !folder.HasMarker() {
err = errors.New("Folder marker missing")
}
} else if os.IsNotExist(err) {
// If we don't have any files in the index, and the directory
// doesn't exist, try creating it.
err = os.MkdirAll(folder.Path, 0700)
if err == nil {
err = folder.CreateMarker()
}
} else if !folder.HasMarker() {
// If we don't have any files in the index, and the path does exist
// but the marker is not there, create it.
err = folder.CreateMarker()
}
if err == nil {
if folder.Invalid != "" {
l.Infof("Starting folder %q after error %q", folder.ID, folder.Invalid)
m.cfg.SetFolderError(id, nil)
}
if folder, ok := m.cfg.Folders()[id]; !ok || folder.Invalid != "" {
panic("Unable to unset folder \"" + id + "\" error.")
}
return nil
}
if folder.Invalid == err.Error() {
return err
}
// folder is a copy of the original struct, hence Invalid value is
// preserved after the set.
m.cfg.SetFolderError(id, err)
if folder.Invalid == "" {
l.Warnf("Stopping folder %q - %v", folder.ID, err)
} else {
l.Infof("Folder %q error changed: %q -> %q", folder.ID, folder.Invalid, err)
}
if folder, ok := m.cfg.Folders()[id]; !ok || folder.Invalid != err.Error() {
panic("Unable to set folder \"" + id + "\" error.")
}
return err
}
func (m *Model) String() string {
return fmt.Sprintf("model@%p", m)
}

View File

@ -20,6 +20,7 @@ import (
"github.com/syncthing/protocol"
"github.com/syncthing/syncthing/internal/config"
"github.com/syncthing/syncthing/internal/db"
"github.com/syndtr/goleveldb/leveldb"
"github.com/syndtr/goleveldb/leveldb/storage"
)
@ -578,6 +579,166 @@ func TestRefuseUnknownBits(t *testing.T) {
}
}
func TestROScanRecovery(t *testing.T) {
ldb, _ := leveldb.Open(storage.NewMemStorage(), nil)
set := db.NewFileSet("default", ldb)
set.Update(protocol.LocalDeviceID, []protocol.FileInfo{
{Name: "dummyfile"},
})
fcfg := config.FolderConfiguration{
ID: "default",
Path: "testdata/rotestfolder",
RescanIntervalS: 1,
}
cfg := config.Wrap("/tmp/test", config.Configuration{
Folders: []config.FolderConfiguration{fcfg},
Devices: []config.DeviceConfiguration{
{
DeviceID: device1,
},
},
})
os.RemoveAll(fcfg.Path)
m := NewModel(cfg, protocol.LocalDeviceID, "device", "syncthing", "dev", ldb)
m.AddFolder(fcfg)
m.StartFolderRO("default")
waitFor := func(status string) error {
timeout := time.Now().Add(2 * time.Second)
for {
if time.Now().After(timeout) {
return fmt.Errorf("Timed out waiting for status: %s, current status: %s", status, m.cfg.Folders()["default"].Invalid)
}
if m.cfg.Folders()["default"].Invalid == status {
return nil
}
time.Sleep(10 * time.Millisecond)
}
}
if err := waitFor("Folder path missing"); err != nil {
t.Error(err)
return
}
os.Mkdir(fcfg.Path, 0700)
if err := waitFor("Folder marker missing"); err != nil {
t.Error(err)
return
}
fd, err := os.Create(filepath.Join(fcfg.Path, ".stfolder"))
if err != nil {
t.Error(err)
return
}
fd.Close()
if err := waitFor(""); err != nil {
t.Error(err)
return
}
os.Remove(filepath.Join(fcfg.Path, ".stfolder"))
if err := waitFor("Folder marker missing"); err != nil {
t.Error(err)
return
}
os.Remove(fcfg.Path)
if err := waitFor("Folder path missing"); err != nil {
t.Error(err)
return
}
}
func TestRWScanRecovery(t *testing.T) {
ldb, _ := leveldb.Open(storage.NewMemStorage(), nil)
set := db.NewFileSet("default", ldb)
set.Update(protocol.LocalDeviceID, []protocol.FileInfo{
{Name: "dummyfile"},
})
fcfg := config.FolderConfiguration{
ID: "default",
Path: "testdata/rwtestfolder",
RescanIntervalS: 1,
}
cfg := config.Wrap("/tmp/test", config.Configuration{
Folders: []config.FolderConfiguration{fcfg},
Devices: []config.DeviceConfiguration{
{
DeviceID: device1,
},
},
})
os.RemoveAll(fcfg.Path)
m := NewModel(cfg, protocol.LocalDeviceID, "device", "syncthing", "dev", ldb)
m.AddFolder(fcfg)
m.StartFolderRW("default")
waitFor := func(status string) error {
timeout := time.Now().Add(2 * time.Second)
for {
if time.Now().After(timeout) {
return fmt.Errorf("Timed out waiting for status: %s, current status: %s", status, m.cfg.Folders()["default"].Invalid)
}
if m.cfg.Folders()["default"].Invalid == status {
return nil
}
time.Sleep(10 * time.Millisecond)
}
}
if err := waitFor("Folder path missing"); err != nil {
t.Error(err)
return
}
os.Mkdir(fcfg.Path, 0700)
if err := waitFor("Folder marker missing"); err != nil {
t.Error(err)
return
}
fd, err := os.Create(filepath.Join(fcfg.Path, ".stfolder"))
if err != nil {
t.Error(err)
return
}
fd.Close()
if err := waitFor(""); err != nil {
t.Error(err)
return
}
os.Remove(filepath.Join(fcfg.Path, ".stfolder"))
if err := waitFor("Folder marker missing"); err != nil {
t.Error(err)
return
}
os.Remove(fcfg.Path)
if err := waitFor("Folder path missing"); err != nil {
t.Error(err)
return
}
}
func TestGlobalDirectoryTree(t *testing.T) {
db, _ := leveldb.Open(storage.NewMemStorage(), nil)
m := NewModel(defaultConfig, protocol.LocalDeviceID, "device", "syncthing", "dev", db)

View File

@ -40,6 +40,12 @@ func (s *roFolder) Serve() {
timer := time.NewTimer(time.Millisecond)
defer timer.Stop()
reschedule := func() {
// Sleep a random time between 3/4 and 5/4 of the configured interval.
sleepNanos := (s.intv.Nanoseconds()*3 + rand.Int63n(2*s.intv.Nanoseconds())) / 4
timer.Reset(time.Duration(sleepNanos) * time.Nanosecond)
}
initialScanCompleted := false
for {
select {
@ -47,16 +53,25 @@ func (s *roFolder) Serve() {
return
case <-timer.C:
if err := s.model.CheckFolderHealth(s.folder); err != nil {
l.Infoln("Skipping folder", s.folder, "scan due to folder error:", err)
reschedule()
continue
}
if debug {
l.Debugln(s, "rescan")
}
s.setState(FolderScanning)
if err := s.model.ScanFolder(s.folder); err != nil {
s.model.cfg.InvalidateFolder(s.folder, err.Error())
return
// Potentially sets the error twice, once in the scanner just
// by doing a check, and once here, if the error returned is
// the same one as returned by CheckFolderHealth, though
// duplicate set is handled by SetFolderError
s.model.cfg.SetFolderError(s.folder, err)
reschedule()
continue
}
s.setState(FolderIdle)
if !initialScanCompleted {
l.Infoln("Completed initial scan (ro) of folder", s.folder)
@ -67,9 +82,7 @@ func (s *roFolder) Serve() {
return
}
// Sleep a random time between 3/4 and 5/4 of the configured interval.
sleepNanos := (s.intv.Nanoseconds()*3 + rand.Int63n(2*s.intv.Nanoseconds())) / 4
timer.Reset(time.Duration(sleepNanos) * time.Nanosecond)
reschedule()
}
}
}

View File

@ -114,10 +114,20 @@ func (p *rwFolder) Serve() {
var prevVer int64
var prevIgnoreHash string
rescheduleScan := func() {
// Sleep a random time between 3/4 and 5/4 of the configured interval.
sleepNanos := (p.scanIntv.Nanoseconds()*3 + rand.Int63n(2*p.scanIntv.Nanoseconds())) / 4
intv := time.Duration(sleepNanos) * time.Nanosecond
if debug {
l.Debugln(p, "next rescan in", intv)
}
scanTimer.Reset(intv)
}
// We don't start pulling files until a scan has been completed.
initialScanCompleted := false
loop:
for {
select {
case <-p.stop:
@ -130,7 +140,6 @@ loop:
// device A to device B, so we have something to work against.
case <-pullTimer.C:
if !initialScanCompleted {
// How did we even get here?
if debug {
l.Debugln(p, "skip (initial)")
}
@ -219,24 +228,28 @@ loop:
// this is the easiest way to make sure we are not doing both at the
// same time.
case <-scanTimer.C:
if err := p.model.CheckFolderHealth(p.folder); err != nil {
l.Infoln("Skipping folder", p.folder, "scan due to folder error:", err)
rescheduleScan()
continue
}
if debug {
l.Debugln(p, "rescan")
}
p.setState(FolderScanning)
if err := p.model.ScanFolder(p.folder); err != nil {
p.model.cfg.InvalidateFolder(p.folder, err.Error())
break loop
}
p.setState(FolderIdle)
if p.scanIntv > 0 {
// Sleep a random time between 3/4 and 5/4 of the configured interval.
sleepNanos := (p.scanIntv.Nanoseconds()*3 + rand.Int63n(2*p.scanIntv.Nanoseconds())) / 4
intv := time.Duration(sleepNanos) * time.Nanosecond
if debug {
l.Debugln(p, "next rescan in", intv)
}
scanTimer.Reset(intv)
if err := p.model.ScanFolder(p.folder); err != nil {
// Potentially sets the error twice, once in the scanner just
// by doing a check, and once here, if the error returned is
// the same one as returned by CheckFolderHealth, though
// duplicate set is handled by SetFolderError
p.model.cfg.SetFolderError(p.folder, err)
rescheduleScan()
continue
}
if p.scanIntv > 0 {
rescheduleScan()
}
if !initialScanCompleted {
l.Infoln("Completed initial scan (rw) of folder", p.folder)

View File

@ -37,6 +37,24 @@ func TestIgnores(t *testing.T) {
if err != nil {
t.Fatal(err)
}
// Wait for one scan to succeed, or up to 20 seconds... This is to let
// startup, UPnP etc complete and make sure that we've performed folder
// error checking which creates the folder path if it's missing.
for i := 0; i < 20; i++ {
resp, err := p.post("/rest/scan?folder=default", nil)
if err != nil {
time.Sleep(time.Second)
continue
}
if resp.StatusCode != 200 {
resp.Body.Close()
time.Sleep(time.Second)
continue
}
break
}
defer p.stop()
// Create eight empty files and directories