lib/rc: Fix hangups when Syncthing process ends unexpectedly (#5383)

This commit is contained in:
Simon Frei 2018-12-21 11:49:04 +01:00 committed by Jakob Borg
parent ae0dfcd7ca
commit 2626143fc5
1 changed files with 51 additions and 45 deletions

View File

@ -22,7 +22,6 @@ import (
"os/exec"
"path/filepath"
"strconv"
stdsync "sync"
"time"
"github.com/syncthing/syncthing/lib/config"
@ -40,14 +39,14 @@ type Process struct {
addr string
// Set by eventLoop()
eventMut sync.Mutex
id protocol.DeviceID
folders []string
startComplete bool
startCompleteCond *stdsync.Cond
stop bool
sequence map[string]map[string]int64 // Folder ID => Device ID => Sequence
done map[string]bool // Folder ID => 100%
eventMut sync.Mutex
id protocol.DeviceID
folders []string
startComplete chan struct{}
stopped chan struct{}
stopErr error
sequence map[string]map[string]int64 // Folder ID => Device ID => Sequence
done map[string]bool // Folder ID => 100%
cmd *exec.Cmd
logfd *os.File
@ -57,12 +56,13 @@ type Process struct {
// Example: NewProcess("127.0.0.1:8082")
func NewProcess(addr string) *Process {
p := &Process{
addr: addr,
sequence: make(map[string]map[string]int64),
done: make(map[string]bool),
eventMut: sync.NewMutex(),
addr: addr,
sequence: make(map[string]map[string]int64),
done: make(map[string]bool),
eventMut: sync.NewMutex(),
startComplete: make(chan struct{}),
stopped: make(chan struct{}),
}
p.startCompleteCond = stdsync.NewCond(p.eventMut)
return p
}
@ -108,19 +108,30 @@ func (p *Process) Start(bin string, args ...string) error {
p.cmd = cmd
go p.eventLoop()
go p.wait()
return nil
}
func (p *Process) wait() {
p.cmd.Wait()
if p.logfd != nil {
p.stopErr = p.checkForProblems(p.logfd)
}
close(p.stopped)
}
// AwaitStartup waits for the Syncthing process to start and perform initial
// scans of all folders.
func (p *Process) AwaitStartup() {
p.eventMut.Lock()
for !p.startComplete {
p.startCompleteCond.Wait()
fmt.Println("awaiting startup")
select {
case <-p.startComplete:
case <-p.stopped:
}
p.eventMut.Unlock()
return
fmt.Println("awaited startup")
}
// Stop stops the running Syncthing process. If the process was logging to a
@ -128,27 +139,21 @@ func (p *Process) AwaitStartup() {
// panics and data races. The presence of either will be signalled in the form
// of a returned error.
func (p *Process) Stop() (*os.ProcessState, error) {
p.eventMut.Lock()
if p.stop {
p.eventMut.Unlock()
return p.cmd.ProcessState, nil
select {
case <-p.stopped:
return p.cmd.ProcessState, p.stopErr
default:
}
p.stop = true
p.eventMut.Unlock()
if _, err := p.Post("/rest/system/shutdown", nil); err != nil && err != io.ErrUnexpectedEOF {
// Unexpected EOF is somewhat expected here, as we may exit before
// returning something sensible.
return nil, err
}
p.cmd.Wait()
var err error
if p.logfd != nil {
err = p.checkForProblems(p.logfd)
}
<-p.stopped
return p.cmd.ProcessState, err
return p.cmd.ProcessState, p.stopErr
}
// Get performs an HTTP GET and returns the bytes and/or an error. Any non-200
@ -403,7 +408,11 @@ func (p *Process) checkForProblems(logfd *os.File) error {
raceConditionStart := []byte("WARNING: DATA RACE")
raceConditionSep := []byte("==================")
panicConditionStart := []byte("panic:")
panicConditionSep := []byte(p.id.String()[:5])
p.eventMut.Lock()
panicConditionSep := []byte("[") // fallback if we don't already know our ID
if p.id.String() != "" {
panicConditionSep = []byte(p.id.String()[:5])
}
sc := bufio.NewScanner(fd)
race := false
_panic := false
@ -442,12 +451,11 @@ func (p *Process) eventLoop() {
notScanned := make(map[string]struct{})
start := time.Now()
for {
p.eventMut.Lock()
if p.stop {
p.eventMut.Unlock()
select {
case <-p.stopped:
return
default:
}
p.eventMut.Unlock()
events, err := p.Events(since)
if err != nil {
@ -457,12 +465,11 @@ func (p *Process) eventLoop() {
}
// If we're stopping, no need to print the error.
p.eventMut.Lock()
if p.stop {
p.eventMut.Unlock()
select {
case <-p.stopped:
return
default:
}
p.eventMut.Unlock()
log.Println("eventLoop: events:", err)
continue
@ -511,17 +518,16 @@ func (p *Process) eventLoop() {
panic("race, or lost startup event")
}
if !p.startComplete {
select {
case <-p.startComplete:
default:
data := ev.Data.(map[string]interface{})
to := data["to"].(string)
if to == "idle" {
folder := data["folder"].(string)
delete(notScanned, folder)
if len(notScanned) == 0 {
p.eventMut.Lock()
p.startComplete = true
p.startCompleteCond.Broadcast()
p.eventMut.Unlock()
close(p.startComplete)
}
}
}