lib/ignore: Optimise ignoring directories for filesystem watcher (fixes #9339) (#9340)

This improves the ignore handling so that directories can be fully
ignored (skipped in the watcher) in more cases. Specifically, where the
previous rule was that any complex `!`-pattern would disable skipping
directories, the new rule is that only matches on patterns *after* such
a `!`-pattern disable skipping. That is, the following now does the
intuitive thing:

```
/foo
/bar
!whatever
*
```

- `/foo/**` and `/bar/**` are completely skipped, since there is no
chance anything underneath them could ever be not-ignored
- `!whatever` toggles the "can't skip directories any more" flag
- Anything that matches `*` can't skip directories, because it's
possible we can have `whatever` match something deeper.

To enable this, some refactoring was necessary:

- The "can skip dirs" flag is now a property of the match result, not of
the pattern set as a whole.
- That meant returning a boolean is not good enough, we need to actually
return the entire `Result` (or, like, two booleans but that seemed
uglier and more annoying to use)
- `ShouldIgnore(string) boolean` went away with
`Match(string).IsIgnored()` being the obvious replacement (API
simplification!)
- The watcher then needed to import the `ignore` package (for the
`Result` type), but `fs` imports the watcher and `ignore` imports `fs`.
That's a cycle, so I broke out `Result` into a package of its own so
that it can be safely imported everywhere in things like `type Matcher
interface { Match(string) result.Result }`. There's a fair amount of
stuttering in `result.Result` and maybe we should go with something like
`ignoreresult.R` or so, leaving this open for discussion.

Tests refactored to suit, I think this change is in fact quite well
covered by the existing ones...

Also some noise because a few of the changed files were quite old and
got the `gofumpt` treatment by my editor. Sorry not sorry.

---------

Co-authored-by: Simon Frei <freisim93@gmail.com>
This commit is contained in:
Jakob Borg 2024-01-15 11:13:22 +01:00 committed by GitHub
parent 445e8cc532
commit 3297624037
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 114 additions and 67 deletions

View File

@ -37,18 +37,14 @@ func (f *BasicFilesystem) Watch(name string, ignore Matcher, ctx context.Context
eventMask |= permEventMask eventMask |= permEventMask
} }
if ignore.SkipIgnoredDirs() { absShouldIgnore := func(absPath string) bool {
absShouldIgnore := func(absPath string) bool { rel, err := f.unrootedChecked(absPath, roots)
rel, err := f.unrootedChecked(absPath, roots) if err != nil {
if err != nil { return true
return true
}
return ignore.Match(rel).IsIgnored()
} }
err = notify.WatchWithFilter(watchPath, backendChan, absShouldIgnore, eventMask) return ignore.Match(rel).CanSkipDir()
} else {
err = notify.Watch(watchPath, backendChan, eventMask)
} }
err = notify.WatchWithFilter(watchPath, backendChan, absShouldIgnore, eventMask)
if err != nil { if err != nil {
notify.Stop(backendChan) notify.Stop(backendChan)
if reachedMaxUserWatches(err) { if reachedMaxUserWatches(err) {

View File

@ -129,7 +129,6 @@ type Usage struct {
type Matcher interface { type Matcher interface {
Match(name string) ignoreresult.R Match(name string) ignoreresult.R
SkipIgnoredDirs() bool
} }
type Event struct { type Event struct {

View File

@ -79,11 +79,17 @@ func (p Pattern) allowsSkippingIgnoredDirs() bool {
if p.pattern[0] != '/' { if p.pattern[0] != '/' {
return false return false
} }
if strings.Contains(p.pattern[1:], "/") { // A "/**" at the end is allowed and doesn't have any bearing on the
// below checks; remove it before checking.
pattern := strings.TrimSuffix(p.pattern, "/**")
if len(pattern) == 0 {
return true
}
if strings.Contains(pattern[1:], "/") {
return false return false
} }
// Double asterisk everywhere in the path except at the end is bad // Double asterisk everywhere in the path except at the end is bad
return !strings.Contains(strings.TrimSuffix(p.pattern, "**"), "**") return !strings.Contains(strings.TrimSuffix(pattern, "**"), "**")
} }
// The ChangeDetector is responsible for determining if files have changed // The ChangeDetector is responsible for determining if files have changed
@ -99,16 +105,15 @@ type ChangeDetector interface {
} }
type Matcher struct { type Matcher struct {
fs fs.Filesystem fs fs.Filesystem
lines []string // exact lines read from .stignore lines []string // exact lines read from .stignore
patterns []Pattern // patterns including those from included files patterns []Pattern // patterns including those from included files
withCache bool withCache bool
matches *cache matches *cache
curHash string curHash string
stop chan struct{} stop chan struct{}
changeDetector ChangeDetector changeDetector ChangeDetector
skipIgnoredDirs bool mut sync.Mutex
mut sync.Mutex
} }
// An Option can be passed to New() // An Option can be passed to New()
@ -131,10 +136,9 @@ func WithChangeDetector(cd ChangeDetector) Option {
func New(fs fs.Filesystem, opts ...Option) *Matcher { func New(fs fs.Filesystem, opts ...Option) *Matcher {
m := &Matcher{ m := &Matcher{
fs: fs, fs: fs,
stop: make(chan struct{}), stop: make(chan struct{}),
mut: sync.NewMutex(), mut: sync.NewMutex(),
skipIgnoredDirs: true,
} }
for _, opt := range opts { for _, opt := range opts {
opt(m) opt(m)
@ -198,23 +202,6 @@ func (m *Matcher) parseLocked(r io.Reader, file string) error {
return err return err
} }
m.skipIgnoredDirs = true
var previous string
for _, p := range patterns {
// We automatically add patterns with a /** suffix, which normally
// means that we cannot skip directories. However if the same
// pattern without the /** already exists (which is true for
// automatically added patterns) we can skip.
if l := len(p.pattern); l > 3 && p.pattern[:len(p.pattern)-3] == previous {
continue
}
if !p.allowsSkippingIgnoredDirs() {
m.skipIgnoredDirs = false
break
}
previous = p.pattern
}
m.curHash = newHash m.curHash = newHash
m.patterns = patterns m.patterns = patterns
if m.withCache { if m.withCache {
@ -228,10 +215,10 @@ func (m *Matcher) parseLocked(r io.Reader, file string) error {
func (m *Matcher) Match(file string) (result ignoreresult.R) { func (m *Matcher) Match(file string) (result ignoreresult.R) {
switch { switch {
case fs.IsTemporary(file): case fs.IsTemporary(file):
return ignoreresult.Ignored return ignoreresult.IgnoreAndSkip
case fs.IsInternal(file): case fs.IsInternal(file):
return ignoreresult.Ignored return ignoreresult.IgnoreAndSkip
case file == ".": case file == ".":
return ignoreresult.NotIgnored return ignoreresult.NotIgnored
@ -257,19 +244,31 @@ func (m *Matcher) Match(file string) (result ignoreresult.R) {
}() }()
} }
// Check all the patterns for a match. // Check all the patterns for a match. Track whether the patterns so far
// allow skipping matched directories or not. As soon as we hit an
// exclude pattern (with some exceptions), we can't skip directories
// anymore.
file = filepath.ToSlash(file) file = filepath.ToSlash(file)
var lowercaseFile string var lowercaseFile string
canSkipDir := true
for _, pattern := range m.patterns { for _, pattern := range m.patterns {
if canSkipDir && !pattern.allowsSkippingIgnoredDirs() {
canSkipDir = false
}
res := pattern.result
if canSkipDir {
res = res.WithSkipDir()
}
if pattern.result.IsCaseFolded() { if pattern.result.IsCaseFolded() {
if lowercaseFile == "" { if lowercaseFile == "" {
lowercaseFile = strings.ToLower(file) lowercaseFile = strings.ToLower(file)
} }
if pattern.match.Match(lowercaseFile) { if pattern.match.Match(lowercaseFile) {
return pattern.result return res
} }
} else if pattern.match.Match(file) { } else if pattern.match.Match(file) {
return pattern.result return res
} }
} }
@ -327,12 +326,6 @@ func (m *Matcher) clean(d time.Duration) {
} }
} }
func (m *Matcher) SkipIgnoredDirs() bool {
m.mut.Lock()
defer m.mut.Unlock()
return m.skipIgnoredDirs
}
func hashPatterns(patterns []Pattern) string { func hashPatterns(patterns []Pattern) string {
h := sha256.New() h := sha256.New()
for _, pat := range patterns { for _, pat := range patterns {

View File

@ -1122,8 +1122,8 @@ func TestIssue5009(t *testing.T) {
if err := pats.Parse(bytes.NewBufferString(stignore), ".stignore"); err != nil { if err := pats.Parse(bytes.NewBufferString(stignore), ".stignore"); err != nil {
t.Fatal(err) t.Fatal(err)
} }
if !pats.skipIgnoredDirs { if m := pats.Match("ign2"); !m.CanSkipDir() {
t.Error("skipIgnoredDirs should be true without includes") t.Error("CanSkipDir should be true without excludes")
} }
stignore = ` stignore = `
@ -1138,8 +1138,8 @@ func TestIssue5009(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
if pats.skipIgnoredDirs { if m := pats.Match("ign2"); m.CanSkipDir() {
t.Error("skipIgnoredDirs should not be true with includes") t.Error("CanSkipDir should not be true with excludes")
} }
} }
@ -1272,8 +1272,8 @@ func TestSkipIgnoredDirs(t *testing.T) {
if err := pats.Parse(bytes.NewBufferString(stignore), ".stignore"); err != nil { if err := pats.Parse(bytes.NewBufferString(stignore), ".stignore"); err != nil {
t.Fatal(err) t.Fatal(err)
} }
if !pats.SkipIgnoredDirs() { if m := pats.Match("whatever"); !m.CanSkipDir() {
t.Error("SkipIgnoredDirs should be true") t.Error("CanSkipDir should be true")
} }
stignore = ` stignore = `
@ -1283,8 +1283,8 @@ func TestSkipIgnoredDirs(t *testing.T) {
if err := pats.Parse(bytes.NewBufferString(stignore), ".stignore"); err != nil { if err := pats.Parse(bytes.NewBufferString(stignore), ".stignore"); err != nil {
t.Fatal(err) t.Fatal(err)
} }
if pats.SkipIgnoredDirs() { if m := pats.Match("whatever"); m.CanSkipDir() {
t.Error("SkipIgnoredDirs should be false") t.Error("CanSkipDir should be false")
} }
} }

View File

@ -12,6 +12,7 @@ const (
NotIgnored R = 0 NotIgnored R = 0
// `Ignored` is defined in platform specific files // `Ignored` is defined in platform specific files
IgnoredDeletable = Ignored | deletableBit IgnoredDeletable = Ignored | deletableBit
IgnoreAndSkip = Ignored | canSkipDirBit
) )
const ( const (
@ -19,6 +20,7 @@ const (
ignoreBit R = 1 << iota ignoreBit R = 1 << iota
deletableBit deletableBit
foldCaseBit foldCaseBit
canSkipDirBit
) )
type R uint8 type R uint8
@ -38,6 +40,15 @@ func (r R) IsCaseFolded() bool {
return r&foldCaseBit != 0 return r&foldCaseBit != 0
} }
// CanSkipDir returns true if the result is ignored and the directory can be
// skipped (no need to recurse deeper). Note that ignore matches are textual
// and based on the name only -- this being true does not mean that the
// matched item is a directory, merely that *if* it is a directory, it can
// be skipped.
func (r R) CanSkipDir() bool {
return r.IsIgnored() && r&canSkipDirBit != 0
}
// ToggleIgnored returns a copy of the result with the ignored bit toggled. // ToggleIgnored returns a copy of the result with the ignored bit toggled.
func (r R) ToggleIgnored() R { func (r R) ToggleIgnored() R {
return r ^ ignoreBit return r ^ ignoreBit
@ -53,6 +64,11 @@ func (r R) WithFoldCase() R {
return r | foldCaseBit return r | foldCaseBit
} }
// WithSkipDir returns a copy of the result with the skip dir bit set.
func (r R) WithSkipDir() R {
return r | canSkipDirBit
}
// String returns a human readable representation of the result flags. // String returns a human readable representation of the result flags.
func (r R) String() string { func (r R) String() string {
var s string var s string
@ -71,5 +87,10 @@ func (r R) String() string {
} else { } else {
s += "-" s += "-"
} }
if r&canSkipDirBit != 0 {
s += "s"
} else {
s += "-"
}
return s return s
} }

View File

@ -0,0 +1,38 @@
// Copyright (C) 2024 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
package ignoreresult_test
import (
"testing"
"github.com/syncthing/syncthing/lib/ignore/ignoreresult"
)
func TestFlagCanSkipDir(t *testing.T) {
// Verify that CanSkipDir() means that something is both ignored and can
// be skipped as a directory, so that it's legitimate to say
// Match(...).CanSkipDir() instead of having to create a temporary
// variable and check both Match(...).IsIgnored() and
// Match(...).CanSkipDir().
cases := []struct {
res ignoreresult.R
canSkipDir bool
}{
{0, false},
{ignoreresult.NotIgnored, false},
{ignoreresult.NotIgnored.WithSkipDir(), false},
{ignoreresult.Ignored, false},
{ignoreresult.IgnoreAndSkip, true},
}
for _, tc := range cases {
if tc.res.CanSkipDir() != tc.canSkipDir {
t.Errorf("%v.CanSkipDir() != %v", tc.res, tc.canSkipDir)
}
}
}

View File

@ -295,10 +295,10 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco
return skip return skip
} }
if w.Matcher.Match(path).IsIgnored() { if m := w.Matcher.Match(path); m.IsIgnored() {
l.Debugln(w, "ignored (patterns):", path) l.Debugln(w, "ignored (patterns):", path)
// Only descend if matcher says so and the current file is not a symlink. // Only descend if matcher says so and the current file is not a symlink.
if err != nil || w.Matcher.SkipIgnoredDirs() || info.IsSymlink() { if err != nil || m.CanSkipDir() || info.IsSymlink() {
return skip return skip
} }
// If the parent wasn't ignored already, set this path as the "highest" ignored parent // If the parent wasn't ignored already, set this path as the "highest" ignored parent

View File

@ -873,8 +873,8 @@ func TestSkipIgnoredDirs(t *testing.T) {
if err := pats.Parse(bytes.NewBufferString(stignore), ".stignore"); err != nil { if err := pats.Parse(bytes.NewBufferString(stignore), ".stignore"); err != nil {
t.Fatal(err) t.Fatal(err)
} }
if !pats.SkipIgnoredDirs() { if m := pats.Match("whatever"); !m.CanSkipDir() {
t.Error("SkipIgnoredDirs should be true") t.Error("CanSkipDir should be true", m)
} }
w.Matcher = pats w.Matcher = pats