lib/ignore: Optimise ignoring directories for filesystem watcher (fixes #9339) (#9340)

This improves the ignore handling so that directories can be fully
ignored (skipped in the watcher) in more cases. Specifically, where the
previous rule was that any complex `!`-pattern would disable skipping
directories, the new rule is that only matches on patterns *after* such
a `!`-pattern disable skipping. That is, the following now does the
intuitive thing:

```
/foo
/bar
!whatever
*
```

- `/foo/**` and `/bar/**` are completely skipped, since there is no
chance anything underneath them could ever be not-ignored
- `!whatever` toggles the "can't skip directories any more" flag
- Anything that matches `*` can't skip directories, because it's
possible we can have `whatever` match something deeper.

To enable this, some refactoring was necessary:

- The "can skip dirs" flag is now a property of the match result, not of
the pattern set as a whole.
- That meant returning a boolean is not good enough, we need to actually
return the entire `Result` (or, like, two booleans but that seemed
uglier and more annoying to use)
- `ShouldIgnore(string) boolean` went away with
`Match(string).IsIgnored()` being the obvious replacement (API
simplification!)
- The watcher then needed to import the `ignore` package (for the
`Result` type), but `fs` imports the watcher and `ignore` imports `fs`.
That's a cycle, so I broke out `Result` into a package of its own so
that it can be safely imported everywhere in things like `type Matcher
interface { Match(string) result.Result }`. There's a fair amount of
stuttering in `result.Result` and maybe we should go with something like
`ignoreresult.R` or so, leaving this open for discussion.

Tests refactored to suit, I think this change is in fact quite well
covered by the existing ones...

Also some noise because a few of the changed files were quite old and
got the `gofumpt` treatment by my editor. Sorry not sorry.

---------

Co-authored-by: Simon Frei <freisim93@gmail.com>
This commit is contained in:
Jakob Borg 2024-01-15 11:13:22 +01:00 committed by GitHub
parent 445e8cc532
commit 3297624037
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 114 additions and 67 deletions

View File

@ -37,18 +37,14 @@ func (f *BasicFilesystem) Watch(name string, ignore Matcher, ctx context.Context
eventMask |= permEventMask
}
if ignore.SkipIgnoredDirs() {
absShouldIgnore := func(absPath string) bool {
rel, err := f.unrootedChecked(absPath, roots)
if err != nil {
return true
}
return ignore.Match(rel).IsIgnored()
absShouldIgnore := func(absPath string) bool {
rel, err := f.unrootedChecked(absPath, roots)
if err != nil {
return true
}
err = notify.WatchWithFilter(watchPath, backendChan, absShouldIgnore, eventMask)
} else {
err = notify.Watch(watchPath, backendChan, eventMask)
return ignore.Match(rel).CanSkipDir()
}
err = notify.WatchWithFilter(watchPath, backendChan, absShouldIgnore, eventMask)
if err != nil {
notify.Stop(backendChan)
if reachedMaxUserWatches(err) {

View File

@ -129,7 +129,6 @@ type Usage struct {
type Matcher interface {
Match(name string) ignoreresult.R
SkipIgnoredDirs() bool
}
type Event struct {

View File

@ -79,11 +79,17 @@ func (p Pattern) allowsSkippingIgnoredDirs() bool {
if p.pattern[0] != '/' {
return false
}
if strings.Contains(p.pattern[1:], "/") {
// A "/**" at the end is allowed and doesn't have any bearing on the
// below checks; remove it before checking.
pattern := strings.TrimSuffix(p.pattern, "/**")
if len(pattern) == 0 {
return true
}
if strings.Contains(pattern[1:], "/") {
return false
}
// Double asterisk everywhere in the path except at the end is bad
return !strings.Contains(strings.TrimSuffix(p.pattern, "**"), "**")
return !strings.Contains(strings.TrimSuffix(pattern, "**"), "**")
}
// The ChangeDetector is responsible for determining if files have changed
@ -99,16 +105,15 @@ type ChangeDetector interface {
}
type Matcher struct {
fs fs.Filesystem
lines []string // exact lines read from .stignore
patterns []Pattern // patterns including those from included files
withCache bool
matches *cache
curHash string
stop chan struct{}
changeDetector ChangeDetector
skipIgnoredDirs bool
mut sync.Mutex
fs fs.Filesystem
lines []string // exact lines read from .stignore
patterns []Pattern // patterns including those from included files
withCache bool
matches *cache
curHash string
stop chan struct{}
changeDetector ChangeDetector
mut sync.Mutex
}
// An Option can be passed to New()
@ -131,10 +136,9 @@ func WithChangeDetector(cd ChangeDetector) Option {
func New(fs fs.Filesystem, opts ...Option) *Matcher {
m := &Matcher{
fs: fs,
stop: make(chan struct{}),
mut: sync.NewMutex(),
skipIgnoredDirs: true,
fs: fs,
stop: make(chan struct{}),
mut: sync.NewMutex(),
}
for _, opt := range opts {
opt(m)
@ -198,23 +202,6 @@ func (m *Matcher) parseLocked(r io.Reader, file string) error {
return err
}
m.skipIgnoredDirs = true
var previous string
for _, p := range patterns {
// We automatically add patterns with a /** suffix, which normally
// means that we cannot skip directories. However if the same
// pattern without the /** already exists (which is true for
// automatically added patterns) we can skip.
if l := len(p.pattern); l > 3 && p.pattern[:len(p.pattern)-3] == previous {
continue
}
if !p.allowsSkippingIgnoredDirs() {
m.skipIgnoredDirs = false
break
}
previous = p.pattern
}
m.curHash = newHash
m.patterns = patterns
if m.withCache {
@ -228,10 +215,10 @@ func (m *Matcher) parseLocked(r io.Reader, file string) error {
func (m *Matcher) Match(file string) (result ignoreresult.R) {
switch {
case fs.IsTemporary(file):
return ignoreresult.Ignored
return ignoreresult.IgnoreAndSkip
case fs.IsInternal(file):
return ignoreresult.Ignored
return ignoreresult.IgnoreAndSkip
case file == ".":
return ignoreresult.NotIgnored
@ -257,19 +244,31 @@ func (m *Matcher) Match(file string) (result ignoreresult.R) {
}()
}
// Check all the patterns for a match.
// Check all the patterns for a match. Track whether the patterns so far
// allow skipping matched directories or not. As soon as we hit an
// exclude pattern (with some exceptions), we can't skip directories
// anymore.
file = filepath.ToSlash(file)
var lowercaseFile string
canSkipDir := true
for _, pattern := range m.patterns {
if canSkipDir && !pattern.allowsSkippingIgnoredDirs() {
canSkipDir = false
}
res := pattern.result
if canSkipDir {
res = res.WithSkipDir()
}
if pattern.result.IsCaseFolded() {
if lowercaseFile == "" {
lowercaseFile = strings.ToLower(file)
}
if pattern.match.Match(lowercaseFile) {
return pattern.result
return res
}
} else if pattern.match.Match(file) {
return pattern.result
return res
}
}
@ -327,12 +326,6 @@ func (m *Matcher) clean(d time.Duration) {
}
}
func (m *Matcher) SkipIgnoredDirs() bool {
m.mut.Lock()
defer m.mut.Unlock()
return m.skipIgnoredDirs
}
func hashPatterns(patterns []Pattern) string {
h := sha256.New()
for _, pat := range patterns {

View File

@ -1122,8 +1122,8 @@ func TestIssue5009(t *testing.T) {
if err := pats.Parse(bytes.NewBufferString(stignore), ".stignore"); err != nil {
t.Fatal(err)
}
if !pats.skipIgnoredDirs {
t.Error("skipIgnoredDirs should be true without includes")
if m := pats.Match("ign2"); !m.CanSkipDir() {
t.Error("CanSkipDir should be true without excludes")
}
stignore = `
@ -1138,8 +1138,8 @@ func TestIssue5009(t *testing.T) {
t.Fatal(err)
}
if pats.skipIgnoredDirs {
t.Error("skipIgnoredDirs should not be true with includes")
if m := pats.Match("ign2"); m.CanSkipDir() {
t.Error("CanSkipDir should not be true with excludes")
}
}
@ -1272,8 +1272,8 @@ func TestSkipIgnoredDirs(t *testing.T) {
if err := pats.Parse(bytes.NewBufferString(stignore), ".stignore"); err != nil {
t.Fatal(err)
}
if !pats.SkipIgnoredDirs() {
t.Error("SkipIgnoredDirs should be true")
if m := pats.Match("whatever"); !m.CanSkipDir() {
t.Error("CanSkipDir should be true")
}
stignore = `
@ -1283,8 +1283,8 @@ func TestSkipIgnoredDirs(t *testing.T) {
if err := pats.Parse(bytes.NewBufferString(stignore), ".stignore"); err != nil {
t.Fatal(err)
}
if pats.SkipIgnoredDirs() {
t.Error("SkipIgnoredDirs should be false")
if m := pats.Match("whatever"); m.CanSkipDir() {
t.Error("CanSkipDir should be false")
}
}

View File

@ -12,6 +12,7 @@ const (
NotIgnored R = 0
// `Ignored` is defined in platform specific files
IgnoredDeletable = Ignored | deletableBit
IgnoreAndSkip = Ignored | canSkipDirBit
)
const (
@ -19,6 +20,7 @@ const (
ignoreBit R = 1 << iota
deletableBit
foldCaseBit
canSkipDirBit
)
type R uint8
@ -38,6 +40,15 @@ func (r R) IsCaseFolded() bool {
return r&foldCaseBit != 0
}
// CanSkipDir returns true if the result is ignored and the directory can be
// skipped (no need to recurse deeper). Note that ignore matches are textual
// and based on the name only -- this being true does not mean that the
// matched item is a directory, merely that *if* it is a directory, it can
// be skipped.
func (r R) CanSkipDir() bool {
return r.IsIgnored() && r&canSkipDirBit != 0
}
// ToggleIgnored returns a copy of the result with the ignored bit toggled.
func (r R) ToggleIgnored() R {
return r ^ ignoreBit
@ -53,6 +64,11 @@ func (r R) WithFoldCase() R {
return r | foldCaseBit
}
// WithSkipDir returns a copy of the result with the skip dir bit set.
func (r R) WithSkipDir() R {
return r | canSkipDirBit
}
// String returns a human readable representation of the result flags.
func (r R) String() string {
var s string
@ -71,5 +87,10 @@ func (r R) String() string {
} else {
s += "-"
}
if r&canSkipDirBit != 0 {
s += "s"
} else {
s += "-"
}
return s
}

View File

@ -0,0 +1,38 @@
// Copyright (C) 2024 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
package ignoreresult_test
import (
"testing"
"github.com/syncthing/syncthing/lib/ignore/ignoreresult"
)
func TestFlagCanSkipDir(t *testing.T) {
// Verify that CanSkipDir() means that something is both ignored and can
// be skipped as a directory, so that it's legitimate to say
// Match(...).CanSkipDir() instead of having to create a temporary
// variable and check both Match(...).IsIgnored() and
// Match(...).CanSkipDir().
cases := []struct {
res ignoreresult.R
canSkipDir bool
}{
{0, false},
{ignoreresult.NotIgnored, false},
{ignoreresult.NotIgnored.WithSkipDir(), false},
{ignoreresult.Ignored, false},
{ignoreresult.IgnoreAndSkip, true},
}
for _, tc := range cases {
if tc.res.CanSkipDir() != tc.canSkipDir {
t.Errorf("%v.CanSkipDir() != %v", tc.res, tc.canSkipDir)
}
}
}

View File

@ -295,10 +295,10 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco
return skip
}
if w.Matcher.Match(path).IsIgnored() {
if m := w.Matcher.Match(path); m.IsIgnored() {
l.Debugln(w, "ignored (patterns):", path)
// Only descend if matcher says so and the current file is not a symlink.
if err != nil || w.Matcher.SkipIgnoredDirs() || info.IsSymlink() {
if err != nil || m.CanSkipDir() || info.IsSymlink() {
return skip
}
// If the parent wasn't ignored already, set this path as the "highest" ignored parent

View File

@ -873,8 +873,8 @@ func TestSkipIgnoredDirs(t *testing.T) {
if err := pats.Parse(bytes.NewBufferString(stignore), ".stignore"); err != nil {
t.Fatal(err)
}
if !pats.SkipIgnoredDirs() {
t.Error("SkipIgnoredDirs should be true")
if m := pats.Match("whatever"); !m.CanSkipDir() {
t.Error("CanSkipDir should be true", m)
}
w.Matcher = pats