Refactor ignore handling (...)

This uses persistent Matcher objects that can reload their content and
provide a hash string that can be used to check if it's changed. The
cache is local to each Matcher object instead of kept globally.
This commit is contained in:
Jakob Borg 2014-12-23 10:05:08 +01:00
parent 597011e3a9
commit 2c89f04be7
5 changed files with 239 additions and 120 deletions

View File

@ -15,26 +15,11 @@
package ignore
import (
"sync"
"time"
)
var (
caches = make(map[string]*cache)
cacheMut sync.Mutex
)
func init() {
// Periodically go through the cache and remove cache entries that have
// not been touched in the last two hours.
go cleanIgnoreCaches(2 * time.Hour)
}
import "time"
type cache struct {
patterns []Pattern
entries map[string]cacheEntry
mut sync.Mutex
}
type cacheEntry struct {
@ -50,46 +35,27 @@ func newCache(patterns []Pattern) *cache {
}
func (c *cache) clean(d time.Duration) {
c.mut.Lock()
for k, v := range c.entries {
if time.Since(v.access) > d {
delete(c.entries, k)
}
}
c.mut.Unlock()
}
func (c *cache) get(key string) (result, ok bool) {
c.mut.Lock()
res, ok := c.entries[key]
if ok {
res.access = time.Now()
c.entries[key] = res
}
c.mut.Unlock()
return res.value, ok
}
func (c *cache) set(key string, val bool) {
c.mut.Lock()
c.entries[key] = cacheEntry{val, time.Now()}
c.mut.Unlock()
}
func (c *cache) len() int {
c.mut.Lock()
l := len(c.entries)
c.mut.Unlock()
return l
}
func cleanIgnoreCaches(dur time.Duration) {
for {
time.Sleep(dur)
cacheMut.Lock()
for _, v := range caches {
v.clean(dur)
}
cacheMut.Unlock()
}
}

View File

@ -17,6 +17,8 @@ package ignore
import (
"bufio"
"bytes"
"crypto/md5"
"fmt"
"io"
"os"
@ -24,6 +26,7 @@ import (
"regexp"
"strings"
"sync"
"time"
"github.com/syncthing/syncthing/internal/fnmatch"
)
@ -33,51 +36,76 @@ type Pattern struct {
include bool
}
func (p Pattern) String() string {
if p.include {
return p.match.String()
} else {
return "(?exclude)" + p.match.String()
}
}
type Matcher struct {
patterns []Pattern
matches *cache
mut sync.Mutex
patterns []Pattern
withCache bool
matches *cache
curHash string
stop chan struct{}
mut sync.Mutex
}
func Load(file string, cache bool) (*Matcher, error) {
seen := make(map[string]bool)
matcher, err := loadIgnoreFile(file, seen)
if !cache || err != nil {
return matcher, err
func New(withCache bool) *Matcher {
m := &Matcher{
withCache: withCache,
stop: make(chan struct{}),
}
cacheMut.Lock()
defer cacheMut.Unlock()
// Get the current cache object for the given file
cached, ok := caches[file]
if !ok || !patternsEqual(cached.patterns, matcher.patterns) {
// Nothing in cache or a cache mismatch, create a new cache which will
// store matches for the given set of patterns.
// Initialize oldMatches to indicate that we are interested in
// caching.
cached = newCache(matcher.patterns)
matcher.matches = cached
caches[file] = cached
return matcher, nil
if withCache {
go m.clean(2 * time.Hour)
}
// Patterns haven't changed, so we can reuse the old matches, create a new
// matches map and update the pointer. (This prevents matches map from
// growing indefinately, as we only cache whatever we've matched in the last
// iteration, rather than through runtime history)
matcher.matches = cached
return matcher, nil
return m
}
func Parse(r io.Reader, file string) (*Matcher, error) {
seen := map[string]bool{
file: true,
func (m *Matcher) Load(file string) error {
// No locking, Parse() does the locking
fd, err := os.Open(file)
if err != nil {
// We do a parse with empty patterns to clear out the hash, cache etc.
m.Parse(&bytes.Buffer{}, file)
return err
}
return parseIgnoreFile(r, file, seen)
defer fd.Close()
return m.Parse(fd, file)
}
func (m *Matcher) Parse(r io.Reader, file string) error {
m.mut.Lock()
defer m.mut.Unlock()
seen := map[string]bool{file: true}
patterns, err := parseIgnoreFile(r, file, seen)
// Error is saved and returned at the end. We process the patterns
// (possibly blank) anyway.
newHash := hashPatterns(patterns)
if newHash == m.curHash {
// We've already loaded exactly these patterns.
return err
}
m.curHash = newHash
m.patterns = patterns
if m.withCache {
m.matches = newCache(patterns)
}
return err
}
func (m *Matcher) Match(file string) (result bool) {
m.mut.Lock()
defer m.mut.Unlock()
if len(m.patterns) == 0 {
return false
}
@ -108,18 +136,53 @@ func (m *Matcher) Match(file string) (result bool) {
// Patterns return a list of the loaded regexp patterns, as strings
func (m *Matcher) Patterns() []string {
m.mut.Lock()
defer m.mut.Unlock()
patterns := make([]string, len(m.patterns))
for i, pat := range m.patterns {
if pat.include {
patterns[i] = pat.match.String()
} else {
patterns[i] = "(?exclude)" + pat.match.String()
}
patterns[i] = pat.String()
}
return patterns
}
func loadIgnoreFile(file string, seen map[string]bool) (*Matcher, error) {
func (m *Matcher) Hash() string {
m.mut.Lock()
defer m.mut.Unlock()
return m.curHash
}
func (m *Matcher) Stop() {
close(m.stop)
}
func (m *Matcher) clean(d time.Duration) {
t := time.NewTimer(d / 2)
for {
select {
case <-m.stop:
return
case <-t.C:
m.mut.Lock()
if m.matches != nil {
m.matches.clean(d)
}
t.Reset(d / 2)
m.mut.Unlock()
}
}
}
func hashPatterns(patterns []Pattern) string {
h := md5.New()
for _, pat := range patterns {
h.Write([]byte(pat.String()))
h.Write([]byte("\n"))
}
return fmt.Sprintf("%x", h.Sum(nil))
}
func loadIgnoreFile(file string, seen map[string]bool) ([]Pattern, error) {
if seen[file] {
return nil, fmt.Errorf("Multiple include of ignore file %q", file)
}
@ -134,8 +197,8 @@ func loadIgnoreFile(file string, seen map[string]bool) (*Matcher, error) {
return parseIgnoreFile(fd, file, seen)
}
func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*Matcher, error) {
var exps Matcher
func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) ([]Pattern, error) {
var patterns []Pattern
addPattern := func(line string) error {
include := true
@ -150,27 +213,27 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*M
if err != nil {
return fmt.Errorf("Invalid pattern %q in ignore file", line)
}
exps.patterns = append(exps.patterns, Pattern{exp, include})
patterns = append(patterns, Pattern{exp, include})
} else if strings.HasPrefix(line, "**/") {
// Add the pattern as is, and without **/ so it matches in current dir
exp, err := fnmatch.Convert(line, fnmatch.FNM_PATHNAME)
if err != nil {
return fmt.Errorf("Invalid pattern %q in ignore file", line)
}
exps.patterns = append(exps.patterns, Pattern{exp, include})
patterns = append(patterns, Pattern{exp, include})
exp, err = fnmatch.Convert(line[3:], fnmatch.FNM_PATHNAME)
if err != nil {
return fmt.Errorf("Invalid pattern %q in ignore file", line)
}
exps.patterns = append(exps.patterns, Pattern{exp, include})
patterns = append(patterns, Pattern{exp, include})
} else if strings.HasPrefix(line, "#include ") {
includeFile := filepath.Join(filepath.Dir(currentFile), line[len("#include "):])
includes, err := loadIgnoreFile(includeFile, seen)
if err != nil {
return err
}
exps.patterns = append(exps.patterns, includes.patterns...)
patterns = append(patterns, includes...)
} else {
// Path name or pattern, add it so it matches files both in
// current directory and subdirs.
@ -178,13 +241,13 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*M
if err != nil {
return fmt.Errorf("Invalid pattern %q in ignore file", line)
}
exps.patterns = append(exps.patterns, Pattern{exp, include})
patterns = append(patterns, Pattern{exp, include})
exp, err = fnmatch.Convert("**/"+line, fnmatch.FNM_PATHNAME)
if err != nil {
return fmt.Errorf("Invalid pattern %q in ignore file", line)
}
exps.patterns = append(exps.patterns, Pattern{exp, include})
patterns = append(patterns, Pattern{exp, include})
}
return nil
}
@ -218,17 +281,5 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*M
}
}
return &exps, nil
}
func patternsEqual(a, b []Pattern) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i].include != b[i].include || a[i].match.String() != b[i].match.String() {
return false
}
}
return true
return patterns, nil
}

View File

@ -25,7 +25,8 @@ import (
)
func TestIgnore(t *testing.T) {
pats, err := Load("testdata/.stignore", true)
pats := New(true)
err := pats.Load("testdata/.stignore")
if err != nil {
t.Fatal(err)
}
@ -74,7 +75,8 @@ func TestExcludes(t *testing.T) {
i*2
!ign2
`
pats, err := Parse(bytes.NewBufferString(stignore), ".stignore")
pats := New(true)
err := pats.Parse(bytes.NewBufferString(stignore), ".stignore")
if err != nil {
t.Fatal(err)
}
@ -114,15 +116,19 @@ func TestBadPatterns(t *testing.T) {
}
for _, pat := range badPatterns {
parsed, err := Parse(bytes.NewBufferString(pat), ".stignore")
err := New(true).Parse(bytes.NewBufferString(pat), ".stignore")
if err == nil {
t.Errorf("No error for pattern %q: %v", pat, parsed)
t.Errorf("No error for pattern %q", pat)
}
}
}
func TestCaseSensitivity(t *testing.T) {
ign, _ := Parse(bytes.NewBufferString("test"), ".stignore")
ign := New(true)
err := ign.Parse(bytes.NewBufferString("test"), ".stignore")
if err != nil {
t.Error(err)
}
match := []string{"test"}
dontMatch := []string{"foo"}
@ -170,7 +176,8 @@ func TestCaching(t *testing.T) {
fd2.WriteString("/y/\n")
pats, err := Load(fd1.Name(), true)
pats := New(true)
err = pats.Load(fd1.Name())
if err != nil {
t.Fatal(err)
}
@ -193,9 +200,9 @@ func TestCaching(t *testing.T) {
t.Fatal("Expected 4 cached results")
}
// Reload file, expect old outcomes to be provided
// Reload file, expect old outcomes to be preserved
pats, err = Load(fd1.Name(), true)
err = pats.Load(fd1.Name())
if err != nil {
t.Fatal(err)
}
@ -207,7 +214,7 @@ func TestCaching(t *testing.T) {
fd2.WriteString("/z/\n")
pats, err = Load(fd1.Name(), true)
err = pats.Load(fd1.Name())
if err != nil {
t.Fatal(err)
}
@ -222,9 +229,9 @@ func TestCaching(t *testing.T) {
pats.Match(letter)
}
// Verify that outcomes provided on next laod
// Verify that outcomes preserved on next laod
pats, err = Load(fd1.Name(), true)
err = pats.Load(fd1.Name())
if err != nil {
t.Fatal(err)
}
@ -236,7 +243,7 @@ func TestCaching(t *testing.T) {
fd1.WriteString("/a/\n")
pats, err = Load(fd1.Name(), true)
err = pats.Load(fd1.Name())
if err != nil {
t.Fatal(err)
}
@ -252,7 +259,7 @@ func TestCaching(t *testing.T) {
// Verify that outcomes provided on next laod
pats, err = Load(fd1.Name(), true)
err = pats.Load(fd1.Name())
if err != nil {
t.Fatal(err)
}
@ -273,7 +280,11 @@ func TestCommentsAndBlankLines(t *testing.T) {
`
pats, _ := Parse(bytes.NewBufferString(stignore), ".stignore")
pats := New(true)
err := pats.Parse(bytes.NewBufferString(stignore), ".stignore")
if err != nil {
t.Error(err)
}
if len(pats.patterns) > 0 {
t.Errorf("Expected no patterns")
}
@ -297,7 +308,11 @@ flamingo
*.crow
*.crow
`
pats, _ := Parse(bytes.NewBufferString(stignore), ".stignore")
pats := New(false)
err := pats.Parse(bytes.NewBufferString(stignore), ".stignore")
if err != nil {
b.Error(err)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
@ -335,7 +350,8 @@ flamingo
}
// Load the patterns
pats, err := Load(fd.Name(), true)
pats := New(true)
err = pats.Load(fd.Name())
if err != nil {
b.Fatal(err)
}
@ -344,7 +360,7 @@ flamingo
// This load should now load the cached outcomes as the set of patterns
// has not changed.
pats, err = Load(fd.Name(), true)
err = pats.Load(fd.Name())
if err != nil {
b.Fatal(err)
}
@ -370,7 +386,8 @@ func TestCacheReload(t *testing.T) {
t.Fatal(err)
}
pats, err := Load(fd.Name(), true)
pats := New(true)
err = pats.Load(fd.Name())
if err != nil {
t.Fatal(err)
}
@ -402,7 +419,7 @@ func TestCacheReload(t *testing.T) {
t.Fatal(err)
}
pats, err = Load(fd.Name(), true)
err = pats.Load(fd.Name())
if err != nil {
t.Fatal(err)
}
@ -419,3 +436,85 @@ func TestCacheReload(t *testing.T) {
t.Error("Unexpected non-match for f3")
}
}
func TestHash(t *testing.T) {
p1 := New(true)
err := p1.Load("testdata/.stignore")
if err != nil {
t.Fatal(err)
}
// Same list of patterns as testdata/.stignore, after expansion
stignore := `
dir2/dfile
dir3
bfile
dir1/cfile
**/efile
/ffile
lost+found
`
p2 := New(true)
err = p2.Parse(bytes.NewBufferString(stignore), ".stignore")
if err != nil {
t.Fatal(err)
}
// Not same list of patterns
stignore = `
dir2/dfile
dir3
bfile
dir1/cfile
/ffile
lost+found
`
p3 := New(true)
err = p3.Parse(bytes.NewBufferString(stignore), ".stignore")
if err != nil {
t.Fatal(err)
}
if p1.Hash() == "" {
t.Error("p1 hash blank")
}
if p2.Hash() == "" {
t.Error("p2 hash blank")
}
if p3.Hash() == "" {
t.Error("p3 hash blank")
}
if p1.Hash() != p2.Hash() {
t.Error("p1-p2 hashes differ")
}
if p1.Hash() == p3.Hash() {
t.Error("p1-p3 hashes same")
}
}
func TestHashOfEmpty(t *testing.T) {
p1 := New(true)
err := p1.Load("testdata/.stignore")
if err != nil {
t.Fatal(err)
}
firstHash := p1.Hash()
// Reloading with a non-existent file should empty the patterns and
// recalculate the hash. d41d8cd98f00b204e9800998ecf8427e is the md5 of
// nothing.
p1.Load("file/does/not/exist")
secondHash := p1.Hash()
if firstHash == secondHash {
t.Error("hash did not change")
}
if secondHash != "d41d8cd98f00b204e9800998ecf8427e" {
t.Error("second hash is not hash of empty string")
}
if len(p1.patterns) != 0 {
t.Error("there are more than zero patterns")
}
}

View File

@ -1040,7 +1040,8 @@ func (m *Model) AddFolder(cfg config.FolderConfiguration) {
m.deviceFolders[device.DeviceID] = append(m.deviceFolders[device.DeviceID], cfg.ID)
}
ignores, _ := ignore.Load(filepath.Join(cfg.Path, ".stignore"), m.cfg.Options().CacheIgnoredFiles)
ignores := ignore.New(m.cfg.Options().CacheIgnoredFiles)
_ = ignores.Load(filepath.Join(cfg.Path, ".stignore")) // Ignore error, there might not be an .stignore
m.folderIgnores[cfg.ID] = ignores
m.addedFolder = true
@ -1083,8 +1084,8 @@ func (m *Model) ScanFolderSub(folder, sub string) error {
fs, ok := m.folderFiles[folder]
dir := m.folderCfgs[folder].Path
ignores, _ := ignore.Load(filepath.Join(dir, ".stignore"), m.cfg.Options().CacheIgnoredFiles)
m.folderIgnores[folder] = ignores
ignores := m.folderIgnores[folder]
_ = ignores.Load(filepath.Join(dir, ".stignore")) // Ignore error, there might not be an .stignore
w := &scanner.Walker{
Dir: dir,

View File

@ -58,7 +58,8 @@ func init() {
}
func TestWalkSub(t *testing.T) {
ignores, err := ignore.Load("testdata/.stignore", false)
ignores := ignore.New(false)
err := ignores.Load("testdata/.stignore")
if err != nil {
t.Fatal(err)
}
@ -93,7 +94,8 @@ func TestWalkSub(t *testing.T) {
}
func TestWalk(t *testing.T) {
ignores, err := ignore.Load("testdata/.stignore", false)
ignores := ignore.New(false)
err := ignores.Load("testdata/.stignore")
if err != nil {
t.Fatal(err)
}