lib/fs: More efficient casefs cache (#6974)

This changes the cache to cache less things, yet retain the required
efficiency for our walk usecase. This uses less memory.

Specifically, instead of keeping result and child caches for each path
level, only keep a single cached child. In practice our operations are
depth-first, or almost depth-first, and then we retain the same hit
ratio for a smaller cache size.

I improved the benchmark so that it counts the Lstat and DirNames
operations performed, and they do not change significantly. The amount
of allocated memory is reduced by 20% and the walk itself is actually
slightly faster.

This also removes the clear based on number of cached names (as that is
not a thing any more) and the timer based clear (which was unused). This
means we'll retain the last cache state forever until it's cleared by a
write operation, but we did that before too and that state is now a lot
smaller...

The overhead compared to not using a casefs, for our typical "double
walk" (walk the tree then stat everything again) is 2x the dirnames we
would otherwise call, and no overhead on the stats (unchanged from old
implementation)

```
name                         old time/op         new time/op         delta
WalkCaseFakeFS100k/rawfs-8           306ms ± 1%          305ms ± 2%     ~     (p=0.182 n=9+10)
WalkCaseFakeFS100k/casefs-8          579ms ± 5%          557ms ± 1%   -3.77%  (p=0.000 n=10+10)

name                         old B/entry         new B/entry         delta
WalkCaseFakeFS100k/rawfs-8             590 ± 0%            590 ± 0%     ~     (all equal)
WalkCaseFakeFS100k/casefs-8          1.09k ± 0%          0.87k ± 0%  -19.98%  (p=0.000 n=10+10)

name                         old DirNames/entry  new DirNames/entry  delta
WalkCaseFakeFS100k/rawfs-8            0.51 ± 0%           0.51 ± 0%     ~     (all equal)
WalkCaseFakeFS100k/casefs-8           1.02 ± 0%           1.02 ± 0%     ~     (all equal)

name                         old DirNames/op     new DirNames/op     delta
WalkCaseFakeFS100k/rawfs-8           51.2k ± 0%          51.2k ± 0%     ~     (all equal)
WalkCaseFakeFS100k/casefs-8           102k ± 0%           102k ± 0%     ~     (all equal)

name                         old Lstat/entry     new Lstat/entry     delta
WalkCaseFakeFS100k/rawfs-8            3.02 ± 0%           3.02 ± 0%     ~     (all equal)
WalkCaseFakeFS100k/casefs-8           3.02 ± 0%           3.02 ± 0%     ~     (all equal)

name                         old Lstat/op        new Lstat/op        delta
WalkCaseFakeFS100k/rawfs-8            302k ± 0%           302k ± 0%     ~     (all equal)
WalkCaseFakeFS100k/casefs-8           302k ± 0%           302k ± 0%     ~     (all equal)

name                         old allocs/entry    new allocs/entry    delta
WalkCaseFakeFS100k/rawfs-8            15.7 ± 0%           15.7 ± 0%     ~     (all equal)
WalkCaseFakeFS100k/casefs-8           27.5 ± 0%           26.1 ± 0%   -5.09%  (p=0.000 n=10+10)

name                         old ns/entry        new ns/entry        delta
WalkCaseFakeFS100k/rawfs-8           2.02k ± 1%          2.02k ± 2%     ~     (p=0.163 n=9+10)
WalkCaseFakeFS100k/casefs-8          3.83k ± 5%          3.68k ± 1%   -3.77%  (p=0.000 n=10+10)

name                         old alloc/op        new alloc/op        delta
WalkCaseFakeFS100k/rawfs-8          89.2MB ± 0%         89.2MB ± 0%     ~     (p=0.364 n=9+10)
WalkCaseFakeFS100k/casefs-8          164MB ± 0%          131MB ± 0%  -19.97%  (p=0.000 n=10+10)

name                         old allocs/op       new allocs/op       delta
WalkCaseFakeFS100k/rawfs-8           2.38M ± 0%          2.38M ± 0%     ~     (all equal)
WalkCaseFakeFS100k/casefs-8          4.16M ± 0%          3.95M ± 0%   -5.05%  (p=0.000 n=10+10)
```
This commit is contained in:
Jakob Borg 2020-09-09 14:38:39 +02:00 committed by GitHub
parent e3cd9219b8
commit 780fb3bac1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 157 additions and 121 deletions

View File

@ -16,12 +16,9 @@ import (
"time"
)
// Both values were chosen by magic.
const (
// How long to consider cached dirnames valid
caseCacheTimeout = time.Second
// When the number of names (all lengths of []string from DirNames)
// exceeds this, we drop the cache.
caseMaxCachedNames = 1 << 20
)
type ErrCaseConflict struct {
@ -47,10 +44,45 @@ type fskey struct {
uri string
}
var (
caseFilesystems = make(map[fskey]Filesystem)
caseFilesystemsMut sync.Mutex
)
// caseFilesystemRegistry caches caseFilesystems and runs a routine to drop
// their cache every now and then.
type caseFilesystemRegistry struct {
fss map[fskey]*caseFilesystem
mut sync.Mutex
startCleaner sync.Once
}
func (r *caseFilesystemRegistry) get(fs Filesystem) *caseFilesystem {
r.mut.Lock()
defer r.mut.Unlock()
k := fskey{fs.Type(), fs.URI()}
caseFs, ok := r.fss[k]
if !ok {
caseFs = &caseFilesystem{
Filesystem: fs,
realCaser: newDefaultRealCaser(fs),
}
r.fss[k] = caseFs
r.startCleaner.Do(func() {
go r.cleaner()
})
}
return caseFs
}
func (r *caseFilesystemRegistry) cleaner() {
for range time.NewTicker(time.Minute).C {
r.mut.Lock()
for _, caseFs := range r.fss {
caseFs.dropCache()
}
r.mut.Unlock()
}
}
var globalCaseFilesystemRegistry = caseFilesystemRegistry{fss: make(map[fskey]*caseFilesystem)}
// caseFilesystem is a BasicFilesystem with additional checks to make a
// potentially case insensitive underlying FS behave like it's case-sensitive.
@ -66,18 +98,7 @@ type caseFilesystem struct {
// case-sensitive one. However it will add some overhead and thus shouldn't be
// used if the filesystem is known to already behave case-sensitively.
func NewCaseFilesystem(fs Filesystem) Filesystem {
caseFilesystemsMut.Lock()
defer caseFilesystemsMut.Unlock()
k := fskey{fs.Type(), fs.URI()}
if caseFs, ok := caseFilesystems[k]; ok {
return caseFs
}
caseFs := &caseFilesystem{
Filesystem: fs,
realCaser: newDefaultRealCaser(fs),
}
caseFilesystems[k] = caseFs
return caseFs
return globalCaseFilesystemRegistry.get(fs)
}
func (f *caseFilesystem) Chmod(name string, mode FileMode) error {
@ -308,21 +329,16 @@ func (f *caseFilesystem) checkCaseExisting(name string) error {
}
type defaultRealCaser struct {
fs Filesystem
root *caseNode
count int
timer *time.Timer
timerStop chan struct{}
mut sync.RWMutex
fs Filesystem
root *caseNode
mut sync.RWMutex
}
func newDefaultRealCaser(fs Filesystem) *defaultRealCaser {
caser := &defaultRealCaser{
fs: fs,
root: &caseNode{name: "."},
timer: time.NewTimer(0),
fs: fs,
root: &caseNode{name: "."},
}
<-caser.timer.C
return caser
}
@ -333,84 +349,49 @@ func (r *defaultRealCaser) realCase(name string) (string, error) {
}
r.mut.Lock()
defer func() {
if r.count > caseMaxCachedNames {
select {
case r.timerStop <- struct{}{}:
default:
}
r.dropCacheLocked()
}
r.mut.Unlock()
}()
defer r.mut.Unlock()
node := r.root
for _, comp := range strings.Split(name, string(PathSeparator)) {
if node.dirNames == nil {
// Haven't called DirNames yet
if node.dirNames == nil || node.expires.Before(time.Now()) {
// Haven't called DirNames yet, or the node has expired
var err error
node.dirNames, err = r.fs.DirNames(out)
if err != nil {
return "", err
}
node.dirNamesLower = make([]string, len(node.dirNames))
for i, n := range node.dirNames {
node.dirNamesLower[i] = UnicodeLowercase(n)
}
node.children = make(map[string]*caseNode)
node.results = make(map[string]*caseNode)
r.count += len(node.dirNames)
} else if child, ok := node.results[comp]; ok {
// Check if this exact name has been queried before to shortcut
node = child
out = filepath.Join(out, child.name)
continue
node.expires = time.Now().Add(caseCacheTimeout)
node.child = nil
}
// Actually loop dirNames to search for a match
n, err := findCaseInsensitiveMatch(comp, node.dirNames, node.dirNamesLower)
if err != nil {
return "", err
// If we don't already have a correct cached child, try to find it.
if node.child == nil || node.child.name != comp {
// Actually loop dirNames to search for a match.
n, err := findCaseInsensitiveMatch(comp, node.dirNames, node.dirNamesLower)
if err != nil {
return "", err
}
node.child = &caseNode{name: n}
}
child, ok := node.children[n]
if !ok {
child = &caseNode{name: n}
}
node.results[comp] = child
node.children[n] = child
node = child
out = filepath.Join(out, n)
node = node.child
out = filepath.Join(out, node.name)
}
return out, nil
}
func (r *defaultRealCaser) startCaseResetTimerLocked() {
r.timerStop = make(chan struct{})
r.timer.Reset(caseCacheTimeout)
go func() {
select {
case <-r.timer.C:
r.dropCache()
case <-r.timerStop:
if !r.timer.Stop() {
<-r.timer.C
}
r.mut.Lock()
r.timerStop = nil
r.mut.Unlock()
}
}()
}
func (r *defaultRealCaser) dropCache() {
r.mut.Lock()
r.dropCacheLocked()
r.mut.Unlock()
}
func (r *defaultRealCaser) dropCacheLocked() {
r.root = &caseNode{name: "."}
r.count = 0
r.mut.Unlock()
}
// Both name and the key to children are "Real", case resolved names of the path
@ -419,10 +400,10 @@ func (r *defaultRealCaser) dropCacheLocked() {
// case resolved.
type caseNode struct {
name string
expires time.Time
dirNames []string
dirNamesLower []string
children map[string]*caseNode
results map[string]*caseNode
child *caseNode
}
func findCaseInsensitiveMatch(name string, names, namesLower []string) (string, error) {

View File

@ -12,6 +12,7 @@ import (
"os"
"path/filepath"
"runtime"
"sort"
"strings"
"testing"
"time"
@ -153,29 +154,31 @@ func testCaseFSStat(t *testing.T, fsys Filesystem) {
}
}
func BenchmarkWalkCaseFakeFS10k(b *testing.B) {
fsys, paths, err := fakefsForBenchmark(10_000, 0)
func BenchmarkWalkCaseFakeFS100k(b *testing.B) {
const entries = 100_000
fsys, paths, err := fakefsForBenchmark(entries, 0)
if err != nil {
b.Fatal(err)
}
slowsys, paths, err := fakefsForBenchmark(10_000, 100*time.Microsecond)
if err != nil {
b.Fatal(err)
}
b.Run("raw-fastfs", func(b *testing.B) {
b.Run("rawfs", func(b *testing.B) {
fakefs := unwrapFilesystem(fsys).(*fakefs)
fakefs.resetCounters()
benchmarkWalkFakeFS(b, fsys, paths)
fakefs.reportMetricsPerOp(b)
fakefs.reportMetricsPer(b, entries, "entry")
b.ReportAllocs()
})
b.Run("case-fastfs", func(b *testing.B) {
benchmarkWalkFakeFS(b, NewCaseFilesystem(fsys), paths)
b.ReportAllocs()
})
b.Run("raw-slowfs", func(b *testing.B) {
benchmarkWalkFakeFS(b, slowsys, paths)
b.ReportAllocs()
})
b.Run("case-slowfs", func(b *testing.B) {
benchmarkWalkFakeFS(b, NewCaseFilesystem(slowsys), paths)
b.Run("casefs", func(b *testing.B) {
// Construct the casefs manually or it will get cached and the benchmark is invalid.
casefs := &caseFilesystem{
Filesystem: fsys,
realCaser: newDefaultRealCaser(fsys),
}
fakefs := unwrapFilesystem(fsys).(*fakefs)
fakefs.resetCounters()
benchmarkWalkFakeFS(b, casefs, paths)
fakefs.reportMetricsPerOp(b)
fakefs.reportMetricsPer(b, entries, "entry")
b.ReportAllocs()
})
}
@ -275,5 +278,7 @@ func fakefsForBenchmark(nfiles int, latency time.Duration) (Filesystem, []string
return nil, nil, errors.New("didn't find enough stuff")
}
sort.Strings(paths)
return fsys, paths, nil
}

View File

@ -29,19 +29,3 @@ func DebugSymlinkForTestsOnly(oldFs, newFs Filesystem, oldname, newname string)
}
return nil
}
// unwrapFilesystem removes "wrapping" filesystems to expose the underlying filesystem.
func unwrapFilesystem(fs Filesystem) Filesystem {
for {
switch sfs := fs.(type) {
case *logFilesystem:
fs = sfs.Filesystem
case *walkFilesystem:
fs = sfs.Filesystem
case *MtimeFS:
fs = sfs.Filesystem
default:
return sfs
}
}
}

View File

@ -20,6 +20,7 @@ import (
"strconv"
"strings"
"sync"
"testing"
"time"
)
@ -53,6 +54,7 @@ const randomBlockShift = 14 // 128k
// - Two fakefs:s pointing at the same root path see the same files.
//
type fakefs struct {
counters fakefsCounters
uri string
mut sync.Mutex
root *fakeEntry
@ -61,6 +63,23 @@ type fakefs struct {
latency time.Duration
}
type fakefsCounters struct {
Chmod int64
Lchown int64
Chtimes int64
Create int64
DirNames int64
Lstat int64
Mkdir int64
MkdirAll int64
Open int64
OpenFile int64
ReadSymlink int64
Remove int64
RemoveAll int64
Rename int64
}
var (
fakefsMut sync.Mutex
fakefsFs = make(map[string]*fakefs)
@ -194,6 +213,7 @@ func (fs *fakefs) entryForName(name string) *fakeEntry {
func (fs *fakefs) Chmod(name string, mode FileMode) error {
fs.mut.Lock()
defer fs.mut.Unlock()
fs.counters.Chmod++
time.Sleep(fs.latency)
entry := fs.entryForName(name)
if entry == nil {
@ -206,6 +226,7 @@ func (fs *fakefs) Chmod(name string, mode FileMode) error {
func (fs *fakefs) Lchown(name string, uid, gid int) error {
fs.mut.Lock()
defer fs.mut.Unlock()
fs.counters.Lchown++
time.Sleep(fs.latency)
entry := fs.entryForName(name)
if entry == nil {
@ -219,6 +240,7 @@ func (fs *fakefs) Lchown(name string, uid, gid int) error {
func (fs *fakefs) Chtimes(name string, atime time.Time, mtime time.Time) error {
fs.mut.Lock()
defer fs.mut.Unlock()
fs.counters.Chtimes++
time.Sleep(fs.latency)
entry := fs.entryForName(name)
if entry == nil {
@ -231,6 +253,7 @@ func (fs *fakefs) Chtimes(name string, atime time.Time, mtime time.Time) error {
func (fs *fakefs) create(name string) (*fakeEntry, error) {
fs.mut.Lock()
defer fs.mut.Unlock()
fs.counters.Create++
time.Sleep(fs.latency)
if entry := fs.entryForName(name); entry != nil {
@ -297,6 +320,7 @@ func (fs *fakefs) CreateSymlink(target, name string) error {
func (fs *fakefs) DirNames(name string) ([]string, error) {
fs.mut.Lock()
defer fs.mut.Unlock()
fs.counters.DirNames++
time.Sleep(fs.latency)
entry := fs.entryForName(name)
@ -315,6 +339,7 @@ func (fs *fakefs) DirNames(name string) ([]string, error) {
func (fs *fakefs) Lstat(name string) (FileInfo, error) {
fs.mut.Lock()
defer fs.mut.Unlock()
fs.counters.Lstat++
time.Sleep(fs.latency)
entry := fs.entryForName(name)
@ -333,6 +358,7 @@ func (fs *fakefs) Lstat(name string) (FileInfo, error) {
func (fs *fakefs) Mkdir(name string, perm FileMode) error {
fs.mut.Lock()
defer fs.mut.Unlock()
fs.counters.Mkdir++
time.Sleep(fs.latency)
dir := filepath.Dir(name)
@ -366,6 +392,7 @@ func (fs *fakefs) Mkdir(name string, perm FileMode) error {
func (fs *fakefs) MkdirAll(name string, perm FileMode) error {
fs.mut.Lock()
defer fs.mut.Unlock()
fs.counters.MkdirAll++
time.Sleep(fs.latency)
name = filepath.ToSlash(name)
@ -402,6 +429,7 @@ func (fs *fakefs) MkdirAll(name string, perm FileMode) error {
func (fs *fakefs) Open(name string) (File, error) {
fs.mut.Lock()
defer fs.mut.Unlock()
fs.counters.Open++
time.Sleep(fs.latency)
entry := fs.entryForName(name)
@ -422,6 +450,7 @@ func (fs *fakefs) OpenFile(name string, flags int, mode FileMode) (File, error)
fs.mut.Lock()
defer fs.mut.Unlock()
fs.counters.OpenFile++
time.Sleep(fs.latency)
dir := filepath.Dir(name)
@ -460,6 +489,7 @@ func (fs *fakefs) OpenFile(name string, flags int, mode FileMode) (File, error)
func (fs *fakefs) ReadSymlink(name string) (string, error) {
fs.mut.Lock()
defer fs.mut.Unlock()
fs.counters.ReadSymlink++
time.Sleep(fs.latency)
entry := fs.entryForName(name)
@ -474,6 +504,7 @@ func (fs *fakefs) ReadSymlink(name string) (string, error) {
func (fs *fakefs) Remove(name string) error {
fs.mut.Lock()
defer fs.mut.Unlock()
fs.counters.Remove++
time.Sleep(fs.latency)
if fs.insens {
@ -496,6 +527,7 @@ func (fs *fakefs) Remove(name string) error {
func (fs *fakefs) RemoveAll(name string) error {
fs.mut.Lock()
defer fs.mut.Unlock()
fs.counters.RemoveAll++
time.Sleep(fs.latency)
if fs.insens {
@ -516,6 +548,7 @@ func (fs *fakefs) RemoveAll(name string) error {
func (fs *fakefs) Rename(oldname, newname string) error {
fs.mut.Lock()
defer fs.mut.Unlock()
fs.counters.Rename++
time.Sleep(fs.latency)
oldKey := filepath.Base(oldname)
@ -622,6 +655,23 @@ func (fs *fakefs) SameFile(fi1, fi2 FileInfo) bool {
return ok && fi1.ModTime().Equal(fi2.ModTime()) && fi1.Mode() == fi2.Mode() && fi1.IsDir() == fi2.IsDir() && fi1.IsRegular() == fi2.IsRegular() && fi1.IsSymlink() == fi2.IsSymlink() && fi1.Owner() == fi2.Owner() && fi1.Group() == fi2.Group()
}
func (fs *fakefs) resetCounters() {
fs.mut.Lock()
fs.counters = fakefsCounters{}
fs.mut.Unlock()
}
func (fs *fakefs) reportMetricsPerOp(b *testing.B) {
fs.reportMetricsPer(b, 1, "op")
}
func (fs *fakefs) reportMetricsPer(b *testing.B, divisor float64, unit string) {
fs.mut.Lock()
defer fs.mut.Unlock()
b.ReportMetric(float64(fs.counters.Lstat)/divisor/float64(b.N), "Lstat/"+unit)
b.ReportMetric(float64(fs.counters.DirNames)/divisor/float64(b.N), "DirNames/"+unit)
}
// fakeFile is the representation of an open file. We don't care if it's
// opened for reading or writing, it's all good.
type fakeFile struct {

View File

@ -259,3 +259,19 @@ func Canonicalize(file string) (string, error) {
return file, nil
}
// unwrapFilesystem removes "wrapping" filesystems to expose the underlying filesystem.
func unwrapFilesystem(fs Filesystem) Filesystem {
for {
switch sfs := fs.(type) {
case *logFilesystem:
fs = sfs.Filesystem
case *walkFilesystem:
fs = sfs.Filesystem
case *MtimeFS:
fs = sfs.Filesystem
default:
return sfs
}
}
}