diff --git a/lib/fs/basicfs_windows.go b/lib/fs/basicfs_windows.go index 1788caf20..e4924da0e 100644 --- a/lib/fs/basicfs_windows.go +++ b/lib/fs/basicfs_windows.go @@ -157,9 +157,9 @@ func (f *BasicFilesystem) Roots() ([]string, error) { // pathseparator. func (f *BasicFilesystem) unrootedChecked(absPath string, roots []string) (string, error) { absPath = f.resolveWin83(absPath) - lowerAbsPath := UnicodeLowercase(absPath) + lowerAbsPath := UnicodeLowercaseNormalized(absPath) for _, root := range roots { - lowerRoot := UnicodeLowercase(root) + lowerRoot := UnicodeLowercaseNormalized(root) if lowerAbsPath+string(PathSeparator) == lowerRoot { return ".", nil } @@ -171,7 +171,7 @@ func (f *BasicFilesystem) unrootedChecked(absPath string, roots []string) (strin } func rel(path, prefix string) string { - lowerRel := strings.TrimPrefix(strings.TrimPrefix(UnicodeLowercase(path), UnicodeLowercase(prefix)), string(PathSeparator)) + lowerRel := strings.TrimPrefix(strings.TrimPrefix(UnicodeLowercaseNormalized(path), UnicodeLowercaseNormalized(prefix)), string(PathSeparator)) return path[len(path)-len(lowerRel):] } @@ -193,8 +193,8 @@ func (f *BasicFilesystem) resolveWin83(absPath string) string { } // Failed getting the long path. Return the part of the path which is // already a long path. - lowerRoot := UnicodeLowercase(f.root) - for absPath = filepath.Dir(absPath); strings.HasPrefix(UnicodeLowercase(absPath), lowerRoot); absPath = filepath.Dir(absPath) { + lowerRoot := UnicodeLowercaseNormalized(f.root) + for absPath = filepath.Dir(absPath); strings.HasPrefix(UnicodeLowercaseNormalized(absPath), lowerRoot); absPath = filepath.Dir(absPath) { if !isMaybeWin83(absPath) { return absPath } diff --git a/lib/fs/casefs.go b/lib/fs/casefs.go index e04a11753..ef98c79e1 100644 --- a/lib/fs/casefs.go +++ b/lib/fs/casefs.go @@ -15,6 +15,7 @@ import ( "time" lru "github.com/hashicorp/golang-lru" + "golang.org/x/text/unicode/norm" ) const ( @@ -375,7 +376,10 @@ func (f *caseFilesystem) checkCaseExisting(name string) error { if err != nil { return err } - if realName != name { + // We normalize the normalization (hah!) of the strings before + // comparing, as we don't want to treat a normalization difference as a + // case conflict. + if norm.NFC.String(realName) != norm.NFC.String(name) { return &ErrCaseConflict{name, realName} } return nil @@ -424,7 +428,7 @@ func (r *defaultRealCaser) realCase(name string) (string, error) { lastLower := "" for _, n := range dirNames { node.children[n] = struct{}{} - lower := UnicodeLowercase(n) + lower := UnicodeLowercaseNormalized(n) if lower != lastLower { node.lowerToReal[lower] = n lastLower = n @@ -437,7 +441,7 @@ func (r *defaultRealCaser) realCase(name string) (string, error) { // Try to find a direct or case match if _, ok := node.children[comp]; !ok { - comp, ok = node.lowerToReal[UnicodeLowercase(comp)] + comp, ok = node.lowerToReal[UnicodeLowercaseNormalized(comp)] if !ok { return "", ErrNotExist } diff --git a/lib/fs/fakefs.go b/lib/fs/fakefs.go index 5e259272d..006cae4b9 100644 --- a/lib/fs/fakefs.go +++ b/lib/fs/fakefs.go @@ -186,7 +186,7 @@ type fakeEntry struct { func (fs *fakeFS) entryForName(name string) *fakeEntry { // bug: lookup doesn't work through symlinks. if fs.insens { - name = UnicodeLowercase(name) + name = UnicodeLowercaseNormalized(name) } name = filepath.ToSlash(name) @@ -285,7 +285,7 @@ func (fs *fakeFS) create(name string) (*fakeEntry, error) { } if fs.insens { - base = UnicodeLowercase(base) + base = UnicodeLowercaseNormalized(base) } if fs.withContent { @@ -373,7 +373,7 @@ func (fs *fakeFS) Mkdir(name string, perm FileMode) error { return os.ErrExist } if fs.insens { - key = UnicodeLowercase(key) + key = UnicodeLowercaseNormalized(key) } if _, ok := entry.children[key]; ok { return os.ErrExist @@ -402,7 +402,7 @@ func (fs *fakeFS) MkdirAll(name string, perm FileMode) error { for _, comp := range comps { key := comp if fs.insens { - key = UnicodeLowercase(key) + key = UnicodeLowercaseNormalized(key) } next, ok := entry.children[key] @@ -465,7 +465,7 @@ func (fs *fakeFS) OpenFile(name string, flags int, mode FileMode) (File, error) } if fs.insens { - key = UnicodeLowercase(key) + key = UnicodeLowercaseNormalized(key) } if flags&os.O_EXCL != 0 { if _, ok := entry.children[key]; ok { @@ -508,7 +508,7 @@ func (fs *fakeFS) Remove(name string) error { time.Sleep(fs.latency) if fs.insens { - name = UnicodeLowercase(name) + name = UnicodeLowercaseNormalized(name) } entry := fs.entryForName(name) @@ -531,7 +531,7 @@ func (fs *fakeFS) RemoveAll(name string) error { time.Sleep(fs.latency) if fs.insens { - name = UnicodeLowercase(name) + name = UnicodeLowercaseNormalized(name) } entry := fs.entryForName(filepath.Dir(name)) @@ -555,8 +555,8 @@ func (fs *fakeFS) Rename(oldname, newname string) error { newKey := filepath.Base(newname) if fs.insens { - oldKey = UnicodeLowercase(oldKey) - newKey = UnicodeLowercase(newKey) + oldKey = UnicodeLowercaseNormalized(oldKey) + newKey = UnicodeLowercaseNormalized(newKey) } p0 := fs.entryForName(filepath.Dir(oldname)) @@ -651,7 +651,7 @@ func (fs *fakeFS) SameFile(fi1, fi2 FileInfo) bool { // where ModTime is not that precise var ok bool if fs.insens { - ok = UnicodeLowercase(fi1.Name()) == UnicodeLowercase(fi2.Name()) + ok = UnicodeLowercaseNormalized(fi1.Name()) == UnicodeLowercaseNormalized(fi2.Name()) } else { ok = fi1.Name() == fi2.Name() } diff --git a/lib/fs/folding.go b/lib/fs/folding.go index 9f95f3d36..5ba6d4530 100644 --- a/lib/fs/folding.go +++ b/lib/fs/folding.go @@ -10,9 +10,13 @@ import ( "strings" "unicode" "unicode/utf8" + + "golang.org/x/text/unicode/norm" ) -func UnicodeLowercase(s string) string { +// UnicodeLowercaseNormalized returns the Unicode lower case variant of s, +// having also normalized it to normalization form C. +func UnicodeLowercaseNormalized(s string) string { i := firstCaseChange(s) if i == -1 { return s @@ -28,7 +32,7 @@ func UnicodeLowercase(s string) string { for _, r := range s[i:] { rs.WriteRune(unicode.ToLower(unicode.ToUpper(r))) } - return rs.String() + return norm.NFC.String(rs.String()) } // Byte index of the first rune r s.t. lower(upper(r)) != r. diff --git a/lib/fs/folding_test.go b/lib/fs/folding_test.go index d00fef26c..f19cfb4f1 100644 --- a/lib/fs/folding_test.go +++ b/lib/fs/folding_test.go @@ -44,13 +44,15 @@ var caseCases = [][2]string{ {"チャーハン", "チャーハン"}, // Some special Unicode characters, however, are folded by OSes. {"\u212A", "k"}, + // Folding renormalizes to NFC + {"A\xCC\x88", "\xC3\xA4"}, // ä } -func TestUnicodeLowercase(t *testing.T) { +func TestUnicodeLowercaseNormalized(t *testing.T) { for _, tc := range caseCases { - res := UnicodeLowercase(tc[0]) + res := UnicodeLowercaseNormalized(tc[0]) if res != tc[1] { - t.Errorf("UnicodeLowercase(%q) => %q, expected %q", tc[0], res, tc[1]) + t.Errorf("UnicodeLowercaseNormalized(%q) => %q, expected %q", tc[0], res, tc[1]) } } } @@ -60,7 +62,7 @@ func BenchmarkUnicodeLowercaseMaybeChange(b *testing.B) { for i := 0; i < b.N; i++ { for _, s := range caseCases { - UnicodeLowercase(s[0]) + UnicodeLowercaseNormalized(s[0]) } } } @@ -70,7 +72,7 @@ func BenchmarkUnicodeLowercaseNoChange(b *testing.B) { for i := 0; i < b.N; i++ { for _, s := range caseCases { - UnicodeLowercase(s[1]) + UnicodeLowercaseNormalized(s[1]) } } } diff --git a/lib/fs/mtimefs.go b/lib/fs/mtimefs.go index f298b7957..5113fc0b7 100644 --- a/lib/fs/mtimefs.go +++ b/lib/fs/mtimefs.go @@ -157,7 +157,7 @@ func (f *mtimeFS) wrapperType() filesystemWrapperType { func (f *mtimeFS) save(name string, real, virtual time.Time) { if f.caseInsensitive { - name = UnicodeLowercase(name) + name = UnicodeLowercaseNormalized(name) } if real.Equal(virtual) { @@ -177,7 +177,7 @@ func (f *mtimeFS) save(name string, real, virtual time.Time) { func (f *mtimeFS) load(name string) (MtimeMapping, error) { if f.caseInsensitive { - name = UnicodeLowercase(name) + name = UnicodeLowercaseNormalized(name) } data, exists, err := f.db.Bytes(name) diff --git a/lib/scanner/walk_test.go b/lib/scanner/walk_test.go index 06f313646..ba7e9a331 100644 --- a/lib/scanner/walk_test.go +++ b/lib/scanner/walk_test.go @@ -251,6 +251,62 @@ func TestNormalization(t *testing.T) { } } +func TestNormalizationDarwinCaseFS(t *testing.T) { + // This tests that normalization works on Darwin, through a CaseFS. + + if runtime.GOOS != "darwin" { + t.Skip("Normalization test not possible on non-Darwin") + return + } + + testFs := fs.NewCaseFilesystem(testFs) + + testFs.RemoveAll("normalization") + defer testFs.RemoveAll("normalization") + testFs.MkdirAll("normalization", 0755) + + const ( + inNFC = "\xC3\x84" + inNFD = "\x41\xCC\x88" + ) + + // Create dir in NFC + if err := testFs.Mkdir(filepath.Join("normalization", "dir-"+inNFC), 0755); err != nil { + t.Fatal(err) + } + + // Create file in NFC + fd, err := testFs.Create(filepath.Join("normalization", "dir-"+inNFC, "file-"+inNFC)) + if err != nil { + t.Fatal(err) + } + fd.Close() + + // Walk, which should normalize and return + walkDir(testFs, "normalization", nil, nil, 0) + tmp := walkDir(testFs, "normalization", nil, nil, 0) + if len(tmp) != 3 { + t.Error("Expected one file and one dir scanned") + } + + // Verify we see the normalized entries in the result + foundFile := false + foundDir := false + for _, f := range tmp { + if f.Name == filepath.Join("normalization", "dir-"+inNFD) { + foundDir = true + continue + } + if f.Name == filepath.Join("normalization", "dir-"+inNFD, "file-"+inNFD) { + foundFile = true + continue + } + } + if !foundFile || !foundDir { + t.Error("Didn't find expected normalization form") + } +} + func TestIssue1507(t *testing.T) { w := &walker{} w.Matcher = ignore.New(w.Filesystem)