lib/fs: Ignore normalization differences in case insensitive lookup (fixes #7677) (#7678)

This commit is contained in:
Jakob Borg 2021-05-17 12:35:03 +02:00 committed by GitHub
parent 5b90a98650
commit 97437cad64
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 93 additions and 27 deletions

View File

@ -157,9 +157,9 @@ func (f *BasicFilesystem) Roots() ([]string, error) {
// pathseparator.
func (f *BasicFilesystem) unrootedChecked(absPath string, roots []string) (string, error) {
absPath = f.resolveWin83(absPath)
lowerAbsPath := UnicodeLowercase(absPath)
lowerAbsPath := UnicodeLowercaseNormalized(absPath)
for _, root := range roots {
lowerRoot := UnicodeLowercase(root)
lowerRoot := UnicodeLowercaseNormalized(root)
if lowerAbsPath+string(PathSeparator) == lowerRoot {
return ".", nil
}
@ -171,7 +171,7 @@ func (f *BasicFilesystem) unrootedChecked(absPath string, roots []string) (strin
}
func rel(path, prefix string) string {
lowerRel := strings.TrimPrefix(strings.TrimPrefix(UnicodeLowercase(path), UnicodeLowercase(prefix)), string(PathSeparator))
lowerRel := strings.TrimPrefix(strings.TrimPrefix(UnicodeLowercaseNormalized(path), UnicodeLowercaseNormalized(prefix)), string(PathSeparator))
return path[len(path)-len(lowerRel):]
}
@ -193,8 +193,8 @@ func (f *BasicFilesystem) resolveWin83(absPath string) string {
}
// Failed getting the long path. Return the part of the path which is
// already a long path.
lowerRoot := UnicodeLowercase(f.root)
for absPath = filepath.Dir(absPath); strings.HasPrefix(UnicodeLowercase(absPath), lowerRoot); absPath = filepath.Dir(absPath) {
lowerRoot := UnicodeLowercaseNormalized(f.root)
for absPath = filepath.Dir(absPath); strings.HasPrefix(UnicodeLowercaseNormalized(absPath), lowerRoot); absPath = filepath.Dir(absPath) {
if !isMaybeWin83(absPath) {
return absPath
}

View File

@ -15,6 +15,7 @@ import (
"time"
lru "github.com/hashicorp/golang-lru"
"golang.org/x/text/unicode/norm"
)
const (
@ -375,7 +376,10 @@ func (f *caseFilesystem) checkCaseExisting(name string) error {
if err != nil {
return err
}
if realName != name {
// We normalize the normalization (hah!) of the strings before
// comparing, as we don't want to treat a normalization difference as a
// case conflict.
if norm.NFC.String(realName) != norm.NFC.String(name) {
return &ErrCaseConflict{name, realName}
}
return nil
@ -424,7 +428,7 @@ func (r *defaultRealCaser) realCase(name string) (string, error) {
lastLower := ""
for _, n := range dirNames {
node.children[n] = struct{}{}
lower := UnicodeLowercase(n)
lower := UnicodeLowercaseNormalized(n)
if lower != lastLower {
node.lowerToReal[lower] = n
lastLower = n
@ -437,7 +441,7 @@ func (r *defaultRealCaser) realCase(name string) (string, error) {
// Try to find a direct or case match
if _, ok := node.children[comp]; !ok {
comp, ok = node.lowerToReal[UnicodeLowercase(comp)]
comp, ok = node.lowerToReal[UnicodeLowercaseNormalized(comp)]
if !ok {
return "", ErrNotExist
}

View File

@ -186,7 +186,7 @@ type fakeEntry struct {
func (fs *fakeFS) entryForName(name string) *fakeEntry {
// bug: lookup doesn't work through symlinks.
if fs.insens {
name = UnicodeLowercase(name)
name = UnicodeLowercaseNormalized(name)
}
name = filepath.ToSlash(name)
@ -285,7 +285,7 @@ func (fs *fakeFS) create(name string) (*fakeEntry, error) {
}
if fs.insens {
base = UnicodeLowercase(base)
base = UnicodeLowercaseNormalized(base)
}
if fs.withContent {
@ -373,7 +373,7 @@ func (fs *fakeFS) Mkdir(name string, perm FileMode) error {
return os.ErrExist
}
if fs.insens {
key = UnicodeLowercase(key)
key = UnicodeLowercaseNormalized(key)
}
if _, ok := entry.children[key]; ok {
return os.ErrExist
@ -402,7 +402,7 @@ func (fs *fakeFS) MkdirAll(name string, perm FileMode) error {
for _, comp := range comps {
key := comp
if fs.insens {
key = UnicodeLowercase(key)
key = UnicodeLowercaseNormalized(key)
}
next, ok := entry.children[key]
@ -465,7 +465,7 @@ func (fs *fakeFS) OpenFile(name string, flags int, mode FileMode) (File, error)
}
if fs.insens {
key = UnicodeLowercase(key)
key = UnicodeLowercaseNormalized(key)
}
if flags&os.O_EXCL != 0 {
if _, ok := entry.children[key]; ok {
@ -508,7 +508,7 @@ func (fs *fakeFS) Remove(name string) error {
time.Sleep(fs.latency)
if fs.insens {
name = UnicodeLowercase(name)
name = UnicodeLowercaseNormalized(name)
}
entry := fs.entryForName(name)
@ -531,7 +531,7 @@ func (fs *fakeFS) RemoveAll(name string) error {
time.Sleep(fs.latency)
if fs.insens {
name = UnicodeLowercase(name)
name = UnicodeLowercaseNormalized(name)
}
entry := fs.entryForName(filepath.Dir(name))
@ -555,8 +555,8 @@ func (fs *fakeFS) Rename(oldname, newname string) error {
newKey := filepath.Base(newname)
if fs.insens {
oldKey = UnicodeLowercase(oldKey)
newKey = UnicodeLowercase(newKey)
oldKey = UnicodeLowercaseNormalized(oldKey)
newKey = UnicodeLowercaseNormalized(newKey)
}
p0 := fs.entryForName(filepath.Dir(oldname))
@ -651,7 +651,7 @@ func (fs *fakeFS) SameFile(fi1, fi2 FileInfo) bool {
// where ModTime is not that precise
var ok bool
if fs.insens {
ok = UnicodeLowercase(fi1.Name()) == UnicodeLowercase(fi2.Name())
ok = UnicodeLowercaseNormalized(fi1.Name()) == UnicodeLowercaseNormalized(fi2.Name())
} else {
ok = fi1.Name() == fi2.Name()
}

View File

@ -10,9 +10,13 @@ import (
"strings"
"unicode"
"unicode/utf8"
"golang.org/x/text/unicode/norm"
)
func UnicodeLowercase(s string) string {
// UnicodeLowercaseNormalized returns the Unicode lower case variant of s,
// having also normalized it to normalization form C.
func UnicodeLowercaseNormalized(s string) string {
i := firstCaseChange(s)
if i == -1 {
return s
@ -28,7 +32,7 @@ func UnicodeLowercase(s string) string {
for _, r := range s[i:] {
rs.WriteRune(unicode.ToLower(unicode.ToUpper(r)))
}
return rs.String()
return norm.NFC.String(rs.String())
}
// Byte index of the first rune r s.t. lower(upper(r)) != r.

View File

@ -44,13 +44,15 @@ var caseCases = [][2]string{
{"チャーハン", "チャーハン"},
// Some special Unicode characters, however, are folded by OSes.
{"\u212A", "k"},
// Folding renormalizes to NFC
{"A\xCC\x88", "\xC3\xA4"}, // ä
}
func TestUnicodeLowercase(t *testing.T) {
func TestUnicodeLowercaseNormalized(t *testing.T) {
for _, tc := range caseCases {
res := UnicodeLowercase(tc[0])
res := UnicodeLowercaseNormalized(tc[0])
if res != tc[1] {
t.Errorf("UnicodeLowercase(%q) => %q, expected %q", tc[0], res, tc[1])
t.Errorf("UnicodeLowercaseNormalized(%q) => %q, expected %q", tc[0], res, tc[1])
}
}
}
@ -60,7 +62,7 @@ func BenchmarkUnicodeLowercaseMaybeChange(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, s := range caseCases {
UnicodeLowercase(s[0])
UnicodeLowercaseNormalized(s[0])
}
}
}
@ -70,7 +72,7 @@ func BenchmarkUnicodeLowercaseNoChange(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, s := range caseCases {
UnicodeLowercase(s[1])
UnicodeLowercaseNormalized(s[1])
}
}
}

View File

@ -157,7 +157,7 @@ func (f *mtimeFS) wrapperType() filesystemWrapperType {
func (f *mtimeFS) save(name string, real, virtual time.Time) {
if f.caseInsensitive {
name = UnicodeLowercase(name)
name = UnicodeLowercaseNormalized(name)
}
if real.Equal(virtual) {
@ -177,7 +177,7 @@ func (f *mtimeFS) save(name string, real, virtual time.Time) {
func (f *mtimeFS) load(name string) (MtimeMapping, error) {
if f.caseInsensitive {
name = UnicodeLowercase(name)
name = UnicodeLowercaseNormalized(name)
}
data, exists, err := f.db.Bytes(name)

View File

@ -251,6 +251,62 @@ func TestNormalization(t *testing.T) {
}
}
func TestNormalizationDarwinCaseFS(t *testing.T) {
// This tests that normalization works on Darwin, through a CaseFS.
if runtime.GOOS != "darwin" {
t.Skip("Normalization test not possible on non-Darwin")
return
}
testFs := fs.NewCaseFilesystem(testFs)
testFs.RemoveAll("normalization")
defer testFs.RemoveAll("normalization")
testFs.MkdirAll("normalization", 0755)
const (
inNFC = "\xC3\x84"
inNFD = "\x41\xCC\x88"
)
// Create dir in NFC
if err := testFs.Mkdir(filepath.Join("normalization", "dir-"+inNFC), 0755); err != nil {
t.Fatal(err)
}
// Create file in NFC
fd, err := testFs.Create(filepath.Join("normalization", "dir-"+inNFC, "file-"+inNFC))
if err != nil {
t.Fatal(err)
}
fd.Close()
// Walk, which should normalize and return
walkDir(testFs, "normalization", nil, nil, 0)
tmp := walkDir(testFs, "normalization", nil, nil, 0)
if len(tmp) != 3 {
t.Error("Expected one file and one dir scanned")
}
// Verify we see the normalized entries in the result
foundFile := false
foundDir := false
for _, f := range tmp {
if f.Name == filepath.Join("normalization", "dir-"+inNFD) {
foundDir = true
continue
}
if f.Name == filepath.Join("normalization", "dir-"+inNFD, "file-"+inNFD) {
foundFile = true
continue
}
}
if !foundFile || !foundDir {
t.Error("Didn't find expected normalization form")
}
}
func TestIssue1507(t *testing.T) {
w := &walker{}
w.Matcher = ignore.New(w.Filesystem)