lib/model: Harden sanitizePath (#6533)

In particular, non-printable runes and non-UTF8 sequences are no longer
allowed. Linux systems will happily creates filenames containing these.
This commit is contained in:
greatroar 2020-04-14 20:26:26 +02:00 committed by GitHub
parent 82fbcb96f8
commit 81ff31b8fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 49 additions and 5 deletions

View File

@ -14,11 +14,11 @@ import (
"net"
"path/filepath"
"reflect"
"regexp"
"runtime"
"strings"
stdsync "sync"
"time"
"unicode"
"github.com/pkg/errors"
"github.com/thejerf/suture"
@ -2697,8 +2697,11 @@ func (m *syncMutexMap) Get(key string) sync.Mutex {
// sanitizePath takes a string that might contain all kinds of special
// characters and makes a valid, similar, path name out of it.
//
// Spans of invalid characters are replaced by a single space. Invalid
// characters are control characters, the things not allowed in file names
// Spans of invalid characters, whitespace and/or non-UTF-8 sequences are
// replaced by a single space. The result is always UTF-8 and contains only
// printable characters, as determined by unicode.IsPrint.
//
// Invalid characters are non-printing runes, things not allowed in file names
// in Windows, and common shell metacharacters. Even if asterisks and pipes
// and stuff are allowed on Unixes in general they might not be allowed by
// the filesystem and may surprise the user and cause shell oddness. This
@ -2709,6 +2712,20 @@ func (m *syncMutexMap) Get(key string) sync.Mutex {
// whitespace is collapsed to a single space. Additionally, whitespace at
// either end is removed.
func sanitizePath(path string) string {
invalid := regexp.MustCompile(`([[:cntrl:]]|[<>:"'/\\|?*\n\r\t \[\]\{\};:!@$%&^#])+`)
return strings.TrimSpace(invalid.ReplaceAllString(path, " "))
var b strings.Builder
prev := ' '
for _, c := range path {
if !unicode.IsPrint(c) || c == unicode.ReplacementChar ||
strings.ContainsRune(`<>:"'/\|?*[]{};:!@$%&^#`, c) {
c = ' '
}
if !(c == ' ' && prev == ' ') {
b.WriteRune(c)
}
prev = c
}
return strings.TrimSpace(b.String())
}

View File

@ -24,6 +24,8 @@ import (
"sync/atomic"
"testing"
"time"
"unicode"
"unicode/utf8"
"github.com/syncthing/syncthing/lib/config"
"github.com/syncthing/syncthing/lib/db"
@ -3306,6 +3308,9 @@ func TestSanitizePath(t *testing.T) {
{`Räk \/ smörgås`, "Räk smörgås"},
{"هذا هو *\x07?اسم الملف", "هذا هو اسم الملف"},
{`../foo.txt`, `.. foo.txt`},
{" \t \n filename in \t space\r", "filename in space"},
{"你\xff好", `你 好`},
{"\000 foo", "foo"},
}
for _, tc := range cases {
@ -3316,6 +3321,28 @@ func TestSanitizePath(t *testing.T) {
}
}
// Fuzz test: sanitizePath must always return strings of printable UTF-8
// characters when fed random data.
//
// Note that space is considered printable, but other whitespace runes are not.
func TestSanitizePathFuzz(t *testing.T) {
buf := make([]byte, 128)
for i := 0; i < 100; i++ {
rand.Read(buf)
path := sanitizePath(string(buf))
if !utf8.ValidString(path) {
t.Errorf("sanitizePath(%q) => %q, not valid UTF-8", buf, path)
continue
}
for _, c := range path {
if !unicode.IsPrint(c) {
t.Errorf("non-printable rune %q in sanitized path", c)
}
}
}
}
// TestConnCloseOnRestart checks that there is no deadlock when calling Close
// on a protocol connection that has a blocking reader (blocking writer can't
// be done as the test requires clusterconfigs to go through).