lib/fs: Add fakefs (#5235)

* lib/fs: Add fakefs

This adds a new fake filesystem type. It's described rather extensively
in fakefs.go, but the main point is that it's for testing: when you want
to spin up a Syncthing and have a terabyte or two of random files that
can be synced somewhere, or an inifitely large filesystem to sync files
into.

It has pseudorandom properties such that data read from one fakefs can
be written into another fakefs and read back and it will look
consistent, without any of the data actually being stored.

To use:

    <folder id="default" path="whatever" ...>
        <filesystemType>fake</filesystemType>

This will create an empty fake filesystem. You can also specify that it
should be prefilled with files:

    <folder id="default" path="whatever?size=2000000" ...>
        <filesystemType>fake</filesystemType>

This will create a filesystem filled with 2TB of random data that can be
scanned and synced. There are more options, see fakefs.go.

Prefilled data is based on a deterministic seed, so you can index the
data and restart Syncthing and the index is still correct for all the
stored data.
This commit is contained in:
Jakob Borg 2018-10-02 20:29:06 +02:00 committed by Audrius Butkevicius
parent 3d74ff97af
commit d27463268d
4 changed files with 834 additions and 0 deletions

673
lib/fs/fakefs.go Normal file
View File

@ -0,0 +1,673 @@
// Copyright (C) 2018 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
package fs
import (
"context"
"errors"
"fmt"
"hash/fnv"
"io"
"io/ioutil"
"math/rand"
"net/url"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
)
// see readShortAt()
const randomBlockShift = 14 // 128k
// fakefs is a fake filesystem for testing and benchmarking. It has the
// following properties:
//
// - File metadata is kept in RAM. Specifically, we remember which files and
// directories exist, their dates, permissions and sizes. Symlinks are
// not supported.
//
// - File contents are generated pseudorandomly with just the file name as
// seed. Writes are discarded, other than having the effect of increasing
// the file size. If you only write data that you've read from a file with
// the same name on a different fakefs, you'll never know the difference...
//
// - We totally ignore permissions - pretend you are root.
//
// - The root path can contain URL query-style parameters that pre populate
// the filesystem at creation with a certain amount of random data:
//
// files=n to generate n random files (default 0)
// maxsize=n to generate files up to a total of n MiB (default 0)
// sizeavg=n to set the average size of random files, in bytes (default 1<<20)
// seed=n to set the initial random seed (default 0)
//
// - Two fakefs:s pointing at the same root path see the same files.
//
type fakefs struct {
mut sync.Mutex
root *fakeEntry
}
var (
fakefsMut sync.Mutex
fakefsFs = make(map[string]*fakefs)
)
func newFakeFilesystem(root string) *fakefs {
fakefsMut.Lock()
defer fakefsMut.Unlock()
var params url.Values
uri, err := url.Parse(root)
if err == nil {
root = uri.Path
params = uri.Query()
}
if fs, ok := fakefsFs[root]; ok {
// Already have an fs at this path
return fs
}
fs := &fakefs{
root: &fakeEntry{
name: "/",
isdir: true,
mode: 0700,
mtime: time.Now(),
children: make(map[string]*fakeEntry),
},
}
files, _ := strconv.Atoi(params.Get("files"))
maxsize, _ := strconv.Atoi(params.Get("maxsize"))
sizeavg, _ := strconv.Atoi(params.Get("sizeavg"))
seed, _ := strconv.Atoi(params.Get("seed"))
if sizeavg == 0 {
sizeavg = 1 << 20
}
if files > 0 || maxsize > 0 {
// Generate initial data according to specs. Operations in here
// *look* like file I/O, but they are not. Do not worry that they
// might fail.
rng := rand.New(rand.NewSource(int64(seed)))
var createdFiles int
var writtenData int64
for (files == 0 || createdFiles < files) && (maxsize == 0 || writtenData>>20 < int64(maxsize)) {
dir := filepath.Join(fmt.Sprintf("%02x", rng.Intn(255)), fmt.Sprintf("%02x", rng.Intn(255)))
file := fmt.Sprintf("%016x", rng.Int63())
fs.MkdirAll(dir, 0755)
fd, _ := fs.Create(filepath.Join(dir, file))
createdFiles++
fsize := int64(sizeavg/2 + rng.Intn(sizeavg))
fd.Truncate(fsize)
writtenData += fsize
ftime := time.Unix(1000000000+rng.Int63n(10*365*86400), 0)
fs.Chtimes(filepath.Join(dir, file), ftime, ftime)
}
}
// Also create a default folder marker for good measure
fs.Mkdir(".stfolder", 0700)
fakefsFs[root] = fs
return fs
}
// fakeEntry is an entry (file or directory) in the fake filesystem
type fakeEntry struct {
name string
isdir bool
size int64
mode FileMode
mtime time.Time
children map[string]*fakeEntry
}
func (fs *fakefs) entryForName(name string) *fakeEntry {
name = filepath.ToSlash(name)
if name == "." || name == "/" {
return fs.root
}
name = strings.Trim(name, "/")
comps := strings.Split(name, "/")
entry := fs.root
for _, comp := range comps {
var ok bool
entry, ok = entry.children[comp]
if !ok {
return nil
}
}
return entry
}
func (fs *fakefs) Chmod(name string, mode FileMode) error {
fs.mut.Lock()
defer fs.mut.Unlock()
entry := fs.entryForName(name)
if entry == nil {
return os.ErrNotExist
}
entry.mode = mode
return nil
}
func (fs *fakefs) Chtimes(name string, atime time.Time, mtime time.Time) error {
fs.mut.Lock()
defer fs.mut.Unlock()
entry := fs.entryForName(name)
if entry == nil {
return os.ErrNotExist
}
entry.mtime = mtime
return nil
}
func (fs *fakefs) Create(name string) (File, error) {
fs.mut.Lock()
defer fs.mut.Unlock()
if entry := fs.entryForName(name); entry != nil {
if entry.isdir {
return nil, os.ErrExist
}
entry.size = 0
entry.mtime = time.Now()
entry.mode = 0666
return &fakeFile{fakeEntry: entry}, nil
}
dir := filepath.Dir(name)
base := filepath.Base(name)
entry := fs.entryForName(dir)
if entry == nil {
return nil, os.ErrNotExist
}
new := &fakeEntry{
name: base,
mode: 0666,
mtime: time.Now(),
}
entry.children[base] = new
return &fakeFile{fakeEntry: new}, nil
}
func (fs *fakefs) CreateSymlink(target, name string) error {
return errors.New("not implemented")
}
func (fs *fakefs) DirNames(name string) ([]string, error) {
fs.mut.Lock()
defer fs.mut.Unlock()
entry := fs.entryForName(name)
if entry == nil {
return nil, os.ErrNotExist
}
names := make([]string, 0, len(entry.children))
for name := range entry.children {
names = append(names, name)
}
return names, nil
}
func (fs *fakefs) Lstat(name string) (FileInfo, error) {
fs.mut.Lock()
defer fs.mut.Unlock()
entry := fs.entryForName(name)
if entry == nil {
return nil, os.ErrNotExist
}
return &fakeFileInfo{*entry}, nil
}
func (fs *fakefs) Mkdir(name string, perm FileMode) error {
fs.mut.Lock()
defer fs.mut.Unlock()
dir := filepath.Dir(name)
base := filepath.Base(name)
entry := fs.entryForName(dir)
if entry == nil {
return os.ErrNotExist
}
if _, ok := entry.children[base]; ok {
return os.ErrExist
}
entry.children[base] = &fakeEntry{
name: base,
isdir: true,
mode: perm,
mtime: time.Now(),
children: make(map[string]*fakeEntry),
}
return nil
}
func (fs *fakefs) MkdirAll(name string, perm FileMode) error {
name = filepath.ToSlash(name)
name = strings.Trim(name, "/")
comps := strings.Split(name, "/")
entry := fs.root
for _, comp := range comps {
next, ok := entry.children[comp]
if !ok {
new := &fakeEntry{
name: comp,
isdir: true,
mode: perm,
mtime: time.Now(),
children: make(map[string]*fakeEntry),
}
entry.children[comp] = new
next = new
} else if !next.isdir {
return errors.New("not a directory")
}
entry = next
}
return nil
}
func (fs *fakefs) Open(name string) (File, error) {
fs.mut.Lock()
defer fs.mut.Unlock()
entry := fs.entryForName(name)
if entry == nil {
return nil, os.ErrNotExist
}
return &fakeFile{fakeEntry: entry}, nil
}
func (fs *fakefs) OpenFile(name string, flags int, mode FileMode) (File, error) {
fs.mut.Lock()
defer fs.mut.Unlock()
if flags&os.O_CREATE == 0 {
return fs.Open(name)
}
dir := filepath.Dir(name)
base := filepath.Base(name)
entry := fs.entryForName(dir)
if entry == nil {
return nil, os.ErrNotExist
}
if flags&os.O_EXCL != 0 {
if _, ok := entry.children[base]; ok {
return nil, os.ErrExist
}
}
newEntry := &fakeEntry{
name: base,
mode: mode,
mtime: time.Now(),
}
entry.children[base] = newEntry
return &fakeFile{fakeEntry: newEntry}, nil
}
func (fs *fakefs) ReadSymlink(name string) (string, error) {
return "", errors.New("not implemented")
}
func (fs *fakefs) Remove(name string) error {
fs.mut.Lock()
defer fs.mut.Unlock()
entry := fs.entryForName(name)
if entry == nil {
return os.ErrNotExist
}
if len(entry.children) != 0 {
return errors.New("not empty")
}
entry = fs.entryForName(filepath.Dir(name))
delete(entry.children, filepath.Base(name))
return nil
}
func (fs *fakefs) RemoveAll(name string) error {
fs.mut.Lock()
defer fs.mut.Unlock()
entry := fs.entryForName(filepath.Dir(name))
if entry == nil {
return os.ErrNotExist
}
// RemoveAll is easy when the file system uses garbage collection under
// the hood... We even get the correct semantics for open fd:s for free.
delete(entry.children, filepath.Base(name))
return nil
}
func (fs *fakefs) Rename(oldname, newname string) error {
fs.mut.Lock()
defer fs.mut.Unlock()
p0 := fs.entryForName(filepath.Dir(oldname))
if p0 == nil {
return os.ErrNotExist
}
entry := p0.children[filepath.Base(oldname)]
if entry == nil {
return os.ErrNotExist
}
p1 := fs.entryForName(filepath.Dir(newname))
if p1 == nil {
return os.ErrNotExist
}
dst, ok := p1.children[filepath.Base(newname)]
if ok && dst.isdir {
return errors.New("is a directory")
}
p1.children[filepath.Base(newname)] = entry
delete(p0.children, filepath.Base(oldname))
return nil
}
func (fs *fakefs) Stat(name string) (FileInfo, error) {
return fs.Lstat(name)
}
func (fs *fakefs) SymlinksSupported() bool {
return false
}
func (fs *fakefs) Walk(name string, walkFn WalkFunc) error {
return errors.New("not implemented")
}
func (fs *fakefs) Watch(path string, ignore Matcher, ctx context.Context, ignorePerms bool) (<-chan Event, error) {
return nil, ErrWatchNotSupported
}
func (fs *fakefs) Hide(name string) error {
return nil
}
func (fs *fakefs) Unhide(name string) error {
return nil
}
func (fs *fakefs) Glob(pattern string) ([]string, error) {
// gnnh we don't seem to actually require this in practice
return nil, errors.New("not implemented")
}
func (fs *fakefs) Roots() ([]string, error) {
return []string{"/"}, nil
}
func (fs *fakefs) Usage(name string) (Usage, error) {
return Usage{}, errors.New("not implemented")
}
func (fs *fakefs) Type() FilesystemType {
return FilesystemTypeFake
}
func (fs *fakefs) URI() string {
return "fake://" + fs.root.name
}
func (fs *fakefs) SameFile(fi1, fi2 FileInfo) bool {
return fi1.Name() == fi1.Name()
}
// fakeFile is the representation of an open file. We don't care if it's
// opened for reading or writing, it's all good.
type fakeFile struct {
*fakeEntry
mut sync.Mutex
rng io.Reader
seed int64
offset int64
seedOffs int64
}
func (f *fakeFile) Close() error {
return nil
}
func (f *fakeFile) Read(p []byte) (int, error) {
f.mut.Lock()
defer f.mut.Unlock()
return f.readShortAt(p, f.offset)
}
func (f *fakeFile) ReadAt(p []byte, offs int64) (int, error) {
f.mut.Lock()
defer f.mut.Unlock()
// ReadAt is spec:ed to always read a full block unless EOF or failure,
// so we must loop. It's also not supposed to affect the seek position,
// but that would make things annoying or inefficient in terms of
// generating the appropriate RNG etc so I ignore that. In practice we
// currently don't depend on that aspect of it...
var read int
for {
n, err := f.readShortAt(p[read:], offs+int64(read))
read += n
if err != nil {
return read, err
}
if read == len(p) {
return read, nil
}
}
}
func (f *fakeFile) readShortAt(p []byte, offs int64) (int, error) {
// Here be a certain amount of magic... We want to return pseudorandom,
// predictable data so that a read from the same offset in the same file
// always returns the same data. But the RNG is a stream, and reads can
// be random.
//
// We split the file into "blocks" numbered by "seedNo", where each
// block becomes an instantiation of the RNG, seeded with the hash of
// the file number plus the seedNo (block number). We keep the RNG
// around in the hope that the next read will be sequential to this one
// and we can continue reading from the same RNG.
//
// When that's not the case we create a new RNG for the block we are in,
// read as many bytes from it as necessary to get to the right offset,
// and then serve the read from there. We limit the length of the read
// to the end of the block, as another RNG needs to be created to serve
// the next block.
//
// The size of the blocks are a matter of taste... Larger blocks give
// better performance for sequential reads, but worse for random reads
// as we often need to generate and throw away a lot of data at the
// start of the block to serve a given read. 128 KiB blocks fit
// reasonably well with the type of IO Syncthing tends to do.
if f.isdir {
return 0, errors.New("is a directory")
}
if offs >= f.size {
return 0, io.EOF
}
// Lazily calculate our main seed, a simple 64 bit FNV hash our file
// name.
if f.seed == 0 {
hf := fnv.New64()
hf.Write([]byte(f.name))
f.seed = int64(hf.Sum64())
}
// Check whether the read is a continuation of an RNG we already have or
// we need to set up a new one.
seedNo := offs >> randomBlockShift
minOffs := seedNo << randomBlockShift
nextBlockOffs := (seedNo + 1) << randomBlockShift
if f.rng == nil || f.offset != offs || seedNo != f.seedOffs {
// This is not a straight read continuing from a previous one
f.rng = rand.New(rand.NewSource(f.seed + seedNo))
// If the read is not at the start of the block, discard data
// accordingly.
diff := offs - minOffs
if diff > 0 {
lr := io.LimitReader(f.rng, diff)
io.Copy(ioutil.Discard, lr)
}
f.offset = offs
f.seedOffs = seedNo
}
size := len(p)
// Don't read past the end of the file
if offs+int64(size) > f.size {
size = int(f.size - offs)
}
// Don't read across the block boundary
if offs+int64(size) > nextBlockOffs {
size = int(nextBlockOffs - offs)
}
f.offset += int64(size)
return f.rng.Read(p[:size])
}
func (f *fakeFile) Seek(offset int64, whence int) (int64, error) {
f.mut.Lock()
defer f.mut.Unlock()
if f.isdir {
return 0, errors.New("is a directory")
}
f.rng = nil
switch whence {
case io.SeekCurrent:
f.offset += offset
case io.SeekEnd:
f.offset = f.size - offset
case io.SeekStart:
f.offset = offset
}
if f.offset < 0 {
f.offset = 0
return f.offset, errors.New("seek before start")
}
if f.offset > f.size {
f.offset = f.size
return f.offset, io.EOF
}
return f.offset, nil
}
func (f *fakeFile) Write(p []byte) (int, error) {
return f.WriteAt(p, f.offset)
}
func (f *fakeFile) WriteAt(p []byte, off int64) (int, error) {
f.mut.Lock()
defer f.mut.Unlock()
if f.isdir {
return 0, errors.New("is a directory")
}
f.rng = nil
f.offset = off + int64(len(p))
if f.offset > f.size {
f.size = f.offset
}
return len(p), nil
}
func (f *fakeFile) Name() string {
return f.name
}
func (f *fakeFile) Truncate(size int64) error {
f.mut.Lock()
defer f.mut.Unlock()
f.rng = nil
f.size = size
if f.offset > size {
f.offset = size
}
return nil
}
func (f *fakeFile) Stat() (FileInfo, error) {
return &fakeFileInfo{*f.fakeEntry}, nil
}
func (f *fakeFile) Sync() error {
return nil
}
// fakeFileInfo is the stat result.
type fakeFileInfo struct {
fakeEntry // intentionally a copy of the struct
}
func (f *fakeFileInfo) Name() string {
return f.name
}
func (f *fakeFileInfo) Mode() FileMode {
return f.mode
}
func (f *fakeFileInfo) Size() int64 {
return f.size
}
func (f *fakeFileInfo) ModTime() time.Time {
return f.mtime
}
func (f *fakeFileInfo) IsDir() bool {
return f.isdir
}
func (f *fakeFileInfo) IsRegular() bool {
return !f.isdir
}
func (f *fakeFileInfo) IsSymlink() bool {
return false
}

154
lib/fs/fakefs_test.go Normal file
View File

@ -0,0 +1,154 @@
// Copyright (C) 2018 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
package fs
import (
"bytes"
"io/ioutil"
"os"
"testing"
)
func TestFakeFS(t *testing.T) {
// Test some basic aspects of the fakefs
fs := newFakeFilesystem("/foo/bar/baz")
// MkdirAll
err := fs.MkdirAll("dira/dirb", 0755)
if err != nil {
t.Fatal(err)
}
info, err := fs.Stat("dira/dirb")
if err != nil {
t.Fatal(err)
}
// Mkdir
err = fs.Mkdir("dira/dirb/dirc", 0755)
if err != nil {
t.Fatal(err)
}
info, err = fs.Stat("dira/dirb/dirc")
if err != nil {
t.Fatal(err)
}
// Create
fd, err := fs.Create("/dira/dirb/test")
if err != nil {
t.Fatal(err)
}
// Write
_, err = fd.Write([]byte("hello"))
if err != nil {
t.Fatal(err)
}
// Stat on fd
info, err = fd.Stat()
if err != nil {
t.Fatal(err)
}
if info.Name() != "test" {
t.Error("wrong name:", info.Name())
}
if info.Size() != 5 {
t.Error("wrong size:", info.Size())
}
// Stat on fs
info, err = fs.Stat("dira/dirb/test")
if err != nil {
t.Fatal(err)
}
if info.Name() != "test" {
t.Error("wrong name:", info.Name())
}
if info.Size() != 5 {
t.Error("wrong size:", info.Size())
}
// Seek
_, err = fd.Seek(1, os.SEEK_SET)
if err != nil {
t.Fatal(err)
}
// Read
bs0, err := ioutil.ReadAll(fd)
if err != nil {
t.Fatal(err)
}
if len(bs0) != 4 {
t.Error("wrong number of bytes:", len(bs0))
}
// Read again, same data hopefully
_, err = fd.Seek(0, os.SEEK_SET)
if err != nil {
t.Fatal(err)
}
bs1, err := ioutil.ReadAll(fd)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(bs0, bs1[1:]) {
t.Error("wrong data")
}
}
func TestFakeFSRead(t *testing.T) {
// Test some basic aspects of the fakefs
fs := newFakeFilesystem("/foo/bar/baz")
// Create
fd, _ := fs.Create("test")
fd.Truncate(3 * 1 << randomBlockShift)
// Read
fd.Seek(0, 0)
bs0, err := ioutil.ReadAll(fd)
if err != nil {
t.Fatal(err)
}
if len(bs0) != 3*1<<randomBlockShift {
t.Error("wrong number of bytes:", len(bs0))
}
// Read again, starting at an odd offset
fd.Seek(0, 0)
buf0 := make([]byte, 12345)
n, _ := fd.Read(buf0)
if n != len(buf0) {
t.Fatal("short read")
}
buf1, err := ioutil.ReadAll(fd)
if err != nil {
t.Fatal(err)
}
if len(buf1) != 3*1<<randomBlockShift-len(buf0) {
t.Error("wrong number of bytes:", len(buf1))
}
bs1 := append(buf0, buf1...)
if !bytes.Equal(bs0, bs1) {
t.Error("data mismatch")
}
// Read large block with ReadAt
bs2 := make([]byte, 3*1<<randomBlockShift)
_, err = fd.ReadAt(bs2, 0)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(bs0, bs2) {
t.Error("data mismatch")
}
}

View File

@ -167,6 +167,8 @@ func NewFilesystem(fsType FilesystemType, uri string) Filesystem {
switch fsType {
case FilesystemTypeBasic:
fs = newBasicFilesystem(uri)
case FilesystemTypeFake:
fs = newFakeFilesystem(uri)
default:
l.Debugln("Unknown filesystem", fsType, uri)
fs = &errorFilesystem{

View File

@ -10,12 +10,15 @@ type FilesystemType int
const (
FilesystemTypeBasic FilesystemType = iota // default is basic
FilesystemTypeFake
)
func (t FilesystemType) String() string {
switch t {
case FilesystemTypeBasic:
return "basic"
case FilesystemTypeFake:
return "fake"
default:
return "unknown"
}
@ -29,6 +32,8 @@ func (t *FilesystemType) UnmarshalText(bs []byte) error {
switch string(bs) {
case "basic":
*t = FilesystemTypeBasic
case "fake":
*t = FilesystemTypeFake
default:
*t = FilesystemTypeBasic
}