lib/db: Switch to faster blobloom Bloom filter pkg (#6537)

This commit is contained in:
greatroar 2020-04-20 09:02:33 +02:00 committed by GitHub
parent 4aa2199d5b
commit 44b0f0b456
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 21 additions and 19 deletions

7
go.mod
View File

@ -17,6 +17,7 @@ require (
github.com/gobwas/glob v0.2.3
github.com/gogo/protobuf v1.3.1
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6
github.com/greatroar/blobloom v0.0.0-20200416084947-36d5bf1a4e53
github.com/jackpal/gateway v1.0.6
github.com/jackpal/go-nat-pmp v1.0.2
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51
@ -33,14 +34,11 @@ require (
github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563
github.com/sasha-s/go-deadlock v0.2.0
github.com/shirou/gopsutil v0.0.0-20190714054239-47ef3260b6bf
github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/syncthing/notify v0.0.0-20190709140112-69c7a957d3e2
github.com/syndtr/goleveldb v1.0.1-0.20190923125748-758128399b1d
github.com/thejerf/suture v3.0.2+incompatible
github.com/urfave/cli v1.22.2
github.com/vitrun/qart v0.0.0-20160531060029-bf64b92db6b0
github.com/willf/bitset v1.1.10 // indirect
github.com/willf/bloom v2.0.3+incompatible
golang.org/x/crypto v0.0.0-20200221231518-2aa609cf4a9d
golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297
golang.org/x/sys v0.0.0-20191224085550-c709ea063b76
@ -50,6 +48,3 @@ require (
)
go 1.13
// https://github.com/spaolacci/murmur3/pull/30
replace github.com/spaolacci/murmur3 v1.1.0 => github.com/twmb/murmur3 v1.1.3

12
go.sum
View File

@ -23,8 +23,6 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bkaradzic/go-lz4 v0.0.0-20160924222819-7224d8d8f27e h1:2augTYh6E+XoNrrivZJBadpThP/dsvYKj0nzqfQ8tM4=
github.com/bkaradzic/go-lz4 v0.0.0-20160924222819-7224d8d8f27e/go.mod h1:0YdlkowM3VswSROI7qDxhRvJ3sLhlFrRRwjwegp5jy4=
github.com/calmh/murmur3 v1.1.1-0.20200226160057-74e9af8f47ac h1:mc24tiVsBenJuhJFQzgvTo1ECJxCGXUgNktcfEhJHHo=
github.com/calmh/murmur3 v1.1.1-0.20200226160057-74e9af8f47ac/go.mod h1:nZyyz8Qrw2g3CakiZkVTsiwKlCgQSCf2ZCAFB3DHbaI=
github.com/calmh/xdr v1.1.0 h1:U/Dd4CXNLoo8EiQ4ulJUXkgO1/EyQLgDKLgpY1SOoJE=
github.com/calmh/xdr v1.1.0/go.mod h1:E8sz2ByAdXC8MbANf1LCRYzedSnnc+/sXXJs/PVqoeg=
github.com/ccding/go-stun v0.0.0-20180726100737-be486d185f3d h1:As4937T5NVbJ/DmZT9z33pyLEprMd6CUSfhbmMY57Io=
@ -84,6 +82,8 @@ github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEW
github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/greatroar/blobloom v0.0.0-20200416084947-36d5bf1a4e53 h1:A6KOo8OOZb+mjFqwDhiUcNPxZVSsK3GGJtfZWh2SqJk=
github.com/greatroar/blobloom v0.0.0-20200416084947-36d5bf1a4e53/go.mod h1:we9vO6GNYMmsNvCWINtZnQbcGEHUT6hGBAznNHd6RlE=
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/jackpal/gateway v1.0.6 h1:/MJORKvJEwNVldtGVJC2p2cwCnsSoLn3hl3zxmZT7tk=
@ -192,24 +192,20 @@ github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/syncthing/notify v0.0.0-20190709140112-69c7a957d3e2 h1:6tuEEEpg+mxM82E0YingzoXzXXISYR/o/7I9n573LWI=
github.com/syncthing/notify v0.0.0-20190709140112-69c7a957d3e2/go.mod h1:Sn4ChoS7e4FxjCN1XHPVBT43AgnRLbuaB8pEc1Zcdjg=
github.com/syndtr/goleveldb v1.0.1-0.20190923125748-758128399b1d h1:gZZadD8H+fF+n9CmNhYL1Y0dJB+kLOmKd7FbPJLeGHs=
github.com/syndtr/goleveldb v1.0.1-0.20190923125748-758128399b1d/go.mod h1:9OrXJhf154huy1nPWmuSrkgjPUtUNhA+Zmy+6AESzuA=
github.com/thejerf/suture v3.0.2+incompatible h1:GtMydYcnK4zBJ0KL6Lx9vLzl6Oozb65wh252FTBxrvM=
github.com/thejerf/suture v3.0.2+incompatible/go.mod h1:ibKwrVj+Uzf3XZdAiNWUouPaAbSoemxOHLmJmwheEMc=
github.com/twmb/murmur3 v1.1.3 h1:D83U0XYKcHRYwYIpBKf3Pks91Z0Byda/9SJ8B6EMRcA=
github.com/twmb/murmur3 v1.1.3/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ=
github.com/urfave/cli v1.20.0 h1:fDqGv3UG/4jbVl/QkFwEdddtEDjh/5Ov6X+0B/3bPaw=
github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
github.com/urfave/cli v1.22.2 h1:gsqYFH8bb9ekPA12kRo0hfjngWQjkJPlN9R0N78BoUo=
github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
github.com/vitrun/qart v0.0.0-20160531060029-bf64b92db6b0 h1:okhMind4q9H1OxF44gNegWkiP4H/gsTFLalHFa4OOUI=
github.com/vitrun/qart v0.0.0-20160531060029-bf64b92db6b0/go.mod h1:TTbGUfE+cXXceWtbTHq6lqcTvYPBKLNejBEbnUsQJtU=
github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc=
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
github.com/willf/bloom v2.0.3+incompatible h1:QDacWdqcAUI1MPOwIQZRy9kOR7yxfyEmxX8Wdm2/JPA=
github.com/willf/bloom v2.0.3+incompatible/go.mod h1:MmAltL9pDMNTrvUkxdg0k0q5I0suxmuwp3KbyrZLOZ8=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90PveolxSbWFaJdECFbxSq0Mqo2M=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=

View File

@ -12,12 +12,12 @@ import (
"encoding/binary"
"time"
"github.com/greatroar/blobloom"
"github.com/syncthing/syncthing/lib/db/backend"
"github.com/syncthing/syncthing/lib/protocol"
"github.com/syncthing/syncthing/lib/sync"
"github.com/syncthing/syncthing/lib/util"
"github.com/thejerf/suture"
"github.com/willf/bloom"
)
const (
@ -27,7 +27,8 @@ const (
// than 100k. For fewer than 100k items we will just get better false
// positive rate instead.
indirectGCBloomCapacity = 100000
indirectGCBloomFalsePositiveRate = 0.01 // 1%
indirectGCBloomFalsePositiveRate = 0.01 // 1%
indirectGCBloomMaxBytes = 32 << 20 // Use at most 32MiB memory, which covers our desired FP rate at 27 M items
indirectGCDefaultInterval = 13 * time.Hour
indirectGCTimeKey = "lastIndirectGCTime"
@ -595,7 +596,11 @@ func (db *Lowlevel) gcIndirect(ctx context.Context) error {
if db.gcKeyCount > capacity {
capacity = db.gcKeyCount
}
blockFilter := bloom.NewWithEstimates(uint(capacity), indirectGCBloomFalsePositiveRate)
blockFilter := blobloom.NewOptimized(blobloom.Config{
FPRate: indirectGCBloomFalsePositiveRate,
MaxBits: 8 * indirectGCBloomMaxBytes,
NKeys: uint64(capacity),
})
// Iterate the FileInfos, unmarshal the block and version hashes and
// add them to the filter.
@ -617,7 +622,7 @@ func (db *Lowlevel) gcIndirect(ctx context.Context) error {
return err
}
if len(bl.BlocksHash) > 0 {
blockFilter.Add(bl.BlocksHash)
blockFilter.Add64(bloomHash(bl.BlocksHash))
}
}
it.Release()
@ -642,7 +647,7 @@ func (db *Lowlevel) gcIndirect(ctx context.Context) error {
}
key := blockListKey(it.Key())
if blockFilter.Test(key.BlocksHash()) {
if blockFilter.Has64(bloomHash(key)) {
matchedBlocks++
continue
}
@ -665,6 +670,12 @@ func (db *Lowlevel) gcIndirect(ctx context.Context) error {
return db.Compact()
}
// Hash function for the bloomfilter: first eight bytes of the SHA-256.
// Big or little-endian makes no difference, as long as we're consistent.
func bloomHash(key blockListKey) uint64 {
return binary.BigEndian.Uint64(key.BlocksHash())
}
// CheckRepair checks folder metadata and sequences for miscellaneous errors.
func (db *Lowlevel) CheckRepair() {
for _, folder := range db.ListFolders() {