aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorn-peugnet <n.peugnet@free.fr>2021-09-06 16:57:47 +0200
committern-peugnet <n.peugnet@free.fr>2021-09-06 17:26:30 +0200
commite2d64a027a72f7db99d98ab9652f3e509b86d344 (patch)
treef3d23663568bdf52cd4737e31630e4766910f6be
parentb31f22ff61b69a0cb3e0a9d7955133b5304f4ff7 (diff)
downloaddna-backup-e2d64a027a72f7db99d98ab9652f3e509b86d344.tar.gz
dna-backup-e2d64a027a72f7db99d98ab9652f3e509b86d344.zip
generate new Polynomial only once
-rw-r--r--repo.go22
-rw-r--r--sketch.go10
-rw-r--r--sketch_test.go4
3 files changed, 23 insertions, 13 deletions
diff --git a/repo.go b/repo.go
index 29f683c..cd5178b 100644
--- a/repo.go
+++ b/repo.go
@@ -49,6 +49,7 @@ type Repo struct {
sketchWSize int
sketchSfCount int
sketchFCount int
+ pol rabinkarp64.Pol
differ Differ
patcher Patcher
fingerprints FingerprintMap
@@ -61,13 +62,22 @@ type File struct {
}
func NewRepo(path string) *Repo {
- os.MkdirAll(path, 0775)
+ err := os.MkdirAll(path, 0775)
+ // if err != nil {
+ // log.Panicln(err)
+ // }
+ var seed int64 = 1
+ p, err := rabinkarp64.RandomPolynomial(seed)
+ if err != nil {
+ log.Panicln(err)
+ }
return &Repo{
path: path,
chunkSize: 8 << 10,
sketchWSize: 32,
sketchSfCount: 3,
sketchFCount: 4,
+ pol: p,
differ: &Bsdiff{},
patcher: &Bsdiff{},
fingerprints: make(FingerprintMap),
@@ -244,7 +254,7 @@ func (r *Repo) loadChunks(versions []string, chunks chan<- IdentifiedChunk) {
// (resemblance hash based on maximal values of regions) are calculated and
// stored in an hashmap.
func (r *Repo) hashChunks(chunks <-chan IdentifiedChunk) {
- hasher := rabinkarp64.New()
+ hasher := rabinkarp64.NewFromPol(r.pol)
for c := range chunks {
r.hashAndStoreChunk(c, hasher)
}
@@ -254,7 +264,7 @@ func (r *Repo) hashAndStoreChunk(chunk IdentifiedChunk, hasher hash.Hash64) {
hasher.Reset()
io.Copy(hasher, chunk.Reader())
fingerprint := hasher.Sum64()
- sketch, _ := SketchChunk(chunk, r.chunkSize, r.sketchWSize, r.sketchSfCount, r.sketchFCount)
+ sketch, _ := SketchChunk(chunk, r.pol, r.chunkSize, r.sketchWSize, r.sketchSfCount, r.sketchFCount)
r.storeChunkId(chunk.Id(), fingerprint, sketch)
}
@@ -282,7 +292,7 @@ func (r *Repo) findSimilarChunk(chunk Chunk) (*ChunkId, bool) {
var similarChunks = make(map[ChunkId]int)
var max int
var similarChunk *ChunkId
- sketch, _ := SketchChunk(chunk, r.chunkSize, r.sketchWSize, r.sketchSfCount, r.sketchFCount)
+ sketch, _ := SketchChunk(chunk, r.pol, r.chunkSize, r.sketchWSize, r.sketchSfCount, r.sketchFCount)
for _, s := range sketch {
chunkIds, exists := r.sketches[s]
if !exists {
@@ -329,7 +339,7 @@ func (r *Repo) encodeTempChunk(temp BufferedChunk, version int, last *uint64) (c
*last++
id := &ChunkId{Ver: version, Idx: *last}
ic := NewLoadedChunk(id, temp.Bytes())
- hasher := rabinkarp64.New()
+ hasher := rabinkarp64.NewFromPol(r.pol)
r.hashAndStoreChunk(ic, hasher)
log.Println("Add new chunk", id)
return ic, false
@@ -368,7 +378,7 @@ func (r *Repo) matchStream(stream io.Reader, version int) []Chunk {
chunks = append(chunks, NewTempChunk(buff[:n]))
return chunks
}
- hasher := rabinkarp64.New()
+ hasher := rabinkarp64.NewFromPol(r.pol)
hasher.Write(buff[:n])
for err != io.EOF {
h := hasher.Sum64()
diff --git a/sketch.go b/sketch.go
index 693cacf..dca813d 100644
--- a/sketch.go
+++ b/sketch.go
@@ -22,7 +22,7 @@ const fBytes = 8
// SketchChunk produces a sketch for a chunk based on wSize: the window size,
// sfCount: the number of super-features, and fCount: the number of feature
// per super-feature
-func SketchChunk(chunk Chunk, chunkSize int, wSize int, sfCount int, fCount int) (Sketch, error) {
+func SketchChunk(chunk Chunk, pol rabinkarp64.Pol, chunkSize int, wSize int, sfCount int, fCount int) (Sketch, error) {
var wg sync.WaitGroup
var fSize = FeatureSize(chunkSize, sfCount, fCount)
superfeatures := make([]uint64, 0, sfCount)
@@ -37,9 +37,9 @@ func SketchChunk(chunk Chunk, chunkSize int, wSize int, sfCount int, fCount int)
}
features = append(features, 0)
wg.Add(1)
- go calcFeature(&wg, &fBuff, wSize, fSize, &features[f])
+ go calcFeature(&wg, pol, &fBuff, wSize, fSize, &features[f])
}
- hasher := rabinkarp64.New()
+ hasher := rabinkarp64.NewFromPol(pol)
wg.Wait()
for sf := 0; sf < len(features)/fCount; sf++ {
for i := 0; i < fCount; i++ {
@@ -52,9 +52,9 @@ func SketchChunk(chunk Chunk, chunkSize int, wSize int, sfCount int, fCount int)
return superfeatures, nil
}
-func calcFeature(wg *sync.WaitGroup, r ReadByteReader, wSize int, fSize int, result *uint64) {
+func calcFeature(wg *sync.WaitGroup, p rabinkarp64.Pol, r ReadByteReader, wSize int, fSize int, result *uint64) {
defer wg.Done()
- hasher := rabinkarp64.New()
+ hasher := rabinkarp64.NewFromPol(p)
n, err := io.CopyN(hasher, r, int64(wSize))
if err != nil {
log.Println(n, err)
diff --git a/sketch_test.go b/sketch_test.go
index d08e2e3..3dce9b5 100644
--- a/sketch_test.go
+++ b/sketch_test.go
@@ -15,7 +15,7 @@ func TestSketchChunk(t *testing.T) {
var i int
for c := range chunks {
if i < 1 {
- sketch, err := SketchChunk(c, 8<<10, 32, 3, 4)
+ sketch, err := SketchChunk(c, repo.pol, 8<<10, 32, 3, 4)
if err != nil {
t.Error(err)
}
@@ -25,7 +25,7 @@ func TestSketchChunk(t *testing.T) {
}
}
if i == 14 {
- sketch, err := SketchChunk(c, 8<<10, 32, 3, 4)
+ sketch, err := SketchChunk(c, repo.pol, 8<<10, 32, 3, 4)
if err != nil {
t.Error(err)
}