diff options
author | n-peugnet <n.peugnet@free.fr> | 2021-09-06 16:57:47 +0200 |
---|---|---|
committer | n-peugnet <n.peugnet@free.fr> | 2021-09-06 17:26:30 +0200 |
commit | e2d64a027a72f7db99d98ab9652f3e509b86d344 (patch) | |
tree | f3d23663568bdf52cd4737e31630e4766910f6be | |
parent | b31f22ff61b69a0cb3e0a9d7955133b5304f4ff7 (diff) | |
download | dna-backup-e2d64a027a72f7db99d98ab9652f3e509b86d344.tar.gz dna-backup-e2d64a027a72f7db99d98ab9652f3e509b86d344.zip |
generate new Polynomial only once
-rw-r--r-- | repo.go | 22 | ||||
-rw-r--r-- | sketch.go | 10 | ||||
-rw-r--r-- | sketch_test.go | 4 |
3 files changed, 23 insertions, 13 deletions
@@ -49,6 +49,7 @@ type Repo struct { sketchWSize int sketchSfCount int sketchFCount int + pol rabinkarp64.Pol differ Differ patcher Patcher fingerprints FingerprintMap @@ -61,13 +62,22 @@ type File struct { } func NewRepo(path string) *Repo { - os.MkdirAll(path, 0775) + err := os.MkdirAll(path, 0775) + // if err != nil { + // log.Panicln(err) + // } + var seed int64 = 1 + p, err := rabinkarp64.RandomPolynomial(seed) + if err != nil { + log.Panicln(err) + } return &Repo{ path: path, chunkSize: 8 << 10, sketchWSize: 32, sketchSfCount: 3, sketchFCount: 4, + pol: p, differ: &Bsdiff{}, patcher: &Bsdiff{}, fingerprints: make(FingerprintMap), @@ -244,7 +254,7 @@ func (r *Repo) loadChunks(versions []string, chunks chan<- IdentifiedChunk) { // (resemblance hash based on maximal values of regions) are calculated and // stored in an hashmap. func (r *Repo) hashChunks(chunks <-chan IdentifiedChunk) { - hasher := rabinkarp64.New() + hasher := rabinkarp64.NewFromPol(r.pol) for c := range chunks { r.hashAndStoreChunk(c, hasher) } @@ -254,7 +264,7 @@ func (r *Repo) hashAndStoreChunk(chunk IdentifiedChunk, hasher hash.Hash64) { hasher.Reset() io.Copy(hasher, chunk.Reader()) fingerprint := hasher.Sum64() - sketch, _ := SketchChunk(chunk, r.chunkSize, r.sketchWSize, r.sketchSfCount, r.sketchFCount) + sketch, _ := SketchChunk(chunk, r.pol, r.chunkSize, r.sketchWSize, r.sketchSfCount, r.sketchFCount) r.storeChunkId(chunk.Id(), fingerprint, sketch) } @@ -282,7 +292,7 @@ func (r *Repo) findSimilarChunk(chunk Chunk) (*ChunkId, bool) { var similarChunks = make(map[ChunkId]int) var max int var similarChunk *ChunkId - sketch, _ := SketchChunk(chunk, r.chunkSize, r.sketchWSize, r.sketchSfCount, r.sketchFCount) + sketch, _ := SketchChunk(chunk, r.pol, r.chunkSize, r.sketchWSize, r.sketchSfCount, r.sketchFCount) for _, s := range sketch { chunkIds, exists := r.sketches[s] if !exists { @@ -329,7 +339,7 @@ func (r *Repo) encodeTempChunk(temp BufferedChunk, version int, last *uint64) (c *last++ id := &ChunkId{Ver: version, Idx: *last} ic := NewLoadedChunk(id, temp.Bytes()) - hasher := rabinkarp64.New() + hasher := rabinkarp64.NewFromPol(r.pol) r.hashAndStoreChunk(ic, hasher) log.Println("Add new chunk", id) return ic, false @@ -368,7 +378,7 @@ func (r *Repo) matchStream(stream io.Reader, version int) []Chunk { chunks = append(chunks, NewTempChunk(buff[:n])) return chunks } - hasher := rabinkarp64.New() + hasher := rabinkarp64.NewFromPol(r.pol) hasher.Write(buff[:n]) for err != io.EOF { h := hasher.Sum64() @@ -22,7 +22,7 @@ const fBytes = 8 // SketchChunk produces a sketch for a chunk based on wSize: the window size, // sfCount: the number of super-features, and fCount: the number of feature // per super-feature -func SketchChunk(chunk Chunk, chunkSize int, wSize int, sfCount int, fCount int) (Sketch, error) { +func SketchChunk(chunk Chunk, pol rabinkarp64.Pol, chunkSize int, wSize int, sfCount int, fCount int) (Sketch, error) { var wg sync.WaitGroup var fSize = FeatureSize(chunkSize, sfCount, fCount) superfeatures := make([]uint64, 0, sfCount) @@ -37,9 +37,9 @@ func SketchChunk(chunk Chunk, chunkSize int, wSize int, sfCount int, fCount int) } features = append(features, 0) wg.Add(1) - go calcFeature(&wg, &fBuff, wSize, fSize, &features[f]) + go calcFeature(&wg, pol, &fBuff, wSize, fSize, &features[f]) } - hasher := rabinkarp64.New() + hasher := rabinkarp64.NewFromPol(pol) wg.Wait() for sf := 0; sf < len(features)/fCount; sf++ { for i := 0; i < fCount; i++ { @@ -52,9 +52,9 @@ func SketchChunk(chunk Chunk, chunkSize int, wSize int, sfCount int, fCount int) return superfeatures, nil } -func calcFeature(wg *sync.WaitGroup, r ReadByteReader, wSize int, fSize int, result *uint64) { +func calcFeature(wg *sync.WaitGroup, p rabinkarp64.Pol, r ReadByteReader, wSize int, fSize int, result *uint64) { defer wg.Done() - hasher := rabinkarp64.New() + hasher := rabinkarp64.NewFromPol(p) n, err := io.CopyN(hasher, r, int64(wSize)) if err != nil { log.Println(n, err) diff --git a/sketch_test.go b/sketch_test.go index d08e2e3..3dce9b5 100644 --- a/sketch_test.go +++ b/sketch_test.go @@ -15,7 +15,7 @@ func TestSketchChunk(t *testing.T) { var i int for c := range chunks { if i < 1 { - sketch, err := SketchChunk(c, 8<<10, 32, 3, 4) + sketch, err := SketchChunk(c, repo.pol, 8<<10, 32, 3, 4) if err != nil { t.Error(err) } @@ -25,7 +25,7 @@ func TestSketchChunk(t *testing.T) { } } if i == 14 { - sketch, err := SketchChunk(c, 8<<10, 32, 3, 4) + sketch, err := SketchChunk(c, repo.pol, 8<<10, 32, 3, 4) if err != nil { t.Error(err) } |