diff options
author | n-peugnet <n.peugnet@free.fr> | 2021-08-23 11:44:54 +0200 |
---|---|---|
committer | n-peugnet <n.peugnet@free.fr> | 2021-08-23 11:44:54 +0200 |
commit | 97638899fa9a160b1cc83a57d1c41d89f5763f49 (patch) | |
tree | e4f21aa69d2643f4f4293a0dc449db9993d0cba9 /repo.go | |
parent | 212ce5c37857a2d64c56e3df8f415f48b8698462 (diff) | |
download | dna-backup-97638899fa9a160b1cc83a57d1c41d89f5763f49.tar.gz dna-backup-97638899fa9a160b1cc83a57d1c41d89f5763f49.zip |
better rolling hash match
If a chunk matched, directly skip to the next chunk instead of
calculating every single byte by byte checksum in it.
Diffstat (limited to 'repo.go')
-rw-r--r-- | repo.go | 22 |
1 files changed, 17 insertions, 5 deletions
@@ -208,18 +208,30 @@ func MatchChunks(chunks <-chan []byte, hashes map[uint64]uint64) { hasher := rabinkarp64.New() hasher.Write(<-chunks) - var i uint64 = 0 + var i uint64 + var offset int + var prefill int + var postfill int for c := range chunks { - for offset, b := range c { - + // Pre fill the window with the rest of the previous chunk + for prefill = 0; prefill < offset; prefill++ { + hasher.Roll(c[prefill]) + } + // Fill the window with the current chunk and match hash byte by byte + for ; offset < len(c); offset++ { h := hasher.Sum64() chunk, exists := hashes[h] if exists { fmt.Printf("Found existing chunk. New{id:%d, offset:%d}, Old: %d\n", i, offset, chunk) + break } - // Roll the incoming byte in rolling - hasher.Roll(b) + hasher.Roll(c[offset]) + } + // Fill the window with the rest of the current chunk if it matched early + for postfill = offset; postfill < len(c); postfill++ { + hasher.Roll(c[postfill]) } + offset %= chunkSize i++ } } |