aboutsummaryrefslogtreecommitdiff
path: root/repo.go
diff options
context:
space:
mode:
authorn-peugnet <n.peugnet@free.fr>2021-08-23 11:44:54 +0200
committern-peugnet <n.peugnet@free.fr>2021-08-23 11:44:54 +0200
commit97638899fa9a160b1cc83a57d1c41d89f5763f49 (patch)
treee4f21aa69d2643f4f4293a0dc449db9993d0cba9 /repo.go
parent212ce5c37857a2d64c56e3df8f415f48b8698462 (diff)
downloaddna-backup-97638899fa9a160b1cc83a57d1c41d89f5763f49.tar.gz
dna-backup-97638899fa9a160b1cc83a57d1c41d89f5763f49.zip
better rolling hash match
If a chunk matched, directly skip to the next chunk instead of calculating every single byte by byte checksum in it.
Diffstat (limited to 'repo.go')
-rw-r--r--repo.go22
1 files changed, 17 insertions, 5 deletions
diff --git a/repo.go b/repo.go
index daf6a0e..37f0eda 100644
--- a/repo.go
+++ b/repo.go
@@ -208,18 +208,30 @@ func MatchChunks(chunks <-chan []byte, hashes map[uint64]uint64) {
hasher := rabinkarp64.New()
hasher.Write(<-chunks)
- var i uint64 = 0
+ var i uint64
+ var offset int
+ var prefill int
+ var postfill int
for c := range chunks {
- for offset, b := range c {
-
+ // Pre fill the window with the rest of the previous chunk
+ for prefill = 0; prefill < offset; prefill++ {
+ hasher.Roll(c[prefill])
+ }
+ // Fill the window with the current chunk and match hash byte by byte
+ for ; offset < len(c); offset++ {
h := hasher.Sum64()
chunk, exists := hashes[h]
if exists {
fmt.Printf("Found existing chunk. New{id:%d, offset:%d}, Old: %d\n", i, offset, chunk)
+ break
}
- // Roll the incoming byte in rolling
- hasher.Roll(b)
+ hasher.Roll(c[offset])
+ }
+ // Fill the window with the rest of the current chunk if it matched early
+ for postfill = offset; postfill < len(c); postfill++ {
+ hasher.Roll(c[postfill])
}
+ offset %= chunkSize
i++
}
}