better rolling hash match

If a chunk matched, directly skip to the next chunk instead of calculating every single byte by byte checksum in it.
author: n-peugnet <n.peugnet@free.fr> 2021-08-23 11:44:54 +0200
committer: n-peugnet <n.peugnet@free.fr> 2021-08-23 11:44:54 +0200
commit: 97638899fa9a160b1cc83a57d1c41d89f5763f49 (patch)
tree: e4f21aa69d2643f4f4293a0dc449db9993d0cba9 /repo.go
parent: 212ce5c37857a2d64c56e3df8f415f48b8698462 (diff)
download: dna-backup-97638899fa9a160b1cc83a57d1c41d89f5763f49.tar.gz
dna-backup-97638899fa9a160b1cc83a57d1c41d89f5763f49.zip
1 files changed, 17 insertions, 5 deletions
diff --git a/repo.go b/repo.go
index daf6a0e..37f0eda 100644
--- a/repo.go
+++ b/repo.go
@@ -208,18 +208,30 @@ func MatchChunks(chunks <-chan []byte, hashes map[uint64]uint64) {
 	hasher := rabinkarp64.New()
 	hasher.Write(<-chunks)
 
-	var i uint64 = 0
+	var i uint64
+	var offset int
+	var prefill int
+	var postfill int
 	for c := range chunks {
-		for offset, b := range c {
-
+		// Pre fill the window with the rest of the previous chunk
+		for prefill = 0; prefill < offset; prefill++ {
+			hasher.Roll(c[prefill])
+		}
+		// Fill the window with the current chunk and match hash byte by byte
+		for ; offset < len(c); offset++ {
 			h := hasher.Sum64()
 			chunk, exists := hashes[h]
 			if exists {
 				fmt.Printf("Found existing chunk. New{id:%d, offset:%d}, Old: %d\n", i, offset, chunk)
+				break
 			}
-			// Roll the incoming byte in rolling
-			hasher.Roll(b)
+			hasher.Roll(c[offset])
+		}
+		// Fill the window with the rest of the current chunk if it matched early
+		for postfill = offset; postfill < len(c); postfill++ {
+			hasher.Roll(c[postfill])
 		}
+		offset %= chunkSize
 		i++
 	}
 }
author	n-peugnet <n.peugnet@free.fr>	2021-08-23 11:44:54 +0200
committer	n-peugnet <n.peugnet@free.fr>	2021-08-23 11:44:54 +0200
commit	97638899fa9a160b1cc83a57d1c41d89f5763f49 (patch)
tree	e4f21aa69d2643f4f4293a0dc449db9993d0cba9 /repo.go
parent	212ce5c37857a2d64c56e3df8f415f48b8698462 (diff)
download	dna-backup-97638899fa9a160b1cc83a57d1c41d89f5763f49.tar.gz dna-backup-97638899fa9a160b1cc83a57d1c41d89f5763f49.zip