From 97638899fa9a160b1cc83a57d1c41d89f5763f49 Mon Sep 17 00:00:00 2001
From: n-peugnet <n.peugnet@free.fr>
Date: Mon, 23 Aug 2021 11:44:54 +0200
Subject: better rolling hash match

If a chunk matched, directly skip to the next chunk instead of
calculating every single byte by byte checksum in it.
---
 repo.go | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/repo.go b/repo.go
index daf6a0e..37f0eda 100644
--- a/repo.go
+++ b/repo.go
@@ -208,18 +208,30 @@ func MatchChunks(chunks <-chan []byte, hashes map[uint64]uint64) {
 	hasher := rabinkarp64.New()
 	hasher.Write(<-chunks)
 
-	var i uint64 = 0
+	var i uint64
+	var offset int
+	var prefill int
+	var postfill int
 	for c := range chunks {
-		for offset, b := range c {
-
+		// Pre fill the window with the rest of the previous chunk
+		for prefill = 0; prefill < offset; prefill++ {
+			hasher.Roll(c[prefill])
+		}
+		// Fill the window with the current chunk and match hash byte by byte
+		for ; offset < len(c); offset++ {
 			h := hasher.Sum64()
 			chunk, exists := hashes[h]
 			if exists {
 				fmt.Printf("Found existing chunk. New{id:%d, offset:%d}, Old: %d\n", i, offset, chunk)
+				break
 			}
-			// Roll the incoming byte in rolling
-			hasher.Roll(b)
+			hasher.Roll(c[offset])
+		}
+		// Fill the window with the rest of the current chunk if it matched early
+		for postfill = offset; postfill < len(c); postfill++ {
+			hasher.Roll(c[postfill])
 		}
+		offset %= chunkSize
 		i++
 	}
 }
-- 
cgit v1.2.3