From ea34c64c5cd6d62af487a0db62bd64a2b467ec25 Mon Sep 17 00:00:00 2001 From: n-peugnet Date: Mon, 30 Aug 2021 14:23:28 +0200 Subject: findSimilarChunk returns an Id if found --- repo.go | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'repo.go') diff --git a/repo.go b/repo.go index 599b8da..bf8b167 100644 --- a/repo.go +++ b/repo.go @@ -229,17 +229,26 @@ func hashChunks(chunks <-chan StoredChunk) (FingerprintMap, SketchMap) { return fingerprints, sketches } -func findSimilarChunks(chunks []Chunk, sketches SketchMap) { - for i, c := range chunks { - log.Println("New chunk:", i) - sketch, _ := SketchChunk(c, 32, 3, 4) - for _, s := range sketch { - chunkId, exists := sketches[s] - if exists { - log.Println("Found similar chunks: ", chunkId) +func findSimilarChunk(chunk Chunk, sketches SketchMap) (*ChunkId, bool) { + var similarChunks = make(map[ChunkId]int) + var max int + var similarChunk *ChunkId + sketch, _ := SketchChunk(chunk, 32, 3, 4) + for _, s := range sketch { + chunkIds, exists := sketches[s] + if !exists { + continue + } + for _, id := range chunkIds { + count := similarChunks[*id] + count += 1 + if count > max { + similarChunk = id } + similarChunks[*id] = count } } + return similarChunk, similarChunk != nil } func readChunk(stream io.Reader) ([]byte, error) { -- cgit v1.2.3