diff options
author | n-peugnet <n.peugnet@free.fr> | 2021-08-30 14:23:28 +0200 |
---|---|---|
committer | n-peugnet <n.peugnet@free.fr> | 2021-08-30 14:23:28 +0200 |
commit | ea34c64c5cd6d62af487a0db62bd64a2b467ec25 (patch) | |
tree | bd223ce337552b344c3842d923855d640e1a6f68 | |
parent | 2cd98a3bcd18870da4404693212119dd8ccfaf68 (diff) | |
download | dna-backup-ea34c64c5cd6d62af487a0db62bd64a2b467ec25.tar.gz dna-backup-ea34c64c5cd6d62af487a0db62bd64a2b467ec25.zip |
findSimilarChunk returns an Id if found
-rw-r--r-- | TODO.md | 1 | ||||
-rw-r--r-- | repo.go | 25 | ||||
-rw-r--r-- | repo_test.go | 6 |
3 files changed, 22 insertions, 10 deletions
@@ -2,6 +2,7 @@ priority 1 ---------- - join non-deduplicated chunks - choose when and how to +- read from repo priority 2 ---------- @@ -229,17 +229,26 @@ func hashChunks(chunks <-chan StoredChunk) (FingerprintMap, SketchMap) { return fingerprints, sketches } -func findSimilarChunks(chunks []Chunk, sketches SketchMap) { - for i, c := range chunks { - log.Println("New chunk:", i) - sketch, _ := SketchChunk(c, 32, 3, 4) - for _, s := range sketch { - chunkId, exists := sketches[s] - if exists { - log.Println("Found similar chunks: ", chunkId) +func findSimilarChunk(chunk Chunk, sketches SketchMap) (*ChunkId, bool) { + var similarChunks = make(map[ChunkId]int) + var max int + var similarChunk *ChunkId + sketch, _ := SketchChunk(chunk, 32, 3, 4) + for _, s := range sketch { + chunkIds, exists := sketches[s] + if !exists { + continue + } + for _, id := range chunkIds { + count := similarChunks[*id] + count += 1 + if count > max { + similarChunk = id } + similarChunks[*id] = count } } + return similarChunk, similarChunk != nil } func readChunk(stream io.Reader) ([]byte, error) { diff --git a/repo_test.go b/repo_test.go index 40d4374..3a4af68 100644 --- a/repo_test.go +++ b/repo_test.go @@ -200,7 +200,9 @@ func TestBsdiff(t *testing.T) { } newChunks := extractNewChunks(recipe) log.Println("Checking new chunks:", len(newChunks[0])) - for _, c := range newChunks { - findSimilarChunks(c, sketches) + for _, chunks := range newChunks { + for _, c := range chunks { + log.Println(findSimilarChunk(c, sketches)) + } } } |