aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorn-peugnet <n.peugnet@free.fr>2021-08-30 14:23:28 +0200
committern-peugnet <n.peugnet@free.fr>2021-08-30 14:23:28 +0200
commitea34c64c5cd6d62af487a0db62bd64a2b467ec25 (patch)
treebd223ce337552b344c3842d923855d640e1a6f68
parent2cd98a3bcd18870da4404693212119dd8ccfaf68 (diff)
downloaddna-backup-ea34c64c5cd6d62af487a0db62bd64a2b467ec25.tar.gz
dna-backup-ea34c64c5cd6d62af487a0db62bd64a2b467ec25.zip
findSimilarChunk returns an Id if found
-rw-r--r--TODO.md1
-rw-r--r--repo.go25
-rw-r--r--repo_test.go6
3 files changed, 22 insertions, 10 deletions
diff --git a/TODO.md b/TODO.md
index a55e618..6c62b4c 100644
--- a/TODO.md
+++ b/TODO.md
@@ -2,6 +2,7 @@ priority 1
----------
- join non-deduplicated chunks
- choose when and how to
+- read from repo
priority 2
----------
diff --git a/repo.go b/repo.go
index 599b8da..bf8b167 100644
--- a/repo.go
+++ b/repo.go
@@ -229,17 +229,26 @@ func hashChunks(chunks <-chan StoredChunk) (FingerprintMap, SketchMap) {
return fingerprints, sketches
}
-func findSimilarChunks(chunks []Chunk, sketches SketchMap) {
- for i, c := range chunks {
- log.Println("New chunk:", i)
- sketch, _ := SketchChunk(c, 32, 3, 4)
- for _, s := range sketch {
- chunkId, exists := sketches[s]
- if exists {
- log.Println("Found similar chunks: ", chunkId)
+func findSimilarChunk(chunk Chunk, sketches SketchMap) (*ChunkId, bool) {
+ var similarChunks = make(map[ChunkId]int)
+ var max int
+ var similarChunk *ChunkId
+ sketch, _ := SketchChunk(chunk, 32, 3, 4)
+ for _, s := range sketch {
+ chunkIds, exists := sketches[s]
+ if !exists {
+ continue
+ }
+ for _, id := range chunkIds {
+ count := similarChunks[*id]
+ count += 1
+ if count > max {
+ similarChunk = id
}
+ similarChunks[*id] = count
}
}
+ return similarChunk, similarChunk != nil
}
func readChunk(stream io.Reader) ([]byte, error) {
diff --git a/repo_test.go b/repo_test.go
index 40d4374..3a4af68 100644
--- a/repo_test.go
+++ b/repo_test.go
@@ -200,7 +200,9 @@ func TestBsdiff(t *testing.T) {
}
newChunks := extractNewChunks(recipe)
log.Println("Checking new chunks:", len(newChunks[0]))
- for _, c := range newChunks {
- findSimilarChunks(c, sketches)
+ for _, chunks := range newChunks {
+ for _, c := range chunks {
+ log.Println(findSimilarChunk(c, sketches))
+ }
}
}