use moare the reader interface

author: n-peugnet <n.peugnet@free.fr> 2021-09-10 14:37:32 +0200
committer: n-peugnet <n.peugnet@free.fr> 2021-09-10 14:37:32 +0200
commit: 13e45db0878bc2dc36ad4578c0758dd05879cded (patch)
tree: 942d332e3b513cd25f869f3900d8f9ba21d3f1dd
parent: bea4f87f1e4528630d326699a9c7fd6133ee7806 (diff)
download: dna-backup-13e45db0878bc2dc36ad4578c0758dd05879cded.tar.gz
dna-backup-13e45db0878bc2dc36ad4578c0758dd05879cded.zip
2 files changed, 20 insertions, 13 deletions
diff --git a/TODO.md b/TODO.md
index 9d2c75f..084dfd6 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,7 +1,10 @@
 priority 1
 ----------
 - [x] add deltaEncode chunks function
-    - [x] do not merge consecutive smaller chunks as these could be stored as chunks if no similar chunk is found. Thus it will need to be of `chunkSize` or less. Otherwise it could not be possibly used for deduplication.
+    - [x] do not merge consecutive smaller chunks as these could be stored as
+        chunks if no similar chunk is found. Thus it will need to be of
+        `chunkSize` or less. Otherwise it could not be possibly used for
+        deduplication.
     ```
     for each new chunk:
         find similar in sketchMap
@@ -12,11 +15,11 @@ priority 1
             store in fingerprintMap
             store in sketchMap
     ```
-- [ ] read from repo (Restore function)
+- [x] read from repo (Restore function)
     - [x] store recipe
     - [x] load recipe
     - [x] read chunks in-order into a stream
-    - [ ] read individual files
+- [ ] read individual files
 - [ ] properly store informations to be DNA encoded
     - [ ] tar source to keep files metadata ?
     - [ ] store chunks compressed
@@ -28,11 +31,14 @@ priority 1
 
 priority 2
 ----------
-- [x] use more the `Reader` API (which is analoguous to the `IOStream` in Java)
+- [ ] use more the `Reader` API (which is analoguous to the `IOStream` in Java)
 - [ ] refactor matchStream as right now it is quite complex
 - [x] better test for `(*Repo).matchStream`
-- [ ] tail packing of PartialChunks (this Struct does not exist yet as it is in fact just `TempChunks` for now)
+- [ ] tail packing of PartialChunks (this Struct does not exist yet as it is in
+    fact just `TempChunks` for now)
 - [ ] option to commit without deltas to save new base chunks
+- [ ] custom binary marshall and unmarshal for chunks
+- [ ] use `loadChunkContent` in `loadChunks`
 
 réunion 7/09
 ------------
diff --git a/repo.go b/repo.go
index ce8d890..21faa0a 100644
--- a/repo.go
+++ b/repo.go
@@ -313,16 +313,18 @@ func (r *Repo) chunkMinLen() int {
 func (r *Repo) hashChunks(chunks <-chan IdentifiedChunk) {
 	hasher := rabinkarp64.NewFromPol(r.pol)
 	for c := range chunks {
-		r.hashAndStoreChunk(c, hasher)
+		r.hashAndStoreChunk(c.GetId(), c.Reader(), hasher)
 	}
 }
 
-func (r *Repo) hashAndStoreChunk(chunk IdentifiedChunk, hasher hash.Hash64) {
+func (r *Repo) hashAndStoreChunk(id *ChunkId, reader io.Reader, hasher hash.Hash64) {
+	var chunk bytes.Buffer
 	hasher.Reset()
-	io.Copy(hasher, chunk.Reader())
+	reader = io.TeeReader(reader, &chunk)
+	io.Copy(hasher, reader)
 	fingerprint := hasher.Sum64()
-	sketch, _ := sketch.SketchChunk(chunk.Reader(), r.pol, r.chunkSize, r.sketchWSize, r.sketchSfCount, r.sketchFCount)
-	r.storeChunkId(chunk.GetId(), fingerprint, sketch)
+	sketch, _ := sketch.SketchChunk(&chunk, r.pol, r.chunkSize, r.sketchWSize, r.sketchSfCount, r.sketchFCount)
+	r.storeChunkId(id, fingerprint, sketch)
 }
 
 func (r *Repo) storeChunkId(id *ChunkId, fingerprint uint64, sketch []uint64) {
@@ -397,10 +399,9 @@ func (r *Repo) encodeTempChunk(temp BufferedChunk, version int, last *uint64) (c
 	if chunk.Len() == r.chunkSize {
 		id := &ChunkId{Ver: version, Idx: *last}
 		*last++
-		ic := NewLoadedChunk(id, temp.Bytes())
 		hasher := rabinkarp64.NewFromPol(r.pol)
-		r.hashAndStoreChunk(ic, hasher)
-		r.StoreChunkContent(id, ic.Reader())
+		r.hashAndStoreChunk(id, temp.Reader(), hasher)
+		r.StoreChunkContent(id, temp.Reader())
 		log.Println("Add new chunk", id)
 		return NewStoredChunk(r, id), false
 	}
author	n-peugnet <n.peugnet@free.fr>	2021-09-10 14:37:32 +0200
committer	n-peugnet <n.peugnet@free.fr>	2021-09-10 14:37:32 +0200
commit	13e45db0878bc2dc36ad4578c0758dd05879cded (patch)
tree	942d332e3b513cd25f869f3900d8f9ba21d3f1dd
parent	bea4f87f1e4528630d326699a9c7fd6133ee7806 (diff)
download	dna-backup-13e45db0878bc2dc36ad4578c0758dd05879cded.tar.gz dna-backup-13e45db0878bc2dc36ad4578c0758dd05879cded.zip