move old funcs to repo_test and add roundtrip test

author: n-peugnet <n.peugnet@free.fr> 2021-09-29 16:31:28 +0200
committer: n-peugnet <n.peugnet@free.fr> 2021-09-29 16:31:28 +0200
commit: c2cc8f8a6fb65488f19a5addf47d83e19aff6f4b (patch)
tree: 08300725c0bc0395e2b9c9bb9b511cf7b13a919a /repo_test.go
parent: 5dad27953b9050f097b53227cfc29e3d3373fd97 (diff)
download: dna-backup-c2cc8f8a6fb65488f19a5addf47d83e19aff6f4b.tar.gz
dna-backup-c2cc8f8a6fb65488f19a5addf47d83e19aff6f4b.zip
1 files changed, 60 insertions, 0 deletions
diff --git a/repo_test.go b/repo_test.go
index 4b5c09b..35b3101 100644
--- a/repo_test.go
+++ b/repo_test.go
@@ -8,9 +8,12 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
+	"sync"
 	"testing"
 
+	"github.com/chmduquesne/rollinghash/rabinkarp64"
 	"github.com/n-peugnet/dna-backup/logger"
+	"github.com/n-peugnet/dna-backup/sketch"
 	"github.com/n-peugnet/dna-backup/testutils"
 	"github.com/n-peugnet/dna-backup/utils"
 )
@@ -96,6 +99,47 @@ func storeChunks(dest string, chunks <-chan []byte) {
 	}
 }
 
+// hashChunks calculates the hashes for a channel of chunks.
+// For each chunk, both a fingerprint (hash over the full content) and a sketch
+// (resemblance hash based on maximal values of regions) are calculated and
+// stored in an hashmap.
+func (r *Repo) hashChunks(chunks <-chan IdentifiedChunk) {
+	for c := range chunks {
+		r.hashChunk(c.GetId(), c.Reader())
+	}
+}
+
+// hashChunk calculates the hashes for a chunk and store them in th repo hashmaps.
+func (r *Repo) hashChunk(id *ChunkId, reader io.Reader) (fp uint64, sk []uint64) {
+	var buffSk bytes.Buffer
+	var buffFp bytes.Buffer
+	var wg sync.WaitGroup
+	reader = io.TeeReader(reader, &buffSk)
+	io.Copy(&buffFp, reader)
+	wg.Add(2)
+	go r.makeFingerprint(id, &buffFp, &wg, &fp)
+	go r.makeSketch(id, &buffSk, &wg, &sk)
+	wg.Wait()
+	if _, e := r.fingerprints[fp]; e {
+		logger.Error(fp, " already exists in fingerprints map")
+	}
+	r.fingerprints[fp] = id
+	r.sketches.Set(sk, id)
+	return
+}
+
+func (r *Repo) makeFingerprint(id *ChunkId, reader io.Reader, wg *sync.WaitGroup, ret *uint64) {
+	defer wg.Done()
+	hasher := rabinkarp64.NewFromPol(r.pol)
+	io.Copy(hasher, reader)
+	*ret = hasher.Sum64()
+}
+
+func (r *Repo) makeSketch(id *ChunkId, reader io.Reader, wg *sync.WaitGroup, ret *[]uint64) {
+	defer wg.Done()
+	*ret, _ = sketch.SketchChunk(reader, r.pol, r.chunkSize, r.sketchWSize, r.sketchSfCount, r.sketchFCount)
+}
+
 func TestReadFiles1(t *testing.T) {
 	tmpDir := t.TempDir()
 	repo := NewRepo(tmpDir)
@@ -308,6 +352,22 @@ func TestRestoreZlib(t *testing.T) {
 	assertSameTree(t, testutils.AssertSameFile, expected, dest, "Restore")
 }
 
+func TestRoundtrip(t *testing.T) {
+	temp := t.TempDir()
+	dest := t.TempDir()
+	source := filepath.Join("testdata", "logs")
+	repo1 := NewRepo(temp)
+	repo2 := NewRepo(temp)
+
+	repo1.Commit(source)
+	// Commit a second version, just to see if it does not destroy everything
+	// TODO: check that the second version is indeed empty
+	repo1.Commit(source)
+	repo2.Restore(dest)
+
+	assertSameTree(t, assertCompatibleRepoFile, source, dest, "Commit")
+}
+
 func TestHashes(t *testing.T) {
 	dest := t.TempDir()
 	source := filepath.Join("testdata", "repo_8k")
author	n-peugnet <n.peugnet@free.fr>	2021-09-29 16:31:28 +0200
committer	n-peugnet <n.peugnet@free.fr>	2021-09-29 16:31:28 +0200
commit	c2cc8f8a6fb65488f19a5addf47d83e19aff6f4b (patch)
tree	08300725c0bc0395e2b9c9bb9b511cf7b13a919a /repo_test.go
parent	5dad27953b9050f097b53227cfc29e3d3373fd97 (diff)
download	dna-backup-c2cc8f8a6fb65488f19a5addf47d83e19aff6f4b.tar.gz dna-backup-c2cc8f8a6fb65488f19a5addf47d83e19aff6f4b.zip