aboutsummaryrefslogtreecommitdiff
path: root/repo_test.go
diff options
context:
space:
mode:
authorn-peugnet <n.peugnet@free.fr>2021-09-29 16:31:28 +0200
committern-peugnet <n.peugnet@free.fr>2021-09-29 16:31:28 +0200
commitc2cc8f8a6fb65488f19a5addf47d83e19aff6f4b (patch)
tree08300725c0bc0395e2b9c9bb9b511cf7b13a919a /repo_test.go
parent5dad27953b9050f097b53227cfc29e3d3373fd97 (diff)
downloaddna-backup-c2cc8f8a6fb65488f19a5addf47d83e19aff6f4b.tar.gz
dna-backup-c2cc8f8a6fb65488f19a5addf47d83e19aff6f4b.zip
move old funcs to repo_test and add roundtrip test
Diffstat (limited to 'repo_test.go')
-rw-r--r--repo_test.go60
1 files changed, 60 insertions, 0 deletions
diff --git a/repo_test.go b/repo_test.go
index 4b5c09b..35b3101 100644
--- a/repo_test.go
+++ b/repo_test.go
@@ -8,9 +8,12 @@ import (
"os"
"path/filepath"
"strings"
+ "sync"
"testing"
+ "github.com/chmduquesne/rollinghash/rabinkarp64"
"github.com/n-peugnet/dna-backup/logger"
+ "github.com/n-peugnet/dna-backup/sketch"
"github.com/n-peugnet/dna-backup/testutils"
"github.com/n-peugnet/dna-backup/utils"
)
@@ -96,6 +99,47 @@ func storeChunks(dest string, chunks <-chan []byte) {
}
}
+// hashChunks calculates the hashes for a channel of chunks.
+// For each chunk, both a fingerprint (hash over the full content) and a sketch
+// (resemblance hash based on maximal values of regions) are calculated and
+// stored in an hashmap.
+func (r *Repo) hashChunks(chunks <-chan IdentifiedChunk) {
+ for c := range chunks {
+ r.hashChunk(c.GetId(), c.Reader())
+ }
+}
+
+// hashChunk calculates the hashes for a chunk and store them in th repo hashmaps.
+func (r *Repo) hashChunk(id *ChunkId, reader io.Reader) (fp uint64, sk []uint64) {
+ var buffSk bytes.Buffer
+ var buffFp bytes.Buffer
+ var wg sync.WaitGroup
+ reader = io.TeeReader(reader, &buffSk)
+ io.Copy(&buffFp, reader)
+ wg.Add(2)
+ go r.makeFingerprint(id, &buffFp, &wg, &fp)
+ go r.makeSketch(id, &buffSk, &wg, &sk)
+ wg.Wait()
+ if _, e := r.fingerprints[fp]; e {
+ logger.Error(fp, " already exists in fingerprints map")
+ }
+ r.fingerprints[fp] = id
+ r.sketches.Set(sk, id)
+ return
+}
+
+func (r *Repo) makeFingerprint(id *ChunkId, reader io.Reader, wg *sync.WaitGroup, ret *uint64) {
+ defer wg.Done()
+ hasher := rabinkarp64.NewFromPol(r.pol)
+ io.Copy(hasher, reader)
+ *ret = hasher.Sum64()
+}
+
+func (r *Repo) makeSketch(id *ChunkId, reader io.Reader, wg *sync.WaitGroup, ret *[]uint64) {
+ defer wg.Done()
+ *ret, _ = sketch.SketchChunk(reader, r.pol, r.chunkSize, r.sketchWSize, r.sketchSfCount, r.sketchFCount)
+}
+
func TestReadFiles1(t *testing.T) {
tmpDir := t.TempDir()
repo := NewRepo(tmpDir)
@@ -308,6 +352,22 @@ func TestRestoreZlib(t *testing.T) {
assertSameTree(t, testutils.AssertSameFile, expected, dest, "Restore")
}
+func TestRoundtrip(t *testing.T) {
+ temp := t.TempDir()
+ dest := t.TempDir()
+ source := filepath.Join("testdata", "logs")
+ repo1 := NewRepo(temp)
+ repo2 := NewRepo(temp)
+
+ repo1.Commit(source)
+ // Commit a second version, just to see if it does not destroy everything
+ // TODO: check that the second version is indeed empty
+ repo1.Commit(source)
+ repo2.Restore(dest)
+
+ assertSameTree(t, assertCompatibleRepoFile, source, dest, "Commit")
+}
+
func TestHashes(t *testing.T) {
dest := t.TempDir()
source := filepath.Join("testdata", "repo_8k")