aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--TODO.md3
-rw-r--r--repo.go27
-rw-r--r--repo_test.go71
-rw-r--r--testdata/repo_8k/00000/filesbin208 -> 155 bytes
-rw-r--r--utils/fileutils.go10
-rw-r--r--utils/fileutils_test.go15
6 files changed, 99 insertions, 27 deletions
diff --git a/TODO.md b/TODO.md
index bb95b39..9d2c75f 100644
--- a/TODO.md
+++ b/TODO.md
@@ -24,12 +24,13 @@ priority 1
- [ ] uncompress before loading
- [ ] store compressed chunks into tracks of trackSize (1024o)
- [x] add chunk cache... what was it for again ??
+- [x] better tests for `(*Repo).Commit`
priority 2
----------
- [x] use more the `Reader` API (which is analoguous to the `IOStream` in Java)
- [ ] refactor matchStream as right now it is quite complex
-- [ ] better test for `Repo.matchStream`
+- [x] better test for `(*Repo).matchStream`
- [ ] tail packing of PartialChunks (this Struct does not exist yet as it is in fact just `TempChunks` for now)
- [ ] option to commit without deltas to save new base chunks
diff --git a/repo.go b/repo.go
index 585e505..ce8d890 100644
--- a/repo.go
+++ b/repo.go
@@ -38,10 +38,12 @@ import (
"path"
"path/filepath"
"reflect"
+ "strings"
"github.com/chmduquesne/rollinghash/rabinkarp64"
"github.com/n-peugnet/dna-backup/cache"
"github.com/n-peugnet/dna-backup/sketch"
+ "github.com/n-peugnet/dna-backup/utils"
)
type FingerprintMap map[uint64]*ChunkId
@@ -100,6 +102,7 @@ func (r *Repo) Patcher() Patcher {
}
func (r *Repo) Commit(source string) {
+ source = utils.TrimTrailingSeparator(source)
versions := r.loadVersions()
newVersion := len(versions) // TODO: add newVersion functino
newPath := path.Join(r.path, fmt.Sprintf(versionFmt, newVersion))
@@ -116,7 +119,7 @@ func (r *Repo) Commit(source string) {
r.hashChunks(oldChunks)
recipe := r.matchStream(reader, newVersion)
storeRecipe(newRecipePath, recipe)
- storeFileList(newFilesPath, files)
+ storeFileList(newFilesPath, unprefixFiles(files, source))
fmt.Println(files)
}
@@ -177,6 +180,20 @@ func listFiles(path string) []File {
return files
}
+func unprefixFiles(files []File, prefix string) (ret []File) {
+ ret = make([]File, len(files))
+ preSize := len(prefix)
+ for i, f := range files {
+ if !strings.HasPrefix(f.Path, prefix) {
+ log.Println("Warning", f.Path, "is not prefixed by", prefix)
+ } else {
+ f.Path = f.Path[preSize:]
+ }
+ ret[i] = f
+ }
+ return
+}
+
func concatFiles(files []File, stream io.WriteCloser) {
for _, f := range files {
file, err := os.Open(f.Path)
@@ -189,10 +206,6 @@ func concatFiles(files []File, stream io.WriteCloser) {
stream.Close()
}
-func (r *Repo) chunkMinLen() int {
- return sketch.SuperFeatureSize(r.chunkSize, r.sketchSfCount, r.sketchFCount)
-}
-
func storeFileList(dest string, files []File) {
file, err := os.Create(dest)
if err == nil {
@@ -288,6 +301,10 @@ func (r *Repo) loadChunks(versions []string, chunks chan<- IdentifiedChunk) {
close(chunks)
}
+func (r *Repo) chunkMinLen() int {
+ return sketch.SuperFeatureSize(r.chunkSize, r.sketchSfCount, r.sketchFCount)
+}
+
// hashChunks calculates the hashes for a channel of chunks.
//
// For each chunk, both a fingerprint (hash over the full content) and a sketch
diff --git a/repo_test.go b/repo_test.go
index 5127263..cd8a88e 100644
--- a/repo_test.go
+++ b/repo_test.go
@@ -231,41 +231,70 @@ func TestBsdiff(t *testing.T) {
func TestCommit(t *testing.T) {
dest := t.TempDir()
source := path.Join("testdata", "logs")
+ expected := path.Join("testdata", "repo_8k")
repo := NewRepo(dest)
repo.Commit(source)
- recipe := loadRecipe(path.Join(dest, "00000", recipeName))
- log.Println(recipe)
+ assertSameTree(t, assertCompatibleRepoFile, expected, dest, "Commit")
}
func TestRestore(t *testing.T) {
dest := t.TempDir()
source := path.Join("testdata", "repo_8k")
+ expected := path.Join("testdata", "logs")
repo := NewRepo(source)
repo.Restore(dest)
- destFiles := listFiles(dest)
- sourceFiles := listFiles(path.Join("testdata", "logs"))
- sfCount := len(sourceFiles)
- if sfCount <= 0 {
- t.Fatalf("No source files: %d", sfCount)
+ assertSameTree(t, assertSameFile, expected, dest, "Restore")
+}
+
+func assertSameTree(t *testing.T, apply func(t *testing.T, expected string, actual string, prefix string), expected string, actual string, prefix string) {
+ actualFiles := listFiles(actual)
+ expectedFiles := listFiles(expected)
+ efCount := len(expectedFiles)
+ if efCount <= 0 {
+ t.Fatalf("No expected files: %d", efCount)
}
- dfCount := len(destFiles)
- if sfCount != dfCount {
- t.Fatalf("Incorrect number for destination files: %d, should be %d", dfCount, sfCount)
+ afCount := len(actualFiles)
+ if efCount != afCount {
+ t.Fatalf("Incorrect number of files: %d, should be %d", afCount, efCount)
}
- for i, sf := range sourceFiles {
- sfContent, err := os.ReadFile(sf.Path)
- if err != nil {
- t.Fatalf("Error reading from source file '%s': %s", sf.Path, err)
+ for i, ef := range expectedFiles {
+ af := actualFiles[i]
+ efRelPath := ef.Path[len(expected):]
+ afRelPath := af.Path[len(actual):]
+ if efRelPath != afRelPath {
+ t.Fatalf("File path '%s' does not match '%s'", afRelPath, efRelPath)
}
- df := destFiles[i]
- dfContent, err := os.ReadFile(df.Path)
- if err != nil {
- t.Fatalf("Error reading from source file '%s': %s", df.Path, err)
- }
- if bytes.Compare(sfContent, dfContent) != 0 {
- t.Errorf("File content of '%s' does not match '%s'", df.Path, sf.Path)
+ apply(t, ef.Path, af.Path, prefix)
+ }
+}
+
+func assertCompatibleRepoFile(t *testing.T, expected string, actual string, prefix string) {
+ if path.Base(expected) == filesName {
+ eFiles := loadFileList(expected)
+ aFiles := loadFileList(actual)
+ assertLen(t, len(eFiles), aFiles, prefix)
+ for i := 0; i < len(eFiles); i++ {
+ if eFiles[i] != aFiles[i] {
+ t.Fatal(prefix, "file entry do not match:", aFiles[i], ", expected:", eFiles[i])
+ }
}
+ } else if path.Base(expected) == recipeName {
+ // TODO: check recipies equality
+ } else {
+ assertSameFile(t, expected, actual, prefix)
+ }
+}
+
+func assertSameFile(t *testing.T, expected string, actual string, prefix string) {
+ efContent, err := os.ReadFile(expected)
+ if err != nil {
+ t.Fatalf("%s Error reading from expected file '%s': %s", prefix, expected, err)
+ }
+ afContent, err := os.ReadFile(actual)
+ if err != nil {
+ t.Fatalf("%s Error reading from expected file '%s': %s", prefix, actual, err)
}
+ assertSameSlice(t, efContent, afContent, prefix+" files")
}
func assertLen(t *testing.T, expected int, actual interface{}, prefix string) {
diff --git a/testdata/repo_8k/00000/files b/testdata/repo_8k/00000/files
index 20d5905..5a7b1da 100644
--- a/testdata/repo_8k/00000/files
+++ b/testdata/repo_8k/00000/files
Binary files differ
diff --git a/utils/fileutils.go b/utils/fileutils.go
new file mode 100644
index 0000000..6c269ca
--- /dev/null
+++ b/utils/fileutils.go
@@ -0,0 +1,10 @@
+package utils
+
+import (
+ "path/filepath"
+ "strings"
+)
+
+func TrimTrailingSeparator(path string) string {
+ return strings.TrimSuffix(path, string(filepath.Separator))
+}
diff --git a/utils/fileutils_test.go b/utils/fileutils_test.go
new file mode 100644
index 0000000..176d856
--- /dev/null
+++ b/utils/fileutils_test.go
@@ -0,0 +1,15 @@
+package utils
+
+import (
+ "path/filepath"
+ "testing"
+)
+
+func TestTrimTrailingSeparator(t *testing.T) {
+ if TrimTrailingSeparator("test"+string(filepath.Separator)) != "test" {
+ t.Error("Seprator should have been trimmed")
+ }
+ if TrimTrailingSeparator("test") != "test" {
+ t.Error("Path should not have changed")
+ }
+}