aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--TODO.md7
-rw-r--r--chunk.go9
-rw-r--r--repo.go65
-rw-r--r--repo_test.go44
-rw-r--r--testdata/repo_8k_zlib/00000/chunks/000000000000000bin0 -> 1077 bytes
-rw-r--r--testdata/repo_8k_zlib/00000/chunks/000000000000001bin0 -> 985 bytes
-rw-r--r--testdata/repo_8k_zlib/00000/chunks/000000000000002bin0 -> 1118 bytes
-rw-r--r--testdata/repo_8k_zlib/00000/chunks/000000000000003bin0 -> 1343 bytes
-rw-r--r--testdata/repo_8k_zlib/00000/chunks/000000000000004bin0 -> 1366 bytes
-rw-r--r--testdata/repo_8k_zlib/00000/chunks/000000000000005bin0 -> 1350 bytes
-rw-r--r--testdata/repo_8k_zlib/00000/chunks/000000000000006bin0 -> 1360 bytes
-rw-r--r--testdata/repo_8k_zlib/00000/chunks/000000000000007bin0 -> 1373 bytes
-rw-r--r--testdata/repo_8k_zlib/00000/chunks/000000000000008bin0 -> 1254 bytes
-rw-r--r--testdata/repo_8k_zlib/00000/chunks/000000000000009bin0 -> 1350 bytes
-rw-r--r--testdata/repo_8k_zlib/00000/chunks/000000000000010bin0 -> 1173 bytes
-rw-r--r--testdata/repo_8k_zlib/00000/chunks/000000000000011bin0 -> 1356 bytes
-rw-r--r--testdata/repo_8k_zlib/00000/chunks/000000000000012bin0 -> 1252 bytes
-rw-r--r--testdata/repo_8k_zlib/00000/filesbin0 -> 155 bytes
-rw-r--r--testdata/repo_8k_zlib/00000/recipebin0 -> 6570 bytes
-rw-r--r--utils/ioutils.go28
20 files changed, 117 insertions, 36 deletions
diff --git a/TODO.md b/TODO.md
index 084dfd6..43d684f 100644
--- a/TODO.md
+++ b/TODO.md
@@ -22,9 +22,9 @@ priority 1
- [ ] read individual files
- [ ] properly store informations to be DNA encoded
- [ ] tar source to keep files metadata ?
- - [ ] store chunks compressed
- - [ ] compress before storing
- - [ ] uncompress before loading
+ - [x] store chunks compressed
+ - [x] compress before storing
+ - [x] uncompress before loading
- [ ] store compressed chunks into tracks of trackSize (1024o)
- [x] add chunk cache... what was it for again ??
- [x] better tests for `(*Repo).Commit`
@@ -34,6 +34,7 @@ priority 2
- [ ] use more the `Reader` API (which is analoguous to the `IOStream` in Java)
- [ ] refactor matchStream as right now it is quite complex
- [x] better test for `(*Repo).matchStream`
+- [ ] compress partial chunks (`TempChunks` for now)
- [ ] tail packing of PartialChunks (this Struct does not exist yet as it is in
fact just `TempChunks` for now)
- [ ] option to commit without deltas to save new base chunks
diff --git a/chunk.go b/chunk.go
index a1cf3cd..14b8cfd 100644
--- a/chunk.go
+++ b/chunk.go
@@ -4,8 +4,6 @@ import (
"bytes"
"fmt"
"io"
- "log"
- "os"
"path"
)
@@ -87,12 +85,7 @@ func (c *StoredChunk) Reader() io.ReadSeeker {
}
func (c *StoredChunk) Len() int {
- path := c.Id.Path(c.repo.path)
- info, err := os.Stat(path)
- if err != nil {
- log.Println("Chunk: could not stat file:", path)
- }
- return int(info.Size())
+ return c.repo.chunkSize
}
func NewTempChunk(value []byte) *TempChunk {
diff --git a/repo.go b/repo.go
index 21faa0a..b572233 100644
--- a/repo.go
+++ b/repo.go
@@ -50,17 +50,19 @@ type FingerprintMap map[uint64]*ChunkId
type SketchMap map[uint64][]*ChunkId
type Repo struct {
- path string
- chunkSize int
- sketchWSize int
- sketchSfCount int
- sketchFCount int
- pol rabinkarp64.Pol
- differ Differ
- patcher Patcher
- fingerprints FingerprintMap
- sketches SketchMap
- chunkCache cache.Cacher
+ path string
+ chunkSize int
+ sketchWSize int
+ sketchSfCount int
+ sketchFCount int
+ pol rabinkarp64.Pol
+ differ Differ
+ patcher Patcher
+ fingerprints FingerprintMap
+ sketches SketchMap
+ chunkCache cache.Cacher
+ chunkReadWrapper func(r io.Reader) (io.ReadCloser, error)
+ chunkWriteWrapper func(w io.Writer) io.WriteCloser
}
type File struct {
@@ -79,17 +81,19 @@ func NewRepo(path string) *Repo {
log.Panicln(err)
}
return &Repo{
- path: path,
- chunkSize: 8 << 10,
- sketchWSize: 32,
- sketchSfCount: 3,
- sketchFCount: 4,
- pol: p,
- differ: &Bsdiff{},
- patcher: &Bsdiff{},
- fingerprints: make(FingerprintMap),
- sketches: make(SketchMap),
- chunkCache: cache.NewFifoCache(1000),
+ path: path,
+ chunkSize: 8 << 10,
+ sketchWSize: 32,
+ sketchSfCount: 3,
+ sketchFCount: 4,
+ pol: p,
+ differ: &Bsdiff{},
+ patcher: &Bsdiff{},
+ fingerprints: make(FingerprintMap),
+ sketches: make(SketchMap),
+ chunkCache: cache.NewFifoCache(1000),
+ chunkReadWrapper: utils.ZlibReader,
+ chunkWriteWrapper: utils.ZlibWriter,
}
}
@@ -242,10 +246,14 @@ func (r *Repo) StoreChunkContent(id *ChunkId, reader io.Reader) error {
if err != nil {
return errors.New(fmt.Sprintf("Error creating chunk for '%s'; %s\n", path, err))
}
- n, err := io.Copy(file, reader)
+ wrapper := r.chunkWriteWrapper(file)
+ n, err := io.Copy(wrapper, reader)
if err != nil {
return errors.New(fmt.Sprintf("Error writing chunk content for '%s', written %d bytes: %s\n", path, n, err))
}
+ if err := wrapper.Close(); err != nil {
+ return errors.New(fmt.Sprintf("Error closing write wrapper for '%s': %s\n", path, err))
+ }
if err := file.Close(); err != nil {
return errors.New(fmt.Sprintf("Error closing chunk for '%s': %s\n", path, err))
}
@@ -262,7 +270,11 @@ func (r *Repo) LoadChunkContent(id *ChunkId) *bytes.Reader {
if err != nil {
log.Printf("Cannot open chunk '%s': %s\n", path, err)
}
- value, err = io.ReadAll(f)
+ wrapper, err := r.chunkReadWrapper(f)
+ if err != nil {
+ log.Printf("Cannot create read wrapper for chunk '%s': %s\n", path, err)
+ }
+ value, err = io.ReadAll(wrapper)
if err != nil {
log.Panicf("Could not read from chunk '%s': %s\n", path, err)
}
@@ -401,7 +413,10 @@ func (r *Repo) encodeTempChunk(temp BufferedChunk, version int, last *uint64) (c
*last++
hasher := rabinkarp64.NewFromPol(r.pol)
r.hashAndStoreChunk(id, temp.Reader(), hasher)
- r.StoreChunkContent(id, temp.Reader())
+ err := r.StoreChunkContent(id, temp.Reader())
+ if err != nil {
+ log.Println(err)
+ }
log.Println("Add new chunk", id)
return NewStoredChunk(r, id), false
}
diff --git a/repo_test.go b/repo_test.go
index cd8a88e..93b1e75 100644
--- a/repo_test.go
+++ b/repo_test.go
@@ -10,6 +10,8 @@ import (
"path"
"reflect"
"testing"
+
+ "github.com/n-peugnet/dna-backup/utils"
)
func chunkCompare(t *testing.T, dataDir string, repo *Repo, testFiles []string, chunkCount int) {
@@ -193,6 +195,14 @@ func getDataStream(dataDir string, streamFunc func([]File, io.WriteCloser)) io.R
return reader
}
+func dummyReader(r io.Reader) (io.ReadCloser, error) {
+ return io.NopCloser(r), nil
+}
+
+func dummyWriter(w io.Writer) io.WriteCloser {
+ return utils.NopCloser(w)
+}
+
func TestBsdiff(t *testing.T) {
resultDir := t.TempDir()
repo := NewRepo(resultDir)
@@ -208,6 +218,10 @@ func TestBsdiff(t *testing.T) {
ioutil.WriteFile(addedFile2, make([]byte, 4000), 0664)
defer os.Remove(addedFile2)
+ // configure repo
+ repo.chunkReadWrapper = dummyReader
+ repo.chunkWriteWrapper = dummyWriter
+
// Load previously stored chunks
oldChunks := make(chan IdentifiedChunk, 16)
versions := repo.loadVersions()
@@ -233,6 +247,21 @@ func TestCommit(t *testing.T) {
source := path.Join("testdata", "logs")
expected := path.Join("testdata", "repo_8k")
repo := NewRepo(dest)
+ repo.chunkReadWrapper = dummyReader
+ repo.chunkWriteWrapper = dummyWriter
+
+ repo.Commit(source)
+ assertSameTree(t, assertCompatibleRepoFile, expected, dest, "Commit")
+}
+
+func TestCommitZlib(t *testing.T) {
+ dest := t.TempDir()
+ source := path.Join("testdata", "logs")
+ expected := path.Join("testdata", "repo_8k_zlib")
+ repo := NewRepo(dest)
+ repo.chunkReadWrapper = utils.ZlibReader
+ repo.chunkWriteWrapper = utils.ZlibWriter
+
repo.Commit(source)
assertSameTree(t, assertCompatibleRepoFile, expected, dest, "Commit")
}
@@ -242,6 +271,21 @@ func TestRestore(t *testing.T) {
source := path.Join("testdata", "repo_8k")
expected := path.Join("testdata", "logs")
repo := NewRepo(source)
+ repo.chunkReadWrapper = dummyReader
+ repo.chunkWriteWrapper = dummyWriter
+
+ repo.Restore(dest)
+ assertSameTree(t, assertSameFile, expected, dest, "Restore")
+}
+
+func TestRestoreZlib(t *testing.T) {
+ dest := t.TempDir()
+ source := path.Join("testdata", "repo_8k_zlib")
+ expected := path.Join("testdata", "logs")
+ repo := NewRepo(source)
+ repo.chunkReadWrapper = utils.ZlibReader
+ repo.chunkWriteWrapper = utils.ZlibWriter
+
repo.Restore(dest)
assertSameTree(t, assertSameFile, expected, dest, "Restore")
}
diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000000 b/testdata/repo_8k_zlib/00000/chunks/000000000000000
new file mode 100644
index 0000000..11c1cab
--- /dev/null
+++ b/testdata/repo_8k_zlib/00000/chunks/000000000000000
Binary files differ
diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000001 b/testdata/repo_8k_zlib/00000/chunks/000000000000001
new file mode 100644
index 0000000..b212097
--- /dev/null
+++ b/testdata/repo_8k_zlib/00000/chunks/000000000000001
Binary files differ
diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000002 b/testdata/repo_8k_zlib/00000/chunks/000000000000002
new file mode 100644
index 0000000..15d2a66
--- /dev/null
+++ b/testdata/repo_8k_zlib/00000/chunks/000000000000002
Binary files differ
diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000003 b/testdata/repo_8k_zlib/00000/chunks/000000000000003
new file mode 100644
index 0000000..d3c7997
--- /dev/null
+++ b/testdata/repo_8k_zlib/00000/chunks/000000000000003
Binary files differ
diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000004 b/testdata/repo_8k_zlib/00000/chunks/000000000000004
new file mode 100644
index 0000000..be13110
--- /dev/null
+++ b/testdata/repo_8k_zlib/00000/chunks/000000000000004
Binary files differ
diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000005 b/testdata/repo_8k_zlib/00000/chunks/000000000000005
new file mode 100644
index 0000000..a92e0a6
--- /dev/null
+++ b/testdata/repo_8k_zlib/00000/chunks/000000000000005
Binary files differ
diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000006 b/testdata/repo_8k_zlib/00000/chunks/000000000000006
new file mode 100644
index 0000000..47e1d74
--- /dev/null
+++ b/testdata/repo_8k_zlib/00000/chunks/000000000000006
Binary files differ
diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000007 b/testdata/repo_8k_zlib/00000/chunks/000000000000007
new file mode 100644
index 0000000..2428c10
--- /dev/null
+++ b/testdata/repo_8k_zlib/00000/chunks/000000000000007
Binary files differ
diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000008 b/testdata/repo_8k_zlib/00000/chunks/000000000000008
new file mode 100644
index 0000000..0524676
--- /dev/null
+++ b/testdata/repo_8k_zlib/00000/chunks/000000000000008
Binary files differ
diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000009 b/testdata/repo_8k_zlib/00000/chunks/000000000000009
new file mode 100644
index 0000000..e89cb70
--- /dev/null
+++ b/testdata/repo_8k_zlib/00000/chunks/000000000000009
Binary files differ
diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000010 b/testdata/repo_8k_zlib/00000/chunks/000000000000010
new file mode 100644
index 0000000..140ca5f
--- /dev/null
+++ b/testdata/repo_8k_zlib/00000/chunks/000000000000010
Binary files differ
diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000011 b/testdata/repo_8k_zlib/00000/chunks/000000000000011
new file mode 100644
index 0000000..b61efae
--- /dev/null
+++ b/testdata/repo_8k_zlib/00000/chunks/000000000000011
Binary files differ
diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000012 b/testdata/repo_8k_zlib/00000/chunks/000000000000012
new file mode 100644
index 0000000..677a3a8
--- /dev/null
+++ b/testdata/repo_8k_zlib/00000/chunks/000000000000012
Binary files differ
diff --git a/testdata/repo_8k_zlib/00000/files b/testdata/repo_8k_zlib/00000/files
new file mode 100644
index 0000000..5a7b1da
--- /dev/null
+++ b/testdata/repo_8k_zlib/00000/files
Binary files differ
diff --git a/testdata/repo_8k_zlib/00000/recipe b/testdata/repo_8k_zlib/00000/recipe
new file mode 100644
index 0000000..1d94f3a
--- /dev/null
+++ b/testdata/repo_8k_zlib/00000/recipe
Binary files differ
diff --git a/utils/ioutils.go b/utils/ioutils.go
new file mode 100644
index 0000000..a861995
--- /dev/null
+++ b/utils/ioutils.go
@@ -0,0 +1,28 @@
+package utils
+
+import (
+ "compress/zlib"
+ "io"
+)
+
+// NopCloser returns a WriteCloser with a no-op Close method wrapping
+// the provided Writer w.
+func NopCloser(w io.Writer) io.WriteCloser {
+ return nopCloser{w}
+}
+
+type nopCloser struct {
+ io.Writer
+}
+
+func (nopCloser) Close() error { return nil }
+
+// ZlibReader wraps a reader with a new zlib.Reader.
+func ZlibReader(r io.Reader) (io.ReadCloser, error) {
+ return zlib.NewReader(r)
+}
+
+// ZlibWrier wraps a writer with a new zlib.Writer.
+func ZlibWriter(w io.Writer) io.WriteCloser {
+ return zlib.NewWriter(w)
+}