diff options
20 files changed, 117 insertions, 36 deletions
@@ -22,9 +22,9 @@ priority 1 - [ ] read individual files - [ ] properly store informations to be DNA encoded - [ ] tar source to keep files metadata ? - - [ ] store chunks compressed - - [ ] compress before storing - - [ ] uncompress before loading + - [x] store chunks compressed + - [x] compress before storing + - [x] uncompress before loading - [ ] store compressed chunks into tracks of trackSize (1024o) - [x] add chunk cache... what was it for again ?? - [x] better tests for `(*Repo).Commit` @@ -34,6 +34,7 @@ priority 2 - [ ] use more the `Reader` API (which is analoguous to the `IOStream` in Java) - [ ] refactor matchStream as right now it is quite complex - [x] better test for `(*Repo).matchStream` +- [ ] compress partial chunks (`TempChunks` for now) - [ ] tail packing of PartialChunks (this Struct does not exist yet as it is in fact just `TempChunks` for now) - [ ] option to commit without deltas to save new base chunks @@ -4,8 +4,6 @@ import ( "bytes" "fmt" "io" - "log" - "os" "path" ) @@ -87,12 +85,7 @@ func (c *StoredChunk) Reader() io.ReadSeeker { } func (c *StoredChunk) Len() int { - path := c.Id.Path(c.repo.path) - info, err := os.Stat(path) - if err != nil { - log.Println("Chunk: could not stat file:", path) - } - return int(info.Size()) + return c.repo.chunkSize } func NewTempChunk(value []byte) *TempChunk { @@ -50,17 +50,19 @@ type FingerprintMap map[uint64]*ChunkId type SketchMap map[uint64][]*ChunkId type Repo struct { - path string - chunkSize int - sketchWSize int - sketchSfCount int - sketchFCount int - pol rabinkarp64.Pol - differ Differ - patcher Patcher - fingerprints FingerprintMap - sketches SketchMap - chunkCache cache.Cacher + path string + chunkSize int + sketchWSize int + sketchSfCount int + sketchFCount int + pol rabinkarp64.Pol + differ Differ + patcher Patcher + fingerprints FingerprintMap + sketches SketchMap + chunkCache cache.Cacher + chunkReadWrapper func(r io.Reader) (io.ReadCloser, error) + chunkWriteWrapper func(w io.Writer) io.WriteCloser } type File struct { @@ -79,17 +81,19 @@ func NewRepo(path string) *Repo { log.Panicln(err) } return &Repo{ - path: path, - chunkSize: 8 << 10, - sketchWSize: 32, - sketchSfCount: 3, - sketchFCount: 4, - pol: p, - differ: &Bsdiff{}, - patcher: &Bsdiff{}, - fingerprints: make(FingerprintMap), - sketches: make(SketchMap), - chunkCache: cache.NewFifoCache(1000), + path: path, + chunkSize: 8 << 10, + sketchWSize: 32, + sketchSfCount: 3, + sketchFCount: 4, + pol: p, + differ: &Bsdiff{}, + patcher: &Bsdiff{}, + fingerprints: make(FingerprintMap), + sketches: make(SketchMap), + chunkCache: cache.NewFifoCache(1000), + chunkReadWrapper: utils.ZlibReader, + chunkWriteWrapper: utils.ZlibWriter, } } @@ -242,10 +246,14 @@ func (r *Repo) StoreChunkContent(id *ChunkId, reader io.Reader) error { if err != nil { return errors.New(fmt.Sprintf("Error creating chunk for '%s'; %s\n", path, err)) } - n, err := io.Copy(file, reader) + wrapper := r.chunkWriteWrapper(file) + n, err := io.Copy(wrapper, reader) if err != nil { return errors.New(fmt.Sprintf("Error writing chunk content for '%s', written %d bytes: %s\n", path, n, err)) } + if err := wrapper.Close(); err != nil { + return errors.New(fmt.Sprintf("Error closing write wrapper for '%s': %s\n", path, err)) + } if err := file.Close(); err != nil { return errors.New(fmt.Sprintf("Error closing chunk for '%s': %s\n", path, err)) } @@ -262,7 +270,11 @@ func (r *Repo) LoadChunkContent(id *ChunkId) *bytes.Reader { if err != nil { log.Printf("Cannot open chunk '%s': %s\n", path, err) } - value, err = io.ReadAll(f) + wrapper, err := r.chunkReadWrapper(f) + if err != nil { + log.Printf("Cannot create read wrapper for chunk '%s': %s\n", path, err) + } + value, err = io.ReadAll(wrapper) if err != nil { log.Panicf("Could not read from chunk '%s': %s\n", path, err) } @@ -401,7 +413,10 @@ func (r *Repo) encodeTempChunk(temp BufferedChunk, version int, last *uint64) (c *last++ hasher := rabinkarp64.NewFromPol(r.pol) r.hashAndStoreChunk(id, temp.Reader(), hasher) - r.StoreChunkContent(id, temp.Reader()) + err := r.StoreChunkContent(id, temp.Reader()) + if err != nil { + log.Println(err) + } log.Println("Add new chunk", id) return NewStoredChunk(r, id), false } diff --git a/repo_test.go b/repo_test.go index cd8a88e..93b1e75 100644 --- a/repo_test.go +++ b/repo_test.go @@ -10,6 +10,8 @@ import ( "path" "reflect" "testing" + + "github.com/n-peugnet/dna-backup/utils" ) func chunkCompare(t *testing.T, dataDir string, repo *Repo, testFiles []string, chunkCount int) { @@ -193,6 +195,14 @@ func getDataStream(dataDir string, streamFunc func([]File, io.WriteCloser)) io.R return reader } +func dummyReader(r io.Reader) (io.ReadCloser, error) { + return io.NopCloser(r), nil +} + +func dummyWriter(w io.Writer) io.WriteCloser { + return utils.NopCloser(w) +} + func TestBsdiff(t *testing.T) { resultDir := t.TempDir() repo := NewRepo(resultDir) @@ -208,6 +218,10 @@ func TestBsdiff(t *testing.T) { ioutil.WriteFile(addedFile2, make([]byte, 4000), 0664) defer os.Remove(addedFile2) + // configure repo + repo.chunkReadWrapper = dummyReader + repo.chunkWriteWrapper = dummyWriter + // Load previously stored chunks oldChunks := make(chan IdentifiedChunk, 16) versions := repo.loadVersions() @@ -233,6 +247,21 @@ func TestCommit(t *testing.T) { source := path.Join("testdata", "logs") expected := path.Join("testdata", "repo_8k") repo := NewRepo(dest) + repo.chunkReadWrapper = dummyReader + repo.chunkWriteWrapper = dummyWriter + + repo.Commit(source) + assertSameTree(t, assertCompatibleRepoFile, expected, dest, "Commit") +} + +func TestCommitZlib(t *testing.T) { + dest := t.TempDir() + source := path.Join("testdata", "logs") + expected := path.Join("testdata", "repo_8k_zlib") + repo := NewRepo(dest) + repo.chunkReadWrapper = utils.ZlibReader + repo.chunkWriteWrapper = utils.ZlibWriter + repo.Commit(source) assertSameTree(t, assertCompatibleRepoFile, expected, dest, "Commit") } @@ -242,6 +271,21 @@ func TestRestore(t *testing.T) { source := path.Join("testdata", "repo_8k") expected := path.Join("testdata", "logs") repo := NewRepo(source) + repo.chunkReadWrapper = dummyReader + repo.chunkWriteWrapper = dummyWriter + + repo.Restore(dest) + assertSameTree(t, assertSameFile, expected, dest, "Restore") +} + +func TestRestoreZlib(t *testing.T) { + dest := t.TempDir() + source := path.Join("testdata", "repo_8k_zlib") + expected := path.Join("testdata", "logs") + repo := NewRepo(source) + repo.chunkReadWrapper = utils.ZlibReader + repo.chunkWriteWrapper = utils.ZlibWriter + repo.Restore(dest) assertSameTree(t, assertSameFile, expected, dest, "Restore") } diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000000 b/testdata/repo_8k_zlib/00000/chunks/000000000000000 Binary files differnew file mode 100644 index 0000000..11c1cab --- /dev/null +++ b/testdata/repo_8k_zlib/00000/chunks/000000000000000 diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000001 b/testdata/repo_8k_zlib/00000/chunks/000000000000001 Binary files differnew file mode 100644 index 0000000..b212097 --- /dev/null +++ b/testdata/repo_8k_zlib/00000/chunks/000000000000001 diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000002 b/testdata/repo_8k_zlib/00000/chunks/000000000000002 Binary files differnew file mode 100644 index 0000000..15d2a66 --- /dev/null +++ b/testdata/repo_8k_zlib/00000/chunks/000000000000002 diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000003 b/testdata/repo_8k_zlib/00000/chunks/000000000000003 Binary files differnew file mode 100644 index 0000000..d3c7997 --- /dev/null +++ b/testdata/repo_8k_zlib/00000/chunks/000000000000003 diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000004 b/testdata/repo_8k_zlib/00000/chunks/000000000000004 Binary files differnew file mode 100644 index 0000000..be13110 --- /dev/null +++ b/testdata/repo_8k_zlib/00000/chunks/000000000000004 diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000005 b/testdata/repo_8k_zlib/00000/chunks/000000000000005 Binary files differnew file mode 100644 index 0000000..a92e0a6 --- /dev/null +++ b/testdata/repo_8k_zlib/00000/chunks/000000000000005 diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000006 b/testdata/repo_8k_zlib/00000/chunks/000000000000006 Binary files differnew file mode 100644 index 0000000..47e1d74 --- /dev/null +++ b/testdata/repo_8k_zlib/00000/chunks/000000000000006 diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000007 b/testdata/repo_8k_zlib/00000/chunks/000000000000007 Binary files differnew file mode 100644 index 0000000..2428c10 --- /dev/null +++ b/testdata/repo_8k_zlib/00000/chunks/000000000000007 diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000008 b/testdata/repo_8k_zlib/00000/chunks/000000000000008 Binary files differnew file mode 100644 index 0000000..0524676 --- /dev/null +++ b/testdata/repo_8k_zlib/00000/chunks/000000000000008 diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000009 b/testdata/repo_8k_zlib/00000/chunks/000000000000009 Binary files differnew file mode 100644 index 0000000..e89cb70 --- /dev/null +++ b/testdata/repo_8k_zlib/00000/chunks/000000000000009 diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000010 b/testdata/repo_8k_zlib/00000/chunks/000000000000010 Binary files differnew file mode 100644 index 0000000..140ca5f --- /dev/null +++ b/testdata/repo_8k_zlib/00000/chunks/000000000000010 diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000011 b/testdata/repo_8k_zlib/00000/chunks/000000000000011 Binary files differnew file mode 100644 index 0000000..b61efae --- /dev/null +++ b/testdata/repo_8k_zlib/00000/chunks/000000000000011 diff --git a/testdata/repo_8k_zlib/00000/chunks/000000000000012 b/testdata/repo_8k_zlib/00000/chunks/000000000000012 Binary files differnew file mode 100644 index 0000000..677a3a8 --- /dev/null +++ b/testdata/repo_8k_zlib/00000/chunks/000000000000012 diff --git a/testdata/repo_8k_zlib/00000/files b/testdata/repo_8k_zlib/00000/files Binary files differnew file mode 100644 index 0000000..5a7b1da --- /dev/null +++ b/testdata/repo_8k_zlib/00000/files diff --git a/testdata/repo_8k_zlib/00000/recipe b/testdata/repo_8k_zlib/00000/recipe Binary files differnew file mode 100644 index 0000000..1d94f3a --- /dev/null +++ b/testdata/repo_8k_zlib/00000/recipe diff --git a/utils/ioutils.go b/utils/ioutils.go new file mode 100644 index 0000000..a861995 --- /dev/null +++ b/utils/ioutils.go @@ -0,0 +1,28 @@ +package utils + +import ( + "compress/zlib" + "io" +) + +// NopCloser returns a WriteCloser with a no-op Close method wrapping +// the provided Writer w. +func NopCloser(w io.Writer) io.WriteCloser { + return nopCloser{w} +} + +type nopCloser struct { + io.Writer +} + +func (nopCloser) Close() error { return nil } + +// ZlibReader wraps a reader with a new zlib.Reader. +func ZlibReader(r io.Reader) (io.ReadCloser, error) { + return zlib.NewReader(r) +} + +// ZlibWrier wraps a writer with a new zlib.Writer. +func ZlibWriter(w io.Writer) io.WriteCloser { + return zlib.NewWriter(w) +} |