From a07bbe4ef22d642347d18f0e0a62d24868500d23 Mon Sep 17 00:00:00 2001 From: n-peugnet Date: Thu, 23 Sep 2021 18:10:30 +0200 Subject: Testing with Fdelta as the differ --- TODO.md | 6 ++++++ delta.go | 43 +++++++++++++++++++++++++++++++++++-------- go.mod | 1 + go.sum | 12 ++++++++++++ repo.go | 10 +++++++--- repo_test.go | 10 ++++++++++ 6 files changed, 71 insertions(+), 11 deletions(-) diff --git a/TODO.md b/TODO.md index b154718..0353b41 100644 --- a/TODO.md +++ b/TODO.md @@ -42,6 +42,12 @@ priority 2 is implemented - [ ] keep hash workers so that they reuse the same hasher and reset it instead of creating a new one each time. This could save some processing time +- [ ] support links (symbolic mainly and also hard) + - [ ] store this metadata somewhere, tar could be the solution, but this + would bury the metadata down into the chunks, storing it into the files + listing could be another solution but with this approach we would have + to think about what other metadata we want to store + - [ ] use a symlink aware Walk function (easy enough) reunion 7/09 ------------ diff --git a/delta.go b/delta.go index ff42e86..a127580 100644 --- a/delta.go +++ b/delta.go @@ -5,13 +5,9 @@ import ( "github.com/gabstv/go-bsdiff/pkg/bsdiff" "github.com/gabstv/go-bsdiff/pkg/bspatch" + "github.com/mdvan/fdelta" ) -type DeltaCodec interface { - Differ - Patcher -} - type Differ interface { Diff(source io.Reader, target io.Reader, patch io.Writer) error } @@ -20,13 +16,44 @@ type Patcher interface { Patch(source io.Reader, target io.Writer, patch io.Reader) error } -// TODO: maybe move this in its own file ? type Bsdiff struct{} -func (*Bsdiff) Diff(source io.Reader, target io.Reader, patch io.Writer) error { +func (Bsdiff) Diff(source io.Reader, target io.Reader, patch io.Writer) error { return bsdiff.Reader(source, target, patch) } -func (*Bsdiff) Patch(source io.Reader, target io.Writer, patch io.Reader) error { +func (Bsdiff) Patch(source io.Reader, target io.Writer, patch io.Reader) error { return bspatch.Reader(source, target, patch) } + +type Fdelta struct{} + +func (Fdelta) Diff(source io.Reader, target io.Reader, patch io.Writer) error { + sourceBuf, err := io.ReadAll(source) + if err != nil { + return err + } + targetBuf, err := io.ReadAll(target) + if err != nil { + return err + } + _, err = patch.Write(fdelta.Create(sourceBuf, targetBuf)) + return err +} + +func (Fdelta) Patch(source io.Reader, target io.Writer, patch io.Reader) error { + sourceBuf, err := io.ReadAll(source) + if err != nil { + return err + } + patchBuf, err := io.ReadAll(patch) + if err != nil { + return err + } + targetBuf, err := fdelta.Apply(sourceBuf, patchBuf) + if err != nil { + return err + } + _, err = target.Write(targetBuf) + return err +} diff --git a/go.mod b/go.mod index 59fed50..c6e7ed9 100644 --- a/go.mod +++ b/go.mod @@ -5,4 +5,5 @@ go 1.16 require ( github.com/chmduquesne/rollinghash v4.0.0+incompatible github.com/gabstv/go-bsdiff v1.0.5 + github.com/mdvan/fdelta v0.0.0-20200114160834-373fc49c9ba9 ) diff --git a/go.sum b/go.sum index 24e690c..57b8aa2 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,18 @@ github.com/chmduquesne/rollinghash v4.0.0+incompatible h1:hnREQO+DXjqIw3rUTzWN7/+Dpw+N5Um8zpKV0JOEgbo= github.com/chmduquesne/rollinghash v4.0.0+incompatible/go.mod h1:Uc2I36RRfTAf7Dge82bi3RU0OQUmXT9iweIcPqvr8A0= +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dsnet/compress v0.0.0-20171208185109-cc9eb1d7ad76 h1:eX+pdPPlD279OWgdx7f6KqIRSONuK7egk+jDx7OM3Ac= github.com/dsnet/compress v0.0.0-20171208185109-cc9eb1d7ad76/go.mod h1:KjxHHirfLaw19iGT70HvVjHQsL1vq1SRQB4yOsAfy2s= github.com/gabstv/go-bsdiff v1.0.5 h1:g29MC/38Eaig+iAobW10/CiFvPtin8U3Jj4yNLcNG9k= github.com/gabstv/go-bsdiff v1.0.5/go.mod h1:/Zz6GK+/f/TMylRtVaW3uwZlb0FZITILfA0q12XKGwg= +github.com/mdvan/fdelta v0.0.0-20200114160834-373fc49c9ba9 h1:r8h5Vudlg1u/k3DUKPMTuPkRHWksN750rs7lP6JfZJk= +github.com/mdvan/fdelta v0.0.0-20200114160834-373fc49c9ba9/go.mod h1:bx2hYg4PdjDEw+dOcIQrU7VlDndO2yRZe31UiFX40hg= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/repo.go b/repo.go index 8c3fa5f..06524aa 100644 --- a/repo.go +++ b/repo.go @@ -125,8 +125,8 @@ func NewRepo(path string) *Repo { sketchSfCount: 3, sketchFCount: 4, pol: p, - differ: &Bsdiff{}, - patcher: &Bsdiff{}, + differ: Fdelta{}, + patcher: Fdelta{}, fingerprints: make(FingerprintMap), sketches: make(SketchMap), chunkCache: cache.NewFifoCache(10000), @@ -154,9 +154,13 @@ func (r *Repo) Commit(source string) { newChunkPath := filepath.Join(newPath, chunksName) os.Mkdir(newPath, 0775) // TODO: handle errors os.Mkdir(newChunkPath, 0775) // TODO: handle errors + logger.Info("listing files") files := listFiles(source) + logger.Info("loading previous hashes") r.loadHashes(versions) + logger.Info("loading previous file lists") r.loadFileLists(versions) + logger.Info("loading previous recipies") r.loadRecipes(versions) storeQueue := make(chan chunkData, 32) storeEnd := make(chan bool) @@ -164,7 +168,7 @@ func (r *Repo) Commit(source string) { var last, nlast, pass uint64 var recipe []Chunk for ; nlast > last || pass == 0; pass++ { - logger.Infof("pass number %d", pass+1) + logger.Infof("matcher pass number %d", pass+1) last = nlast reader, writer := io.Pipe() go concatFiles(&files, writer) diff --git a/repo_test.go b/repo_test.go index 2d1c6e2..2205e63 100644 --- a/repo_test.go +++ b/repo_test.go @@ -234,6 +234,8 @@ func TestBsdiff(t *testing.T) { defer os.Remove(addedFile2) // configure repo + repo.patcher = Bsdiff{} + repo.differ = Bsdiff{} repo.chunkReadWrapper = utils.NopReadWrapper repo.chunkWriteWrapper = utils.NopWriteWrapper @@ -269,6 +271,8 @@ func TestCommit(t *testing.T) { source := filepath.Join("testdata", "logs") expected := filepath.Join("testdata", "repo_8k") repo := NewRepo(dest) + repo.patcher = Bsdiff{} + repo.differ = Bsdiff{} repo.chunkReadWrapper = utils.NopReadWrapper repo.chunkWriteWrapper = utils.NopWriteWrapper @@ -281,6 +285,8 @@ func TestCommitZlib(t *testing.T) { source := filepath.Join("testdata", "logs") expected := filepath.Join("testdata", "repo_8k_zlib") repo := NewRepo(dest) + repo.patcher = Bsdiff{} + repo.differ = Bsdiff{} repo.chunkReadWrapper = utils.ZlibReader repo.chunkWriteWrapper = utils.ZlibWriter @@ -293,6 +299,8 @@ func TestRestore(t *testing.T) { source := filepath.Join("testdata", "repo_8k") expected := filepath.Join("testdata", "logs") repo := NewRepo(source) + repo.patcher = Bsdiff{} + repo.differ = Bsdiff{} repo.chunkReadWrapper = utils.NopReadWrapper repo.chunkWriteWrapper = utils.NopWriteWrapper @@ -305,6 +313,8 @@ func TestRestoreZlib(t *testing.T) { source := filepath.Join("testdata", "repo_8k_zlib") expected := filepath.Join("testdata", "logs") repo := NewRepo(source) + repo.patcher = Bsdiff{} + repo.differ = Bsdiff{} repo.chunkReadWrapper = utils.ZlibReader repo.chunkWriteWrapper = utils.ZlibWriter -- cgit v1.2.3