From be1ad1c114b77b654bd3e79fdb18b5d5cf93ddd1 Mon Sep 17 00:00:00 2001 From: n-peugnet Date: Fri, 8 Oct 2021 12:08:22 +0200 Subject: add chunkSize parameter to cli --- repo/repo.go | 16 +++++++++------- repo/repo_test.go | 22 +++++++++++----------- 2 files changed, 20 insertions(+), 18 deletions(-) (limited to 'repo') diff --git a/repo/repo.go b/repo/repo.go index 678b091..27df54e 100644 --- a/repo/repo.go +++ b/repo/repo.go @@ -104,7 +104,7 @@ type File struct { Link string } -func NewRepo(path string) *Repo { +func NewRepo(path string, chunkSize int) *Repo { var err error path, err = filepath.Abs(path) if err != nil { @@ -121,7 +121,7 @@ func NewRepo(path string) *Repo { } return &Repo{ path: path, - chunkSize: 8 << 10, + chunkSize: chunkSize, sketchWSize: 32, sketchSfCount: 3, sketchFCount: 4, @@ -155,13 +155,9 @@ func (r *Repo) Commit(source string) { newChunkPath := filepath.Join(newPath, chunksName) os.Mkdir(newPath, 0775) // TODO: handle errors os.Mkdir(newChunkPath, 0775) // TODO: handle errors - logger.Info("listing files") files := listFiles(source) - logger.Info("loading previous hashes") r.loadHashes(versions) - logger.Info("loading previous file lists") r.loadFileLists(versions) - logger.Info("loading previous recipies") r.loadRecipes(versions) storeQueue := make(chan chunkData, 32) storeEnd := make(chan bool) @@ -183,7 +179,6 @@ func (r *Repo) Commit(source string) { func (r *Repo) Restore(destination string) { versions := r.loadVersions() - logger.Info("loading previous file lists") r.loadFileLists(versions) logger.Info("loading previous recipies") r.loadRecipes(versions) @@ -233,6 +228,7 @@ func (r *Repo) loadVersions() []string { } func listFiles(path string) []File { + logger.Infof("list files from %s", path) var files []File err := filepath.Walk(path, func(p string, i fs.FileInfo, err error) error { if err != nil { @@ -353,6 +349,7 @@ func storeDelta(prevRaw []byte, curr interface{}, dest string, differ delta.Diff if err = encoder.Encode(curr); err != nil { logger.Panic(err) } + logger.Infof("store before delta: %d", currBuff.Len()) file, err := os.Create(dest) if err != nil { logger.Panic(err) @@ -412,12 +409,14 @@ func loadDeltas(target interface{}, versions []string, patcher delta.Patcher, wr // storeFileList stores the given list in the repo dir as a delta against the // previous version's one. func (r *Repo) storeFileList(version int, list []File) { + logger.Info("store files") dest := filepath.Join(r.path, fmt.Sprintf(versionFmt, version), filesName) storeDelta(r.filesRaw, list, dest, r.differ, r.chunkWriteWrapper) } // loadFileLists loads incrementally the file lists' delta of each given version. func (r *Repo) loadFileLists(versions []string) { + logger.Info("load previous file lists") var files []File r.filesRaw = loadDeltas(&files, versions, r.patcher, r.chunkReadWrapper, filesName) r.files = files @@ -516,6 +515,7 @@ func (r *Repo) loadChunks(versions []string, chunks chan<- IdentifiedChunk) { // loadHashes loads and aggregates the hashes stored for each given version and // stores them in the repo maps. func (r *Repo) loadHashes(versions []string) { + logger.Info("load previous hashes") for i, v := range versions { path := filepath.Join(v, hashesName) file, err := os.Open(path) @@ -740,11 +740,13 @@ func (r *Repo) restoreStream(stream io.WriteCloser, recipe []Chunk) { } func (r *Repo) storeRecipe(version int, recipe []Chunk) { + logger.Info("store recipe") dest := filepath.Join(r.path, fmt.Sprintf(versionFmt, version), recipeName) storeDelta(r.recipeRaw, recipe, dest, r.differ, r.chunkWriteWrapper) } func (r *Repo) loadRecipes(versions []string) { + logger.Info("load previous recipies") var recipe []Chunk r.recipeRaw = loadDeltas(&recipe, versions, r.patcher, r.chunkReadWrapper, recipeName) for _, c := range recipe { diff --git a/repo/repo_test.go b/repo/repo_test.go index 36cc1bd..e3a49c7 100644 --- a/repo/repo_test.go +++ b/repo/repo_test.go @@ -157,7 +157,7 @@ func (r *Repo) makeSketch(id *ChunkId, reader io.Reader, wg *sync.WaitGroup, ret func TestReadFiles1(t *testing.T) { tmpDir := t.TempDir() - repo := NewRepo(tmpDir) + repo := NewRepo(tmpDir, 8<<10) chunkCount := 590/repo.chunkSize + 1 dataDir := filepath.Join("testdata", "logs", "1") files := []string{"logTest.log"} @@ -166,7 +166,7 @@ func TestReadFiles1(t *testing.T) { func TestReadFiles2(t *testing.T) { tmpDir := t.TempDir() - repo := NewRepo(tmpDir) + repo := NewRepo(tmpDir, 8<<10) chunkCount := 22899/repo.chunkSize + 1 dataDir := filepath.Join("testdata", "logs", "2") files := []string{"csvParserTest.log", "slipdb.log"} @@ -175,7 +175,7 @@ func TestReadFiles2(t *testing.T) { func TestReadFiles3(t *testing.T) { tmpDir := t.TempDir() - repo := NewRepo(tmpDir) + repo := NewRepo(tmpDir, 8<<10) chunkCount := 119398/repo.chunkSize + 1 dataDir := filepath.Join("testdata", "logs") files := []string{ @@ -234,7 +234,7 @@ func TestSymlinks(t *testing.T) { func TestLoadChunks(t *testing.T) { resultDir := t.TempDir() dataDir := filepath.Join("testdata", "logs") - repo := NewRepo(resultDir) + repo := NewRepo(resultDir, 8<<10) repo.chunkReadWrapper = utils.NopReadWrapper repo.chunkWriteWrapper = utils.NopWriteWrapper resultVersion := filepath.Join(resultDir, "00000") @@ -291,7 +291,7 @@ func TestBsdiff(t *testing.T) { logger.SetLevel(3) defer logger.SetLevel(4) resultDir := t.TempDir() - repo := NewRepo(resultDir) + repo := NewRepo(resultDir, 8<<10) dataDir := filepath.Join("testdata", "logs") addedFile1 := filepath.Join(dataDir, "2", "slogTest.log") addedFile2 := filepath.Join(dataDir, "3", "slogTest.log") @@ -341,7 +341,7 @@ func TestCommitZlib(t *testing.T) { dest := t.TempDir() source := filepath.Join("testdata", "logs") expected := filepath.Join("testdata", "repo_8k_zlib") - repo := NewRepo(dest) + repo := NewRepo(dest, 8<<10) repo.patcher = delta.Fdelta{} repo.differ = delta.Fdelta{} repo.chunkReadWrapper = utils.ZlibReader @@ -357,7 +357,7 @@ func TestRestoreZlib(t *testing.T) { dest := t.TempDir() source := filepath.Join("testdata", "repo_8k_zlib") expected := filepath.Join("testdata", "logs") - repo := NewRepo(source) + repo := NewRepo(source, 8<<10) repo.patcher = delta.Fdelta{} repo.differ = delta.Fdelta{} repo.chunkReadWrapper = utils.ZlibReader @@ -373,8 +373,8 @@ func TestRoundtrip(t *testing.T) { temp := t.TempDir() dest := t.TempDir() source := filepath.Join("testdata", "logs") - repo1 := NewRepo(temp) - repo2 := NewRepo(temp) + repo1 := NewRepo(temp, 8<<10) + repo2 := NewRepo(temp, 8<<10) repo1.Commit(source) // Commit a second version, just to see if it does not destroy everything @@ -393,7 +393,7 @@ func TestHashes(t *testing.T) { storeQueue := make(chan chunkData, 16) storeEnd := make(chan bool) - repo1 := NewRepo(source) + repo1 := NewRepo(source, 8<<10) repo1.chunkReadWrapper = utils.NopReadWrapper repo1.chunkWriteWrapper = utils.NopWriteWrapper go repo1.loadChunks([]string{filepath.Join(source, "00000")}, chunks) @@ -409,7 +409,7 @@ func TestHashes(t *testing.T) { id: c.GetId(), } } - repo2 := NewRepo(dest) + repo2 := NewRepo(dest, 8<<10) repo2.chunkReadWrapper = utils.NopReadWrapper repo2.chunkWriteWrapper = utils.NopWriteWrapper os.MkdirAll(filepath.Join(dest, "00000", chunksName), 0775) -- cgit v1.2.3