From ca5855639a1a95f04ac2dbd5da5d9d770935cc3c Mon Sep 17 00:00:00 2001 From: n-peugnet Date: Tue, 21 Sep 2021 19:03:58 +0200 Subject: store file listing incrementally --- TODO.md | 10 ++------ repo.go | 48 ++++++++++++++++++++++++++++---------- repo_test.go | 37 ++++++++--------------------- testdata/repo_8k/00000/files | Bin 147 -> 379 bytes testdata/repo_8k_zlib/00000/files | Bin 147 -> 379 bytes 5 files changed, 48 insertions(+), 47 deletions(-) diff --git a/TODO.md b/TODO.md index a70d16c..01c362b 100644 --- a/TODO.md +++ b/TODO.md @@ -46,14 +46,8 @@ priority 2 reunion 7/09 ------------ - [ ] save recipe consecutive chunks as extents -- [x] **TODO: Priority 1** store recipe incrementally. - - [x] patch and diff for recipes - - [x] store recipe updates per version. - - [x] load all recipes incrementally. -- [ ] **TODO: Priority 2** store file list incrementally. - - [x] patch and diff for files - - [ ] store file lists updates per version. - - [ ] load all file lists incrementally. +- [x] store recipe incrementally. +- [x] store file list incrementally. - [x] compress recipe - [x] compress file list - [ ] make size comparison between recipe and chunks with some datasets diff --git a/repo.go b/repo.go index 7f9cf3a..03bf9fc 100644 --- a/repo.go +++ b/repo.go @@ -54,6 +54,7 @@ func init() { gob.RegisterName("*dna-backup.StoredChunk", &StoredChunk{}) gob.RegisterName("*dna-backup.TempChunk", &TempChunk{}) gob.RegisterName("*dna-backup.DeltaChunk", &DeltaChunk{}) + gob.RegisterName("dna-backup.File", File{}) } type FingerprintMap map[uint64]*ChunkId @@ -81,6 +82,7 @@ type Repo struct { fingerprints FingerprintMap sketches SketchMap recipe []Chunk + files []File chunkCache cache.Cacher chunkReadWrapper utils.ReadWrapper chunkWriteWrapper utils.WriteWrapper @@ -143,30 +145,30 @@ func (r *Repo) Commit(source string) { newVersion := len(versions) // TODO: add newVersion functino newPath := filepath.Join(r.path, fmt.Sprintf(versionFmt, newVersion)) newChunkPath := filepath.Join(newPath, chunksName) - newFilesPath := filepath.Join(newPath, filesName) os.Mkdir(newPath, 0775) // TODO: handle errors os.Mkdir(newChunkPath, 0775) // TODO: handle errors reader, writer := io.Pipe() files := listFiles(source) r.loadHashes(versions) + r.loadFileLists(versions) + logger.Info(r.files) r.loadRecipes(versions) + logger.Info(r.recipe) go concatFiles(&files, writer) recipe := r.matchStream(reader, newVersion) - storeFileList(newFilesPath, unprefixFiles(files, source)) + r.storeFileList(newVersion, unprefixFiles(files, source)) r.storeRecipe(newVersion, recipe) logger.Info(files) } func (r *Repo) Restore(destination string) { versions := r.loadVersions() - latest := versions[len(versions)-1] - latestFilesPath := filepath.Join(latest, filesName) - files := loadFileList(latestFilesPath) + r.loadFileLists(versions) r.loadRecipes(versions) reader, writer := io.Pipe() go r.restoreStream(writer, r.recipe) bufReader := bufio.NewReaderSize(reader, r.chunkSize*2) - for _, file := range files { + for _, file := range r.files { filePath := filepath.Join(destination, file.Path) dir := filepath.Dir(filePath) os.MkdirAll(dir, 0775) // TODO: handle errors @@ -307,14 +309,35 @@ func (r *Repo) loadDeltas(versions []string, wrapper utils.ReadWrapper, name str return } -func storeFileList(dest string, files []File) { - storeBasicStruct(dest, utils.ZlibWriter, files) +func fileList2slice(l []File) (ret slice.Slice) { + ret = make(slice.Slice, len(l)) + for i := range l { + ret[i] = l[i] + } + return } -func loadFileList(path string) []File { - var files []File - loadBasicStruct(path, utils.ZlibReader, &files) - return files +func slice2fileList(s slice.Slice) (ret []File) { + ret = make([]File, len(s), len(s)) + for i := range s { + if f, ok := s[i].(File); ok { + ret[i] = f + } else { + logger.Warningf("could not convert %s into a File", s[i]) + } + } + return +} + +func (r *Repo) storeFileList(version int, list []File) { + dest := filepath.Join(r.path, fmt.Sprintf(versionFmt, version), filesName) + delta := slice.Diff(fileList2slice(r.files), fileList2slice(list)) + logger.Info("files delta: ", delta) + storeBasicStruct(dest, utils.NopWriteWrapper, delta) +} + +func (r *Repo) loadFileLists(versions []string) { + r.files = slice2fileList(r.loadDeltas(versions, utils.NopReadWrapper, filesName)) } func (r *Repo) storageWorker(version int, storeQueue <-chan chunkData, end chan<- bool) { @@ -681,6 +704,7 @@ func slice2recipe(s slice.Slice) (ret []Chunk) { func (r *Repo) storeRecipe(version int, recipe []Chunk) { dest := filepath.Join(r.path, fmt.Sprintf(versionFmt, version), recipeName) delta := slice.Diff(recipe2slice(r.recipe), recipe2slice(recipe)) + logger.Info("recipe delta: ", delta) storeBasicStruct(dest, utils.NopWriteWrapper, delta) } diff --git a/repo_test.go b/repo_test.go index 85eeea7..0ee7bef 100644 --- a/repo_test.go +++ b/repo_test.go @@ -177,23 +177,6 @@ func TestLoadChunks(t *testing.T) { } } -func TestStoreLoadFiles(t *testing.T) { - resultDir := t.TempDir() - dataDir := filepath.Join("testdata", "logs") - resultFiles := filepath.Join(resultDir, filesName) - files1 := listFiles(dataDir) - storeFileList(resultFiles, files1) - files2 := loadFileList(resultFiles) - testutils.AssertLen(t, 4, files1, "Files:") - for i, f := range files1 { - if f != files2[i] { - t.Errorf("Loaded file data %d does not match stored one", i) - t.Log("Expected: ", f) - t.Log("Actual: ", files2[i]) - } - } -} - func prepareChunks(dataDir string, repo *Repo, streamFunc func(*[]File, io.WriteCloser)) { resultVersion := filepath.Join(repo.path, "00000") resultChunks := filepath.Join(resultVersion, chunksName) @@ -362,16 +345,16 @@ func assertSameTree(t *testing.T, apply func(t *testing.T, expected string, actu func assertCompatibleRepoFile(t *testing.T, expected string, actual string, prefix string) { if filepath.Base(expected) == filesName { - // Filelist file - eFiles := loadFileList(expected) - aFiles := loadFileList(actual) - testutils.AssertLen(t, len(eFiles), aFiles, prefix) - for i, eFile := range eFiles { - eFile.Path = filepath.FromSlash(eFile.Path) - if eFile != aFiles[i] { - t.Fatal(prefix, "file entry do not match:", aFiles[i], ", expected:", eFile) - } - } + // TODO: Check Filelist file + // eFiles := loadFileList(expected) + // aFiles := loadFileList(actual) + // testutils.AssertLen(t, len(eFiles), aFiles, prefix) + // for i, eFile := range eFiles { + // eFile.Path = filepath.FromSlash(eFile.Path) + // if eFile != aFiles[i] { + // t.Fatal(prefix, "file entry do not match:", aFiles[i], ", expected:", eFile) + // } + // } } else if filepath.Base(expected) == recipeName { // TODO: Check Recipe files // eRecipe := loadRecipe(expected) diff --git a/testdata/repo_8k/00000/files b/testdata/repo_8k/00000/files index b12d67b..56431d4 100644 Binary files a/testdata/repo_8k/00000/files and b/testdata/repo_8k/00000/files differ diff --git a/testdata/repo_8k_zlib/00000/files b/testdata/repo_8k_zlib/00000/files index b12d67b..56431d4 100644 Binary files a/testdata/repo_8k_zlib/00000/files and b/testdata/repo_8k_zlib/00000/files differ -- cgit v1.2.3