diff options
author | n-peugnet <n.peugnet@free.fr> | 2021-10-14 18:07:17 +0200 |
---|---|---|
committer | n-peugnet <n.peugnet@free.fr> | 2021-10-15 16:11:43 +0200 |
commit | 89fb0a85f78a415477e450b0091d8c2b994b687d (patch) | |
tree | ee8283bd91998b65f1a5056f254467a616964653 | |
parent | 347c626f4103f7afd494031cdf9f9fa7868f8e59 (diff) | |
download | dna-backup-89fb0a85f78a415477e450b0091d8c2b994b687d.tar.gz dna-backup-89fb0a85f78a415477e450b0091d8c2b994b687d.zip |
start dir exporter
-rw-r--r-- | dna/drive.go | 30 | ||||
-rw-r--r-- | dna/writer.go | 45 | ||||
-rw-r--r-- | main.go | 42 | ||||
-rw-r--r-- | repo/export_dir.go | 88 | ||||
-rw-r--r-- | repo/repo.go | 57 | ||||
-rw-r--r-- | repo/repo_test.go | 19 | ||||
-rw-r--r-- | utils/io.go | 19 |
7 files changed, 255 insertions, 45 deletions
diff --git a/dna/drive.go b/dna/drive.go new file mode 100644 index 0000000..f99e7d4 --- /dev/null +++ b/dna/drive.go @@ -0,0 +1,30 @@ +/* Copyright (C) 2021 Nicolas Peugnet <n.peugnet@free.fr> + + This file is part of dna-backup. + + dna-backup is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + dna-backup is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with dna-backup. If not, see <https://www.gnu.org/licenses/>. */ + +package dna + +import "io" + +type DnaDrive struct { + poolCount int + trackSize int + tracksPerPool int +} + +func (d *DnaDrive) Writer(w io.Writer) io.WriteCloser { + return NewWriter(w, d.trackSize) +} diff --git a/dna/writer.go b/dna/writer.go new file mode 100644 index 0000000..6b232cd --- /dev/null +++ b/dna/writer.go @@ -0,0 +1,45 @@ +/* Copyright (C) 2021 Nicolas Peugnet <n.peugnet@free.fr> + + This file is part of dna-backup. + + dna-backup is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + dna-backup is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with dna-backup. If not, see <https://www.gnu.org/licenses/>. */ + +package dna + +import ( + "io" + + "github.com/n-peugnet/dna-backup/utils" +) + +type writer struct { + *utils.WriteCounter + trackSize int +} + +func NewWriter(w io.Writer, trackSize int) io.WriteCloser { + return &writer{ + WriteCounter: utils.NewWriteCounter(w), + trackSize: trackSize, + } +} + +func (d *writer) Close() (err error) { + // add padding for the last track + padding := make([]byte, d.trackSize-d.Count()%d.trackSize) + if _, err = d.Write(padding); err != nil { + return err + } + return nil +} @@ -42,19 +42,25 @@ var ( logLevel int chunkSize int format string + trackSize int ) -var commit = command{flag.NewFlagSet("commit", flag.ExitOnError), commitMain, +var Commit = command{flag.NewFlagSet("commit", flag.ExitOnError), commitMain, "[<options>] [--] <source> <dest>", "Create a new version of folder <source> into repo <dest>", } -var restore = command{flag.NewFlagSet("restore", flag.ExitOnError), restoreMain, +var Restore = command{flag.NewFlagSet("restore", flag.ExitOnError), restoreMain, "[<options>] [--] <source> <dest>", "Restore the last version from repo <source> into folder <dest>", } +var Export = command{flag.NewFlagSet("export", flag.ExitOnError), exportMain, + "[<options>] [--] <source> <dest>", + "Export versions from repo <source> into folder <dest>", +} var subcommands = map[string]command{ - commit.Flag.Name(): commit, - restore.Flag.Name(): restore, + Commit.Flag.Name(): Commit, + Restore.Flag.Name(): Restore, + Export.Flag.Name(): Export, } func init() { @@ -71,6 +77,8 @@ func init() { s.Flag.IntVar(&logLevel, "v", 3, "log verbosity level (0-4)") s.Flag.IntVar(&chunkSize, "c", 8<<10, "chunk size") } + Export.Flag.StringVar(&format, "format", "dir", "format of the export (dir, csv)") + Export.Flag.IntVar(&trackSize, "track", 1020, "size of a DNA track") } func main() { @@ -104,8 +112,8 @@ func commitMain(args []string) error { } source := args[0] dest := args[1] - repo := repo.NewRepo(dest, chunkSize) - repo.Commit(source) + r := repo.NewRepo(dest, chunkSize) + r.Commit(source) return nil } @@ -115,7 +123,25 @@ func restoreMain(args []string) error { } source := args[0] dest := args[1] - repo := repo.NewRepo(source, chunkSize) - repo.Restore(dest) + r := repo.NewRepo(source, chunkSize) + r.Restore(dest) + return nil +} + +func exportMain(args []string) error { + if len(args) != 2 { + return fmt.Errorf("wrong number args") + } + source := args[0] + dest := args[1] + r := repo.NewRepo(source, chunkSize) + switch format { + case "dir": + r.ExportDir(dest, trackSize) + case "csv": + fmt.Println("csv") + default: + logger.Errorf("unknown format %s", format) + } return nil } diff --git a/repo/export_dir.go b/repo/export_dir.go new file mode 100644 index 0000000..8c63fdb --- /dev/null +++ b/repo/export_dir.go @@ -0,0 +1,88 @@ +/* Copyright (C) 2021 Nicolas Peugnet <n.peugnet@free.fr> + + This file is part of dna-backup. + + dna-backup is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + dna-backup is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with dna-backup. If not, see <https://www.gnu.org/licenses/>. */ + +package repo + +import ( + "bytes" + "compress/zlib" + "encoding/binary" + "io" + + "github.com/n-peugnet/dna-backup/dna" + "github.com/n-peugnet/dna-backup/logger" + "github.com/n-peugnet/dna-backup/utils" +) + +type Version struct { + Chunks uint64 + Recipe uint64 + Files uint64 +} + +func (r *Repo) ExportDir(dest string, trackSize int) { + r.Init() + versions := make([]Version, len(r.versions)) + chunks := r.loadChunks(r.versions) + for i := range versions { + var count int64 + var content bytes.Buffer // replace with a reader capable of switching files + var recipe, fileList []byte + var err error + tracker := dna.NewWriter(&content, trackSize) + counter := utils.NewWriteCounter(tracker) + compressor := zlib.NewWriter(counter) + for _, c := range chunks[i] { + n, err := io.Copy(compressor, c.Reader()) + if err != nil { + logger.Error(err) + } + count += n + } + compressor.Close() + tracker.Close() + readDelta(r.versions[i], recipeName, utils.NopReadWrapper, func(rc io.ReadCloser) { + recipe, err = io.ReadAll(rc) + if err != nil { + logger.Error("load recipe ", err) + } + }) + readDelta(r.versions[i], filesName, utils.NopReadWrapper, func(rc io.ReadCloser) { + fileList, err = io.ReadAll(rc) + if err != nil { + logger.Error("load files ", err) + } + }) + versions[i] = Version{ + uint64(counter.Count()), + uint64(len(recipe)), + uint64(len(fileList)), + } + header := versions[i].createHeader() + logger.Info(header) + } +} + +func (v Version) createHeader() []byte { + buf := make([]byte, binary.MaxVarintLen64*3) + i := 0 + for _, x := range []uint64{v.Chunks, v.Recipe, v.Files} { + n := binary.PutUvarint(buf[i:], x) + i += n + } + return buf[:i] +} diff --git a/repo/repo.go b/repo/repo.go index d3a5904..d12c72d 100644 --- a/repo/repo.go +++ b/repo/repo.go @@ -380,31 +380,36 @@ func storeDelta(prevRaw []byte, curr interface{}, dest string, differ delta.Diff } } +func readDelta(version string, name string, wrapper utils.ReadWrapper, callback func(io.ReadCloser)) { + path := filepath.Join(version, name) + file, err := os.Open(path) + if err != nil { + logger.Panic(err) + } + in, err := wrapper(file) + if err != nil { + logger.Panic(err) + } + callback(in) + if err = in.Close(); err != nil { + logger.Panic(err) + } + if err = file.Close(); err != nil { + logger.Panic(err) + } +} + func loadDeltas(target interface{}, versions []string, patcher delta.Patcher, wrapper utils.ReadWrapper, name string) (ret []byte) { var prev bytes.Buffer var err error - for _, v := range versions { - var curr bytes.Buffer - path := filepath.Join(v, name) - file, err := os.Open(path) - if err != nil { - logger.Panic(err) - } - in, err := wrapper(file) - if err != nil { - logger.Panic(err) - } - if err = patcher.Patch(&prev, &curr, in); err != nil { - logger.Panic(err) - } - prev = curr - if err = in.Close(); err != nil { - logger.Panic(err) - } - if err = file.Close(); err != nil { - logger.Panic(err) - } + readDelta(v, name, wrapper, func(in io.ReadCloser) { + var curr bytes.Buffer + if err = patcher.Patch(&prev, &curr, in); err != nil { + logger.Panic(err) + } + prev = curr + }) } ret = prev.Bytes() if len(ret) == 0 { @@ -505,8 +510,9 @@ func (r *Repo) LoadChunkContent(id *ChunkId) *bytes.Reader { } // TODO: use atoi for chunkid ? -func (r *Repo) LoadChunks(chunks chan<- IdentifiedChunk) { - for i, v := range r.versions { +func (r *Repo) loadChunks(versions []string) (chunks [][]IdentifiedChunk) { + for i, v := range versions { + vc := make([]IdentifiedChunk, 0) p := filepath.Join(v, chunksName) entries, err := os.ReadDir(p) if err != nil { @@ -518,10 +524,11 @@ func (r *Repo) LoadChunks(chunks chan<- IdentifiedChunk) { } id := &ChunkId{Ver: i, Idx: uint64(j)} c := NewStoredChunk(r, id) - chunks <- c + vc = append(vc, c) } + chunks = append(chunks, vc) } - close(chunks) + return chunks } // loadHashes loads and aggregates the hashes stored for each given version and diff --git a/repo/repo_test.go b/repo/repo_test.go index 38219e3..6338425 100644 --- a/repo/repo_test.go +++ b/repo/repo_test.go @@ -135,8 +135,8 @@ func storeChunks(dest string, chunks <-chan []byte) { // For each chunk, both a fingerprint (hash over the full content) and a sketch // (resemblance hash based on maximal values of regions) are calculated and // stored in an hashmap. -func (r *Repo) hashChunks(chunks <-chan IdentifiedChunk) { - for c := range chunks { +func (r *Repo) hashChunks(chunks []IdentifiedChunk) { + for _, c := range chunks { r.hashChunk(c.GetId(), c.Reader()) } } @@ -260,7 +260,6 @@ func TestLoadChunks(t *testing.T) { reader2, writer2 := io.Pipe() chunks1 := make(chan []byte, 16) chunks2 := make(chan []byte, 16) - chunks3 := make(chan IdentifiedChunk, 16) files := listFiles(dataDir) go concatFiles(&files, writer1) go concatFiles(&files, writer2) @@ -268,11 +267,11 @@ func TestLoadChunks(t *testing.T) { go repo.chunkStream(reader2, chunks2) storeChunks(resultChunks, chunks1) repo.versions = []string{resultVersion} - go repo.LoadChunks(chunks3) + chunks3 := repo.loadChunks(repo.versions) i := 0 for c2 := range chunks2 { - c3 := <-chunks3 + c3 := chunks3[0][i] buff, err := io.ReadAll(c3.Reader()) if err != nil { t.Errorf("Error reading from chunk %d: %s\n", c3, err) @@ -285,7 +284,6 @@ func TestLoadChunks(t *testing.T) { i++ } } - func prepareChunks(dataDir string, repo *Repo, streamFunc func(*[]File, io.WriteCloser)) { resultVersion := filepath.Join(repo.path, "00000") resultChunks := filepath.Join(resultVersion, chunksName) @@ -327,10 +325,8 @@ func TestBsdiff(t *testing.T) { repo.chunkWriteWrapper = utils.NopWriteWrapper // Load previously stored chunks - oldChunks := make(chan IdentifiedChunk, 16) repo.loadVersions() - go repo.LoadChunks(oldChunks) - repo.hashChunks(oldChunks) + repo.hashChunks(repo.loadChunks(repo.versions)[0]) // Read new data newVersion := len(repo.versions) @@ -405,7 +401,6 @@ func TestHashes(t *testing.T) { dest := t.TempDir() source := filepath.Join("testdata", "repo_8k_zlib") - chunks := make(chan IdentifiedChunk, 16) storeQueue := make(chan chunkData, 16) storeEnd := make(chan bool) @@ -413,8 +408,8 @@ func TestHashes(t *testing.T) { repo1.chunkReadWrapper = utils.ZlibReader repo1.chunkWriteWrapper = utils.ZlibWriter repo1.versions = []string{filepath.Join(source, "00000")} - go repo1.LoadChunks(chunks) - for c := range chunks { + chunks := repo1.loadChunks(repo1.versions) + for _, c := range chunks[0] { fp, sk := repo1.hashChunk(c.GetId(), c.Reader()) content, err := io.ReadAll(c.Reader()) if err != nil { diff --git a/utils/io.go b/utils/io.go index a0aa70c..3adc5f2 100644 --- a/utils/io.go +++ b/utils/io.go @@ -54,3 +54,22 @@ func NopReadWrapper(r io.Reader) (io.ReadCloser, error) { func NopWriteWrapper(w io.Writer) io.WriteCloser { return NopCloser(w) } + +type WriteCounter struct { + w io.Writer + count int +} + +func NewWriteCounter(writer io.Writer) *WriteCounter { + return &WriteCounter{w: writer} +} + +func (c *WriteCounter) Write(p []byte) (n int, err error) { + n, err = c.w.Write(p) + c.count += n + return +} + +func (c *WriteCounter) Count() int { + return c.count +} |