aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorn-peugnet <n.peugnet@free.fr>2021-10-14 18:07:17 +0200
committern-peugnet <n.peugnet@free.fr>2021-10-15 16:11:43 +0200
commit89fb0a85f78a415477e450b0091d8c2b994b687d (patch)
treeee8283bd91998b65f1a5056f254467a616964653
parent347c626f4103f7afd494031cdf9f9fa7868f8e59 (diff)
downloaddna-backup-89fb0a85f78a415477e450b0091d8c2b994b687d.tar.gz
dna-backup-89fb0a85f78a415477e450b0091d8c2b994b687d.zip
start dir exporter
-rw-r--r--dna/drive.go30
-rw-r--r--dna/writer.go45
-rw-r--r--main.go42
-rw-r--r--repo/export_dir.go88
-rw-r--r--repo/repo.go57
-rw-r--r--repo/repo_test.go19
-rw-r--r--utils/io.go19
7 files changed, 255 insertions, 45 deletions
diff --git a/dna/drive.go b/dna/drive.go
new file mode 100644
index 0000000..f99e7d4
--- /dev/null
+++ b/dna/drive.go
@@ -0,0 +1,30 @@
+/* Copyright (C) 2021 Nicolas Peugnet <n.peugnet@free.fr>
+
+ This file is part of dna-backup.
+
+ dna-backup is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ dna-backup is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with dna-backup. If not, see <https://www.gnu.org/licenses/>. */
+
+package dna
+
+import "io"
+
+type DnaDrive struct {
+ poolCount int
+ trackSize int
+ tracksPerPool int
+}
+
+func (d *DnaDrive) Writer(w io.Writer) io.WriteCloser {
+ return NewWriter(w, d.trackSize)
+}
diff --git a/dna/writer.go b/dna/writer.go
new file mode 100644
index 0000000..6b232cd
--- /dev/null
+++ b/dna/writer.go
@@ -0,0 +1,45 @@
+/* Copyright (C) 2021 Nicolas Peugnet <n.peugnet@free.fr>
+
+ This file is part of dna-backup.
+
+ dna-backup is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ dna-backup is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with dna-backup. If not, see <https://www.gnu.org/licenses/>. */
+
+package dna
+
+import (
+ "io"
+
+ "github.com/n-peugnet/dna-backup/utils"
+)
+
+type writer struct {
+ *utils.WriteCounter
+ trackSize int
+}
+
+func NewWriter(w io.Writer, trackSize int) io.WriteCloser {
+ return &writer{
+ WriteCounter: utils.NewWriteCounter(w),
+ trackSize: trackSize,
+ }
+}
+
+func (d *writer) Close() (err error) {
+ // add padding for the last track
+ padding := make([]byte, d.trackSize-d.Count()%d.trackSize)
+ if _, err = d.Write(padding); err != nil {
+ return err
+ }
+ return nil
+}
diff --git a/main.go b/main.go
index ada0252..ae180b9 100644
--- a/main.go
+++ b/main.go
@@ -42,19 +42,25 @@ var (
logLevel int
chunkSize int
format string
+ trackSize int
)
-var commit = command{flag.NewFlagSet("commit", flag.ExitOnError), commitMain,
+var Commit = command{flag.NewFlagSet("commit", flag.ExitOnError), commitMain,
"[<options>] [--] <source> <dest>",
"Create a new version of folder <source> into repo <dest>",
}
-var restore = command{flag.NewFlagSet("restore", flag.ExitOnError), restoreMain,
+var Restore = command{flag.NewFlagSet("restore", flag.ExitOnError), restoreMain,
"[<options>] [--] <source> <dest>",
"Restore the last version from repo <source> into folder <dest>",
}
+var Export = command{flag.NewFlagSet("export", flag.ExitOnError), exportMain,
+ "[<options>] [--] <source> <dest>",
+ "Export versions from repo <source> into folder <dest>",
+}
var subcommands = map[string]command{
- commit.Flag.Name(): commit,
- restore.Flag.Name(): restore,
+ Commit.Flag.Name(): Commit,
+ Restore.Flag.Name(): Restore,
+ Export.Flag.Name(): Export,
}
func init() {
@@ -71,6 +77,8 @@ func init() {
s.Flag.IntVar(&logLevel, "v", 3, "log verbosity level (0-4)")
s.Flag.IntVar(&chunkSize, "c", 8<<10, "chunk size")
}
+ Export.Flag.StringVar(&format, "format", "dir", "format of the export (dir, csv)")
+ Export.Flag.IntVar(&trackSize, "track", 1020, "size of a DNA track")
}
func main() {
@@ -104,8 +112,8 @@ func commitMain(args []string) error {
}
source := args[0]
dest := args[1]
- repo := repo.NewRepo(dest, chunkSize)
- repo.Commit(source)
+ r := repo.NewRepo(dest, chunkSize)
+ r.Commit(source)
return nil
}
@@ -115,7 +123,25 @@ func restoreMain(args []string) error {
}
source := args[0]
dest := args[1]
- repo := repo.NewRepo(source, chunkSize)
- repo.Restore(dest)
+ r := repo.NewRepo(source, chunkSize)
+ r.Restore(dest)
+ return nil
+}
+
+func exportMain(args []string) error {
+ if len(args) != 2 {
+ return fmt.Errorf("wrong number args")
+ }
+ source := args[0]
+ dest := args[1]
+ r := repo.NewRepo(source, chunkSize)
+ switch format {
+ case "dir":
+ r.ExportDir(dest, trackSize)
+ case "csv":
+ fmt.Println("csv")
+ default:
+ logger.Errorf("unknown format %s", format)
+ }
return nil
}
diff --git a/repo/export_dir.go b/repo/export_dir.go
new file mode 100644
index 0000000..8c63fdb
--- /dev/null
+++ b/repo/export_dir.go
@@ -0,0 +1,88 @@
+/* Copyright (C) 2021 Nicolas Peugnet <n.peugnet@free.fr>
+
+ This file is part of dna-backup.
+
+ dna-backup is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ dna-backup is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with dna-backup. If not, see <https://www.gnu.org/licenses/>. */
+
+package repo
+
+import (
+ "bytes"
+ "compress/zlib"
+ "encoding/binary"
+ "io"
+
+ "github.com/n-peugnet/dna-backup/dna"
+ "github.com/n-peugnet/dna-backup/logger"
+ "github.com/n-peugnet/dna-backup/utils"
+)
+
+type Version struct {
+ Chunks uint64
+ Recipe uint64
+ Files uint64
+}
+
+func (r *Repo) ExportDir(dest string, trackSize int) {
+ r.Init()
+ versions := make([]Version, len(r.versions))
+ chunks := r.loadChunks(r.versions)
+ for i := range versions {
+ var count int64
+ var content bytes.Buffer // replace with a reader capable of switching files
+ var recipe, fileList []byte
+ var err error
+ tracker := dna.NewWriter(&content, trackSize)
+ counter := utils.NewWriteCounter(tracker)
+ compressor := zlib.NewWriter(counter)
+ for _, c := range chunks[i] {
+ n, err := io.Copy(compressor, c.Reader())
+ if err != nil {
+ logger.Error(err)
+ }
+ count += n
+ }
+ compressor.Close()
+ tracker.Close()
+ readDelta(r.versions[i], recipeName, utils.NopReadWrapper, func(rc io.ReadCloser) {
+ recipe, err = io.ReadAll(rc)
+ if err != nil {
+ logger.Error("load recipe ", err)
+ }
+ })
+ readDelta(r.versions[i], filesName, utils.NopReadWrapper, func(rc io.ReadCloser) {
+ fileList, err = io.ReadAll(rc)
+ if err != nil {
+ logger.Error("load files ", err)
+ }
+ })
+ versions[i] = Version{
+ uint64(counter.Count()),
+ uint64(len(recipe)),
+ uint64(len(fileList)),
+ }
+ header := versions[i].createHeader()
+ logger.Info(header)
+ }
+}
+
+func (v Version) createHeader() []byte {
+ buf := make([]byte, binary.MaxVarintLen64*3)
+ i := 0
+ for _, x := range []uint64{v.Chunks, v.Recipe, v.Files} {
+ n := binary.PutUvarint(buf[i:], x)
+ i += n
+ }
+ return buf[:i]
+}
diff --git a/repo/repo.go b/repo/repo.go
index d3a5904..d12c72d 100644
--- a/repo/repo.go
+++ b/repo/repo.go
@@ -380,31 +380,36 @@ func storeDelta(prevRaw []byte, curr interface{}, dest string, differ delta.Diff
}
}
+func readDelta(version string, name string, wrapper utils.ReadWrapper, callback func(io.ReadCloser)) {
+ path := filepath.Join(version, name)
+ file, err := os.Open(path)
+ if err != nil {
+ logger.Panic(err)
+ }
+ in, err := wrapper(file)
+ if err != nil {
+ logger.Panic(err)
+ }
+ callback(in)
+ if err = in.Close(); err != nil {
+ logger.Panic(err)
+ }
+ if err = file.Close(); err != nil {
+ logger.Panic(err)
+ }
+}
+
func loadDeltas(target interface{}, versions []string, patcher delta.Patcher, wrapper utils.ReadWrapper, name string) (ret []byte) {
var prev bytes.Buffer
var err error
-
for _, v := range versions {
- var curr bytes.Buffer
- path := filepath.Join(v, name)
- file, err := os.Open(path)
- if err != nil {
- logger.Panic(err)
- }
- in, err := wrapper(file)
- if err != nil {
- logger.Panic(err)
- }
- if err = patcher.Patch(&prev, &curr, in); err != nil {
- logger.Panic(err)
- }
- prev = curr
- if err = in.Close(); err != nil {
- logger.Panic(err)
- }
- if err = file.Close(); err != nil {
- logger.Panic(err)
- }
+ readDelta(v, name, wrapper, func(in io.ReadCloser) {
+ var curr bytes.Buffer
+ if err = patcher.Patch(&prev, &curr, in); err != nil {
+ logger.Panic(err)
+ }
+ prev = curr
+ })
}
ret = prev.Bytes()
if len(ret) == 0 {
@@ -505,8 +510,9 @@ func (r *Repo) LoadChunkContent(id *ChunkId) *bytes.Reader {
}
// TODO: use atoi for chunkid ?
-func (r *Repo) LoadChunks(chunks chan<- IdentifiedChunk) {
- for i, v := range r.versions {
+func (r *Repo) loadChunks(versions []string) (chunks [][]IdentifiedChunk) {
+ for i, v := range versions {
+ vc := make([]IdentifiedChunk, 0)
p := filepath.Join(v, chunksName)
entries, err := os.ReadDir(p)
if err != nil {
@@ -518,10 +524,11 @@ func (r *Repo) LoadChunks(chunks chan<- IdentifiedChunk) {
}
id := &ChunkId{Ver: i, Idx: uint64(j)}
c := NewStoredChunk(r, id)
- chunks <- c
+ vc = append(vc, c)
}
+ chunks = append(chunks, vc)
}
- close(chunks)
+ return chunks
}
// loadHashes loads and aggregates the hashes stored for each given version and
diff --git a/repo/repo_test.go b/repo/repo_test.go
index 38219e3..6338425 100644
--- a/repo/repo_test.go
+++ b/repo/repo_test.go
@@ -135,8 +135,8 @@ func storeChunks(dest string, chunks <-chan []byte) {
// For each chunk, both a fingerprint (hash over the full content) and a sketch
// (resemblance hash based on maximal values of regions) are calculated and
// stored in an hashmap.
-func (r *Repo) hashChunks(chunks <-chan IdentifiedChunk) {
- for c := range chunks {
+func (r *Repo) hashChunks(chunks []IdentifiedChunk) {
+ for _, c := range chunks {
r.hashChunk(c.GetId(), c.Reader())
}
}
@@ -260,7 +260,6 @@ func TestLoadChunks(t *testing.T) {
reader2, writer2 := io.Pipe()
chunks1 := make(chan []byte, 16)
chunks2 := make(chan []byte, 16)
- chunks3 := make(chan IdentifiedChunk, 16)
files := listFiles(dataDir)
go concatFiles(&files, writer1)
go concatFiles(&files, writer2)
@@ -268,11 +267,11 @@ func TestLoadChunks(t *testing.T) {
go repo.chunkStream(reader2, chunks2)
storeChunks(resultChunks, chunks1)
repo.versions = []string{resultVersion}
- go repo.LoadChunks(chunks3)
+ chunks3 := repo.loadChunks(repo.versions)
i := 0
for c2 := range chunks2 {
- c3 := <-chunks3
+ c3 := chunks3[0][i]
buff, err := io.ReadAll(c3.Reader())
if err != nil {
t.Errorf("Error reading from chunk %d: %s\n", c3, err)
@@ -285,7 +284,6 @@ func TestLoadChunks(t *testing.T) {
i++
}
}
-
func prepareChunks(dataDir string, repo *Repo, streamFunc func(*[]File, io.WriteCloser)) {
resultVersion := filepath.Join(repo.path, "00000")
resultChunks := filepath.Join(resultVersion, chunksName)
@@ -327,10 +325,8 @@ func TestBsdiff(t *testing.T) {
repo.chunkWriteWrapper = utils.NopWriteWrapper
// Load previously stored chunks
- oldChunks := make(chan IdentifiedChunk, 16)
repo.loadVersions()
- go repo.LoadChunks(oldChunks)
- repo.hashChunks(oldChunks)
+ repo.hashChunks(repo.loadChunks(repo.versions)[0])
// Read new data
newVersion := len(repo.versions)
@@ -405,7 +401,6 @@ func TestHashes(t *testing.T) {
dest := t.TempDir()
source := filepath.Join("testdata", "repo_8k_zlib")
- chunks := make(chan IdentifiedChunk, 16)
storeQueue := make(chan chunkData, 16)
storeEnd := make(chan bool)
@@ -413,8 +408,8 @@ func TestHashes(t *testing.T) {
repo1.chunkReadWrapper = utils.ZlibReader
repo1.chunkWriteWrapper = utils.ZlibWriter
repo1.versions = []string{filepath.Join(source, "00000")}
- go repo1.LoadChunks(chunks)
- for c := range chunks {
+ chunks := repo1.loadChunks(repo1.versions)
+ for _, c := range chunks[0] {
fp, sk := repo1.hashChunk(c.GetId(), c.Reader())
content, err := io.ReadAll(c.Reader())
if err != nil {
diff --git a/utils/io.go b/utils/io.go
index a0aa70c..3adc5f2 100644
--- a/utils/io.go
+++ b/utils/io.go
@@ -54,3 +54,22 @@ func NopReadWrapper(r io.Reader) (io.ReadCloser, error) {
func NopWriteWrapper(w io.Writer) io.WriteCloser {
return NopCloser(w)
}
+
+type WriteCounter struct {
+ w io.Writer
+ count int
+}
+
+func NewWriteCounter(writer io.Writer) *WriteCounter {
+ return &WriteCounter{w: writer}
+}
+
+func (c *WriteCounter) Write(p []byte) (n int, err error) {
+ n, err = c.w.Write(p)
+ c.count += n
+ return
+}
+
+func (c *WriteCounter) Count() int {
+ return c.count
+}