aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorn-peugnet <n.peugnet@free.fr>2021-08-23 15:59:49 +0200
committern-peugnet <n.peugnet@free.fr>2021-08-23 15:59:49 +0200
commita67dfdd993dee93950c4b60e99ab6ca92d842072 (patch)
treef2363c729e890b57a6f87a9c4ae147a668c92a64
parente0b1733b3d7c88c79d4d92d8c6fac97bbd73f06b (diff)
downloaddna-backup-a67dfdd993dee93950c4b60e99ab6ca92d842072.tar.gz
dna-backup-a67dfdd993dee93950c4b60e99ab6ca92d842072.zip
add ChunkId that contains version number
-rw-r--r--go.mod2
-rw-r--r--repo.go97
-rw-r--r--repo_test.go42
3 files changed, 85 insertions, 56 deletions
diff --git a/go.mod b/go.mod
index d606d25..26bdbbc 100644
--- a/go.mod
+++ b/go.mod
@@ -2,4 +2,4 @@ module github.com/n-peugnet/dna-backup
go 1.16
-require github.com/chmduquesne/rollinghash v4.0.0+incompatible // indirect
+require github.com/chmduquesne/rollinghash v4.0.0+incompatible
diff --git a/repo.go b/repo.go
index 37f0eda..19caf0d 100644
--- a/repo.go
+++ b/repo.go
@@ -30,12 +30,10 @@ import (
"hash"
"io"
"io/fs"
- "io/ioutil"
"log"
"os"
"path"
"path/filepath"
- "strconv"
"github.com/chmduquesne/rollinghash/rabinkarp64"
)
@@ -47,18 +45,28 @@ type File struct {
Size int64
}
+type ChunkId struct {
+ Ver int
+ Idx uint64
+}
+
+type Chunk struct {
+ Id ChunkId
+ Value []byte
+}
+
func Commit(source string, repo string) {
- latest := GetLastVersion(repo)
- new := latest + 1
- newPath := path.Join(repo, fmt.Sprintf("%05d", new))
+ versions := LoadVersions(repo)
+ newVersion := len(versions)
+ newPath := path.Join(repo, fmt.Sprintf("%05d", newVersion))
newChunkPath := path.Join(newPath, "chunks")
// newFilesPath := path.Join(newPath, "files")
os.Mkdir(newPath, 0775)
os.Mkdir(newChunkPath, 0775)
newChunks := make(chan []byte, 16)
- oldChunks := make(chan []byte, 16)
+ oldChunks := make(chan Chunk, 16)
files := ListFiles(source)
- go LoadChunks(repo, oldChunks)
+ go LoadChunks(versions, oldChunks)
go ReadFiles(files, newChunks)
hashes := HashChunks(oldChunks)
MatchChunks(newChunks, hashes)
@@ -67,9 +75,9 @@ func Commit(source string, repo string) {
fmt.Println(files)
}
-func GetLastVersion(repo string) int {
- v := -1
- files, err := ioutil.ReadDir(repo)
+func LoadVersions(repo string) []string {
+ versions := make([]string, 0)
+ files, err := os.ReadDir(repo)
if err != nil {
log.Fatalln(err)
}
@@ -77,16 +85,9 @@ func GetLastVersion(repo string) int {
if !f.IsDir() {
continue
}
- num, err := strconv.Atoi(f.Name())
- if err != nil {
- log.Println(err)
- continue
- }
- if num > v {
- v = num
- }
+ versions = append(versions, path.Join(repo, f.Name()))
}
- return v
+ return versions
}
func ListFiles(path string) []File {
@@ -146,9 +147,9 @@ func StoreFiles(dest string, files []File) {
}
}
-func LoadFiles(repo string) []File {
+func LoadFiles(path string) []File {
files := make([]File, 0)
- err := readFile(repo, &files)
+ err := readFile(path, &files)
if err != nil {
log.Println(err)
}
@@ -165,46 +166,56 @@ func StoreChunks(dest string, chunks <-chan []byte) {
i := 0
for c := range chunks {
path := path.Join(dest, fmt.Sprintf("%015d", i))
- os.WriteFile(path, c, 0664)
+ err := os.WriteFile(path, c, 0664)
+ if err != nil {
+ log.Println(err)
+ }
i++
}
}
-func LoadChunks(repo string, chunks chan<- []byte) {
- err := filepath.WalkDir(repo,
- func(p string, e fs.DirEntry, err error) error {
+func LoadChunks(versions []string, chunks chan<- Chunk) {
+ for i, v := range versions {
+ p := path.Join(v, "chunks")
+ entries, err := os.ReadDir(p)
+ if err != nil {
+ log.Printf("Error reading version '%05d' in '%s' chunks: %s", i, v, err)
+ }
+ for j, e := range entries {
+ if e.IsDir() {
+ continue
+ }
+ f := path.Join(p, e.Name())
+ buff, err := os.ReadFile(f)
if err != nil {
- log.Println(err)
- return err
+ log.Printf("Error reading chunk '%s': %s", f, err.Error())
}
- if e.IsDir() {
- return nil
+ c := Chunk{
+ Id: ChunkId{
+ Ver: i,
+ Idx: uint64(j),
+ },
+ Value: buff,
}
- buff, err := os.ReadFile(p)
- chunks <- buff
- return nil
- })
- if err != nil {
- log.Println(err)
+ chunks <- c
+ }
}
close(chunks)
}
-func HashChunks(chunks <-chan []byte) map[uint64]uint64 {
- hashes := make(map[uint64]uint64)
+func HashChunks(chunks <-chan Chunk) map[uint64]ChunkId {
+ hashes := make(map[uint64]ChunkId)
hasher := hash.Hash64(rabinkarp64.New())
- var i uint64 = 0
for c := range chunks {
hasher.Reset()
- hasher.Write(c)
+ hasher.Write(c.Value)
h := hasher.Sum64()
- hashes[h] = i
- i++
+ hashes[h] = c.Id
}
return hashes
}
-func MatchChunks(chunks <-chan []byte, hashes map[uint64]uint64) {
+func MatchChunks(chunks <-chan []byte, hashes map[uint64]ChunkId) {
hasher := rabinkarp64.New()
hasher.Write(<-chunks)
@@ -222,7 +233,7 @@ func MatchChunks(chunks <-chan []byte, hashes map[uint64]uint64) {
h := hasher.Sum64()
chunk, exists := hashes[h]
if exists {
- fmt.Printf("Found existing chunk. New{id:%d, offset:%d}, Old: %d\n", i, offset, chunk)
+ fmt.Printf("Found existing chunk: New{id:%d, offset:%d} Old%d\n", i, offset, chunk)
break
}
hasher.Roll(c[offset])
diff --git a/repo_test.go b/repo_test.go
index 3be4835..6475fc2 100644
--- a/repo_test.go
+++ b/repo_test.go
@@ -2,15 +2,30 @@ package main
import (
"bytes"
+ "log"
"os"
"path"
"testing"
)
-func prepareResult() {
+func TestMain(m *testing.M) {
+ setup()
+ code := m.Run()
+ shutdown()
+ os.Exit(code)
+}
+
+func setup() {
+ log.SetFlags(log.Lshortfile)
+}
+
+func shutdown() {}
+
+func prepareResult() string {
result := path.Join("test", "result")
os.RemoveAll(result)
os.MkdirAll(result, 0775)
+ return result
}
func chunkCompare(t *testing.T, dataDir string, testFiles []string, chunkCount int) {
@@ -40,7 +55,7 @@ func chunkCompare(t *testing.T, dataDir string, testFiles []string, chunkCount i
}
if bytes.Compare(c, content) != 0 {
t.Errorf("Chunk %d does not match file content", i)
- t.Log("Expected: ", c)
+ t.Log("Expected: ", c[:10], "...")
t.Log("Result:", content)
}
i++
@@ -77,34 +92,37 @@ func TestReadFiles3(t *testing.T) {
}
func TestLoadChunks(t *testing.T) {
- prepareResult()
+ resultDir := prepareResult()
dataDir := path.Join("test", "data")
- resultDir := path.Join("test", "result")
+ resultVersion := path.Join(resultDir, "00000")
+ resultChunks := path.Join(resultVersion, "chunks")
+ os.MkdirAll(resultChunks, 0775)
chunks1 := make(chan []byte, 16)
chunks2 := make(chan []byte, 16)
- chunks3 := make(chan []byte, 16)
+ chunks3 := make(chan Chunk, 16)
files := ListFiles(dataDir)
go ReadFiles(files, chunks1)
go ReadFiles(files, chunks2)
- StoreChunks(resultDir, chunks1)
- go LoadChunks(resultDir, chunks3)
+ StoreChunks(resultChunks, chunks1)
+ versions := []string{resultVersion}
+ go LoadChunks(versions, chunks3)
i := 0
for c2 := range chunks2 {
c3 := <-chunks3
- if bytes.Compare(c2, c3) != 0 {
+ if bytes.Compare(c2, c3.Value) != 0 {
t.Errorf("Chunk %d does not match file content", i)
- t.Log("Expected: ", c2)
- t.Log("Result:", c3)
+ t.Log("Expected: ", c2[:10], "...")
+ t.Log("Result:", c3.Value)
}
i++
}
}
func TestStoreLoadFiles(t *testing.T) {
- prepareResult()
+ resultDir := prepareResult()
dataDir := path.Join("test", "data")
- resultFiles := path.Join("test", "result", "files")
+ resultFiles := path.Join(resultDir, "files")
files1 := ListFiles(dataDir)
StoreFiles(resultFiles, files1)
files2 := LoadFiles(resultFiles)