diff options
author | n-peugnet <n.peugnet@free.fr> | 2021-08-23 15:59:49 +0200 |
---|---|---|
committer | n-peugnet <n.peugnet@free.fr> | 2021-08-23 15:59:49 +0200 |
commit | a67dfdd993dee93950c4b60e99ab6ca92d842072 (patch) | |
tree | f2363c729e890b57a6f87a9c4ae147a668c92a64 | |
parent | e0b1733b3d7c88c79d4d92d8c6fac97bbd73f06b (diff) | |
download | dna-backup-a67dfdd993dee93950c4b60e99ab6ca92d842072.tar.gz dna-backup-a67dfdd993dee93950c4b60e99ab6ca92d842072.zip |
add ChunkId that contains version number
-rw-r--r-- | go.mod | 2 | ||||
-rw-r--r-- | repo.go | 97 | ||||
-rw-r--r-- | repo_test.go | 42 |
3 files changed, 85 insertions, 56 deletions
@@ -2,4 +2,4 @@ module github.com/n-peugnet/dna-backup go 1.16 -require github.com/chmduquesne/rollinghash v4.0.0+incompatible // indirect +require github.com/chmduquesne/rollinghash v4.0.0+incompatible @@ -30,12 +30,10 @@ import ( "hash" "io" "io/fs" - "io/ioutil" "log" "os" "path" "path/filepath" - "strconv" "github.com/chmduquesne/rollinghash/rabinkarp64" ) @@ -47,18 +45,28 @@ type File struct { Size int64 } +type ChunkId struct { + Ver int + Idx uint64 +} + +type Chunk struct { + Id ChunkId + Value []byte +} + func Commit(source string, repo string) { - latest := GetLastVersion(repo) - new := latest + 1 - newPath := path.Join(repo, fmt.Sprintf("%05d", new)) + versions := LoadVersions(repo) + newVersion := len(versions) + newPath := path.Join(repo, fmt.Sprintf("%05d", newVersion)) newChunkPath := path.Join(newPath, "chunks") // newFilesPath := path.Join(newPath, "files") os.Mkdir(newPath, 0775) os.Mkdir(newChunkPath, 0775) newChunks := make(chan []byte, 16) - oldChunks := make(chan []byte, 16) + oldChunks := make(chan Chunk, 16) files := ListFiles(source) - go LoadChunks(repo, oldChunks) + go LoadChunks(versions, oldChunks) go ReadFiles(files, newChunks) hashes := HashChunks(oldChunks) MatchChunks(newChunks, hashes) @@ -67,9 +75,9 @@ func Commit(source string, repo string) { fmt.Println(files) } -func GetLastVersion(repo string) int { - v := -1 - files, err := ioutil.ReadDir(repo) +func LoadVersions(repo string) []string { + versions := make([]string, 0) + files, err := os.ReadDir(repo) if err != nil { log.Fatalln(err) } @@ -77,16 +85,9 @@ func GetLastVersion(repo string) int { if !f.IsDir() { continue } - num, err := strconv.Atoi(f.Name()) - if err != nil { - log.Println(err) - continue - } - if num > v { - v = num - } + versions = append(versions, path.Join(repo, f.Name())) } - return v + return versions } func ListFiles(path string) []File { @@ -146,9 +147,9 @@ func StoreFiles(dest string, files []File) { } } -func LoadFiles(repo string) []File { +func LoadFiles(path string) []File { files := make([]File, 0) - err := readFile(repo, &files) + err := readFile(path, &files) if err != nil { log.Println(err) } @@ -165,46 +166,56 @@ func StoreChunks(dest string, chunks <-chan []byte) { i := 0 for c := range chunks { path := path.Join(dest, fmt.Sprintf("%015d", i)) - os.WriteFile(path, c, 0664) + err := os.WriteFile(path, c, 0664) + if err != nil { + log.Println(err) + } i++ } } -func LoadChunks(repo string, chunks chan<- []byte) { - err := filepath.WalkDir(repo, - func(p string, e fs.DirEntry, err error) error { +func LoadChunks(versions []string, chunks chan<- Chunk) { + for i, v := range versions { + p := path.Join(v, "chunks") + entries, err := os.ReadDir(p) + if err != nil { + log.Printf("Error reading version '%05d' in '%s' chunks: %s", i, v, err) + } + for j, e := range entries { + if e.IsDir() { + continue + } + f := path.Join(p, e.Name()) + buff, err := os.ReadFile(f) if err != nil { - log.Println(err) - return err + log.Printf("Error reading chunk '%s': %s", f, err.Error()) } - if e.IsDir() { - return nil + c := Chunk{ + Id: ChunkId{ + Ver: i, + Idx: uint64(j), + }, + Value: buff, } - buff, err := os.ReadFile(p) - chunks <- buff - return nil - }) - if err != nil { - log.Println(err) + chunks <- c + } } close(chunks) } -func HashChunks(chunks <-chan []byte) map[uint64]uint64 { - hashes := make(map[uint64]uint64) +func HashChunks(chunks <-chan Chunk) map[uint64]ChunkId { + hashes := make(map[uint64]ChunkId) hasher := hash.Hash64(rabinkarp64.New()) - var i uint64 = 0 for c := range chunks { hasher.Reset() - hasher.Write(c) + hasher.Write(c.Value) h := hasher.Sum64() - hashes[h] = i - i++ + hashes[h] = c.Id } return hashes } -func MatchChunks(chunks <-chan []byte, hashes map[uint64]uint64) { +func MatchChunks(chunks <-chan []byte, hashes map[uint64]ChunkId) { hasher := rabinkarp64.New() hasher.Write(<-chunks) @@ -222,7 +233,7 @@ func MatchChunks(chunks <-chan []byte, hashes map[uint64]uint64) { h := hasher.Sum64() chunk, exists := hashes[h] if exists { - fmt.Printf("Found existing chunk. New{id:%d, offset:%d}, Old: %d\n", i, offset, chunk) + fmt.Printf("Found existing chunk: New{id:%d, offset:%d} Old%d\n", i, offset, chunk) break } hasher.Roll(c[offset]) diff --git a/repo_test.go b/repo_test.go index 3be4835..6475fc2 100644 --- a/repo_test.go +++ b/repo_test.go @@ -2,15 +2,30 @@ package main import ( "bytes" + "log" "os" "path" "testing" ) -func prepareResult() { +func TestMain(m *testing.M) { + setup() + code := m.Run() + shutdown() + os.Exit(code) +} + +func setup() { + log.SetFlags(log.Lshortfile) +} + +func shutdown() {} + +func prepareResult() string { result := path.Join("test", "result") os.RemoveAll(result) os.MkdirAll(result, 0775) + return result } func chunkCompare(t *testing.T, dataDir string, testFiles []string, chunkCount int) { @@ -40,7 +55,7 @@ func chunkCompare(t *testing.T, dataDir string, testFiles []string, chunkCount i } if bytes.Compare(c, content) != 0 { t.Errorf("Chunk %d does not match file content", i) - t.Log("Expected: ", c) + t.Log("Expected: ", c[:10], "...") t.Log("Result:", content) } i++ @@ -77,34 +92,37 @@ func TestReadFiles3(t *testing.T) { } func TestLoadChunks(t *testing.T) { - prepareResult() + resultDir := prepareResult() dataDir := path.Join("test", "data") - resultDir := path.Join("test", "result") + resultVersion := path.Join(resultDir, "00000") + resultChunks := path.Join(resultVersion, "chunks") + os.MkdirAll(resultChunks, 0775) chunks1 := make(chan []byte, 16) chunks2 := make(chan []byte, 16) - chunks3 := make(chan []byte, 16) + chunks3 := make(chan Chunk, 16) files := ListFiles(dataDir) go ReadFiles(files, chunks1) go ReadFiles(files, chunks2) - StoreChunks(resultDir, chunks1) - go LoadChunks(resultDir, chunks3) + StoreChunks(resultChunks, chunks1) + versions := []string{resultVersion} + go LoadChunks(versions, chunks3) i := 0 for c2 := range chunks2 { c3 := <-chunks3 - if bytes.Compare(c2, c3) != 0 { + if bytes.Compare(c2, c3.Value) != 0 { t.Errorf("Chunk %d does not match file content", i) - t.Log("Expected: ", c2) - t.Log("Result:", c3) + t.Log("Expected: ", c2[:10], "...") + t.Log("Result:", c3.Value) } i++ } } func TestStoreLoadFiles(t *testing.T) { - prepareResult() + resultDir := prepareResult() dataDir := path.Join("test", "data") - resultFiles := path.Join("test", "result", "files") + resultFiles := path.Join(resultDir, "files") files1 := ListFiles(dataDir) StoreFiles(resultFiles, files1) files2 := LoadFiles(resultFiles) |