aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--go.mod2
-rw-r--r--go.sum2
-rw-r--r--repo.go45
3 files changed, 46 insertions, 3 deletions
diff --git a/go.mod b/go.mod
index cdc2a48..d606d25 100644
--- a/go.mod
+++ b/go.mod
@@ -1,3 +1,5 @@
module github.com/n-peugnet/dna-backup
go 1.16
+
+require github.com/chmduquesne/rollinghash v4.0.0+incompatible // indirect
diff --git a/go.sum b/go.sum
new file mode 100644
index 0000000..c9d9a48
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,2 @@
+github.com/chmduquesne/rollinghash v4.0.0+incompatible h1:hnREQO+DXjqIw3rUTzWN7/+Dpw+N5Um8zpKV0JOEgbo=
+github.com/chmduquesne/rollinghash v4.0.0+incompatible/go.mod h1:Uc2I36RRfTAf7Dge82bi3RU0OQUmXT9iweIcPqvr8A0=
diff --git a/repo.go b/repo.go
index 3a91407..daf6a0e 100644
--- a/repo.go
+++ b/repo.go
@@ -27,6 +27,7 @@ package main
import (
"encoding/gob"
"fmt"
+ "hash"
"io"
"io/fs"
"io/ioutil"
@@ -35,6 +36,8 @@ import (
"path"
"path/filepath"
"strconv"
+
+ "github.com/chmduquesne/rollinghash/rabinkarp64"
)
var chunkSize = 8 << 10
@@ -49,7 +52,7 @@ func Commit(source string, repo string) {
new := latest + 1
newPath := path.Join(repo, fmt.Sprintf("%05d", new))
newChunkPath := path.Join(newPath, "chunks")
- newFilesPath := path.Join(newPath, "files")
+ // newFilesPath := path.Join(newPath, "files")
os.Mkdir(newPath, 0775)
os.Mkdir(newChunkPath, 0775)
newChunks := make(chan []byte, 16)
@@ -57,8 +60,10 @@ func Commit(source string, repo string) {
files := ListFiles(source)
go LoadChunks(repo, oldChunks)
go ReadFiles(files, newChunks)
- StoreChunks(newChunkPath, newChunks)
- StoreFiles(newFilesPath, files)
+ hashes := HashChunks(oldChunks)
+ MatchChunks(newChunks, hashes)
+ // StoreChunks(newChunkPath, newChunks)
+ // StoreFiles(newFilesPath, files)
fmt.Println(files)
}
@@ -185,6 +190,40 @@ func LoadChunks(repo string, chunks chan<- []byte) {
close(chunks)
}
+func HashChunks(chunks <-chan []byte) map[uint64]uint64 {
+ hashes := make(map[uint64]uint64)
+ hasher := hash.Hash64(rabinkarp64.New())
+ var i uint64 = 0
+ for c := range chunks {
+ hasher.Reset()
+ hasher.Write(c)
+ h := hasher.Sum64()
+ hashes[h] = i
+ i++
+ }
+ return hashes
+}
+
+func MatchChunks(chunks <-chan []byte, hashes map[uint64]uint64) {
+ hasher := rabinkarp64.New()
+ hasher.Write(<-chunks)
+
+ var i uint64 = 0
+ for c := range chunks {
+ for offset, b := range c {
+
+ h := hasher.Sum64()
+ chunk, exists := hashes[h]
+ if exists {
+ fmt.Printf("Found existing chunk. New{id:%d, offset:%d}, Old: %d\n", i, offset, chunk)
+ }
+ // Roll the incoming byte in rolling
+ hasher.Roll(b)
+ }
+ i++
+ }
+}
+
func writeFile(filePath string, object interface{}) error {
file, err := os.Create(filePath)
if err == nil {