diff options
author | n-peugnet <n.peugnet@free.fr> | 2021-08-23 11:19:23 +0200 |
---|---|---|
committer | n-peugnet <n.peugnet@free.fr> | 2021-08-23 11:19:23 +0200 |
commit | 212ce5c37857a2d64c56e3df8f415f48b8698462 (patch) | |
tree | 3e3afef77617534ed6cd4e7e49690cb738009322 | |
parent | f63d18f08e86b4135e466d5e88e03dda65466e6b (diff) | |
download | dna-backup-212ce5c37857a2d64c56e3df8f415f48b8698462.tar.gz dna-backup-212ce5c37857a2d64c56e3df8f415f48b8698462.zip |
initial rolling hash matching
-rw-r--r-- | go.mod | 2 | ||||
-rw-r--r-- | go.sum | 2 | ||||
-rw-r--r-- | repo.go | 45 |
3 files changed, 46 insertions, 3 deletions
@@ -1,3 +1,5 @@ module github.com/n-peugnet/dna-backup go 1.16 + +require github.com/chmduquesne/rollinghash v4.0.0+incompatible // indirect @@ -0,0 +1,2 @@ +github.com/chmduquesne/rollinghash v4.0.0+incompatible h1:hnREQO+DXjqIw3rUTzWN7/+Dpw+N5Um8zpKV0JOEgbo= +github.com/chmduquesne/rollinghash v4.0.0+incompatible/go.mod h1:Uc2I36RRfTAf7Dge82bi3RU0OQUmXT9iweIcPqvr8A0= @@ -27,6 +27,7 @@ package main import ( "encoding/gob" "fmt" + "hash" "io" "io/fs" "io/ioutil" @@ -35,6 +36,8 @@ import ( "path" "path/filepath" "strconv" + + "github.com/chmduquesne/rollinghash/rabinkarp64" ) var chunkSize = 8 << 10 @@ -49,7 +52,7 @@ func Commit(source string, repo string) { new := latest + 1 newPath := path.Join(repo, fmt.Sprintf("%05d", new)) newChunkPath := path.Join(newPath, "chunks") - newFilesPath := path.Join(newPath, "files") + // newFilesPath := path.Join(newPath, "files") os.Mkdir(newPath, 0775) os.Mkdir(newChunkPath, 0775) newChunks := make(chan []byte, 16) @@ -57,8 +60,10 @@ func Commit(source string, repo string) { files := ListFiles(source) go LoadChunks(repo, oldChunks) go ReadFiles(files, newChunks) - StoreChunks(newChunkPath, newChunks) - StoreFiles(newFilesPath, files) + hashes := HashChunks(oldChunks) + MatchChunks(newChunks, hashes) + // StoreChunks(newChunkPath, newChunks) + // StoreFiles(newFilesPath, files) fmt.Println(files) } @@ -185,6 +190,40 @@ func LoadChunks(repo string, chunks chan<- []byte) { close(chunks) } +func HashChunks(chunks <-chan []byte) map[uint64]uint64 { + hashes := make(map[uint64]uint64) + hasher := hash.Hash64(rabinkarp64.New()) + var i uint64 = 0 + for c := range chunks { + hasher.Reset() + hasher.Write(c) + h := hasher.Sum64() + hashes[h] = i + i++ + } + return hashes +} + +func MatchChunks(chunks <-chan []byte, hashes map[uint64]uint64) { + hasher := rabinkarp64.New() + hasher.Write(<-chunks) + + var i uint64 = 0 + for c := range chunks { + for offset, b := range c { + + h := hasher.Sum64() + chunk, exists := hashes[h] + if exists { + fmt.Printf("Found existing chunk. New{id:%d, offset:%d}, Old: %d\n", i, offset, chunk) + } + // Roll the incoming byte in rolling + hasher.Roll(b) + } + i++ + } +} + func writeFile(filePath string, object interface{}) error { file, err := os.Create(filePath) if err == nil { |