move some consts into repo

author: n-peugnet <n.peugnet@free.fr> 2021-09-01 19:07:35 +0200
committer: n-peugnet <n.peugnet@free.fr> 2021-09-01 19:07:35 +0200
commit: db40818ef79ccb3f5f9232623f57ad284a4af7d0 (patch)
tree: 6b1b1a7b6169eb19f6ca17ff87f1075b41bd513f /repo.go
parent: 34a84a44b4dfa513d8ceb1cfeec50ac78fb311e0 (diff)
download: dna-backup-db40818ef79ccb3f5f9232623f57ad284a4af7d0.tar.gz
dna-backup-db40818ef79ccb3f5f9232623f57ad284a4af7d0.zip
1 files changed, 41 insertions, 31 deletions
diff --git a/repo.go b/repo.go
index ea27fb4..6f177c2 100644
--- a/repo.go
+++ b/repo.go
@@ -43,7 +43,11 @@ type FingerprintMap map[uint64]*ChunkId
 type SketchMap map[uint64][]*ChunkId
 
 type Repo struct {
-	path string
+	path          string
+	chunkSize     int
+	sketchWSize   int
+	sketchSfCount int
+	sketchFCount  int
 }
 
 type File struct {
@@ -53,7 +57,13 @@ type File struct {
 
 func NewRepo(path string) *Repo {
 	os.MkdirAll(path, 0775)
-	return &Repo{path}
+	return &Repo{
+		path:          path,
+		chunkSize:     8 << 10,
+		sketchWSize:   32,
+		sketchSfCount: 3,
+		sketchFCount:  4,
+	}
 }
 
 func (r *Repo) Commit(source string) {
@@ -69,7 +79,7 @@ func (r *Repo) Commit(source string) {
 	files := listFiles(source)
 	go r.loadChunks(versions, oldChunks)
 	go concatFiles(files, writer)
-	fingerprints, _ := hashChunks(oldChunks)
+	fingerprints, _ := r.hashChunks(oldChunks)
 	chunks := r.matchStream(reader, fingerprints)
 	extractTempChunks(chunks)
 	// storeChunks(newChunkPath, newChunks)
@@ -124,14 +134,14 @@ func concatFiles(files []File, stream io.WriteCloser) {
 	stream.Close()
 }
 
-func chunkStream(stream io.Reader, chunks chan<- []byte) {
+func (r *Repo) chunkStream(stream io.Reader, chunks chan<- []byte) {
 	var buff []byte
-	var prev, read = chunkSize, 0
+	var prev, read = r.chunkSize, 0
 	var err error
 
 	for err != io.EOF {
-		if prev == chunkSize {
-			buff = make([]byte, chunkSize)
+		if prev == r.chunkSize {
+			buff = make([]byte, r.chunkSize)
 			prev, err = stream.Read(buff)
 		} else {
 			read, err = stream.Read(buff[prev:])
@@ -140,11 +150,11 @@ func chunkStream(stream io.Reader, chunks chan<- []byte) {
 		if err != nil && err != io.EOF {
 			log.Println(err)
 		}
-		if prev == chunkSize {
+		if prev == r.chunkSize {
 			chunks <- buff
 		}
 	}
-	if prev != chunkSize {
+	if prev != r.chunkSize {
 		chunks <- buff[:prev]
 	}
 	close(chunks)
@@ -212,7 +222,7 @@ func (r *Repo) loadChunks(versions []string, chunks chan<- StoredChunk) {
 // For each chunk, both a fingerprint (hash over the full content) and a sketch
 // (resemblance hash based on maximal values of regions) are calculated and
 // stored in an hashmap which are then returned.
-func hashChunks(chunks <-chan StoredChunk) (FingerprintMap, SketchMap) {
+func (r *Repo) hashChunks(chunks <-chan StoredChunk) (FingerprintMap, SketchMap) {
 	fingerprints := make(FingerprintMap)
 	sketches := make(SketchMap)
 	hasher := hash.Hash64(rabinkarp64.New())
@@ -221,7 +231,7 @@ func hashChunks(chunks <-chan StoredChunk) (FingerprintMap, SketchMap) {
 		io.Copy(hasher, c.Reader())
 		h := hasher.Sum64()
 		fingerprints[h] = c.Id()
-		sketch, _ := SketchChunk(c, sketchWSize, sketchSfCount, sketchFCount)
+		sketch, _ := SketchChunk(c, r.chunkSize, r.sketchWSize, r.sketchSfCount, r.sketchFCount)
 		for _, s := range sketch {
 			prev := sketches[s]
 			if contains(prev, c.Id()) {
@@ -242,11 +252,11 @@ func contains(s []*ChunkId, id *ChunkId) bool {
 	return false
 }
 
-func findSimilarChunk(chunk Chunk, sketches SketchMap) (*ChunkId, bool) {
+func (r *Repo) findSimilarChunk(chunk Chunk, sketches SketchMap) (*ChunkId, bool) {
 	var similarChunks = make(map[ChunkId]int)
 	var max int
 	var similarChunk *ChunkId
-	sketch, _ := SketchChunk(chunk, sketchWSize, sketchSfCount, sketchFCount)
+	sketch, _ := SketchChunk(chunk, r.chunkSize, r.sketchWSize, r.sketchSfCount, r.sketchFCount)
 	for _, s := range sketch {
 		chunkIds, exists := sketches[s]
 		if !exists {
@@ -268,10 +278,10 @@ func findSimilarChunk(chunk Chunk, sketches SketchMap) (*ChunkId, bool) {
 func (r *Repo) matchStream(stream io.Reader, fingerprints FingerprintMap) []Chunk {
 	var b byte
 	var chunks []Chunk
-	bufStream := bufio.NewReaderSize(stream, chunkSize)
-	buff := make([]byte, 0, chunkSize*2)
-	n, err := io.ReadFull(stream, buff[:chunkSize])
-	if n < chunkSize {
+	bufStream := bufio.NewReaderSize(stream, r.chunkSize)
+	buff := make([]byte, 0, r.chunkSize*2)
+	n, err := io.ReadFull(stream, buff[:r.chunkSize])
+	if n < r.chunkSize {
 		chunks = append(chunks, NewTempChunk(buff[:n]))
 		return chunks
 	}
@@ -281,26 +291,26 @@ func (r *Repo) matchStream(stream io.Reader, fingerprints FingerprintMap) []Chun
 		h := hasher.Sum64()
 		chunkId, exists := fingerprints[h]
 		if exists {
-			if len(buff) > chunkSize && len(buff) < chunkSize*2 {
-				size := len(buff) - chunkSize
+			if len(buff) > r.chunkSize && len(buff) < r.chunkSize*2 {
+				size := len(buff) - r.chunkSize
 				log.Println("Add new partial chunk of size:", size)
 				chunks = append(chunks, NewTempChunk(buff[:size]))
 			}
 			log.Printf("Add existing chunk: %d\n", chunkId)
 			chunks = append(chunks, NewChunkFile(r, chunkId))
-			buff = make([]byte, 0, chunkSize*2)
-			for i := 0; i < chunkSize && err == nil; i++ {
+			buff = make([]byte, 0, r.chunkSize*2)
+			for i := 0; i < r.chunkSize && err == nil; i++ {
 				b, err = bufStream.ReadByte()
 				hasher.Roll(b)
 				buff = append(buff, b)
 			}
 			continue
 		}
-		if len(buff) == chunkSize*2 {
+		if len(buff) == r.chunkSize*2 {
 			log.Println("Add new chunk")
-			chunks = append(chunks, NewTempChunk(buff[:chunkSize]))
-			tmp := buff[chunkSize:]
-			buff = make([]byte, 0, chunkSize*2)
+			chunks = append(chunks, NewTempChunk(buff[:r.chunkSize]))
+			tmp := buff[r.chunkSize:]
+			buff = make([]byte, 0, r.chunkSize*2)
 			buff = append(buff, tmp...)
 		}
 		b, err = bufStream.ReadByte()
@@ -309,11 +319,11 @@ func (r *Repo) matchStream(stream io.Reader, fingerprints FingerprintMap) []Chun
 			buff = append(buff, b)
 		}
 	}
-	if len(buff) > chunkSize {
+	if len(buff) > r.chunkSize {
 		log.Println("Add new chunk")
-		chunks = append(chunks, NewTempChunk(buff[:chunkSize]))
-		log.Println("Add new partial chunk of size:", len(buff)-chunkSize)
-		chunks = append(chunks, NewTempChunk(buff[chunkSize:]))
+		chunks = append(chunks, NewTempChunk(buff[:r.chunkSize]))
+		log.Println("Add new partial chunk of size:", len(buff)-r.chunkSize)
+		chunks = append(chunks, NewTempChunk(buff[r.chunkSize:]))
 	} else if len(buff) > 0 {
 		log.Println("Add new partial chunk of size:", len(buff))
 		chunks = append(chunks, NewTempChunk(buff))
@@ -324,13 +334,13 @@ func (r *Repo) matchStream(stream io.Reader, fingerprints FingerprintMap) []Chun
 // mergeTempChunks joins temporary partial chunks from an array of chunks if possible.
 // If a chunk is smaller than the size required to calculate a super-feature,
 // it is then appended to the previous consecutive temporary chunk if it exists.
-func mergeTempChunks(chunks []Chunk) (ret []Chunk) {
+func (r *Repo) mergeTempChunks(chunks []Chunk) (ret []Chunk) {
 	var prev *TempChunk
 	var curr *TempChunk
 	for _, c := range chunks {
 		tmp, isTmp := c.(*TempChunk)
 		if !isTmp {
-			if prev != nil && curr.Len() <= SuperFeatureSize(chunkSize, sketchSfCount, sketchFCount) {
+			if prev != nil && curr.Len() <= SuperFeatureSize(r.chunkSize, r.sketchSfCount, r.sketchFCount) {
 				prev.AppendFrom(curr.Reader())
 			} else if curr != nil {
 				ret = append(ret, curr)
author	n-peugnet <n.peugnet@free.fr>	2021-09-01 19:07:35 +0200
committer	n-peugnet <n.peugnet@free.fr>	2021-09-01 19:07:35 +0200
commit	db40818ef79ccb3f5f9232623f57ad284a4af7d0 (patch)
tree	6b1b1a7b6169eb19f6ca17ff87f1075b41bd513f /repo.go
parent	34a84a44b4dfa513d8ceb1cfeec50ac78fb311e0 (diff)
download	dna-backup-db40818ef79ccb3f5f9232623f57ad284a4af7d0.tar.gz dna-backup-db40818ef79ccb3f5f9232623f57ad284a4af7d0.zip