From 504fe3db47c058807b656a8e63bb27c12420f268 Mon Sep 17 00:00:00 2001 From: n-peugnet Date: Tue, 31 Aug 2021 16:28:07 +0200 Subject: join too small temp chunks with previous one if possible --- repo.go | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) (limited to 'repo.go') diff --git a/repo.go b/repo.go index 55e830f..d32d437 100644 --- a/repo.go +++ b/repo.go @@ -71,7 +71,7 @@ func (r *Repo) Commit(source string) { go concatFiles(files, writer) fingerprints, _ := hashChunks(oldChunks) chunks := r.matchStream(reader, fingerprints) - extractNewChunks(chunks) + extractTempChunks(chunks) // storeChunks(newChunkPath, newChunks) // storeFiles(newFilesPath, files) fmt.Println(files) @@ -327,24 +327,32 @@ func (r *Repo) matchStream(stream io.Reader, fingerprints FingerprintMap) []Chun return chunks } -// extractNewChunks extracts new chunks from an array of chunks and -// returns them in an array of consecutive new chunk's array -func extractNewChunks(chunks []Chunk) (ret [][]Chunk) { - var i int - ret = append(ret, nil) +// extractTempChunks extracts temporary chunks from an array of chunks. +// If a chunk is smaller than the size required to calculate a super-feature, +// it is then appended to the previous consecutive temporary chunk if it exists. +func extractTempChunks(chunks []Chunk) (ret []Chunk) { + var prev *TempChunk + var curr *TempChunk for _, c := range chunks { - _, isTmp := c.(*TempChunk) + tmp, isTmp := c.(*TempChunk) if !isTmp { - if len(ret[i]) != 0 { - i++ - ret = append(ret, nil) + if prev != nil && curr.Len() <= SuperFeatureSize(chunkSize, sketchSfCount, sketchFCount) { + prev.AppendFrom(curr.Reader()) + } else if curr != nil { + ret = append(ret, curr) } + curr = nil + prev = nil } else { - ret[i] = append(ret[i], c) + prev = curr + curr = tmp + if prev != nil { + ret = append(ret, prev) + } } } - if len(ret[i]) == 0 { - ret = ret[:i] + if curr != nil { + ret = append(ret, curr) } return } -- cgit v1.2.3