diff options
author | n-peugnet <n.peugnet@free.fr> | 2021-08-31 12:05:29 +0200 |
---|---|---|
committer | n-peugnet <n.peugnet@free.fr> | 2021-08-31 12:05:29 +0200 |
commit | c481eb2b44adf50b62de3b9e3355f64973967d52 (patch) | |
tree | 34a218c926f6aa6420c8abfcf703262e6148c0ed /sketch.go | |
parent | 36da6832dce67da09d7bcee1a6ab2312e515cb0a (diff) | |
download | dna-backup-c481eb2b44adf50b62de3b9e3355f64973967d52.tar.gz dna-backup-c481eb2b44adf50b62de3b9e3355f64973967d52.zip |
do not fill partial cunks with padding
this way a partial chunk may have less superfeatures than
a complete one
Diffstat (limited to 'sketch.go')
-rw-r--r-- | sketch.go | 39 |
1 files changed, 19 insertions, 20 deletions
@@ -18,31 +18,30 @@ const fBytes = 8 func SketchChunk(chunk Chunk, wSize int, sfCount int, fCount int) (Sketch, error) { var fSize = chunkSize / (sfCount * fCount) superfeatures := make([]uint64, 0, sfCount) - features := make([]uint64, 0, fCount) + features := make([]uint64, 0, fCount*sfCount) buff := make([]byte, fBytes*fCount) r := chunk.Reader() hasher := rabinkarp64.New() - for sf := 0; sf < sfCount; sf++ { - features = features[:0] - for f := 0; f < fCount; f++ { - hasher.Reset() - n, err := io.CopyN(hasher, r, int64(wSize)) - if err != nil { - log.Println(n, err) - } - max := hasher.Sum64() - for w := 0; w < fSize-wSize; w++ { - b, _ := r.ReadByte() - hasher.Roll(b) - h := hasher.Sum64() - if h > max { - max = h - } + for f := 0; f < chunk.Len()/fSize; f++ { + hasher.Reset() + n, err := io.CopyN(hasher, r, int64(wSize)) + if err != nil { + log.Println(n, err) + } + max := hasher.Sum64() + for w := 0; w < fSize-wSize; w++ { + b, _ := r.ReadByte() + hasher.Roll(b) + h := hasher.Sum64() + if h > max { + max = h } - features = append(features, max) } - for i, f := range features { - binary.LittleEndian.PutUint64(buff[i*fBytes:(i+1)*fBytes], f) + features = append(features, max) + } + for sf := 0; sf < len(features)/fCount; sf++ { + for i := 0; i < fCount; i++ { + binary.LittleEndian.PutUint64(buff[i*fBytes:(i+1)*fBytes], features[i+sf*fCount]) } hasher.Reset() hasher.Write(buff) |