aboutsummaryrefslogtreecommitdiff
path: root/sketch.go
diff options
context:
space:
mode:
authorn-peugnet <n.peugnet@free.fr>2021-08-31 12:05:29 +0200
committern-peugnet <n.peugnet@free.fr>2021-08-31 12:05:29 +0200
commitc481eb2b44adf50b62de3b9e3355f64973967d52 (patch)
tree34a218c926f6aa6420c8abfcf703262e6148c0ed /sketch.go
parent36da6832dce67da09d7bcee1a6ab2312e515cb0a (diff)
downloaddna-backup-c481eb2b44adf50b62de3b9e3355f64973967d52.tar.gz
dna-backup-c481eb2b44adf50b62de3b9e3355f64973967d52.zip
do not fill partial cunks with padding
this way a partial chunk may have less superfeatures than a complete one
Diffstat (limited to 'sketch.go')
-rw-r--r--sketch.go39
1 files changed, 19 insertions, 20 deletions
diff --git a/sketch.go b/sketch.go
index db7e4e6..9910848 100644
--- a/sketch.go
+++ b/sketch.go
@@ -18,31 +18,30 @@ const fBytes = 8
func SketchChunk(chunk Chunk, wSize int, sfCount int, fCount int) (Sketch, error) {
var fSize = chunkSize / (sfCount * fCount)
superfeatures := make([]uint64, 0, sfCount)
- features := make([]uint64, 0, fCount)
+ features := make([]uint64, 0, fCount*sfCount)
buff := make([]byte, fBytes*fCount)
r := chunk.Reader()
hasher := rabinkarp64.New()
- for sf := 0; sf < sfCount; sf++ {
- features = features[:0]
- for f := 0; f < fCount; f++ {
- hasher.Reset()
- n, err := io.CopyN(hasher, r, int64(wSize))
- if err != nil {
- log.Println(n, err)
- }
- max := hasher.Sum64()
- for w := 0; w < fSize-wSize; w++ {
- b, _ := r.ReadByte()
- hasher.Roll(b)
- h := hasher.Sum64()
- if h > max {
- max = h
- }
+ for f := 0; f < chunk.Len()/fSize; f++ {
+ hasher.Reset()
+ n, err := io.CopyN(hasher, r, int64(wSize))
+ if err != nil {
+ log.Println(n, err)
+ }
+ max := hasher.Sum64()
+ for w := 0; w < fSize-wSize; w++ {
+ b, _ := r.ReadByte()
+ hasher.Roll(b)
+ h := hasher.Sum64()
+ if h > max {
+ max = h
}
- features = append(features, max)
}
- for i, f := range features {
- binary.LittleEndian.PutUint64(buff[i*fBytes:(i+1)*fBytes], f)
+ features = append(features, max)
+ }
+ for sf := 0; sf < len(features)/fCount; sf++ {
+ for i := 0; i < fCount; i++ {
+ binary.LittleEndian.PutUint64(buff[i*fBytes:(i+1)*fBytes], features[i+sf*fCount])
}
hasher.Reset()
hasher.Write(buff)