diff options
author | n-peugnet <n.peugnet@free.fr> | 2021-08-27 18:38:07 +0200 |
---|---|---|
committer | n-peugnet <n.peugnet@free.fr> | 2021-08-27 18:38:16 +0200 |
commit | 129a86b3a6780b7aee5a7469cc5adeaf2ea6c20f (patch) | |
tree | ab8423f6885c380b2bb4d807313428003d8d5e37 /sketch.go | |
parent | 78251f11c91b2504edfc02b760ef53bd352b856c (diff) | |
download | dna-backup-129a86b3a6780b7aee5a7469cc5adeaf2ea6c20f.tar.gz dna-backup-129a86b3a6780b7aee5a7469cc5adeaf2ea6c20f.zip |
add findSimilarChunks method to test sketches
Still missing a real test...
Diffstat (limited to 'sketch.go')
-rw-r--r-- | sketch.go | 16 |
1 files changed, 12 insertions, 4 deletions
@@ -3,18 +3,23 @@ package main import ( "encoding/binary" "io" + "log" "github.com/chmduquesne/rollinghash/rabinkarp64" ) +type Sketch []uint64 + +const fBytes = 8 + // SketchChunk produces a sketch for a chunk based on wSize: the window size, // sfCount: the number of super-features, and fCount: the number of feature // per super-feature -func SketchChunk(chunk Chunk, wSize int, sfCount int, fCount int) ([]uint64, error) { +func SketchChunk(chunk Chunk, wSize int, sfCount int, fCount int) (Sketch, error) { var fSize = chunkSize / (sfCount * fCount) superfeatures := make([]uint64, 0, sfCount) features := make([]uint64, 0, fCount) - buff := make([]byte, 8*fCount) + buff := make([]byte, fBytes*fCount) r, err := chunk.Reader() if err != nil { return nil, err @@ -24,7 +29,10 @@ func SketchChunk(chunk Chunk, wSize int, sfCount int, fCount int) ([]uint64, err features = features[:0] for f := 0; f < fCount; f++ { hasher.Reset() - io.CopyN(hasher, r, int64(wSize)) + n, err := io.CopyN(hasher, r, int64(wSize)) + if err != nil { + log.Println(n, err) + } max := hasher.Sum64() for w := 0; w < fSize-wSize; w++ { b, _ := r.ReadByte() @@ -37,7 +45,7 @@ func SketchChunk(chunk Chunk, wSize int, sfCount int, fCount int) ([]uint64, err features = append(features, max) } for i, f := range features { - binary.LittleEndian.PutUint64(buff[i*8:i*8+8], f) + binary.LittleEndian.PutUint64(buff[i*fBytes:(i+1)*fBytes], f) } hasher.Reset() hasher.Write(buff) |