diff options
Diffstat (limited to 'sketch.go')
-rw-r--r-- | sketch.go | 16 |
1 files changed, 12 insertions, 4 deletions
@@ -3,18 +3,23 @@ package main import ( "encoding/binary" "io" + "log" "github.com/chmduquesne/rollinghash/rabinkarp64" ) +type Sketch []uint64 + +const fBytes = 8 + // SketchChunk produces a sketch for a chunk based on wSize: the window size, // sfCount: the number of super-features, and fCount: the number of feature // per super-feature -func SketchChunk(chunk Chunk, wSize int, sfCount int, fCount int) ([]uint64, error) { +func SketchChunk(chunk Chunk, wSize int, sfCount int, fCount int) (Sketch, error) { var fSize = chunkSize / (sfCount * fCount) superfeatures := make([]uint64, 0, sfCount) features := make([]uint64, 0, fCount) - buff := make([]byte, 8*fCount) + buff := make([]byte, fBytes*fCount) r, err := chunk.Reader() if err != nil { return nil, err @@ -24,7 +29,10 @@ func SketchChunk(chunk Chunk, wSize int, sfCount int, fCount int) ([]uint64, err features = features[:0] for f := 0; f < fCount; f++ { hasher.Reset() - io.CopyN(hasher, r, int64(wSize)) + n, err := io.CopyN(hasher, r, int64(wSize)) + if err != nil { + log.Println(n, err) + } max := hasher.Sum64() for w := 0; w < fSize-wSize; w++ { b, _ := r.ReadByte() @@ -37,7 +45,7 @@ func SketchChunk(chunk Chunk, wSize int, sfCount int, fCount int) ([]uint64, err features = append(features, max) } for i, f := range features { - binary.LittleEndian.PutUint64(buff[i*8:i*8+8], f) + binary.LittleEndian.PutUint64(buff[i*fBytes:(i+1)*fBytes], f) } hasher.Reset() hasher.Write(buff) |