aboutsummaryrefslogtreecommitdiff
path: root/sketch.go
diff options
context:
space:
mode:
Diffstat (limited to 'sketch.go')
-rw-r--r--sketch.go53
1 files changed, 37 insertions, 16 deletions
diff --git a/sketch.go b/sketch.go
index 295a11b..693cacf 100644
--- a/sketch.go
+++ b/sketch.go
@@ -1,55 +1,76 @@
package main
import (
+ "bytes"
"encoding/binary"
"io"
"log"
+ "sync"
"github.com/chmduquesne/rollinghash/rabinkarp64"
)
type Sketch []uint64
+type ReadByteReader interface {
+ io.Reader
+ io.ByteReader
+}
+
const fBytes = 8
// SketchChunk produces a sketch for a chunk based on wSize: the window size,
// sfCount: the number of super-features, and fCount: the number of feature
// per super-feature
func SketchChunk(chunk Chunk, chunkSize int, wSize int, sfCount int, fCount int) (Sketch, error) {
+ var wg sync.WaitGroup
var fSize = FeatureSize(chunkSize, sfCount, fCount)
superfeatures := make([]uint64, 0, sfCount)
features := make([]uint64, 0, fCount*sfCount)
- buff := make([]byte, fBytes*fCount)
+ sfBuff := make([]byte, fBytes*fCount)
r := chunk.Reader()
- hasher := rabinkarp64.New()
for f := 0; f < chunk.Len()/fSize; f++ {
- hasher.Reset()
- n, err := io.CopyN(hasher, r, int64(wSize))
+ var fBuff bytes.Buffer
+ n, err := io.CopyN(&fBuff, r, int64(fSize))
if err != nil {
log.Println(n, err)
}
- max := hasher.Sum64()
- for w := 0; w < fSize-wSize; w++ {
- b, _ := r.ReadByte()
- hasher.Roll(b)
- h := hasher.Sum64()
- if h > max {
- max = h
- }
- }
- features = append(features, max)
+ features = append(features, 0)
+ wg.Add(1)
+ go calcFeature(&wg, &fBuff, wSize, fSize, &features[f])
}
+ hasher := rabinkarp64.New()
+ wg.Wait()
for sf := 0; sf < len(features)/fCount; sf++ {
for i := 0; i < fCount; i++ {
- binary.LittleEndian.PutUint64(buff[i*fBytes:(i+1)*fBytes], features[i+sf*fCount])
+ binary.LittleEndian.PutUint64(sfBuff[i*fBytes:(i+1)*fBytes], features[i+sf*fCount])
}
hasher.Reset()
- hasher.Write(buff)
+ hasher.Write(sfBuff)
superfeatures = append(superfeatures, hasher.Sum64())
}
return superfeatures, nil
}
+func calcFeature(wg *sync.WaitGroup, r ReadByteReader, wSize int, fSize int, result *uint64) {
+ defer wg.Done()
+ hasher := rabinkarp64.New()
+ n, err := io.CopyN(hasher, r, int64(wSize))
+ if err != nil {
+ log.Println(n, err)
+ }
+ max := hasher.Sum64()
+ for w := 0; w < fSize-wSize; w++ {
+ b, _ := r.ReadByte()
+ hasher.Roll(b)
+ h := hasher.Sum64()
+ if h > max {
+ max = h
+ }
+ }
+ *result = max
+}
+
func SuperFeatureSize(chunkSize int, sfCount int, fCount int) int {
return FeatureSize(chunkSize, sfCount, fCount) * sfCount
}