diff options
author | n-peugnet <n.peugnet@free.fr> | 2021-09-08 19:20:07 +0200 |
---|---|---|
committer | n-peugnet <n.peugnet@free.fr> | 2021-09-08 19:20:07 +0200 |
commit | f061a7031181ef53d034c46b696156c143451cce (patch) | |
tree | a9d70773e06e46cc313f92fa000aca1d7471f817 | |
parent | 66cb179e0c751c081fbb9ec769a409a7a8115459 (diff) | |
download | dna-backup-f061a7031181ef53d034c46b696156c143451cce.tar.gz dna-backup-f061a7031181ef53d034c46b696156c143451cce.zip |
start using chunk cache
-rw-r--r-- | TODO.md | 10 | ||||
-rw-r--r-- | chunk.go | 38 | ||||
-rw-r--r-- | repo.go | 39 |
3 files changed, 33 insertions, 54 deletions
@@ -23,15 +23,7 @@ priority 1 - [ ] compress before storing - [ ] uncompress before loading - [ ] store compressed chunks into tracks of trackSize (1024o) -- [ ] add chunk cache that would look like this: - ```go - type ChunkCache map[ChunkId][]byte // Do we really want to only keep the chunk content ? - - type Cache interface { - Get(id ChunkId) Chunk - Set(id ChunkId, Chunk) - } - ``` +- [x] add chunk cache... what was it for again ?? priority 2 ---------- @@ -2,7 +2,6 @@ package main import ( "bytes" - "errors" "fmt" "io" "log" @@ -25,11 +24,6 @@ type BufferedChunk interface { Bytes() []byte } -type StorerChunk interface { - Chunk - Store(path string) error -} - type ChunkId struct { Ver int Idx uint64 @@ -39,15 +33,6 @@ func (i *ChunkId) Path(repo string) string { return path.Join(repo, fmt.Sprintf(versionFmt, i.Ver), chunksName, fmt.Sprintf(chunkIdFmt, i.Idx)) } -func (i *ChunkId) Reader(repo *Repo) io.ReadSeeker { - path := i.Path(repo.path) - f, err := os.Open(path) - if err != nil { - log.Println("Cannot open chunk: ", path) - } - return f -} - func NewLoadedChunk(id *ChunkId, value []byte) *LoadedChunk { return &LoadedChunk{Id: id, value: value} } @@ -74,10 +59,6 @@ func (c *LoadedChunk) Bytes() []byte { return c.value } -func (c *LoadedChunk) Store(path string) error { - return storeChunk(c.Reader(), c.Id.Path(path)) -} - func NewStoredChunk(repo *Repo, id *ChunkId) *StoredChunk { return &StoredChunk{repo: repo, Id: id} } @@ -93,7 +74,7 @@ func (c *StoredChunk) GetId() *ChunkId { func (c *StoredChunk) Reader() io.ReadSeeker { // log.Printf("Chunk %d: Reading from file\n", c.id) - return c.Id.Reader(c.repo) + return c.repo.LoadChunkContent(c.Id) } func (c *StoredChunk) Len() int { @@ -142,7 +123,7 @@ type DeltaChunk struct { func (c *DeltaChunk) Reader() io.ReadSeeker { var buff bytes.Buffer - c.repo.Patcher().Patch(c.Source.Reader(c.repo), &buff, bytes.NewReader(c.Patch)) + c.repo.Patcher().Patch(c.repo.LoadChunkContent(c.Source), &buff, bytes.NewReader(c.Patch)) return bytes.NewReader(buff.Bytes()) } @@ -150,18 +131,3 @@ func (c *DeltaChunk) Reader() io.ReadSeeker { func (c *DeltaChunk) Len() int { return c.Size } - -func storeChunk(r io.Reader, path string) error { - file, err := os.Create(path) - if err != nil { - return errors.New(fmt.Sprintf("Error creating chunk for '%s'; %s\n", path, err)) - } - n, err := io.Copy(file, r) - if err != nil { - return errors.New(fmt.Sprintf("Error writing chunk content for '%s', written %d bytes: %s\n", path, n, err)) - } - if err := file.Close(); err != nil { - return errors.New(fmt.Sprintf("Error closing chunk for '%s': %s\n", path, err)) - } - return nil -} @@ -28,6 +28,7 @@ import ( "bufio" "bytes" "encoding/gob" + "errors" "fmt" "hash" "io" @@ -225,19 +226,39 @@ func loadFileList(path string) []File { return files } -// GetChunk loads a chunk from the repo. +func (r *Repo) StoreChunkContent(id *ChunkId, reader io.Reader) error { + path := id.Path(r.path) + file, err := os.Create(path) + if err != nil { + return errors.New(fmt.Sprintf("Error creating chunk for '%s'; %s\n", path, err)) + } + n, err := io.Copy(file, reader) + if err != nil { + return errors.New(fmt.Sprintf("Error writing chunk content for '%s', written %d bytes: %s\n", path, n, err)) + } + if err := file.Close(); err != nil { + return errors.New(fmt.Sprintf("Error closing chunk for '%s': %s\n", path, err)) + } + return nil +} + +// LoadChunkContent loads a chunk from the repo. // If the chunk is in cache, get it from cache, else read it from drive. -func (r *Repo) GetChunk(id *ChunkId) *LoadedChunk { - var err error +func (r *Repo) LoadChunkContent(id *ChunkId) io.ReadSeeker { value, exists := r.chunkCache.Get(id) if !exists { - value, err = io.ReadAll(id.Reader(r)) + path := id.Path(r.path) + f, err := os.Open(path) + if err != nil { + log.Printf("Cannot open chunk '%s': %s\n", path, err) + } + value, err = io.ReadAll(f) if err != nil { - log.Panicf("Could not read from chunk %d: %s", id, err) + log.Panicf("Could not read from chunk '%s': %s\n", path, err) } r.chunkCache.Set(id, value) } - return NewLoadedChunk(id, value) + return bytes.NewReader(value) } func storeChunks(dest string, chunks <-chan []byte) { @@ -349,7 +370,7 @@ func (r *Repo) tryDeltaEncodeChunk(temp BufferedChunk) (Chunk, bool) { id, found := r.findSimilarChunk(temp) if found { var buff bytes.Buffer - if err := r.differ.Diff(id.Reader(r), temp.Reader(), &buff); err != nil { + if err := r.differ.Diff(r.LoadChunkContent(id), temp.Reader(), &buff); err != nil { log.Println("Error trying delta encode chunk:", temp, "with source:", id, ":", err) } else { return &DeltaChunk{ @@ -377,9 +398,9 @@ func (r *Repo) encodeTempChunk(temp BufferedChunk, version int, last *uint64) (c ic := NewLoadedChunk(id, temp.Bytes()) hasher := rabinkarp64.NewFromPol(r.pol) r.hashAndStoreChunk(ic, hasher) - ic.Store(r.path) + r.StoreChunkContent(id, ic.Reader()) log.Println("Add new chunk", id) - return ic, false + return NewStoredChunk(r, id), false } log.Println("Add new partial chunk of size:", chunk.Len()) return |