aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorn-peugnet <n.peugnet@free.fr>2021-09-08 19:20:07 +0200
committern-peugnet <n.peugnet@free.fr>2021-09-08 19:20:07 +0200
commitf061a7031181ef53d034c46b696156c143451cce (patch)
treea9d70773e06e46cc313f92fa000aca1d7471f817
parent66cb179e0c751c081fbb9ec769a409a7a8115459 (diff)
downloaddna-backup-f061a7031181ef53d034c46b696156c143451cce.tar.gz
dna-backup-f061a7031181ef53d034c46b696156c143451cce.zip
start using chunk cache
-rw-r--r--TODO.md10
-rw-r--r--chunk.go38
-rw-r--r--repo.go39
3 files changed, 33 insertions, 54 deletions
diff --git a/TODO.md b/TODO.md
index 72301cf..b35bff4 100644
--- a/TODO.md
+++ b/TODO.md
@@ -23,15 +23,7 @@ priority 1
- [ ] compress before storing
- [ ] uncompress before loading
- [ ] store compressed chunks into tracks of trackSize (1024o)
-- [ ] add chunk cache that would look like this:
- ```go
- type ChunkCache map[ChunkId][]byte // Do we really want to only keep the chunk content ?
-
- type Cache interface {
- Get(id ChunkId) Chunk
- Set(id ChunkId, Chunk)
- }
- ```
+- [x] add chunk cache... what was it for again ??
priority 2
----------
diff --git a/chunk.go b/chunk.go
index ba8334b..196152e 100644
--- a/chunk.go
+++ b/chunk.go
@@ -2,7 +2,6 @@ package main
import (
"bytes"
- "errors"
"fmt"
"io"
"log"
@@ -25,11 +24,6 @@ type BufferedChunk interface {
Bytes() []byte
}
-type StorerChunk interface {
- Chunk
- Store(path string) error
-}
-
type ChunkId struct {
Ver int
Idx uint64
@@ -39,15 +33,6 @@ func (i *ChunkId) Path(repo string) string {
return path.Join(repo, fmt.Sprintf(versionFmt, i.Ver), chunksName, fmt.Sprintf(chunkIdFmt, i.Idx))
}
-func (i *ChunkId) Reader(repo *Repo) io.ReadSeeker {
- path := i.Path(repo.path)
- f, err := os.Open(path)
- if err != nil {
- log.Println("Cannot open chunk: ", path)
- }
- return f
-}
-
func NewLoadedChunk(id *ChunkId, value []byte) *LoadedChunk {
return &LoadedChunk{Id: id, value: value}
}
@@ -74,10 +59,6 @@ func (c *LoadedChunk) Bytes() []byte {
return c.value
}
-func (c *LoadedChunk) Store(path string) error {
- return storeChunk(c.Reader(), c.Id.Path(path))
-}
-
func NewStoredChunk(repo *Repo, id *ChunkId) *StoredChunk {
return &StoredChunk{repo: repo, Id: id}
}
@@ -93,7 +74,7 @@ func (c *StoredChunk) GetId() *ChunkId {
func (c *StoredChunk) Reader() io.ReadSeeker {
// log.Printf("Chunk %d: Reading from file\n", c.id)
- return c.Id.Reader(c.repo)
+ return c.repo.LoadChunkContent(c.Id)
}
func (c *StoredChunk) Len() int {
@@ -142,7 +123,7 @@ type DeltaChunk struct {
func (c *DeltaChunk) Reader() io.ReadSeeker {
var buff bytes.Buffer
- c.repo.Patcher().Patch(c.Source.Reader(c.repo), &buff, bytes.NewReader(c.Patch))
+ c.repo.Patcher().Patch(c.repo.LoadChunkContent(c.Source), &buff, bytes.NewReader(c.Patch))
return bytes.NewReader(buff.Bytes())
}
@@ -150,18 +131,3 @@ func (c *DeltaChunk) Reader() io.ReadSeeker {
func (c *DeltaChunk) Len() int {
return c.Size
}
-
-func storeChunk(r io.Reader, path string) error {
- file, err := os.Create(path)
- if err != nil {
- return errors.New(fmt.Sprintf("Error creating chunk for '%s'; %s\n", path, err))
- }
- n, err := io.Copy(file, r)
- if err != nil {
- return errors.New(fmt.Sprintf("Error writing chunk content for '%s', written %d bytes: %s\n", path, n, err))
- }
- if err := file.Close(); err != nil {
- return errors.New(fmt.Sprintf("Error closing chunk for '%s': %s\n", path, err))
- }
- return nil
-}
diff --git a/repo.go b/repo.go
index 49ff088..2be457d 100644
--- a/repo.go
+++ b/repo.go
@@ -28,6 +28,7 @@ import (
"bufio"
"bytes"
"encoding/gob"
+ "errors"
"fmt"
"hash"
"io"
@@ -225,19 +226,39 @@ func loadFileList(path string) []File {
return files
}
-// GetChunk loads a chunk from the repo.
+func (r *Repo) StoreChunkContent(id *ChunkId, reader io.Reader) error {
+ path := id.Path(r.path)
+ file, err := os.Create(path)
+ if err != nil {
+ return errors.New(fmt.Sprintf("Error creating chunk for '%s'; %s\n", path, err))
+ }
+ n, err := io.Copy(file, reader)
+ if err != nil {
+ return errors.New(fmt.Sprintf("Error writing chunk content for '%s', written %d bytes: %s\n", path, n, err))
+ }
+ if err := file.Close(); err != nil {
+ return errors.New(fmt.Sprintf("Error closing chunk for '%s': %s\n", path, err))
+ }
+ return nil
+}
+
+// LoadChunkContent loads a chunk from the repo.
// If the chunk is in cache, get it from cache, else read it from drive.
-func (r *Repo) GetChunk(id *ChunkId) *LoadedChunk {
- var err error
+func (r *Repo) LoadChunkContent(id *ChunkId) io.ReadSeeker {
value, exists := r.chunkCache.Get(id)
if !exists {
- value, err = io.ReadAll(id.Reader(r))
+ path := id.Path(r.path)
+ f, err := os.Open(path)
+ if err != nil {
+ log.Printf("Cannot open chunk '%s': %s\n", path, err)
+ }
+ value, err = io.ReadAll(f)
if err != nil {
- log.Panicf("Could not read from chunk %d: %s", id, err)
+ log.Panicf("Could not read from chunk '%s': %s\n", path, err)
}
r.chunkCache.Set(id, value)
}
- return NewLoadedChunk(id, value)
+ return bytes.NewReader(value)
}
func storeChunks(dest string, chunks <-chan []byte) {
@@ -349,7 +370,7 @@ func (r *Repo) tryDeltaEncodeChunk(temp BufferedChunk) (Chunk, bool) {
id, found := r.findSimilarChunk(temp)
if found {
var buff bytes.Buffer
- if err := r.differ.Diff(id.Reader(r), temp.Reader(), &buff); err != nil {
+ if err := r.differ.Diff(r.LoadChunkContent(id), temp.Reader(), &buff); err != nil {
log.Println("Error trying delta encode chunk:", temp, "with source:", id, ":", err)
} else {
return &DeltaChunk{
@@ -377,9 +398,9 @@ func (r *Repo) encodeTempChunk(temp BufferedChunk, version int, last *uint64) (c
ic := NewLoadedChunk(id, temp.Bytes())
hasher := rabinkarp64.NewFromPol(r.pol)
r.hashAndStoreChunk(ic, hasher)
- ic.Store(r.path)
+ r.StoreChunkContent(id, ic.Reader())
log.Println("Add new chunk", id)
- return ic, false
+ return NewStoredChunk(r, id), false
}
log.Println("Add new partial chunk of size:", chunk.Len())
return