aboutsummaryrefslogtreecommitdiff
path: root/exp
diff options
context:
space:
mode:
authorn-peugnet <n.peugnet@free.fr>2021-10-08 15:35:44 +0200
committern-peugnet <n.peugnet@free.fr>2021-10-08 15:35:44 +0200
commitb3fee746c053990c88c0e067b1a654acbeb1cd1f (patch)
treede29e4efaaaccab453d321079f06831c41859b50 /exp
parentbe1ad1c114b77b654bd3e79fdb18b5d5cf93ddd1 (diff)
downloaddna-backup-b3fee746c053990c88c0e067b1a654acbeb1cd1f.tar.gz
dna-backup-b3fee746c053990c88c0e067b1a654acbeb1cd1f.zip
exp: repo 4k, better summary format, fix diff sizes
Diffstat (limited to 'exp')
-rw-r--r--exp/Makefile46
-rwxr-xr-xexp/exp.sh40
2 files changed, 61 insertions, 25 deletions
diff --git a/exp/Makefile b/exp/Makefile
index aaf377c..8fb4ce0 100644
--- a/exp/Makefile
+++ b/exp/Makefile
@@ -1,48 +1,62 @@
REPO_URL := https://club1.fr/~nicolas/git/dna-backup/
+SHELL := /bin/bash
+
+# Reset LC_NUMERIC to make printf thousands separator work as expected
+export LC_NUMERIC :=
+
export DNA_BACKUP ?= ../dna-backup
export MAX_VERSION ?= 5
export COMMITS ?= commits
-export BACKUP ?= backup
+export DNA_4K ?= dna_4k
+export DNA_8K ?= dna_8k
export DIFFS ?= diffs
export REPO_PATH ?= repo
GIT_PATH := git
GITC := git -C $(REPO_PATH)
-DATADIRS := $(BACKUP) $(DIFFS)
-SIZEFILES := $(DATADIRS:%=%-size)
+DNADIRS := $(DNA_4K) $(DNA_8K)
+DATADIRS := $(DNA_4K) $(DNA_8K) $(DIFFS)
+SIZEFILES := $(DATADIRS:%=%.size)
.PHONY: all exp
all exp: summary.csv $(SIZEFILES)
- @echo "============== SUMMARY =============="
- @cat $< | tr ',' '\t'
- @echo "============== TOTAL =============="
- @for i in $(SIZEFILES); do cat $$i | paste -sd+ | bc; done | tr '\n' '\t'
+ @echo "=============== SUMMARY ==============="
+ @head -n 1 $< | while IFS="," read -r $(DATADIRS); do \
+ printf "$(DATADIRS:%=\%13s)\n" $(DATADIRS:%=$$%); \
+ done
+ @tail -n +2 $< | while IFS="," read -r $(DATADIRS); do \
+ printf "$(DATADIRS:%=\%'13d)\n" $(DATADIRS:%=$$%); \
+ done
+ @echo "================ TOTAL ================"
+ @for i in $(SIZEFILES); do cat $$i | paste -sd+ | bc; done | while read j; do \
+ printf "%'13d " $$j; \
+ done
@echo
summary.csv: $(SIZEFILES)
echo $(DATADIRS) | tr ' ' ',' > $@
paste -d "," $^ >> $@
-backup-size: versions
+$(DNADIRS:%=%.size): %.size: %.versions
rm -rf $@
for i in $</*; do \
cut -f1 $$i | paste -sd+ | bc >> $@; \
done
-diffs-size: diffs run
- find $< -type f -exec du -ba {} + | cut -f1 > $@
+%.size: % run
+ find $< -type f -exec du -ba {} + | sort -k2 | cut -f1 > $@
-versions: results
+%.versions: %.results
rm -rf $@
mkdir -p $@
- for i in backup/*; do \
+ for i in $*/*; do \
v=`echo $$i | cut -d / -f2`; \
grep $$i $< > $@/$$v ; \
done
-results: backup run
+%.results: % run
find $< -type f -exec du -ba {} + \
| grep -v hashes \
| sort -k2 \
@@ -53,7 +67,7 @@ run: $(COMMITS) $(DNA_BACKUP) | $(DATADIRS)
./exp.sh
touch $@
-backup diffs:
+$(DATADIRS):
mkdir $@
$(DNA_BACKUP): .FORCE
@@ -75,9 +89,9 @@ clean: mostlyclean
rm -f $(COMMITS)
mostlyclean: resultsclean
- rm -rf $(DATADIRS) versions run
+ rm -rf $(DATADIRS) $(DNADIRS:%=%.versions) run
resultsclean:
- rm -f results summary.csv $(SIZEFILES)
+ rm -f summary.csv $(SIZEFILES)
.FORCE: ;
diff --git a/exp/exp.sh b/exp/exp.sh
index f7a603c..8a7af40 100755
--- a/exp/exp.sh
+++ b/exp/exp.sh
@@ -5,7 +5,8 @@
# - REPO_PATH: the path of the repo the experiment is based on
# - MAX_VERSION: the max number for versions for the experiment
# - COMMITS: the name of the file that contains the lists of versions
-# - BACKUP: the path fo the dna-backup dir
+# - DNA_4K: the path fo the dna-backup dir with 4K chunksize
+# - DNA_8K: the path fo the dna-backup dir with 8K chunksize
# - DIFFS: the path of the git diff dir
log() {
@@ -22,27 +23,38 @@ last=$(tail --lines=1 $COMMITS | cut -f1)
i=0
cat $COMMITS | while read line
do
+ # Get hash
hash=$(echo "$line" | cut -f1)
+ # Check out repo
log "check out $hash"
$GITC checkout $hash 2> $OUT \
|| (log "error checking out"; cat $OUT; exit 1)
- log "create diff for this version"
+ # Create git diff for this version
+ log "create git diff for this version"
+ diff=$(printf "%s/%05d.diff.gz" $DIFFS $i)
$GITC diff --minimal --binary --unified=0 -l0 $prev \
| gzip \
- > "$DIFFS/$i.diff.gz"
+ > $diff
- log "create backup for this version"
- $DNA_BACKUP commit -v 2 $REPO_PATH $BACKUP
+ # Create 4k dna backup for this version
+ log "create 4k dna backup for this version"
+ $DNA_BACKUP commit -v 2 -c 4096 $REPO_PATH $DNA_4K
+
+ # Create 8k dna backup for this version
+ log "create 8k dna backup for this version"
+ $DNA_BACKUP commit -v 2 $REPO_PATH $DNA_8K
if [[ $(( $i % 4 )) == 0 ]]
then
- log "restore from diffs"
+ # Check restore from git diffs
+ log "restore from git diffs"
TEMP=$(mktemp -d)
for n in $(seq 0 $i)
do
- cat "$DIFFS/$n.diff.gz" \
+ diff=$(printf "%s/%05d.diff.gz" $DIFFS $n)
+ cat $diff \
| gzip --decompress \
| git -C $TEMP apply --binary --unidiff-zero --whitespace=nowarn -
done
@@ -52,9 +64,19 @@ do
|| log "git patchs restore do not match source"
rm -rf $TEMP
- log "restore from backup"
+ # Check restore from 4k dna backup
+ log "restore from 4k dna backup"
+ TEMP=$(mktemp -d)
+ $DNA_BACKUP restore -v 2 -c 4096 $DNA_4K $TEMP
+ log "check restore from backup"
+ diff --brief --recursive $REPO_PATH $TEMP \
+ || log "dna backup restore do not match source"
+ rm -rf $TEMP
+
+ # Check restore from 8k dna backup
+ log "restore from 8k dna backup"
TEMP=$(mktemp -d)
- $DNA_BACKUP restore -v 2 $BACKUP $TEMP
+ $DNA_BACKUP restore -v 2 $DNA_8K $TEMP
log "check restore from backup"
diff --brief --recursive $REPO_PATH $TEMP \
|| log "dna backup restore do not match source"