From b3fee746c053990c88c0e067b1a654acbeb1cd1f Mon Sep 17 00:00:00 2001 From: n-peugnet Date: Fri, 8 Oct 2021 15:35:44 +0200 Subject: exp: repo 4k, better summary format, fix diff sizes --- exp/Makefile | 46 ++++++++++++++++++++++++++++++---------------- exp/exp.sh | 40 +++++++++++++++++++++++++++++++--------- 2 files changed, 61 insertions(+), 25 deletions(-) (limited to 'exp') diff --git a/exp/Makefile b/exp/Makefile index aaf377c..8fb4ce0 100644 --- a/exp/Makefile +++ b/exp/Makefile @@ -1,48 +1,62 @@ REPO_URL := https://club1.fr/~nicolas/git/dna-backup/ +SHELL := /bin/bash + +# Reset LC_NUMERIC to make printf thousands separator work as expected +export LC_NUMERIC := + export DNA_BACKUP ?= ../dna-backup export MAX_VERSION ?= 5 export COMMITS ?= commits -export BACKUP ?= backup +export DNA_4K ?= dna_4k +export DNA_8K ?= dna_8k export DIFFS ?= diffs export REPO_PATH ?= repo GIT_PATH := git GITC := git -C $(REPO_PATH) -DATADIRS := $(BACKUP) $(DIFFS) -SIZEFILES := $(DATADIRS:%=%-size) +DNADIRS := $(DNA_4K) $(DNA_8K) +DATADIRS := $(DNA_4K) $(DNA_8K) $(DIFFS) +SIZEFILES := $(DATADIRS:%=%.size) .PHONY: all exp all exp: summary.csv $(SIZEFILES) - @echo "============== SUMMARY ==============" - @cat $< | tr ',' '\t' - @echo "============== TOTAL ==============" - @for i in $(SIZEFILES); do cat $$i | paste -sd+ | bc; done | tr '\n' '\t' + @echo "=============== SUMMARY ===============" + @head -n 1 $< | while IFS="," read -r $(DATADIRS); do \ + printf "$(DATADIRS:%=\%13s)\n" $(DATADIRS:%=$$%); \ + done + @tail -n +2 $< | while IFS="," read -r $(DATADIRS); do \ + printf "$(DATADIRS:%=\%'13d)\n" $(DATADIRS:%=$$%); \ + done + @echo "================ TOTAL ================" + @for i in $(SIZEFILES); do cat $$i | paste -sd+ | bc; done | while read j; do \ + printf "%'13d " $$j; \ + done @echo summary.csv: $(SIZEFILES) echo $(DATADIRS) | tr ' ' ',' > $@ paste -d "," $^ >> $@ -backup-size: versions +$(DNADIRS:%=%.size): %.size: %.versions rm -rf $@ for i in $> $@; \ done -diffs-size: diffs run - find $< -type f -exec du -ba {} + | cut -f1 > $@ +%.size: % run + find $< -type f -exec du -ba {} + | sort -k2 | cut -f1 > $@ -versions: results +%.versions: %.results rm -rf $@ mkdir -p $@ - for i in backup/*; do \ + for i in $*/*; do \ v=`echo $$i | cut -d / -f2`; \ grep $$i $< > $@/$$v ; \ done -results: backup run +%.results: % run find $< -type f -exec du -ba {} + \ | grep -v hashes \ | sort -k2 \ @@ -53,7 +67,7 @@ run: $(COMMITS) $(DNA_BACKUP) | $(DATADIRS) ./exp.sh touch $@ -backup diffs: +$(DATADIRS): mkdir $@ $(DNA_BACKUP): .FORCE @@ -75,9 +89,9 @@ clean: mostlyclean rm -f $(COMMITS) mostlyclean: resultsclean - rm -rf $(DATADIRS) versions run + rm -rf $(DATADIRS) $(DNADIRS:%=%.versions) run resultsclean: - rm -f results summary.csv $(SIZEFILES) + rm -f summary.csv $(SIZEFILES) .FORCE: ; diff --git a/exp/exp.sh b/exp/exp.sh index f7a603c..8a7af40 100755 --- a/exp/exp.sh +++ b/exp/exp.sh @@ -5,7 +5,8 @@ # - REPO_PATH: the path of the repo the experiment is based on # - MAX_VERSION: the max number for versions for the experiment # - COMMITS: the name of the file that contains the lists of versions -# - BACKUP: the path fo the dna-backup dir +# - DNA_4K: the path fo the dna-backup dir with 4K chunksize +# - DNA_8K: the path fo the dna-backup dir with 8K chunksize # - DIFFS: the path of the git diff dir log() { @@ -22,27 +23,38 @@ last=$(tail --lines=1 $COMMITS | cut -f1) i=0 cat $COMMITS | while read line do + # Get hash hash=$(echo "$line" | cut -f1) + # Check out repo log "check out $hash" $GITC checkout $hash 2> $OUT \ || (log "error checking out"; cat $OUT; exit 1) - log "create diff for this version" + # Create git diff for this version + log "create git diff for this version" + diff=$(printf "%s/%05d.diff.gz" $DIFFS $i) $GITC diff --minimal --binary --unified=0 -l0 $prev \ | gzip \ - > "$DIFFS/$i.diff.gz" + > $diff - log "create backup for this version" - $DNA_BACKUP commit -v 2 $REPO_PATH $BACKUP + # Create 4k dna backup for this version + log "create 4k dna backup for this version" + $DNA_BACKUP commit -v 2 -c 4096 $REPO_PATH $DNA_4K + + # Create 8k dna backup for this version + log "create 8k dna backup for this version" + $DNA_BACKUP commit -v 2 $REPO_PATH $DNA_8K if [[ $(( $i % 4 )) == 0 ]] then - log "restore from diffs" + # Check restore from git diffs + log "restore from git diffs" TEMP=$(mktemp -d) for n in $(seq 0 $i) do - cat "$DIFFS/$n.diff.gz" \ + diff=$(printf "%s/%05d.diff.gz" $DIFFS $n) + cat $diff \ | gzip --decompress \ | git -C $TEMP apply --binary --unidiff-zero --whitespace=nowarn - done @@ -52,9 +64,19 @@ do || log "git patchs restore do not match source" rm -rf $TEMP - log "restore from backup" + # Check restore from 4k dna backup + log "restore from 4k dna backup" + TEMP=$(mktemp -d) + $DNA_BACKUP restore -v 2 -c 4096 $DNA_4K $TEMP + log "check restore from backup" + diff --brief --recursive $REPO_PATH $TEMP \ + || log "dna backup restore do not match source" + rm -rf $TEMP + + # Check restore from 8k dna backup + log "restore from 8k dna backup" TEMP=$(mktemp -d) - $DNA_BACKUP restore -v 2 $BACKUP $TEMP + $DNA_BACKUP restore -v 2 $DNA_8K $TEMP log "check restore from backup" diff --brief --recursive $REPO_PATH $TEMP \ || log "dna backup restore do not match source" -- cgit v1.2.3