aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorn-peugnet <n.peugnet@free.fr>2021-10-11 14:49:16 +0200
committern-peugnet <n.peugnet@free.fr>2021-10-11 14:49:16 +0200
commitb703ef5246014d5450a707c0f295232f830c5493 (patch)
tree56cdc03c07c941e702600aa697b9d008cf7bad44
parentdd0b1178bb8dd2f9ab63cdb8ac32099d8b75d661 (diff)
downloaddna-backup-b703ef5246014d5450a707c0f295232f830c5493.tar.gz
dna-backup-b703ef5246014d5450a707c0f295232f830c5493.zip
exp real size, fix nopack measurement, time ranges
-rw-r--r--docs/note-2021-10-08.md30
-rw-r--r--exp/Makefile41
-rwxr-xr-xexp/exp.sh21
3 files changed, 57 insertions, 35 deletions
diff --git a/docs/note-2021-10-08.md b/docs/note-2021-10-08.md
index fc26758..a718d53 100644
--- a/docs/note-2021-10-08.md
+++ b/docs/note-2021-10-08.md
@@ -4,17 +4,23 @@ Comparison with git basic
```
=============== SUMMARY ===============
dna_4k dna_8k diffs nopack
- 85,062,292 77,288,903 59,771,518 83,421,168
- 113,976,574 108,519,248 75,958,096 126,395,519
- 40,450,844 43,371,572 87,905,291 62,515,404
- 13,699,644 17,821,696 7,398,260 47,162,686
- 39,029,362 40,321,219 28,806,094 55,473,207
- 9,335,530 11,047,234 20,915,481 32,021,383
- 16,891,778 20,219,859 12,926,085 42,470,711
- 9,764,957 11,381,646 13,592,417 28,181,096
- 2,597 131,258 1,395 55,764
- 93,015 243,287 104,610 2,739,143
- 9,331 7,919 2,607 102,283
+ 85,062,292 77,288,903 59,771,518 86,227,304
+ 113,976,574 108,519,248 75,958,096 126,651,038
+ 40,450,844 43,371,572 87,905,291 59,283,496
+ 13,699,644 17,821,696 7,398,260 45,949,939
+ 39,029,362 40,321,219 28,806,094 55,346,500
+ 9,335,530 11,047,234 20,915,481 30,755,543
+ 16,891,778 20,219,859 12,926,085 40,974,309
+ 9,764,957 11,381,646 13,592,417 26,555,084
+ 2,597 131,258 1,395 60,217
+ 93,015 243,287 104,610 1,084,646
+ 9,331 7,919 2,607 102,640
+ 6,462,903 7,803,303 10,052,904 21,908,322
+ 278,450 144,214 510 8,245
+ 353 149,747 639 6,221
+ 282,278 4,322 368 7,231
+ 280,151 145,265 231 13,904
+ 2,892,390 3,189,502 9,910,136 3,078,458
================ TOTAL ================
- 328,315,924 330,353,841 307,381,854 480,538,364
+ 338,512,449 341,790,194 327,346,642 498,013,097
```
diff --git a/exp/Makefile b/exp/Makefile
index f49d2f1..b69b0bf 100644
--- a/exp/Makefile
+++ b/exp/Makefile
@@ -8,33 +8,38 @@ export LC_NUMERIC :=
export DNA_BACKUP ?= ../dna-backup
export DNA_PARAMS ?= dna_params
export MAX_VERSION ?= 5
-export COMMITS ?= commits
+export COMMITS ?= commits.daily
DNA_4K ?= dna_4k
DNA_8K ?= dna_8k
NOPACK ?= nopack
export GIT_NOPACK ?= $(abspath $(NOPACK))
+export REAL ?= real
export DIFFS ?= diffs
export REPO_PATH ?= repo
export GIT_PATH ?= $(abspath git)
GITC := git -C $(REPO_PATH)
+# Number of initial commits to skip. This option has been added for the
+# Linux kernel repo, as the first commits are too far away date wise.
+SKIP_COMMITS ?= 30
+
DNADIRS := $(DNA_4K) $(DNA_8K)
-DATADIRS := $(DNA_4K) $(DNA_8K) $(DIFFS) $(NOPACK)
+DATADIRS := $(DNA_4K) $(DNA_8K) $(DIFFS) $(NOPACK) $(REAL)
SIZEFILES := $(DATADIRS:%=%.size)
.PHONY: all exp
all exp: summary.csv $(SIZEFILES)
@echo "=============== SUMMARY ==============="
@head -n 1 $< | while IFS="," read -r $(DATADIRS); do \
- printf "$(DATADIRS:%=\%13s)\n" $(DATADIRS:%=$$%); \
+ printf "$(DATADIRS:%=\%14s)\n" $(DATADIRS:%=$$%); \
done
@tail -n +2 $< | while IFS="," read -r $(DATADIRS); do \
- printf "$(DATADIRS:%=\%'13d)\n" $(DATADIRS:%=$$%); \
+ printf "$(DATADIRS:%=\%'14d)\n" $(DATADIRS:%=$$%); \
done
@echo "================ TOTAL ================"
@for i in $(SIZEFILES); do cat $$i | paste -sd+ | bc; done | while read j; do \
- printf "%'13d " $$j; \
+ printf "%'14d " $$j; \
done
@echo
@@ -42,12 +47,23 @@ summary.csv: $(SIZEFILES)
echo $(DATADIRS) | tr ' ' ',' > $@
paste -d "," $^ >> $@
-$(DNADIRS:%=%.size) $(NOPACK).size: %.size: %.versions run
+$(DNADIRS:%=%.size) $(REAL).size: %.size: %.versions run
rm -rf $@
for i in $</*; do \
cut -f1 $$i | paste -sd+ | bc >> $@; \
done
+$(NOPACK).size: $(NOPACK).versions
+ prev=/dev/null; for i in $</*; do \
+ diff -u0 $$prev $$i \
+ | sed -e '1,2d' -e '/^-/d' -e 's/^+//' -e '/^@@/d' \
+ | cut -f1 \
+ | paste -sd+ \
+ | bc \
+ >> $@; \
+ prev=$$i; \
+ done
+
%.size: % run
find $< -type f -exec du -ba {} + | sort -k2 | cut -f1 > $@
@@ -65,7 +81,7 @@ $(DNADIRS:%=%.size) $(NOPACK).size: %.size: %.versions run
| sort -k2 \
> $@
-run: $(COMMITS) $(DNA_BACKUP) $(DNA_PARAMS) | $(DATADIRS) $(NOPACK).versions
+run: $(COMMITS) $(DNA_BACKUP) $(DNA_PARAMS) | $(DATADIRS) $(NOPACK).versions $(REAL).versions
rm -rf $(DATADIRS:%=%/*)
./exp.sh
touch $@
@@ -74,15 +90,22 @@ $(DNA_PARAMS):
echo "$(DNA_4K) -c 4096" >> $@
echo "$(DNA_8K) -c 8192" >> $@
-$(DATADIRS) $(NOPACK).versions:
+$(DATADIRS) $(NOPACK).versions $(REAL).versions:
mkdir $@
$(DNA_BACKUP): .FORCE
@$(MAKE) -C $(@D) --no-print-directory $(@F)
-$(COMMITS): | repo git
+commits.monthly: commits.daily
+ sed -n '0~30p' $< > $@
+
+commits.weekly: commits.daily
+ sed -n '0~7p' $< > $@
+
+commits.daily: | repo git
$(GITC) log --reverse --no-merges --pretty=tformat:"%H %as" \
| sort --unique --key=2 \
+ | tail -n +$(SKIP_COMMITS) \
> $@
repo git &:
diff --git a/exp/exp.sh b/exp/exp.sh
index 49f429b..02cea84 100755
--- a/exp/exp.sh
+++ b/exp/exp.sh
@@ -8,6 +8,7 @@
# - MAX_VERSION: the max number for versions for the experiment
# - COMMITS: the name of the file that contains the lists of versions
# - DIFFS: the path of the git diff dir
+# - REAL: the path of the real size dir
# - GIT_NOPACK: the path of the git nopack dir
log() {
@@ -26,14 +27,13 @@ rm $REPO_PATH/.git
$GITC init --separate-git-dir=$GIT_NOPACK
$GITC --git-dir=$GIT_NOPACK config gc.auto 0
set-git-dir $GIT_PATH
-nopack_prev=0
# "empty tree" commit
prev="4b825dc642cb6eb9a060e54bf8d69288fbee4904"
last=$(tail --lines=1 $COMMITS | cut -f1)
i=0
-cat $COMMITS | while read line
+head -n $MAX_VERSION $COMMITS | while read line
do
# Get hash
hash=$(echo "$line" | cut -f1)
@@ -43,6 +43,10 @@ do
$GITC checkout $hash 2> $OUT \
|| (log "error checking out"; cat $OUT; exit 1)
+ # Save real size for this version
+ log "save real size for this version"
+ du -b --summarize $REPO_PATH > $(printf "%s.versions/%05d" $REAL $i)
+
# Create git diff for this version
log "create git diff for this version"
diff=$(printf "%s/%05d.diff.gz" $DIFFS $i)
@@ -57,15 +61,8 @@ do
$GITC commit -m $hash &> $OUT \
|| (log "error commiting to nopack"; cat $OUT; exit 1)
ls $GIT_NOPACK/objects/pack
- nopack_curr=$(printf "%s.versions/%05d" $GIT_NOPACK $i)
find $GIT_NOPACK -type f -exec du -ba {} + \
- | grep -v /logs/ \
- | cut -f1 \
- | paste -sd+ \
- | xargs -i echo {} - $nopack_prev \
- | bc \
- > $nopack_curr
- let nopack_prev+=$(cat $nopack_curr)
+ > $(printf "%s.versions/%05d" $GIT_NOPACK $i)
set-git-dir $GIT_PATH
# Create dna backups for this version
@@ -109,10 +106,6 @@ do
prev=$hash
let i++
- if [[ $i == $MAX_VERSION ]]
- then
- break
- fi
done
# cleanup