diff options
-rw-r--r-- | docs/note-2021-10-08.md | 30 | ||||
-rw-r--r-- | exp/Makefile | 41 | ||||
-rwxr-xr-x | exp/exp.sh | 21 |
3 files changed, 57 insertions, 35 deletions
diff --git a/docs/note-2021-10-08.md b/docs/note-2021-10-08.md index fc26758..a718d53 100644 --- a/docs/note-2021-10-08.md +++ b/docs/note-2021-10-08.md @@ -4,17 +4,23 @@ Comparison with git basic ``` =============== SUMMARY =============== dna_4k dna_8k diffs nopack - 85,062,292 77,288,903 59,771,518 83,421,168 - 113,976,574 108,519,248 75,958,096 126,395,519 - 40,450,844 43,371,572 87,905,291 62,515,404 - 13,699,644 17,821,696 7,398,260 47,162,686 - 39,029,362 40,321,219 28,806,094 55,473,207 - 9,335,530 11,047,234 20,915,481 32,021,383 - 16,891,778 20,219,859 12,926,085 42,470,711 - 9,764,957 11,381,646 13,592,417 28,181,096 - 2,597 131,258 1,395 55,764 - 93,015 243,287 104,610 2,739,143 - 9,331 7,919 2,607 102,283 + 85,062,292 77,288,903 59,771,518 86,227,304 + 113,976,574 108,519,248 75,958,096 126,651,038 + 40,450,844 43,371,572 87,905,291 59,283,496 + 13,699,644 17,821,696 7,398,260 45,949,939 + 39,029,362 40,321,219 28,806,094 55,346,500 + 9,335,530 11,047,234 20,915,481 30,755,543 + 16,891,778 20,219,859 12,926,085 40,974,309 + 9,764,957 11,381,646 13,592,417 26,555,084 + 2,597 131,258 1,395 60,217 + 93,015 243,287 104,610 1,084,646 + 9,331 7,919 2,607 102,640 + 6,462,903 7,803,303 10,052,904 21,908,322 + 278,450 144,214 510 8,245 + 353 149,747 639 6,221 + 282,278 4,322 368 7,231 + 280,151 145,265 231 13,904 + 2,892,390 3,189,502 9,910,136 3,078,458 ================ TOTAL ================ - 328,315,924 330,353,841 307,381,854 480,538,364 + 338,512,449 341,790,194 327,346,642 498,013,097 ``` diff --git a/exp/Makefile b/exp/Makefile index f49d2f1..b69b0bf 100644 --- a/exp/Makefile +++ b/exp/Makefile @@ -8,33 +8,38 @@ export LC_NUMERIC := export DNA_BACKUP ?= ../dna-backup export DNA_PARAMS ?= dna_params export MAX_VERSION ?= 5 -export COMMITS ?= commits +export COMMITS ?= commits.daily DNA_4K ?= dna_4k DNA_8K ?= dna_8k NOPACK ?= nopack export GIT_NOPACK ?= $(abspath $(NOPACK)) +export REAL ?= real export DIFFS ?= diffs export REPO_PATH ?= repo export GIT_PATH ?= $(abspath git) GITC := git -C $(REPO_PATH) +# Number of initial commits to skip. This option has been added for the +# Linux kernel repo, as the first commits are too far away date wise. +SKIP_COMMITS ?= 30 + DNADIRS := $(DNA_4K) $(DNA_8K) -DATADIRS := $(DNA_4K) $(DNA_8K) $(DIFFS) $(NOPACK) +DATADIRS := $(DNA_4K) $(DNA_8K) $(DIFFS) $(NOPACK) $(REAL) SIZEFILES := $(DATADIRS:%=%.size) .PHONY: all exp all exp: summary.csv $(SIZEFILES) @echo "=============== SUMMARY ===============" @head -n 1 $< | while IFS="," read -r $(DATADIRS); do \ - printf "$(DATADIRS:%=\%13s)\n" $(DATADIRS:%=$$%); \ + printf "$(DATADIRS:%=\%14s)\n" $(DATADIRS:%=$$%); \ done @tail -n +2 $< | while IFS="," read -r $(DATADIRS); do \ - printf "$(DATADIRS:%=\%'13d)\n" $(DATADIRS:%=$$%); \ + printf "$(DATADIRS:%=\%'14d)\n" $(DATADIRS:%=$$%); \ done @echo "================ TOTAL ================" @for i in $(SIZEFILES); do cat $$i | paste -sd+ | bc; done | while read j; do \ - printf "%'13d " $$j; \ + printf "%'14d " $$j; \ done @echo @@ -42,12 +47,23 @@ summary.csv: $(SIZEFILES) echo $(DATADIRS) | tr ' ' ',' > $@ paste -d "," $^ >> $@ -$(DNADIRS:%=%.size) $(NOPACK).size: %.size: %.versions run +$(DNADIRS:%=%.size) $(REAL).size: %.size: %.versions run rm -rf $@ for i in $</*; do \ cut -f1 $$i | paste -sd+ | bc >> $@; \ done +$(NOPACK).size: $(NOPACK).versions + prev=/dev/null; for i in $</*; do \ + diff -u0 $$prev $$i \ + | sed -e '1,2d' -e '/^-/d' -e 's/^+//' -e '/^@@/d' \ + | cut -f1 \ + | paste -sd+ \ + | bc \ + >> $@; \ + prev=$$i; \ + done + %.size: % run find $< -type f -exec du -ba {} + | sort -k2 | cut -f1 > $@ @@ -65,7 +81,7 @@ $(DNADIRS:%=%.size) $(NOPACK).size: %.size: %.versions run | sort -k2 \ > $@ -run: $(COMMITS) $(DNA_BACKUP) $(DNA_PARAMS) | $(DATADIRS) $(NOPACK).versions +run: $(COMMITS) $(DNA_BACKUP) $(DNA_PARAMS) | $(DATADIRS) $(NOPACK).versions $(REAL).versions rm -rf $(DATADIRS:%=%/*) ./exp.sh touch $@ @@ -74,15 +90,22 @@ $(DNA_PARAMS): echo "$(DNA_4K) -c 4096" >> $@ echo "$(DNA_8K) -c 8192" >> $@ -$(DATADIRS) $(NOPACK).versions: +$(DATADIRS) $(NOPACK).versions $(REAL).versions: mkdir $@ $(DNA_BACKUP): .FORCE @$(MAKE) -C $(@D) --no-print-directory $(@F) -$(COMMITS): | repo git +commits.monthly: commits.daily + sed -n '0~30p' $< > $@ + +commits.weekly: commits.daily + sed -n '0~7p' $< > $@ + +commits.daily: | repo git $(GITC) log --reverse --no-merges --pretty=tformat:"%H %as" \ | sort --unique --key=2 \ + | tail -n +$(SKIP_COMMITS) \ > $@ repo git &: @@ -8,6 +8,7 @@ # - MAX_VERSION: the max number for versions for the experiment # - COMMITS: the name of the file that contains the lists of versions # - DIFFS: the path of the git diff dir +# - REAL: the path of the real size dir # - GIT_NOPACK: the path of the git nopack dir log() { @@ -26,14 +27,13 @@ rm $REPO_PATH/.git $GITC init --separate-git-dir=$GIT_NOPACK $GITC --git-dir=$GIT_NOPACK config gc.auto 0 set-git-dir $GIT_PATH -nopack_prev=0 # "empty tree" commit prev="4b825dc642cb6eb9a060e54bf8d69288fbee4904" last=$(tail --lines=1 $COMMITS | cut -f1) i=0 -cat $COMMITS | while read line +head -n $MAX_VERSION $COMMITS | while read line do # Get hash hash=$(echo "$line" | cut -f1) @@ -43,6 +43,10 @@ do $GITC checkout $hash 2> $OUT \ || (log "error checking out"; cat $OUT; exit 1) + # Save real size for this version + log "save real size for this version" + du -b --summarize $REPO_PATH > $(printf "%s.versions/%05d" $REAL $i) + # Create git diff for this version log "create git diff for this version" diff=$(printf "%s/%05d.diff.gz" $DIFFS $i) @@ -57,15 +61,8 @@ do $GITC commit -m $hash &> $OUT \ || (log "error commiting to nopack"; cat $OUT; exit 1) ls $GIT_NOPACK/objects/pack - nopack_curr=$(printf "%s.versions/%05d" $GIT_NOPACK $i) find $GIT_NOPACK -type f -exec du -ba {} + \ - | grep -v /logs/ \ - | cut -f1 \ - | paste -sd+ \ - | xargs -i echo {} - $nopack_prev \ - | bc \ - > $nopack_curr - let nopack_prev+=$(cat $nopack_curr) + > $(printf "%s.versions/%05d" $GIT_NOPACK $i) set-git-dir $GIT_PATH # Create dna backups for this version @@ -109,10 +106,6 @@ do prev=$hash let i++ - if [[ $i == $MAX_VERSION ]] - then - break - fi done # cleanup |