diff options
Diffstat (limited to 'exp')
-rw-r--r-- | exp/Makefile | 41 | ||||
-rwxr-xr-x | exp/exp.sh | 21 |
2 files changed, 39 insertions, 23 deletions
diff --git a/exp/Makefile b/exp/Makefile index f49d2f1..b69b0bf 100644 --- a/exp/Makefile +++ b/exp/Makefile @@ -8,33 +8,38 @@ export LC_NUMERIC := export DNA_BACKUP ?= ../dna-backup export DNA_PARAMS ?= dna_params export MAX_VERSION ?= 5 -export COMMITS ?= commits +export COMMITS ?= commits.daily DNA_4K ?= dna_4k DNA_8K ?= dna_8k NOPACK ?= nopack export GIT_NOPACK ?= $(abspath $(NOPACK)) +export REAL ?= real export DIFFS ?= diffs export REPO_PATH ?= repo export GIT_PATH ?= $(abspath git) GITC := git -C $(REPO_PATH) +# Number of initial commits to skip. This option has been added for the +# Linux kernel repo, as the first commits are too far away date wise. +SKIP_COMMITS ?= 30 + DNADIRS := $(DNA_4K) $(DNA_8K) -DATADIRS := $(DNA_4K) $(DNA_8K) $(DIFFS) $(NOPACK) +DATADIRS := $(DNA_4K) $(DNA_8K) $(DIFFS) $(NOPACK) $(REAL) SIZEFILES := $(DATADIRS:%=%.size) .PHONY: all exp all exp: summary.csv $(SIZEFILES) @echo "=============== SUMMARY ===============" @head -n 1 $< | while IFS="," read -r $(DATADIRS); do \ - printf "$(DATADIRS:%=\%13s)\n" $(DATADIRS:%=$$%); \ + printf "$(DATADIRS:%=\%14s)\n" $(DATADIRS:%=$$%); \ done @tail -n +2 $< | while IFS="," read -r $(DATADIRS); do \ - printf "$(DATADIRS:%=\%'13d)\n" $(DATADIRS:%=$$%); \ + printf "$(DATADIRS:%=\%'14d)\n" $(DATADIRS:%=$$%); \ done @echo "================ TOTAL ================" @for i in $(SIZEFILES); do cat $$i | paste -sd+ | bc; done | while read j; do \ - printf "%'13d " $$j; \ + printf "%'14d " $$j; \ done @echo @@ -42,12 +47,23 @@ summary.csv: $(SIZEFILES) echo $(DATADIRS) | tr ' ' ',' > $@ paste -d "," $^ >> $@ -$(DNADIRS:%=%.size) $(NOPACK).size: %.size: %.versions run +$(DNADIRS:%=%.size) $(REAL).size: %.size: %.versions run rm -rf $@ for i in $</*; do \ cut -f1 $$i | paste -sd+ | bc >> $@; \ done +$(NOPACK).size: $(NOPACK).versions + prev=/dev/null; for i in $</*; do \ + diff -u0 $$prev $$i \ + | sed -e '1,2d' -e '/^-/d' -e 's/^+//' -e '/^@@/d' \ + | cut -f1 \ + | paste -sd+ \ + | bc \ + >> $@; \ + prev=$$i; \ + done + %.size: % run find $< -type f -exec du -ba {} + | sort -k2 | cut -f1 > $@ @@ -65,7 +81,7 @@ $(DNADIRS:%=%.size) $(NOPACK).size: %.size: %.versions run | sort -k2 \ > $@ -run: $(COMMITS) $(DNA_BACKUP) $(DNA_PARAMS) | $(DATADIRS) $(NOPACK).versions +run: $(COMMITS) $(DNA_BACKUP) $(DNA_PARAMS) | $(DATADIRS) $(NOPACK).versions $(REAL).versions rm -rf $(DATADIRS:%=%/*) ./exp.sh touch $@ @@ -74,15 +90,22 @@ $(DNA_PARAMS): echo "$(DNA_4K) -c 4096" >> $@ echo "$(DNA_8K) -c 8192" >> $@ -$(DATADIRS) $(NOPACK).versions: +$(DATADIRS) $(NOPACK).versions $(REAL).versions: mkdir $@ $(DNA_BACKUP): .FORCE @$(MAKE) -C $(@D) --no-print-directory $(@F) -$(COMMITS): | repo git +commits.monthly: commits.daily + sed -n '0~30p' $< > $@ + +commits.weekly: commits.daily + sed -n '0~7p' $< > $@ + +commits.daily: | repo git $(GITC) log --reverse --no-merges --pretty=tformat:"%H %as" \ | sort --unique --key=2 \ + | tail -n +$(SKIP_COMMITS) \ > $@ repo git &: @@ -8,6 +8,7 @@ # - MAX_VERSION: the max number for versions for the experiment # - COMMITS: the name of the file that contains the lists of versions # - DIFFS: the path of the git diff dir +# - REAL: the path of the real size dir # - GIT_NOPACK: the path of the git nopack dir log() { @@ -26,14 +27,13 @@ rm $REPO_PATH/.git $GITC init --separate-git-dir=$GIT_NOPACK $GITC --git-dir=$GIT_NOPACK config gc.auto 0 set-git-dir $GIT_PATH -nopack_prev=0 # "empty tree" commit prev="4b825dc642cb6eb9a060e54bf8d69288fbee4904" last=$(tail --lines=1 $COMMITS | cut -f1) i=0 -cat $COMMITS | while read line +head -n $MAX_VERSION $COMMITS | while read line do # Get hash hash=$(echo "$line" | cut -f1) @@ -43,6 +43,10 @@ do $GITC checkout $hash 2> $OUT \ || (log "error checking out"; cat $OUT; exit 1) + # Save real size for this version + log "save real size for this version" + du -b --summarize $REPO_PATH > $(printf "%s.versions/%05d" $REAL $i) + # Create git diff for this version log "create git diff for this version" diff=$(printf "%s/%05d.diff.gz" $DIFFS $i) @@ -57,15 +61,8 @@ do $GITC commit -m $hash &> $OUT \ || (log "error commiting to nopack"; cat $OUT; exit 1) ls $GIT_NOPACK/objects/pack - nopack_curr=$(printf "%s.versions/%05d" $GIT_NOPACK $i) find $GIT_NOPACK -type f -exec du -ba {} + \ - | grep -v /logs/ \ - | cut -f1 \ - | paste -sd+ \ - | xargs -i echo {} - $nopack_prev \ - | bc \ - > $nopack_curr - let nopack_prev+=$(cat $nopack_curr) + > $(printf "%s.versions/%05d" $GIT_NOPACK $i) set-git-dir $GIT_PATH # Create dna backups for this version @@ -109,10 +106,6 @@ do prev=$hash let i++ - if [[ $i == $MAX_VERSION ]] - then - break - fi done # cleanup |