aboutsummaryrefslogtreecommitdiff
path: root/exp
diff options
context:
space:
mode:
Diffstat (limited to 'exp')
-rw-r--r--exp/Makefile41
-rwxr-xr-xexp/exp.sh21
2 files changed, 39 insertions, 23 deletions
diff --git a/exp/Makefile b/exp/Makefile
index f49d2f1..b69b0bf 100644
--- a/exp/Makefile
+++ b/exp/Makefile
@@ -8,33 +8,38 @@ export LC_NUMERIC :=
export DNA_BACKUP ?= ../dna-backup
export DNA_PARAMS ?= dna_params
export MAX_VERSION ?= 5
-export COMMITS ?= commits
+export COMMITS ?= commits.daily
DNA_4K ?= dna_4k
DNA_8K ?= dna_8k
NOPACK ?= nopack
export GIT_NOPACK ?= $(abspath $(NOPACK))
+export REAL ?= real
export DIFFS ?= diffs
export REPO_PATH ?= repo
export GIT_PATH ?= $(abspath git)
GITC := git -C $(REPO_PATH)
+# Number of initial commits to skip. This option has been added for the
+# Linux kernel repo, as the first commits are too far away date wise.
+SKIP_COMMITS ?= 30
+
DNADIRS := $(DNA_4K) $(DNA_8K)
-DATADIRS := $(DNA_4K) $(DNA_8K) $(DIFFS) $(NOPACK)
+DATADIRS := $(DNA_4K) $(DNA_8K) $(DIFFS) $(NOPACK) $(REAL)
SIZEFILES := $(DATADIRS:%=%.size)
.PHONY: all exp
all exp: summary.csv $(SIZEFILES)
@echo "=============== SUMMARY ==============="
@head -n 1 $< | while IFS="," read -r $(DATADIRS); do \
- printf "$(DATADIRS:%=\%13s)\n" $(DATADIRS:%=$$%); \
+ printf "$(DATADIRS:%=\%14s)\n" $(DATADIRS:%=$$%); \
done
@tail -n +2 $< | while IFS="," read -r $(DATADIRS); do \
- printf "$(DATADIRS:%=\%'13d)\n" $(DATADIRS:%=$$%); \
+ printf "$(DATADIRS:%=\%'14d)\n" $(DATADIRS:%=$$%); \
done
@echo "================ TOTAL ================"
@for i in $(SIZEFILES); do cat $$i | paste -sd+ | bc; done | while read j; do \
- printf "%'13d " $$j; \
+ printf "%'14d " $$j; \
done
@echo
@@ -42,12 +47,23 @@ summary.csv: $(SIZEFILES)
echo $(DATADIRS) | tr ' ' ',' > $@
paste -d "," $^ >> $@
-$(DNADIRS:%=%.size) $(NOPACK).size: %.size: %.versions run
+$(DNADIRS:%=%.size) $(REAL).size: %.size: %.versions run
rm -rf $@
for i in $</*; do \
cut -f1 $$i | paste -sd+ | bc >> $@; \
done
+$(NOPACK).size: $(NOPACK).versions
+ prev=/dev/null; for i in $</*; do \
+ diff -u0 $$prev $$i \
+ | sed -e '1,2d' -e '/^-/d' -e 's/^+//' -e '/^@@/d' \
+ | cut -f1 \
+ | paste -sd+ \
+ | bc \
+ >> $@; \
+ prev=$$i; \
+ done
+
%.size: % run
find $< -type f -exec du -ba {} + | sort -k2 | cut -f1 > $@
@@ -65,7 +81,7 @@ $(DNADIRS:%=%.size) $(NOPACK).size: %.size: %.versions run
| sort -k2 \
> $@
-run: $(COMMITS) $(DNA_BACKUP) $(DNA_PARAMS) | $(DATADIRS) $(NOPACK).versions
+run: $(COMMITS) $(DNA_BACKUP) $(DNA_PARAMS) | $(DATADIRS) $(NOPACK).versions $(REAL).versions
rm -rf $(DATADIRS:%=%/*)
./exp.sh
touch $@
@@ -74,15 +90,22 @@ $(DNA_PARAMS):
echo "$(DNA_4K) -c 4096" >> $@
echo "$(DNA_8K) -c 8192" >> $@
-$(DATADIRS) $(NOPACK).versions:
+$(DATADIRS) $(NOPACK).versions $(REAL).versions:
mkdir $@
$(DNA_BACKUP): .FORCE
@$(MAKE) -C $(@D) --no-print-directory $(@F)
-$(COMMITS): | repo git
+commits.monthly: commits.daily
+ sed -n '0~30p' $< > $@
+
+commits.weekly: commits.daily
+ sed -n '0~7p' $< > $@
+
+commits.daily: | repo git
$(GITC) log --reverse --no-merges --pretty=tformat:"%H %as" \
| sort --unique --key=2 \
+ | tail -n +$(SKIP_COMMITS) \
> $@
repo git &:
diff --git a/exp/exp.sh b/exp/exp.sh
index 49f429b..02cea84 100755
--- a/exp/exp.sh
+++ b/exp/exp.sh
@@ -8,6 +8,7 @@
# - MAX_VERSION: the max number for versions for the experiment
# - COMMITS: the name of the file that contains the lists of versions
# - DIFFS: the path of the git diff dir
+# - REAL: the path of the real size dir
# - GIT_NOPACK: the path of the git nopack dir
log() {
@@ -26,14 +27,13 @@ rm $REPO_PATH/.git
$GITC init --separate-git-dir=$GIT_NOPACK
$GITC --git-dir=$GIT_NOPACK config gc.auto 0
set-git-dir $GIT_PATH
-nopack_prev=0
# "empty tree" commit
prev="4b825dc642cb6eb9a060e54bf8d69288fbee4904"
last=$(tail --lines=1 $COMMITS | cut -f1)
i=0
-cat $COMMITS | while read line
+head -n $MAX_VERSION $COMMITS | while read line
do
# Get hash
hash=$(echo "$line" | cut -f1)
@@ -43,6 +43,10 @@ do
$GITC checkout $hash 2> $OUT \
|| (log "error checking out"; cat $OUT; exit 1)
+ # Save real size for this version
+ log "save real size for this version"
+ du -b --summarize $REPO_PATH > $(printf "%s.versions/%05d" $REAL $i)
+
# Create git diff for this version
log "create git diff for this version"
diff=$(printf "%s/%05d.diff.gz" $DIFFS $i)
@@ -57,15 +61,8 @@ do
$GITC commit -m $hash &> $OUT \
|| (log "error commiting to nopack"; cat $OUT; exit 1)
ls $GIT_NOPACK/objects/pack
- nopack_curr=$(printf "%s.versions/%05d" $GIT_NOPACK $i)
find $GIT_NOPACK -type f -exec du -ba {} + \
- | grep -v /logs/ \
- | cut -f1 \
- | paste -sd+ \
- | xargs -i echo {} - $nopack_prev \
- | bc \
- > $nopack_curr
- let nopack_prev+=$(cat $nopack_curr)
+ > $(printf "%s.versions/%05d" $GIT_NOPACK $i)
set-git-dir $GIT_PATH
# Create dna backups for this version
@@ -109,10 +106,6 @@ do
prev=$hash
let i++
- if [[ $i == $MAX_VERSION ]]
- then
- break
- fi
done
# cleanup