exp/Makefile


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128

REPO_URL  := https://club1.fr/~nicolas/git/dna-backup/

SHELL := /bin/bash

# Reset LC_NUMERIC to make printf thousands separator work as expected
export LC_NUMERIC  :=

export DNA_BACKUP  ?= ../dna-backup
export DNA_PARAMS  ?= dna_params
export MAX_VERSION ?= 5
export COMMITS     ?= commits.daily
DNA_4K             ?= dna_4k
DNA_8K             ?= dna_8k
NOPACK             ?= nopack
export GIT_NOPACK  ?= $(abspath $(NOPACK))
export REAL        ?= real
export DIFFS       ?= diffs

export REPO_PATH   ?= repo
export GIT_PATH    ?= $(abspath git)
GITC      := git -C $(REPO_PATH)

# Number of initial commits to skip. This option has been added for the
# Linux kernel repo, as the first commits are too far away date wise.
SKIP_COMMITS ?= 30

DNADIRS   := $(DNA_4K) $(DNA_8K)
DATADIRS  := $(DNA_4K) $(DNA_8K) $(DIFFS) $(NOPACK) $(REAL)
SIZEFILES := $(DATADIRS:%=%.size)

.PHONY: all exp
all exp: summary.csv $(SIZEFILES)
	@echo "=============== SUMMARY ==============="
	@head -n 1 $< | while IFS="," read -r $(DATADIRS); do \
		printf "$(DATADIRS:%=\%14s)\n" $(DATADIRS:%=$$%); \
	done
	@tail -n +2 $< | while IFS="," read -r $(DATADIRS); do \
		printf "$(DATADIRS:%=\%'14d)\n" $(DATADIRS:%=$$%); \
	done
	@echo "================ TOTAL ================"
	@for i in $(SIZEFILES); do cat $$i | paste -sd+ | bc; done | while read j; do \
		printf "%'14d " $$j; \
	done
	@echo

summary.csv: $(SIZEFILES)
	echo $(DATADIRS) | tr ' ' ',' > $@
	paste -d "," $^ >> $@

$(DNADIRS:%=%.size) $(REAL).size: %.size: %.versions run
	rm -rf $@
	for i in $</*; do \
		cut -f1 $$i | paste -sd+ | bc >> $@; \
	done

$(NOPACK).size: $(NOPACK).versions
	prev=/dev/null; for i in $</*; do \
		diff -u0 $$prev $$i \
		| sed -e '1,2d' -e '/^-/d' -e 's/^+//' -e '/^@@/d' \
		| cut -f1 \
		| paste -sd+ \
		| bc \
		>> $@; \
		prev=$$i; \
        done

%.size: % run
	find $< -type f -exec du -ba {} + | sort -k2 | cut -f1 > $@

%.versions: %.results
	rm -rf $@
	mkdir -p $@
	for i in $*/*; do \
		v=`echo $$i | cut -d / -f2`; \
		grep $$i $< > $@/$$v ; \
	done

%.results: % run
	find $< -type f -exec du -ba {} + \
	| grep -v hashes \
	| sort -k2 \
	> $@

run: $(COMMITS) $(DNA_BACKUP) $(DNA_PARAMS) | $(DATADIRS) $(NOPACK).versions $(REAL).versions
	rm -rf $(DATADIRS:%=%/*)
	./exp.sh
	touch $@

$(DNA_PARAMS):
	echo "$(DNA_4K)	-c 4096" >> $@
	echo "$(DNA_8K)	-c 8192" >> $@

$(DATADIRS) $(NOPACK).versions $(REAL).versions:
	mkdir $@

$(DNA_BACKUP): .FORCE
	@$(MAKE) -C $(@D) --no-print-directory $(@F)

commits.monthly: commits.daily
	sed -n '0~30p' $< > $@

commits.weekly: commits.daily
	sed -n '0~7p' $< > $@

commits.daily: | repo git
	$(GITC) log --reverse --no-merges --pretty=tformat:"%H	%as" \
	| sort --unique --key=2 \
	| tail -n +$(SKIP_COMMITS) \
	> $@

repo git &:
	git clone --separate-git-dir=$(GIT_PATH) $(REPO_URL) $(REPO_PATH)
# remove warning about detached head state
	$(GITC) config advice.detachedHead false

.PHONY: clean
clean: mostlyclean
	rm -rf $(REPO_PATH) $(GIT_PATH)
	rm -f $(COMMITS)

mostlyclean: resultsclean
	rm -rf $(DATADIRS) $(DATADIRS:%=%.versions)
	rm -f run $(DATADIRS:%=%.results) $(DNA_PARAMS)

resultsclean:
	rm -f summary.csv $(SIZEFILES)

.FORCE: ;