From c4aaf908abd8127c43088201f34dd2c921286a23 Mon Sep 17 00:00:00 2001 From: n-peugnet Date: Fri, 1 Oct 2021 19:43:56 +0200 Subject: compare against git diff gzipped this is not looking good... --- exp/Makefile | 59 +++++++++++++++++++++++++++++++++++++++++++++-------------- exp/exp.sh | 32 +++++++++++++++++++++++++------- 2 files changed, 70 insertions(+), 21 deletions(-) (limited to 'exp') diff --git a/exp/Makefile b/exp/Makefile index e7314bf..16d4b65 100644 --- a/exp/Makefile +++ b/exp/Makefile @@ -4,23 +4,50 @@ REPO_PATH := repo GIT_PATH := git GITC := git -C $(REPO_PATH) -TEMP := temp +MAX_VERSION ?= 3 + +DATADIRS := backup diffs +SIZEFILES := $(DATADIRS:%=%-size) .PHONY: all exp -all exp: versions results - @echo "total size : `head --lines=1 results | cut -f1`" - @echo "version count : `wc -l versions`" - @echo "repo results :\n`cat results`" +all exp: summary.csv $(SIZEFILES) + @echo "============== SUMMARY ==============" + @cat $< | tr ',' '\t' + @echo "============== TOTAL ==============" + @for i in $(SIZEFILES); do cat $$i | paste -sd+ | bc; done | tr '\n' '\t' + @echo + +summary.csv: $(SIZEFILES) + echo $(DATADIRS) | tr ' ' ',' > $@ + paste -d "," $^ >> $@ -versions: results - grep [0-9]$$ $< | cut -f1 > $@ +backup-size: versions + rm -rf $@ + for i in $> $@; \ + done -results: | $(TEMP) - du -bad 2 $(TEMP) | sort -k2 > $@ +diffs-size: diffs + find $< -type f -exec du -ba {} + | cut -f1 > $@ -$(TEMP): commits ../dna-backup +versions: results backup rm -rf $@ - ./exp.sh $< $(REPO_PATH) $@ + mkdir -p $@ + for i in backup/*; do \ + v=`echo $$i | cut -d / -f2`; \ + grep $$i $< > $@/$$v ; \ + done + +results: backup + find $< -type f -exec du -ba {} + \ + | grep -v hashes \ + | sort -k2 \ + > $@ + +$(DATADIRS): commits ../dna-backup + rm -rf $(DATADIRS) + ./exp.sh $< $(REPO_PATH) $(MAX_VERSION) $(DATADIRS) + touch $(DATADIRS) ../dna-backup: .FORCE @$(MAKE) -C .. --no-print-directory dna-backup @@ -36,8 +63,12 @@ repo git: $(GITC) config advice.detachedHead false .PHONY: clean -clean: - rm -rf $(REPO_PATH) $(GIT_PATH) $(TEMP) - rm -f commits results versions +clean: mostlyclean + rm -rf $(REPO_PATH) $(GIT_PATH) + rm commits + +mostlyclean: + rm -rf $(DATADIRS) versions + rm -f results summary.csv $(SIZEFILES) .FORCE: ; diff --git a/exp/exp.sh b/exp/exp.sh index 8c861a3..0213b2f 100755 --- a/exp/exp.sh +++ b/exp/exp.sh @@ -1,12 +1,30 @@ #!/bin/bash -commits_file=$1 -repo_path=$2 -temp=$3 +commits="$1" +repo="$2" +max_count="$3" +backup="$4" +diffs="$5" -cat $commits_file | while read i +mkdir -p $backup $diffs + +# "empty tree" commit +prev="4b825dc642cb6eb9a060e54bf8d69288fbee4904" +last=$(tail --lines=1 "$commits" | cut -f1) + +i=0 +cat "$commits" | while read line do - hash=$(echo "$i" | cut -f1) - git -C $repo_path checkout $hash - ../dna-backup commit -v 2 $repo_path $temp + hash=$(echo "$line" | cut -f1) + git -C "$repo" checkout "$hash" + git -C "$repo" diff --minimal --binary --unified=0 "$prev" | gzip > "$diffs/$i.diff.gz" + ../dna-backup commit -v 2 "$repo" "$backup" + prev="$hash" + let i++ + if [[ $i == $max_count ]] + then + break + fi done + +git -C "$repo" checkout "$last" -- cgit v1.2.3