From b3fee746c053990c88c0e067b1a654acbeb1cd1f Mon Sep 17 00:00:00 2001 From: n-peugnet Date: Fri, 8 Oct 2021 15:35:44 +0200 Subject: exp: repo 4k, better summary format, fix diff sizes --- docs/note-2021-10-07.md | 298 +++++++++++++++++++++++++----------------------- exp/Makefile | 46 +++++--- exp/exp.sh | 40 +++++-- 3 files changed, 218 insertions(+), 166 deletions(-) diff --git a/docs/note-2021-10-07.md b/docs/note-2021-10-07.md index 476964e..2d49241 100644 --- a/docs/note-2021-10-07.md +++ b/docs/note-2021-10-07.md @@ -5,151 +5,167 @@ Linux kernel ------------ ``` -11:53:07.681 check out 224426f168aa4af3dcb628e6edaa824d32d60e6f -11:53:15.471 create diff for this version -11:53:30.163 create backup for this version -11:54:34.346 restore from diffs -11:54:38.244 check restore from diffs -11:54:38.880 restore from backup -11:54:43.776 check restore from backup -11:54:44.397 check out dbe79bbe9dcb22cb3651c46f18943477141ca452 -11:55:01.762 create diff for this version -11:56:19.545 create backup for this version -11:58:13.244 check out ceeee1fb2897651b434547eb26d93e6d2ff5a1a5 -11:58:27.085 create diff for this version -12:00:14.362 create backup for this version -12:01:09.252 check out f35723ec48ca60f2f3493ea40d63a9bc5b585c28 -12:01:22.871 create diff for this version -12:01:26.590 create backup for this version -12:02:08.992 check out d2cb1a95c5fa4d1691c90a4f530955b4ea3cfa24 -12:02:22.320 create diff for this version -12:02:34.623 create backup for this version -12:03:28.304 restore from diffs -12:03:52.677 check restore from diffs -12:03:53.166 restore from backup -12:03:57.937 check restore from backup -12:03:58.427 check out 03d782524e2d0511317769521c8d5daadbab8482 -12:04:12.099 create diff for this version -12:04:19.500 create backup for this version -12:05:01.339 check out 1cbf4c563c0eaaf11c552a88b374e213181c6ddd -12:05:13.805 create diff for this version -12:05:18.433 create backup for this version -12:06:13.891 check out da28438cae9a271c5c232177f81dfb243de9b7fa -12:06:26.596 create diff for this version -12:06:32.695 create backup for this version -12:07:10.455 check out d4b9ba7bf6f38cff55b5d95a0db7dd91311ce20a -12:07:23.631 create diff for this version -12:07:23.663 create backup for this version -12:07:43.166 restore from diffs -12:08:15.771 check restore from diffs -12:08:16.253 restore from backup -12:08:21.572 check restore from backup -12:08:22.035 check out 367636772f094fd840d2d79e75257bcfaa28e70f -12:08:32.149 create diff for this version -12:08:32.270 create backup for this version -12:08:55.599 check out f50f3ac51983025405a71b70b033cc6bcb0d1fc1 -12:09:05.597 create diff for this version -12:09:05.623 create backup for this version -12:09:28.613 check out b59a9504cb93db7fae31e60760725d48652a1fc3 -12:09:40.484 create diff for this version -12:09:43.885 create backup for this version -12:10:19.089 check out 69903d6500c73af8329a5fba7153b0d50748981c -12:10:19.245 create diff for this version -12:10:19.266 create backup for this version -12:10:35.016 restore from diffs -12:11:07.791 check restore from diffs -12:11:08.286 restore from backup -12:11:14.264 check restore from backup -12:11:14.752 check out c264852726dde251a0c09ec22f61a9be8b0db68b -12:11:14.835 create diff for this version -12:11:14.855 create backup for this version -12:11:42.260 check out 0964ce24d091a1d3dc7f667e1b107ab77d4325e6 -12:11:42.340 create diff for this version -12:11:42.361 create backup for this version -12:12:09.532 check out b188ffe876382ecc009ceb4fe033fd6ec7ba4ede -12:12:09.614 create diff for this version -12:12:09.635 create backup for this version -12:12:25.965 check out 14f8351a313f364afbc565f1ddcd43f8cfdccf52 -12:12:37.653 create diff for this version -12:12:41.413 create backup for this version -12:13:06.960 restore from diffs -12:13:41.277 check restore from diffs -12:13:41.740 restore from backup -12:13:47.783 check restore from backup -12:13:48.247 check out 07a4e2da7dd3c9345f84b2552872f9d38c257451 -12:13:58.535 create diff for this version -12:13:58.860 create backup for this version -12:14:23.049 check out 3834c3f227725e2395840aed82342bda4ee9d379 -12:14:33.557 create diff for this version -12:14:33.582 create backup for this version -12:14:58.535 check out 9383292f179e1907e7e7ade539ac8fd3b65c1e97 -12:15:10.629 create diff for this version -12:15:14.131 create backup for this version -12:15:46.082 clean up repo -============== SUMMARY ============== -backup diffs -77288903 12926085 -108519248 104610 -43371572 10323162 -17821696 10052904 -40321219 1395 -11047234 20915481 -20219859 639 -11381646 13592417 -131258 368 -243287 231 -7919 238438 -7803303 510 -144214 59771518 -149747 9910136 -4322 87905291 -145265 75958096 -3189502 28806094 -233130 7398260 -6718 2607 -3721760 812 -============== TOTAL ============== -345751802 337909054 +12:45:43.014 check out 224426f168aa4af3dcb628e6edaa824d32d60e6f +12:45:46.792 create git diff for this version +12:45:57.891 create 4k dna backup for this version +12:46:55.577 create 8k dna backup for this version +12:47:42.649 restore from git diffs +12:47:46.303 check restore from diffs +12:47:46.900 restore from 4k dna backup +12:47:53.096 check restore from backup +12:47:53.687 restore from 8k dna backup +12:47:58.141 check restore from backup +12:47:58.730 check out dbe79bbe9dcb22cb3651c46f18943477141ca452 +12:48:12.722 create git diff for this version +12:49:24.854 create 4k dna backup for this version +12:51:16.256 create 8k dna backup for this version +12:52:33.649 check out ceeee1fb2897651b434547eb26d93e6d2ff5a1a5 +12:52:45.479 create git diff for this version +12:54:28.628 create 4k dna backup for this version +12:55:26.836 create 8k dna backup for this version +12:56:10.546 check out f35723ec48ca60f2f3493ea40d63a9bc5b585c28 +12:56:22.180 create git diff for this version +12:56:25.403 create 4k dna backup for this version +12:57:14.309 create 8k dna backup for this version +12:57:48.758 check out d2cb1a95c5fa4d1691c90a4f530955b4ea3cfa24 +12:58:00.454 create git diff for this version +12:58:12.064 create 4k dna backup for this version +12:59:01.275 create 8k dna backup for this version +12:59:44.119 restore from git diffs +13:00:08.435 check restore from diffs +13:00:08.904 restore from 4k dna backup +13:00:15.364 check restore from backup +13:00:15.829 restore from 8k dna backup +13:00:20.637 check restore from backup +13:00:21.104 check out 03d782524e2d0511317769521c8d5daadbab8482 +13:00:33.352 create git diff for this version +13:00:40.479 create 4k dna backup for this version +13:01:14.669 create 8k dna backup for this version +13:01:51.043 check out 1cbf4c563c0eaaf11c552a88b374e213181c6ddd +13:02:02.684 create git diff for this version +13:02:07.721 create 4k dna backup for this version +13:02:49.135 create 8k dna backup for this version +13:03:34.611 check out da28438cae9a271c5c232177f81dfb243de9b7fa +13:03:46.148 create git diff for this version +13:03:50.982 create 4k dna backup for this version +13:04:22.855 create 8k dna backup for this version +13:04:54.493 check out d4b9ba7bf6f38cff55b5d95a0db7dd91311ce20a +13:05:04.586 create git diff for this version +13:05:04.610 create 4k dna backup for this version +13:05:30.353 create 8k dna backup for this version +13:05:43.662 restore from git diffs +13:06:15.035 check restore from diffs +13:06:15.501 restore from 4k dna backup +13:06:21.935 check restore from backup +13:06:22.377 restore from 8k dna backup +13:06:27.647 check restore from backup +13:06:28.102 check out 367636772f094fd840d2d79e75257bcfaa28e70f +13:06:38.159 create git diff for this version +13:06:38.290 create 4k dna backup for this version +13:07:03.723 create 8k dna backup for this version +13:07:26.853 check out f50f3ac51983025405a71b70b033cc6bcb0d1fc1 +13:07:36.852 create git diff for this version +13:07:36.879 create 4k dna backup for this version +13:08:02.827 create 8k dna backup for this version +13:08:25.778 check out b59a9504cb93db7fae31e60760725d48652a1fc3 +13:08:37.006 create git diff for this version +13:08:39.960 create 4k dna backup for this version +13:09:15.111 create 8k dna backup for this version +13:09:47.135 check out 69903d6500c73af8329a5fba7153b0d50748981c +13:09:47.223 create git diff for this version +13:09:47.245 create 4k dna backup for this version +13:10:05.115 create 8k dna backup for this version +13:10:20.921 restore from git diffs +13:10:53.907 check restore from diffs +13:10:54.403 restore from 4k dna backup +13:11:01.699 check restore from backup +13:11:02.177 restore from 8k dna backup +13:11:08.205 check restore from backup +13:11:08.689 check out c264852726dde251a0c09ec22f61a9be8b0db68b +13:11:08.765 create git diff for this version +13:11:08.787 create 4k dna backup for this version +13:11:26.510 create 8k dna backup for this version +13:11:52.903 check out 0964ce24d091a1d3dc7f667e1b107ab77d4325e6 +13:11:52.980 create git diff for this version +13:11:53.001 create 4k dna backup for this version +13:12:25.061 create 8k dna backup for this version +13:12:52.359 check out b188ffe876382ecc009ceb4fe033fd6ec7ba4ede +13:12:52.435 create git diff for this version +13:12:52.456 create 4k dna backup for this version +13:13:10.666 create 8k dna backup for this version +13:13:26.860 check out 14f8351a313f364afbc565f1ddcd43f8cfdccf52 +13:13:37.828 create git diff for this version +13:13:42.115 create 4k dna backup for this version +13:14:12.267 create 8k dna backup for this version +13:14:37.398 restore from git diffs +13:15:12.183 check restore from diffs +13:15:12.647 restore from 4k dna backup +13:15:19.905 check restore from backup +13:15:20.357 restore from 8k dna backup +13:15:26.486 check restore from backup +13:15:26.935 clean up repo +=============== SUMMARY =============== + dna_4k dna_8k diffs + 85,062,292 77,288,903 59,771,518 + 113,976,574 108,519,248 75,958,096 + 40,450,844 43,371,572 87,905,291 + 13,699,644 17,821,696 7,398,260 + 39,029,362 40,321,219 28,806,094 + 9,335,530 11,047,234 20,915,481 + 16,891,778 20,219,859 12,926,085 + 9,764,957 11,381,646 13,592,417 + 2,597 131,258 1,395 + 93,015 243,287 104,610 + 9,331 7,919 2,607 + 6,462,903 7,803,303 10,052,904 + 278,450 144,214 510 + 353 149,747 639 + 282,278 4,322 368 + 280,151 145,265 231 + 2,892,390 3,189,502 9,910,136 +================ TOTAL ================ + 338,512,449 341,790,194 327,346,642 ``` DNA-backup ---------- ``` -============== SUMMARY ============== -backup diffs -19280 1451 -3138 638 -2746 4737 -2919 7321 -5430 2665 -6093 1496 -2888 11129 -7577 6004 -5824 2418 -1177 2815 -8396 7551 -8479 5804 -9323 2449 -7234 3397 -7926 2143 -12121 2416 -12273 13301 -12909 3274 -29000 1764 -18634 2162 -24159 9681 -20158 4631 -24669 10147 -25806 4983 -18169 11488 -20882 867 -8063 1768 -4053 4373 -14906 9698 -15514 869 -10193 4188 -6140 23257 -============== TOTAL ============== -376079 170885 +=============== SUMMARY =============== + dna_4k dna_8k diffs + 22,321 19,280 10,147 + 2,375 3,138 1,768 + 2,118 2,746 867 + 3,012 2,919 869 + 5,209 5,430 4,373 + 5,927 6,093 2,449 + 1,937 2,888 1,451 + 6,903 7,577 13,301 + 4,127 5,824 2,815 + 3,444 1,177 638 + 7,559 8,396 4,188 + 7,308 8,479 2,665 + 8,827 9,323 4,631 + 9,169 7,234 3,397 + 9,724 7,926 3,274 + 7,207 12,121 2,162 + 11,242 12,273 4,983 + 15,136 12,909 9,681 + 26,562 29,000 23,257 + 15,777 18,634 7,321 + 22,006 24,159 7,551 + 10,859 20,158 6,004 + 25,471 24,669 9,698 + 14,953 25,806 11,129 + 15,672 18,169 4,737 + 25,211 20,882 11,488 + 4,420 8,063 1,496 + 4,680 4,053 2,143 + 12,723 14,906 5,804 + 9,330 15,514 2,416 + 5,028 10,193 2,418 + 8,519 6,140 1,764 + 18,690 20,560 22,786 +================ TOTAL ================ + 353,446 396,639 193,671 ``` diff --git a/exp/Makefile b/exp/Makefile index aaf377c..8fb4ce0 100644 --- a/exp/Makefile +++ b/exp/Makefile @@ -1,48 +1,62 @@ REPO_URL := https://club1.fr/~nicolas/git/dna-backup/ +SHELL := /bin/bash + +# Reset LC_NUMERIC to make printf thousands separator work as expected +export LC_NUMERIC := + export DNA_BACKUP ?= ../dna-backup export MAX_VERSION ?= 5 export COMMITS ?= commits -export BACKUP ?= backup +export DNA_4K ?= dna_4k +export DNA_8K ?= dna_8k export DIFFS ?= diffs export REPO_PATH ?= repo GIT_PATH := git GITC := git -C $(REPO_PATH) -DATADIRS := $(BACKUP) $(DIFFS) -SIZEFILES := $(DATADIRS:%=%-size) +DNADIRS := $(DNA_4K) $(DNA_8K) +DATADIRS := $(DNA_4K) $(DNA_8K) $(DIFFS) +SIZEFILES := $(DATADIRS:%=%.size) .PHONY: all exp all exp: summary.csv $(SIZEFILES) - @echo "============== SUMMARY ==============" - @cat $< | tr ',' '\t' - @echo "============== TOTAL ==============" - @for i in $(SIZEFILES); do cat $$i | paste -sd+ | bc; done | tr '\n' '\t' + @echo "=============== SUMMARY ===============" + @head -n 1 $< | while IFS="," read -r $(DATADIRS); do \ + printf "$(DATADIRS:%=\%13s)\n" $(DATADIRS:%=$$%); \ + done + @tail -n +2 $< | while IFS="," read -r $(DATADIRS); do \ + printf "$(DATADIRS:%=\%'13d)\n" $(DATADIRS:%=$$%); \ + done + @echo "================ TOTAL ================" + @for i in $(SIZEFILES); do cat $$i | paste -sd+ | bc; done | while read j; do \ + printf "%'13d " $$j; \ + done @echo summary.csv: $(SIZEFILES) echo $(DATADIRS) | tr ' ' ',' > $@ paste -d "," $^ >> $@ -backup-size: versions +$(DNADIRS:%=%.size): %.size: %.versions rm -rf $@ for i in $> $@; \ done -diffs-size: diffs run - find $< -type f -exec du -ba {} + | cut -f1 > $@ +%.size: % run + find $< -type f -exec du -ba {} + | sort -k2 | cut -f1 > $@ -versions: results +%.versions: %.results rm -rf $@ mkdir -p $@ - for i in backup/*; do \ + for i in $*/*; do \ v=`echo $$i | cut -d / -f2`; \ grep $$i $< > $@/$$v ; \ done -results: backup run +%.results: % run find $< -type f -exec du -ba {} + \ | grep -v hashes \ | sort -k2 \ @@ -53,7 +67,7 @@ run: $(COMMITS) $(DNA_BACKUP) | $(DATADIRS) ./exp.sh touch $@ -backup diffs: +$(DATADIRS): mkdir $@ $(DNA_BACKUP): .FORCE @@ -75,9 +89,9 @@ clean: mostlyclean rm -f $(COMMITS) mostlyclean: resultsclean - rm -rf $(DATADIRS) versions run + rm -rf $(DATADIRS) $(DNADIRS:%=%.versions) run resultsclean: - rm -f results summary.csv $(SIZEFILES) + rm -f summary.csv $(SIZEFILES) .FORCE: ; diff --git a/exp/exp.sh b/exp/exp.sh index f7a603c..8a7af40 100755 --- a/exp/exp.sh +++ b/exp/exp.sh @@ -5,7 +5,8 @@ # - REPO_PATH: the path of the repo the experiment is based on # - MAX_VERSION: the max number for versions for the experiment # - COMMITS: the name of the file that contains the lists of versions -# - BACKUP: the path fo the dna-backup dir +# - DNA_4K: the path fo the dna-backup dir with 4K chunksize +# - DNA_8K: the path fo the dna-backup dir with 8K chunksize # - DIFFS: the path of the git diff dir log() { @@ -22,27 +23,38 @@ last=$(tail --lines=1 $COMMITS | cut -f1) i=0 cat $COMMITS | while read line do + # Get hash hash=$(echo "$line" | cut -f1) + # Check out repo log "check out $hash" $GITC checkout $hash 2> $OUT \ || (log "error checking out"; cat $OUT; exit 1) - log "create diff for this version" + # Create git diff for this version + log "create git diff for this version" + diff=$(printf "%s/%05d.diff.gz" $DIFFS $i) $GITC diff --minimal --binary --unified=0 -l0 $prev \ | gzip \ - > "$DIFFS/$i.diff.gz" + > $diff - log "create backup for this version" - $DNA_BACKUP commit -v 2 $REPO_PATH $BACKUP + # Create 4k dna backup for this version + log "create 4k dna backup for this version" + $DNA_BACKUP commit -v 2 -c 4096 $REPO_PATH $DNA_4K + + # Create 8k dna backup for this version + log "create 8k dna backup for this version" + $DNA_BACKUP commit -v 2 $REPO_PATH $DNA_8K if [[ $(( $i % 4 )) == 0 ]] then - log "restore from diffs" + # Check restore from git diffs + log "restore from git diffs" TEMP=$(mktemp -d) for n in $(seq 0 $i) do - cat "$DIFFS/$n.diff.gz" \ + diff=$(printf "%s/%05d.diff.gz" $DIFFS $n) + cat $diff \ | gzip --decompress \ | git -C $TEMP apply --binary --unidiff-zero --whitespace=nowarn - done @@ -52,9 +64,19 @@ do || log "git patchs restore do not match source" rm -rf $TEMP - log "restore from backup" + # Check restore from 4k dna backup + log "restore from 4k dna backup" + TEMP=$(mktemp -d) + $DNA_BACKUP restore -v 2 -c 4096 $DNA_4K $TEMP + log "check restore from backup" + diff --brief --recursive $REPO_PATH $TEMP \ + || log "dna backup restore do not match source" + rm -rf $TEMP + + # Check restore from 8k dna backup + log "restore from 8k dna backup" TEMP=$(mktemp -d) - $DNA_BACKUP restore -v 2 $BACKUP $TEMP + $DNA_BACKUP restore -v 2 $DNA_8K $TEMP log "check restore from backup" diff --brief --recursive $REPO_PATH $TEMP \ || log "dna backup restore do not match source" -- cgit v1.2.3