diff options
author | n-peugnet <n.peugnet@free.fr> | 2021-10-11 17:34:40 +0200 |
---|---|---|
committer | n-peugnet <n.peugnet@free.fr> | 2021-10-11 17:34:40 +0200 |
commit | c5b862bdd24eaae03b45ccc0bffe6b41cbe92d85 (patch) | |
tree | 7a0ba1611b3965c94380048989eeffc6d3afcbb6 | |
parent | b703ef5246014d5450a707c0f295232f830c5493 (diff) | |
download | dna-backup-c5b862bdd24eaae03b45ccc0bffe6b41cbe92d85.tar.gz dna-backup-c5b862bdd24eaae03b45ccc0bffe6b41cbe92d85.zip |
only count objects dir for nopack
and start redaction of details
-rw-r--r-- | README.md | 196 | ||||
-rw-r--r-- | docs/note-2021-10-11.md | 81 | ||||
-rw-r--r-- | exp/Makefile | 6 |
3 files changed, 242 insertions, 41 deletions
@@ -43,6 +43,7 @@ repo/ └── recipe ``` + Pour un repo d'une taille totale de 401 Mio : ``` @@ -69,7 +70,7 @@ On imagine le _DNA-Drive_ comme un segment de _pools_ : ``` +---+---+---+---+---+---------+---+---+---+ - | 0 | 1 | 2 | 3 | 4 |--> <--| 61| 62| 63| + | 0 | 1 | 2 | 3 | 4 |--> <--| 93| 94| 95| +---+---+---+---+---+---------+---+---+---+ |versions| chunks | | metadata | (recipe+files) @@ -142,49 +143,166 @@ des _chunks_ de ce fichier. ### Évaluation de performances Le dossier `exp` contient les scripts permettant de reproduire les expériences. -Les scripts ne sont prévus pour fonctionner que sur des systèmes Unix. +Les scripts ne sont prévus pour fonctionner que sur Linux. + +On utilise le dépôt git du kernel Linux comme base de donnée de test. Il s'agit +en effet d'une bonne simulation de modification de dossiers, car l'historique +contient toutes les modifications qui ont été apportées petit à petit à +l'ensemble des fichiers. + +#### Bases de comparaison + +Pour évaluer les performances du système dna-backup, trois autres systèmes de +stockage versionnés ont été choisis comme base de comparaison : + +- **git diffs** +- **git objects** +- **taille réelle** + +##### Git diffs + + + +##### Git objects + + +##### Taille réelle + + +<table> +<tr> +<th>Feature\Système</th> +<th>dna-backup</th> +<th>git diffs</th> +<th>git objects</th> +<th>taille réelle</th> +</tr> +<tr> +<th>Déduplication</th> +<td>Niveau chunk</td> +<td>Niveau fichier (lors du renommage)</td> +<td>Niveau fichier</td> +<td>Aucune</td> +</tr> +<tr> +<th>Encodage delta</th> +<td>Niveau chunk</td> +<td>D'une version à l'autre</td> +<td>Aucun</td> +<td>Aucun</td> +</tr> +<tr> +<th>Compression</th> +<td>Niveau chunk (pour le moment indépendamment)</td> +<td>Niveau version</td> +<td>Niveau fichier</td> +<td>Aucune</td> +</tr> +<tr> +<th>Restauration de la dernière version</th> +<td> +Lecture des metadonnées puis des chunks de cette version +(répartis dans différents pools) +</td> +<td>Lecture de la totalité du DNA-Drive</td> +<td> +Lecture récursive des différents objets composant la backup +(répartis dans différents pools) +</td> +<td>Lecture de la zone correspondant à la dernière version</td> +</tr> +</table> + +#### Nombre d'octets par version + +##### Légende + +- `dna_4K` : le système dna-backup avec des blocks de 4 Kio. +- `dna_8K` : le système dna-backup avec des blocks de 8 Kio. +- `diffs` : des diffs git minimales gzippées. +- `nopack` : le dossier `objects` de git, contenant l'ensemble des données + des fichiers et dossiers d'un dépôt. +- `real` : le poid réel de chaque version et donc l'espace nécessaire à + stocker l'ensemble des versions de manière non-dé-dupliquées. + +##### Résultats + +Daily commit: + +``` +=============================== SUMMARY =============================== + dna_4k dna_8k diffs nopack real + 66,403,731 60,453,345 47,304,239 63,594,887 202,628,603 + 21,779,553 24,378,156 9,902,241 43,580,981 214,273,512 + 339,032 230,797 9,905,079 35,489 202,628,471 + 650,649 778,139 252,846 4,717,317 202,453,713 + 174,127 91,398 253,535 3,432 202,628,093 + 561 485 552 45,292 202,628,344 + 2,987,849 2,736,896 9,911,248 72,885 214,276,336 + 4,768 4,696 1,175 26,967 214,278,164 + 347,636 247,112 9,912,728 76,073 202,636,078 + 7,641 10,974 3,307 56,203 202,639,214 + 2,914,343 2,716,445 9,911,148 41,606 214,283,553 + 347,449 257,531 9,910,361 139,913 202,641,133 + 2,923,844 2,746,697 9,912,341 69,243 214,285,297 + 1,330 7,476 214 9,792 214,285,426 + 404,258 242,004 9,914,620 13,702 202,641,398 + 415,937 359,261 266,905 130,934 202,455,083 + 207,470 291,050 142,110 1,976,823 202,272,761 +================================ TOTAL ================================ + 99,910,178 95,552,462 127,504,649 114,591,539 3,513,935,179 +``` -#### Légende +Weekly commit: -`backups` : le système dna-backup -`diffs` : des diffs git minimales gzippées +``` +=============================== SUMMARY =============================== + dna_4k dna_8k diffs nopack real + 70,192,809 63,852,374 49,917,523 67,132,003 214,292,720 + 31,567 28,668 8,822 90,423 214,301,810 + 27,389 31,485 10,920 99,194 214,299,953 + 18,135,507 20,861,135 9,918,650 40,011,293 202,624,903 + 907,939 1,209,389 285,459 4,920,733 202,470,023 + 113,871 152,351 293,731 137,519 202,618,267 + 294,810 367,701 272,263 2,304,224 203,092,308 + 2,112,921 2,540,859 1,148,513 9,636,016 201,476,675 + 252,068 288,241 609,369 857,331 202,282,568 + 782,812 981,296 697,995 2,758,951 201,420,809 + 136,493 161,325 398,494 727,346 202,065,360 + 62,677 80,290 134,441 458,130 202,251,722 + 162,061 196,716 365,229 230,404 202,465,009 + 7,665 9,678 10,625 77,034 202,457,471 + 71,731 80,298 152,999 187,241 202,615,704 + 307,109 222,474 241,092 12,081 203,083,912 + 305,795 228,540 35,494 740,246 203,113,279 +================================ TOTAL ================================ + 93,905,224 91,292,820 64,501,619 130,380,169 3,476,932,493 +``` -### Nombre d'octets par version +Monthly commits: ``` -============== SUMMARY ============== -backup diffs -19552 1451 -3365 638 -3557 4737 -3779 7321 -6208 2665 -6826 1496 -3655 11129 -9953 6004 -7825 2815 -3009 7551 -9052 5804 -9415 2449 -10386 3397 -7536 2143 -8521 13301 -12488 3274 -13314 2162 -13476 9681 -30227 4631 -18880 10147 -27057 4983 -23634 11488 -26857 867 -30352 1768 -20086 4373 -23713 9698 -12258 869 -5255 4188 -16356 23257 -============== TOTAL ============== -386592 164287 +=============================== SUMMARY =============================== + dna_4k dna_8k diffs nopack real + 66,344,139 60,414,327 47,255,410 63,531,013 202,455,244 + 268,382 293,432 71,579 2,114,221 202,438,437 + 288,294 288,397 137,081 2,625,834 202,477,165 + 2,617,048 2,989,196 1,106,365 11,273,622 203,355,330 + 4,219,402 5,065,795 1,485,211 14,062,635 206,087,365 + 6,925,148 8,177,404 3,102,478 20,489,609 209,450,906 + 1,931,351 2,314,294 771,998 6,811,409 209,646,120 + 9,775,191 11,577,926 3,335,990 26,532,154 213,287,798 + 7,783,071 9,101,660 2,505,353 20,687,252 216,420,188 + 9,445,609 10,977,253 3,479,709 25,758,937 217,852,953 + 701,911 905,423 164,682 4,517,360 217,851,223 + 14,385,992 16,467,969 4,380,280 32,949,448 222,875,080 + 3,389,340 4,347,527 817,894 14,054,849 223,352,903 + 13,307,722 15,446,179 4,060,874 32,889,854 225,760,003 + 3,219,293 3,895,349 1,301,487 10,953,334 225,577,911 + 1,876,709 2,451,988 390,110 9,171,030 225,848,365 + 12,995,018 15,561,939 4,204,779 32,837,755 227,575,213 +================================ TOTAL ================================ + 159,473,620 170,276,058 78,571,280 331,260,316 3,652,312,204 ``` <!-- LTeX: language=en --> diff --git a/docs/note-2021-10-11.md b/docs/note-2021-10-11.md new file mode 100644 index 0000000..e313c33 --- /dev/null +++ b/docs/note-2021-10-11.md @@ -0,0 +1,81 @@ +Real size & correct nopack measurement +====================================== + + +Daily commit: + +``` +=============================== SUMMARY =============================== + dna_4k dna_8k diffs nopack real + 66,403,731 60,453,345 47,304,239 63,594,887 202,628,603 + 21,779,553 24,378,156 9,902,241 43,580,981 214,273,512 + 339,032 230,797 9,905,079 35,489 202,628,471 + 650,649 778,139 252,846 4,717,317 202,453,713 + 174,127 91,398 253,535 3,432 202,628,093 + 561 485 552 45,292 202,628,344 + 2,987,849 2,736,896 9,911,248 72,885 214,276,336 + 4,768 4,696 1,175 26,967 214,278,164 + 347,636 247,112 9,912,728 76,073 202,636,078 + 7,641 10,974 3,307 56,203 202,639,214 + 2,914,343 2,716,445 9,911,148 41,606 214,283,553 + 347,449 257,531 9,910,361 139,913 202,641,133 + 2,923,844 2,746,697 9,912,341 69,243 214,285,297 + 1,330 7,476 214 9,792 214,285,426 + 404,258 242,004 9,914,620 13,702 202,641,398 + 415,937 359,261 266,905 130,934 202,455,083 + 207,470 291,050 142,110 1,976,823 202,272,761 +================================ TOTAL ================================ + 99,910,178 95,552,462 127,504,649 114,591,539 3,513,935,179 +``` + +Weekly commit: + +``` +=============================== SUMMARY =============================== + dna_4k dna_8k diffs nopack real + 70,192,809 63,852,374 49,917,523 67,132,003 214,292,720 + 31,567 28,668 8,822 90,423 214,301,810 + 27,389 31,485 10,920 99,194 214,299,953 + 18,135,507 20,861,135 9,918,650 40,011,293 202,624,903 + 907,939 1,209,389 285,459 4,920,733 202,470,023 + 113,871 152,351 293,731 137,519 202,618,267 + 294,810 367,701 272,263 2,304,224 203,092,308 + 2,112,921 2,540,859 1,148,513 9,636,016 201,476,675 + 252,068 288,241 609,369 857,331 202,282,568 + 782,812 981,296 697,995 2,758,951 201,420,809 + 136,493 161,325 398,494 727,346 202,065,360 + 62,677 80,290 134,441 458,130 202,251,722 + 162,061 196,716 365,229 230,404 202,465,009 + 7,665 9,678 10,625 77,034 202,457,471 + 71,731 80,298 152,999 187,241 202,615,704 + 307,109 222,474 241,092 12,081 203,083,912 + 305,795 228,540 35,494 740,246 203,113,279 +================================ TOTAL ================================ + 93,905,224 91,292,820 64,501,619 130,380,169 3,476,932,493 +``` + +Monthly commits: + +``` +=============================== SUMMARY =============================== + dna_4k dna_8k diffs nopack real + 66,344,139 60,414,327 47,255,410 63,531,013 202,455,244 + 268,382 293,432 71,579 2,114,221 202,438,437 + 288,294 288,397 137,081 2,625,834 202,477,165 + 2,617,048 2,989,196 1,106,365 11,273,622 203,355,330 + 4,219,402 5,065,795 1,485,211 14,062,635 206,087,365 + 6,925,148 8,177,404 3,102,478 20,489,609 209,450,906 + 1,931,351 2,314,294 771,998 6,811,409 209,646,120 + 9,775,191 11,577,926 3,335,990 26,532,154 213,287,798 + 7,783,071 9,101,660 2,505,353 20,687,252 216,420,188 + 9,445,609 10,977,253 3,479,709 25,758,937 217,852,953 + 701,911 905,423 164,682 4,517,360 217,851,223 + 14,385,992 16,467,969 4,380,280 32,949,448 222,875,080 + 3,389,340 4,347,527 817,894 14,054,849 223,352,903 + 13,307,722 15,446,179 4,060,874 32,889,854 225,760,003 + 3,219,293 3,895,349 1,301,487 10,953,334 225,577,911 + 1,876,709 2,451,988 390,110 9,171,030 225,848,365 + 12,995,018 15,561,939 4,204,779 32,837,755 227,575,213 +================================ TOTAL ================================ + 159,473,620 170,276,058 78,571,280 331,260,316 3,652,312,204 +``` diff --git a/exp/Makefile b/exp/Makefile index b69b0bf..555ae67 100644 --- a/exp/Makefile +++ b/exp/Makefile @@ -53,10 +53,12 @@ $(DNADIRS:%=%.size) $(REAL).size: %.size: %.versions run cut -f1 $$i | paste -sd+ | bc >> $@; \ done -$(NOPACK).size: $(NOPACK).versions +$(NOPACK).size: $(NOPACK).versions run + rm -rf $@ prev=/dev/null; for i in $</*; do \ diff -u0 $$prev $$i \ | sed -e '1,2d' -e '/^-/d' -e 's/^+//' -e '/^@@/d' \ + | grep /objects/ \ | cut -f1 \ | paste -sd+ \ | bc \ @@ -82,7 +84,7 @@ $(NOPACK).size: $(NOPACK).versions > $@ run: $(COMMITS) $(DNA_BACKUP) $(DNA_PARAMS) | $(DATADIRS) $(NOPACK).versions $(REAL).versions - rm -rf $(DATADIRS:%=%/*) + rm -rf $(DATADIRS:%=%/*) $(DATADIRS:%=%.versions/*) ./exp.sh touch $@ |