t0410: make test description clearer
[git/gitster.git] / builtin / pack-redundant.c
blobd2c1c4e5ec14711a54b8483106e9ed05fbe960bf
1 /*
3 * Copyright 2005, Lukas Sandstrom <lukass@etek.chalmers.se>
5 * This file is licensed under the GPL v2.
7 */
8 #define USE_THE_REPOSITORY_VARIABLE
10 #include "builtin.h"
11 #include "gettext.h"
12 #include "hex.h"
14 #include "packfile.h"
15 #include "object-store-ll.h"
16 #include "strbuf.h"
18 #define BLKSIZE 512
20 static const char pack_redundant_usage[] =
21 "git pack-redundant [--verbose] [--alt-odb] (--all | <pack-filename>...)";
23 static int load_all_packs, verbose, alt_odb;
25 struct llist_item {
26 struct llist_item *next;
27 struct object_id oid;
29 static struct llist {
30 struct llist_item *front;
31 struct llist_item *back;
32 size_t size;
33 } *all_objects; /* all objects which must be present in local packfiles */
35 static struct pack_list {
36 struct pack_list *next;
37 struct packed_git *pack;
38 struct llist *unique_objects;
39 struct llist *remaining_objects;
40 size_t all_objects_size;
41 } *local_packs = NULL, *altodb_packs = NULL;
43 static struct llist_item *free_nodes;
45 static inline void llist_item_put(struct llist_item *item)
47 item->next = free_nodes;
48 free_nodes = item;
51 static inline struct llist_item *llist_item_get(void)
53 struct llist_item *new_item;
54 if ( free_nodes ) {
55 new_item = free_nodes;
56 free_nodes = free_nodes->next;
57 } else {
58 int i = 1;
59 ALLOC_ARRAY(new_item, BLKSIZE);
60 for (; i < BLKSIZE; i++)
61 llist_item_put(&new_item[i]);
63 return new_item;
66 static inline void llist_init(struct llist **list)
68 *list = xmalloc(sizeof(struct llist));
69 (*list)->front = (*list)->back = NULL;
70 (*list)->size = 0;
73 static void llist_free(struct llist *list)
75 for (struct llist_item *i = list->front, *next; i; i = next) {
76 next = i->next;
77 llist_item_put(i);
79 free(list);
82 static struct llist * llist_copy(struct llist *list)
84 struct llist *ret;
85 struct llist_item *new_item, *old_item, *prev;
87 llist_init(&ret);
89 if ((ret->size = list->size) == 0)
90 return ret;
92 new_item = ret->front = llist_item_get();
93 new_item->oid = list->front->oid;
95 old_item = list->front->next;
96 while (old_item) {
97 prev = new_item;
98 new_item = llist_item_get();
99 prev->next = new_item;
100 new_item->oid = old_item->oid;
101 old_item = old_item->next;
103 new_item->next = NULL;
104 ret->back = new_item;
106 return ret;
109 static inline struct llist_item *llist_insert(struct llist *list,
110 struct llist_item *after,
111 const unsigned char *oid)
113 struct llist_item *new_item = llist_item_get();
114 oidread(&new_item->oid, oid, the_repository->hash_algo);
115 new_item->next = NULL;
117 if (after) {
118 new_item->next = after->next;
119 after->next = new_item;
120 if (after == list->back)
121 list->back = new_item;
122 } else {/* insert in front */
123 if (list->size == 0)
124 list->back = new_item;
125 else
126 new_item->next = list->front;
127 list->front = new_item;
129 list->size++;
130 return new_item;
133 static inline struct llist_item *llist_insert_back(struct llist *list,
134 const unsigned char *oid)
136 return llist_insert(list, list->back, oid);
139 static inline struct llist_item *llist_insert_sorted_unique(struct llist *list,
140 const struct object_id *oid, struct llist_item *hint)
142 struct llist_item *prev = NULL, *l;
144 l = (hint == NULL) ? list->front : hint;
145 while (l) {
146 int cmp = oidcmp(&l->oid, oid);
147 if (cmp > 0) { /* we insert before this entry */
148 return llist_insert(list, prev, oid->hash);
150 if (!cmp) { /* already exists */
151 return l;
153 prev = l;
154 l = l->next;
156 /* insert at the end */
157 return llist_insert_back(list, oid->hash);
160 /* returns a pointer to an item in front of sha1 */
161 static inline struct llist_item * llist_sorted_remove(struct llist *list, const unsigned char *oid, struct llist_item *hint)
163 struct llist_item *prev, *l;
165 redo_from_start:
166 l = (hint == NULL) ? list->front : hint;
167 prev = NULL;
168 while (l) {
169 const int cmp = hashcmp(l->oid.hash, oid, the_repository->hash_algo);
170 if (cmp > 0) /* not in list, since sorted */
171 return prev;
172 if (!cmp) { /* found */
173 if (!prev) {
174 if (hint != NULL && hint != list->front) {
175 /* we don't know the previous element */
176 hint = NULL;
177 goto redo_from_start;
179 list->front = l->next;
180 } else
181 prev->next = l->next;
182 if (l == list->back)
183 list->back = prev;
184 llist_item_put(l);
185 list->size--;
186 return prev;
188 prev = l;
189 l = l->next;
191 return prev;
194 /* computes A\B */
195 static void llist_sorted_difference_inplace(struct llist *A,
196 struct llist *B)
198 struct llist_item *hint, *b;
200 hint = NULL;
201 b = B->front;
203 while (b) {
204 hint = llist_sorted_remove(A, b->oid.hash, hint);
205 b = b->next;
209 static inline struct pack_list * pack_list_insert(struct pack_list **pl,
210 struct pack_list *entry)
212 struct pack_list *p = xmalloc(sizeof(struct pack_list));
213 memcpy(p, entry, sizeof(struct pack_list));
214 p->next = *pl;
215 *pl = p;
216 return p;
219 static void pack_list_free(struct pack_list *pl)
221 for (struct pack_list *next; pl; pl = next) {
222 next = pl->next;
223 free(pl);
227 static inline size_t pack_list_size(struct pack_list *pl)
229 size_t ret = 0;
230 while (pl) {
231 ret++;
232 pl = pl->next;
234 return ret;
237 static struct pack_list * pack_list_difference(const struct pack_list *A,
238 const struct pack_list *B)
240 struct pack_list *ret;
241 const struct pack_list *pl;
243 if (!A)
244 return NULL;
246 pl = B;
247 while (pl != NULL) {
248 if (A->pack == pl->pack)
249 return pack_list_difference(A->next, B);
250 pl = pl->next;
252 ret = xmalloc(sizeof(struct pack_list));
253 memcpy(ret, A, sizeof(struct pack_list));
254 ret->next = pack_list_difference(A->next, B);
255 return ret;
258 static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
260 size_t p1_off = 0, p2_off = 0, p1_step, p2_step;
261 const unsigned char *p1_base, *p2_base;
262 struct llist_item *p1_hint = NULL, *p2_hint = NULL;
263 const unsigned int hashsz = the_hash_algo->rawsz;
265 if (!p1->unique_objects)
266 p1->unique_objects = llist_copy(p1->remaining_objects);
267 if (!p2->unique_objects)
268 p2->unique_objects = llist_copy(p2->remaining_objects);
270 p1_base = p1->pack->index_data;
271 p2_base = p2->pack->index_data;
272 p1_base += 256 * 4 + ((p1->pack->index_version < 2) ? 4 : 8);
273 p2_base += 256 * 4 + ((p2->pack->index_version < 2) ? 4 : 8);
274 p1_step = hashsz + ((p1->pack->index_version < 2) ? 4 : 0);
275 p2_step = hashsz + ((p2->pack->index_version < 2) ? 4 : 0);
277 while (p1_off < p1->pack->num_objects * p1_step &&
278 p2_off < p2->pack->num_objects * p2_step)
280 const int cmp = hashcmp(p1_base + p1_off, p2_base + p2_off,
281 the_repository->hash_algo);
282 /* cmp ~ p1 - p2 */
283 if (cmp == 0) {
284 p1_hint = llist_sorted_remove(p1->unique_objects,
285 p1_base + p1_off,
286 p1_hint);
287 p2_hint = llist_sorted_remove(p2->unique_objects,
288 p1_base + p1_off,
289 p2_hint);
290 p1_off += p1_step;
291 p2_off += p2_step;
292 continue;
294 if (cmp < 0) { /* p1 has the object, p2 doesn't */
295 p1_off += p1_step;
296 } else { /* p2 has the object, p1 doesn't */
297 p2_off += p2_step;
302 static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
304 size_t ret = 0;
305 size_t p1_off = 0, p2_off = 0, p1_step, p2_step;
306 const unsigned char *p1_base, *p2_base;
307 const unsigned int hashsz = the_hash_algo->rawsz;
309 p1_base = p1->index_data;
310 p2_base = p2->index_data;
311 p1_base += 256 * 4 + ((p1->index_version < 2) ? 4 : 8);
312 p2_base += 256 * 4 + ((p2->index_version < 2) ? 4 : 8);
313 p1_step = hashsz + ((p1->index_version < 2) ? 4 : 0);
314 p2_step = hashsz + ((p2->index_version < 2) ? 4 : 0);
316 while (p1_off < p1->num_objects * p1_step &&
317 p2_off < p2->num_objects * p2_step)
319 int cmp = hashcmp(p1_base + p1_off, p2_base + p2_off,
320 the_repository->hash_algo);
321 /* cmp ~ p1 - p2 */
322 if (cmp == 0) {
323 ret++;
324 p1_off += p1_step;
325 p2_off += p2_step;
326 continue;
328 if (cmp < 0) { /* p1 has the object, p2 doesn't */
329 p1_off += p1_step;
330 } else { /* p2 has the object, p1 doesn't */
331 p2_off += p2_step;
334 return ret;
337 /* another O(n^2) function ... */
338 static size_t get_pack_redundancy(struct pack_list *pl)
340 struct pack_list *subset;
341 size_t ret = 0;
343 if (!pl)
344 return 0;
346 while ((subset = pl->next)) {
347 while (subset) {
348 ret += sizeof_union(pl->pack, subset->pack);
349 subset = subset->next;
351 pl = pl->next;
353 return ret;
356 static inline off_t pack_set_bytecount(struct pack_list *pl)
358 off_t ret = 0;
359 while (pl) {
360 ret += pl->pack->pack_size;
361 ret += pl->pack->index_size;
362 pl = pl->next;
364 return ret;
367 static int cmp_remaining_objects(const void *a, const void *b)
369 struct pack_list *pl_a = *((struct pack_list **)a);
370 struct pack_list *pl_b = *((struct pack_list **)b);
372 if (pl_a->remaining_objects->size == pl_b->remaining_objects->size) {
373 /* have the same remaining_objects, big pack first */
374 if (pl_a->all_objects_size == pl_b->all_objects_size)
375 return 0;
376 else if (pl_a->all_objects_size < pl_b->all_objects_size)
377 return 1;
378 else
379 return -1;
380 } else if (pl_a->remaining_objects->size < pl_b->remaining_objects->size) {
381 /* sort by remaining objects, more objects first */
382 return 1;
383 } else {
384 return -1;
388 /* Sort pack_list, greater size of remaining_objects first */
389 static void sort_pack_list(struct pack_list **pl)
391 struct pack_list **ary, *p;
392 int i;
393 size_t n = pack_list_size(*pl);
395 if (n < 2)
396 return;
398 /* prepare an array of packed_list for easier sorting */
399 CALLOC_ARRAY(ary, n);
400 for (n = 0, p = *pl; p; p = p->next)
401 ary[n++] = p;
403 QSORT(ary, n, cmp_remaining_objects);
405 /* link them back again */
406 for (i = 0; i < n - 1; i++)
407 ary[i]->next = ary[i + 1];
408 ary[n - 1]->next = NULL;
409 *pl = ary[0];
411 free(ary);
415 static void minimize(struct pack_list **min)
417 struct pack_list *pl, *unique = NULL, *non_unique = NULL;
418 struct llist *missing, *unique_pack_objects;
420 pl = local_packs;
421 while (pl) {
422 if (pl->unique_objects->size)
423 pack_list_insert(&unique, pl);
424 else
425 pack_list_insert(&non_unique, pl);
426 pl = pl->next;
428 /* find out which objects are missing from the set of unique packs */
429 missing = llist_copy(all_objects);
430 pl = unique;
431 while (pl) {
432 llist_sorted_difference_inplace(missing, pl->remaining_objects);
433 pl = pl->next;
436 *min = unique;
438 /* return if there are no objects missing from the unique set */
439 if (missing->size == 0) {
440 llist_free(missing);
441 pack_list_free(non_unique);
442 return;
445 unique_pack_objects = llist_copy(all_objects);
446 llist_sorted_difference_inplace(unique_pack_objects, missing);
448 /* remove unique pack objects from the non_unique packs */
449 pl = non_unique;
450 while (pl) {
451 llist_sorted_difference_inplace(pl->remaining_objects, unique_pack_objects);
452 pl = pl->next;
455 while (non_unique) {
456 struct pack_list *next;
458 /* sort the non_unique packs, greater size of remaining_objects first */
459 sort_pack_list(&non_unique);
460 if (non_unique->remaining_objects->size == 0)
461 break;
463 pack_list_insert(min, non_unique);
465 for (pl = non_unique->next; pl && pl->remaining_objects->size > 0; pl = pl->next)
466 llist_sorted_difference_inplace(pl->remaining_objects, non_unique->remaining_objects);
468 next = non_unique->next;
469 free(non_unique);
470 non_unique = next;
473 pack_list_free(non_unique);
474 llist_free(unique_pack_objects);
475 llist_free(missing);
478 static void load_all_objects(void)
480 struct pack_list *pl = local_packs;
481 struct llist_item *hint, *l;
483 llist_init(&all_objects);
485 while (pl) {
486 hint = NULL;
487 l = pl->remaining_objects->front;
488 while (l) {
489 hint = llist_insert_sorted_unique(all_objects,
490 &l->oid, hint);
491 l = l->next;
493 pl = pl->next;
495 /* remove objects present in remote packs */
496 pl = altodb_packs;
497 while (pl) {
498 llist_sorted_difference_inplace(all_objects, pl->remaining_objects);
499 pl = pl->next;
503 /* this scales like O(n^2) */
504 static void cmp_local_packs(void)
506 struct pack_list *subset, *pl = local_packs;
508 /* only one packfile */
509 if (!pl->next) {
510 llist_init(&pl->unique_objects);
511 return;
514 while ((subset = pl)) {
515 while ((subset = subset->next))
516 cmp_two_packs(pl, subset);
517 pl = pl->next;
521 static void scan_alt_odb_packs(void)
523 struct pack_list *local, *alt;
525 alt = altodb_packs;
526 while (alt) {
527 local = local_packs;
528 while (local) {
529 llist_sorted_difference_inplace(local->remaining_objects,
530 alt->remaining_objects);
531 local = local->next;
533 alt = alt->next;
537 static struct pack_list * add_pack(struct packed_git *p)
539 struct pack_list l;
540 size_t off = 0, step;
541 const unsigned char *base;
543 if (!p->pack_local && !(alt_odb || verbose))
544 return NULL;
546 l.pack = p;
547 llist_init(&l.remaining_objects);
549 if (open_pack_index(p))
550 return NULL;
552 base = p->index_data;
553 base += 256 * 4 + ((p->index_version < 2) ? 4 : 8);
554 step = the_hash_algo->rawsz + ((p->index_version < 2) ? 4 : 0);
555 while (off < p->num_objects * step) {
556 llist_insert_back(l.remaining_objects, base + off);
557 off += step;
559 l.all_objects_size = l.remaining_objects->size;
560 l.unique_objects = NULL;
561 if (p->pack_local)
562 return pack_list_insert(&local_packs, &l);
563 else
564 return pack_list_insert(&altodb_packs, &l);
567 static struct pack_list * add_pack_file(const char *filename)
569 struct packed_git *p = get_all_packs(the_repository);
571 if (strlen(filename) < 40)
572 die("Bad pack filename: %s", filename);
574 while (p) {
575 if (strstr(p->pack_name, filename))
576 return add_pack(p);
577 p = p->next;
579 die("Filename %s not found in packed_git", filename);
582 static void load_all(void)
584 struct packed_git *p = get_all_packs(the_repository);
586 while (p) {
587 add_pack(p);
588 p = p->next;
592 int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, struct repository *repo UNUSED) {
593 int i; int i_still_use_this = 0; struct pack_list *min = NULL, *red, *pl;
594 struct llist *ignore;
595 struct strbuf idx_name = STRBUF_INIT;
596 char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
598 if (argc == 2 && !strcmp(argv[1], "-h"))
599 usage(pack_redundant_usage);
601 for (i = 1; i < argc; i++) {
602 const char *arg = argv[i];
603 if (!strcmp(arg, "--")) {
604 i++;
605 break;
607 if (!strcmp(arg, "--all")) {
608 load_all_packs = 1;
609 continue;
611 if (!strcmp(arg, "--verbose")) {
612 verbose = 1;
613 continue;
615 if (!strcmp(arg, "--alt-odb")) {
616 alt_odb = 1;
617 continue;
619 if (!strcmp(arg, "--i-still-use-this")) {
620 i_still_use_this = 1;
621 continue;
623 if (*arg == '-')
624 usage(pack_redundant_usage);
625 else
626 break;
629 if (!i_still_use_this) {
630 fputs(_("'git pack-redundant' is nominated for removal.\n"
631 "If you still use this command, please add an extra\n"
632 "option, '--i-still-use-this', on the command line\n"
633 "and let us know you still use it by sending an e-mail\n"
634 "to <git@vger.kernel.org>. Thanks.\n"), stderr);
635 die(_("refusing to run without --i-still-use-this"));
638 if (load_all_packs)
639 load_all();
640 else
641 while (*(argv + i) != NULL)
642 add_pack_file(*(argv + i++));
644 if (!local_packs)
645 die("Zero packs found!");
647 load_all_objects();
649 if (alt_odb)
650 scan_alt_odb_packs();
652 /* ignore objects given on stdin */
653 llist_init(&ignore);
654 if (!isatty(0)) {
655 struct object_id oid;
656 while (fgets(buf, sizeof(buf), stdin)) {
657 if (get_oid_hex(buf, &oid))
658 die("Bad object ID on stdin: %s", buf);
659 llist_insert_sorted_unique(ignore, &oid, NULL);
662 llist_sorted_difference_inplace(all_objects, ignore);
663 pl = local_packs;
664 while (pl) {
665 llist_sorted_difference_inplace(pl->remaining_objects, ignore);
666 pl = pl->next;
669 cmp_local_packs();
671 minimize(&min);
673 if (verbose) {
674 fprintf(stderr, "There are %lu packs available in alt-odbs.\n",
675 (unsigned long)pack_list_size(altodb_packs));
676 fprintf(stderr, "The smallest (bytewise) set of packs is:\n");
677 pl = min;
678 while (pl) {
679 fprintf(stderr, "\t%s\n", pl->pack->pack_name);
680 pl = pl->next;
682 fprintf(stderr, "containing %lu duplicate objects "
683 "with a total size of %lukb.\n",
684 (unsigned long)get_pack_redundancy(min),
685 (unsigned long)pack_set_bytecount(min)/1024);
686 fprintf(stderr, "A total of %lu unique objects were considered.\n",
687 (unsigned long)all_objects->size);
688 fprintf(stderr, "Redundant packs (with indexes):\n");
690 pl = red = pack_list_difference(local_packs, min);
691 while (pl) {
692 printf("%s\n%s\n",
693 odb_pack_name(&idx_name, pl->pack->hash, "idx"),
694 pl->pack->pack_name);
695 pl = pl->next;
697 if (verbose)
698 fprintf(stderr, "%luMB of redundant packs in total.\n",
699 (unsigned long)pack_set_bytecount(red)/(1024*1024));
701 pack_list_free(red);
702 pack_list_free(min);
703 llist_free(ignore);
704 strbuf_release(&idx_name);
705 return 0;