2005-11-09 02:22:40 +01:00
|
|
|
/*
|
|
|
|
*
|
|
|
|
* Copyright 2005, Lukas Sandstrom <lukass@etek.chalmers.se>
|
|
|
|
*
|
|
|
|
* This file is licensed under the GPL v2.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
Fix sparse warnings
Fix warnings from 'make check'.
- These files don't include 'builtin.h' causing sparse to complain that
cmd_* isn't declared:
builtin/clone.c:364, builtin/fetch-pack.c:797,
builtin/fmt-merge-msg.c:34, builtin/hash-object.c:78,
builtin/merge-index.c:69, builtin/merge-recursive.c:22
builtin/merge-tree.c:341, builtin/mktag.c:156, builtin/notes.c:426
builtin/notes.c:822, builtin/pack-redundant.c:596,
builtin/pack-refs.c:10, builtin/patch-id.c:60, builtin/patch-id.c:149,
builtin/remote.c:1512, builtin/remote-ext.c:240,
builtin/remote-fd.c:53, builtin/reset.c:236, builtin/send-pack.c:384,
builtin/unpack-file.c:25, builtin/var.c:75
- These files have symbols which should be marked static since they're
only file scope:
submodule.c:12, diff.c:631, replace_object.c:92, submodule.c:13,
submodule.c:14, trace.c:78, transport.c:195, transport-helper.c:79,
unpack-trees.c:19, url.c:3, url.c:18, url.c:104, url.c:117, url.c:123,
url.c:129, url.c:136, thread-utils.c:21, thread-utils.c:48
- These files redeclare symbols to be different types:
builtin/index-pack.c:210, parse-options.c:564, parse-options.c:571,
usage.c:49, usage.c:58, usage.c:63, usage.c:72
- These files use a literal integer 0 when they really should use a NULL
pointer:
daemon.c:663, fast-import.c:2942, imap-send.c:1072, notes-merge.c:362
While we're in the area, clean up some unused #includes in builtin files
(mostly exec_cmd.h).
Signed-off-by: Stephen Boyd <bebarino@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-22 08:51:05 +01:00
|
|
|
#include "builtin.h"
|
2023-03-21 07:25:54 +01:00
|
|
|
#include "gettext.h"
|
2023-02-24 01:09:27 +01:00
|
|
|
#include "hex.h"
|
2018-03-23 18:20:59 +01:00
|
|
|
#include "repository.h"
|
2017-08-19 00:20:16 +02:00
|
|
|
#include "packfile.h"
|
2018-03-23 18:20:59 +01:00
|
|
|
#include "object-store.h"
|
2005-11-09 02:22:40 +01:00
|
|
|
|
2005-12-20 21:05:54 +01:00
|
|
|
#define BLKSIZE 512
|
|
|
|
|
2005-11-10 00:16:13 +01:00
|
|
|
static const char pack_redundant_usage[] =
|
2022-10-13 17:39:10 +02:00
|
|
|
"git pack-redundant [--verbose] [--alt-odb] (--all | <pack-filename>...)";
|
2005-11-09 02:22:40 +01:00
|
|
|
|
2006-08-15 19:23:48 +02:00
|
|
|
static int load_all_packs, verbose, alt_odb;
|
2005-11-09 02:22:40 +01:00
|
|
|
|
|
|
|
struct llist_item {
|
|
|
|
struct llist_item *next;
|
2021-04-26 03:02:54 +02:00
|
|
|
struct object_id oid;
|
2005-11-09 02:22:40 +01:00
|
|
|
};
|
2005-11-21 01:52:52 +01:00
|
|
|
static struct llist {
|
2005-11-09 02:22:40 +01:00
|
|
|
struct llist_item *front;
|
|
|
|
struct llist_item *back;
|
|
|
|
size_t size;
|
2005-11-11 01:25:04 +01:00
|
|
|
} *all_objects; /* all objects which must be present in local packfiles */
|
2005-11-09 02:22:40 +01:00
|
|
|
|
2005-11-21 01:52:52 +01:00
|
|
|
static struct pack_list {
|
2005-11-09 02:22:40 +01:00
|
|
|
struct pack_list *next;
|
|
|
|
struct packed_git *pack;
|
|
|
|
struct llist *unique_objects;
|
2019-02-02 14:30:16 +01:00
|
|
|
struct llist *remaining_objects;
|
2019-02-02 14:30:17 +01:00
|
|
|
size_t all_objects_size;
|
2005-11-11 01:25:04 +01:00
|
|
|
} *local_packs = NULL, *altodb_packs = NULL;
|
2005-11-09 02:22:40 +01:00
|
|
|
|
2006-08-15 19:23:48 +02:00
|
|
|
static struct llist_item *free_nodes;
|
2005-11-22 15:56:35 +01:00
|
|
|
|
2005-12-20 21:05:54 +01:00
|
|
|
static inline void llist_item_put(struct llist_item *item)
|
|
|
|
{
|
|
|
|
item->next = free_nodes;
|
|
|
|
free_nodes = item;
|
|
|
|
}
|
|
|
|
|
2006-02-26 16:13:46 +01:00
|
|
|
static inline struct llist_item *llist_item_get(void)
|
2005-11-22 15:56:35 +01:00
|
|
|
{
|
2018-02-14 19:59:33 +01:00
|
|
|
struct llist_item *new_item;
|
2005-11-22 15:56:35 +01:00
|
|
|
if ( free_nodes ) {
|
2018-02-14 19:59:33 +01:00
|
|
|
new_item = free_nodes;
|
2005-11-22 15:56:35 +01:00
|
|
|
free_nodes = free_nodes->next;
|
2005-12-20 21:05:54 +01:00
|
|
|
} else {
|
|
|
|
int i = 1;
|
2018-02-14 19:59:33 +01:00
|
|
|
ALLOC_ARRAY(new_item, BLKSIZE);
|
2009-09-01 07:35:10 +02:00
|
|
|
for (; i < BLKSIZE; i++)
|
2018-02-14 19:59:33 +01:00
|
|
|
llist_item_put(&new_item[i]);
|
2005-12-20 21:05:54 +01:00
|
|
|
}
|
2018-02-14 19:59:33 +01:00
|
|
|
return new_item;
|
2005-11-22 15:56:35 +01:00
|
|
|
}
|
|
|
|
|
2005-11-21 01:52:52 +01:00
|
|
|
static inline void llist_init(struct llist **list)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
|
|
|
*list = xmalloc(sizeof(struct llist));
|
|
|
|
(*list)->front = (*list)->back = NULL;
|
|
|
|
(*list)->size = 0;
|
|
|
|
}
|
|
|
|
|
2005-11-21 01:52:52 +01:00
|
|
|
static struct llist * llist_copy(struct llist *list)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
|
|
|
struct llist *ret;
|
2018-02-14 19:59:33 +01:00
|
|
|
struct llist_item *new_item, *old_item, *prev;
|
2007-06-07 09:04:01 +02:00
|
|
|
|
2005-11-09 02:22:40 +01:00
|
|
|
llist_init(&ret);
|
|
|
|
|
|
|
|
if ((ret->size = list->size) == 0)
|
|
|
|
return ret;
|
|
|
|
|
2018-02-14 19:59:33 +01:00
|
|
|
new_item = ret->front = llist_item_get();
|
2018-05-02 02:25:45 +02:00
|
|
|
new_item->oid = list->front->oid;
|
2005-11-09 02:22:40 +01:00
|
|
|
|
2018-02-14 19:59:33 +01:00
|
|
|
old_item = list->front->next;
|
|
|
|
while (old_item) {
|
|
|
|
prev = new_item;
|
|
|
|
new_item = llist_item_get();
|
|
|
|
prev->next = new_item;
|
2018-05-02 02:25:45 +02:00
|
|
|
new_item->oid = old_item->oid;
|
2018-02-14 19:59:33 +01:00
|
|
|
old_item = old_item->next;
|
2005-11-09 02:22:40 +01:00
|
|
|
}
|
2018-02-14 19:59:33 +01:00
|
|
|
new_item->next = NULL;
|
|
|
|
ret->back = new_item;
|
2007-06-07 09:04:01 +02:00
|
|
|
|
2005-11-09 02:22:40 +01:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2007-03-16 21:42:50 +01:00
|
|
|
static inline struct llist_item *llist_insert(struct llist *list,
|
|
|
|
struct llist_item *after,
|
2021-04-26 03:02:54 +02:00
|
|
|
const unsigned char *oid)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
2018-02-14 19:59:33 +01:00
|
|
|
struct llist_item *new_item = llist_item_get();
|
2021-04-26 03:02:54 +02:00
|
|
|
oidread(&new_item->oid, oid);
|
2018-02-14 19:59:33 +01:00
|
|
|
new_item->next = NULL;
|
2005-11-09 02:22:40 +01:00
|
|
|
|
2022-05-02 18:50:37 +02:00
|
|
|
if (after) {
|
2018-02-14 19:59:33 +01:00
|
|
|
new_item->next = after->next;
|
|
|
|
after->next = new_item;
|
2005-11-09 02:22:40 +01:00
|
|
|
if (after == list->back)
|
2018-02-14 19:59:33 +01:00
|
|
|
list->back = new_item;
|
2005-11-09 02:22:40 +01:00
|
|
|
} else {/* insert in front */
|
|
|
|
if (list->size == 0)
|
2018-02-14 19:59:33 +01:00
|
|
|
list->back = new_item;
|
2005-11-09 02:22:40 +01:00
|
|
|
else
|
2018-02-14 19:59:33 +01:00
|
|
|
new_item->next = list->front;
|
|
|
|
list->front = new_item;
|
2005-11-09 02:22:40 +01:00
|
|
|
}
|
|
|
|
list->size++;
|
2018-02-14 19:59:33 +01:00
|
|
|
return new_item;
|
2005-11-09 02:22:40 +01:00
|
|
|
}
|
|
|
|
|
2007-03-16 21:42:50 +01:00
|
|
|
static inline struct llist_item *llist_insert_back(struct llist *list,
|
2021-04-26 03:02:54 +02:00
|
|
|
const unsigned char *oid)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
2018-05-02 02:25:45 +02:00
|
|
|
return llist_insert(list, list->back, oid);
|
2005-11-09 02:22:40 +01:00
|
|
|
}
|
|
|
|
|
2007-03-16 21:42:50 +01:00
|
|
|
static inline struct llist_item *llist_insert_sorted_unique(struct llist *list,
|
2018-05-02 02:25:45 +02:00
|
|
|
const struct object_id *oid, struct llist_item *hint)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
|
|
|
struct llist_item *prev = NULL, *l;
|
|
|
|
|
|
|
|
l = (hint == NULL) ? list->front : hint;
|
|
|
|
while (l) {
|
2021-04-26 03:02:54 +02:00
|
|
|
int cmp = oidcmp(&l->oid, oid);
|
2005-11-09 02:22:40 +01:00
|
|
|
if (cmp > 0) { /* we insert before this entry */
|
2021-04-26 03:02:54 +02:00
|
|
|
return llist_insert(list, prev, oid->hash);
|
2005-11-09 02:22:40 +01:00
|
|
|
}
|
2009-09-01 07:35:10 +02:00
|
|
|
if (!cmp) { /* already exists */
|
2005-11-09 02:22:40 +01:00
|
|
|
return l;
|
|
|
|
}
|
|
|
|
prev = l;
|
|
|
|
l = l->next;
|
|
|
|
}
|
|
|
|
/* insert at the end */
|
2021-04-26 03:02:54 +02:00
|
|
|
return llist_insert_back(list, oid->hash);
|
2005-11-09 02:22:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* returns a pointer to an item in front of sha1 */
|
2021-04-26 03:02:54 +02:00
|
|
|
static inline struct llist_item * llist_sorted_remove(struct llist *list, const unsigned char *oid, struct llist_item *hint)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
|
|
|
struct llist_item *prev, *l;
|
|
|
|
|
|
|
|
redo_from_start:
|
|
|
|
l = (hint == NULL) ? list->front : hint;
|
|
|
|
prev = NULL;
|
|
|
|
while (l) {
|
2021-04-26 03:02:54 +02:00
|
|
|
const int cmp = hashcmp(l->oid.hash, oid);
|
2005-11-09 02:22:40 +01:00
|
|
|
if (cmp > 0) /* not in list, since sorted */
|
|
|
|
return prev;
|
2009-09-01 07:35:10 +02:00
|
|
|
if (!cmp) { /* found */
|
2022-05-02 18:50:37 +02:00
|
|
|
if (!prev) {
|
2005-11-09 02:22:40 +01:00
|
|
|
if (hint != NULL && hint != list->front) {
|
|
|
|
/* we don't know the previous element */
|
|
|
|
hint = NULL;
|
|
|
|
goto redo_from_start;
|
|
|
|
}
|
|
|
|
list->front = l->next;
|
|
|
|
} else
|
|
|
|
prev->next = l->next;
|
|
|
|
if (l == list->back)
|
|
|
|
list->back = prev;
|
2005-11-22 15:56:35 +01:00
|
|
|
llist_item_put(l);
|
2005-11-09 02:22:40 +01:00
|
|
|
list->size--;
|
|
|
|
return prev;
|
|
|
|
}
|
|
|
|
prev = l;
|
|
|
|
l = l->next;
|
|
|
|
}
|
|
|
|
return prev;
|
|
|
|
}
|
|
|
|
|
2005-11-15 22:24:02 +01:00
|
|
|
/* computes A\B */
|
2005-11-21 01:52:52 +01:00
|
|
|
static void llist_sorted_difference_inplace(struct llist *A,
|
2005-11-15 22:24:02 +01:00
|
|
|
struct llist *B)
|
|
|
|
{
|
|
|
|
struct llist_item *hint, *b;
|
|
|
|
|
|
|
|
hint = NULL;
|
|
|
|
b = B->front;
|
|
|
|
|
|
|
|
while (b) {
|
2021-04-26 03:02:54 +02:00
|
|
|
hint = llist_sorted_remove(A, b->oid.hash, hint);
|
2005-11-15 22:24:02 +01:00
|
|
|
b = b->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-11-21 01:52:52 +01:00
|
|
|
static inline struct pack_list * pack_list_insert(struct pack_list **pl,
|
2005-11-09 02:22:40 +01:00
|
|
|
struct pack_list *entry)
|
|
|
|
{
|
|
|
|
struct pack_list *p = xmalloc(sizeof(struct pack_list));
|
|
|
|
memcpy(p, entry, sizeof(struct pack_list));
|
|
|
|
p->next = *pl;
|
|
|
|
*pl = p;
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
2005-11-21 01:52:52 +01:00
|
|
|
static inline size_t pack_list_size(struct pack_list *pl)
|
2005-11-11 01:25:04 +01:00
|
|
|
{
|
|
|
|
size_t ret = 0;
|
2009-09-01 07:35:10 +02:00
|
|
|
while (pl) {
|
2005-11-11 01:25:04 +01:00
|
|
|
ret++;
|
|
|
|
pl = pl->next;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2005-11-22 15:59:22 +01:00
|
|
|
static struct pack_list * pack_list_difference(const struct pack_list *A,
|
|
|
|
const struct pack_list *B)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
2005-11-22 15:59:22 +01:00
|
|
|
struct pack_list *ret;
|
|
|
|
const struct pack_list *pl;
|
2005-11-09 02:22:40 +01:00
|
|
|
|
2022-05-02 18:50:37 +02:00
|
|
|
if (!A)
|
2005-11-09 02:22:40 +01:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
pl = B;
|
|
|
|
while (pl != NULL) {
|
|
|
|
if (A->pack == pl->pack)
|
|
|
|
return pack_list_difference(A->next, B);
|
|
|
|
pl = pl->next;
|
|
|
|
}
|
|
|
|
ret = xmalloc(sizeof(struct pack_list));
|
|
|
|
memcpy(ret, A, sizeof(struct pack_list));
|
|
|
|
ret->next = pack_list_difference(A->next, B);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2005-11-21 01:52:52 +01:00
|
|
|
static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
2020-11-13 06:07:01 +01:00
|
|
|
size_t p1_off = 0, p2_off = 0, p1_step, p2_step;
|
2007-03-16 21:42:50 +01:00
|
|
|
const unsigned char *p1_base, *p2_base;
|
2005-11-09 02:22:40 +01:00
|
|
|
struct llist_item *p1_hint = NULL, *p2_hint = NULL;
|
2018-05-02 02:25:38 +02:00
|
|
|
const unsigned int hashsz = the_hash_algo->rawsz;
|
2006-06-18 17:18:09 +02:00
|
|
|
|
2019-02-02 14:30:13 +01:00
|
|
|
if (!p1->unique_objects)
|
2019-02-02 14:30:16 +01:00
|
|
|
p1->unique_objects = llist_copy(p1->remaining_objects);
|
2019-02-02 14:30:13 +01:00
|
|
|
if (!p2->unique_objects)
|
2019-02-02 14:30:16 +01:00
|
|
|
p2->unique_objects = llist_copy(p2->remaining_objects);
|
2019-02-02 14:30:13 +01:00
|
|
|
|
2007-03-16 21:42:50 +01:00
|
|
|
p1_base = p1->pack->index_data;
|
|
|
|
p2_base = p2->pack->index_data;
|
2007-04-09 07:06:37 +02:00
|
|
|
p1_base += 256 * 4 + ((p1->pack->index_version < 2) ? 4 : 8);
|
|
|
|
p2_base += 256 * 4 + ((p2->pack->index_version < 2) ? 4 : 8);
|
2018-05-02 02:25:38 +02:00
|
|
|
p1_step = hashsz + ((p1->pack->index_version < 2) ? 4 : 0);
|
|
|
|
p2_step = hashsz + ((p2->pack->index_version < 2) ? 4 : 0);
|
2005-11-09 02:22:40 +01:00
|
|
|
|
2007-04-09 07:06:37 +02:00
|
|
|
while (p1_off < p1->pack->num_objects * p1_step &&
|
|
|
|
p2_off < p2->pack->num_objects * p2_step)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
2019-02-03 00:16:45 +01:00
|
|
|
const int cmp = hashcmp(p1_base + p1_off, p2_base + p2_off);
|
2005-11-09 02:22:40 +01:00
|
|
|
/* cmp ~ p1 - p2 */
|
|
|
|
if (cmp == 0) {
|
|
|
|
p1_hint = llist_sorted_remove(p1->unique_objects,
|
2021-04-26 03:02:54 +02:00
|
|
|
p1_base + p1_off,
|
2018-05-02 02:25:45 +02:00
|
|
|
p1_hint);
|
2005-11-09 02:22:40 +01:00
|
|
|
p2_hint = llist_sorted_remove(p2->unique_objects,
|
2021-04-26 03:02:54 +02:00
|
|
|
p1_base + p1_off,
|
2018-05-02 02:25:45 +02:00
|
|
|
p2_hint);
|
2007-04-09 07:06:37 +02:00
|
|
|
p1_off += p1_step;
|
|
|
|
p2_off += p2_step;
|
2005-11-09 02:22:40 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (cmp < 0) { /* p1 has the object, p2 doesn't */
|
2007-04-09 07:06:37 +02:00
|
|
|
p1_off += p1_step;
|
2005-11-09 02:22:40 +01:00
|
|
|
} else { /* p2 has the object, p1 doesn't */
|
2007-04-09 07:06:37 +02:00
|
|
|
p2_off += p2_step;
|
2005-11-09 02:22:40 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-11-21 01:52:52 +01:00
|
|
|
static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
|
|
|
size_t ret = 0;
|
2020-11-13 06:07:01 +01:00
|
|
|
size_t p1_off = 0, p2_off = 0, p1_step, p2_step;
|
2007-03-16 21:42:50 +01:00
|
|
|
const unsigned char *p1_base, *p2_base;
|
2018-05-02 02:25:38 +02:00
|
|
|
const unsigned int hashsz = the_hash_algo->rawsz;
|
2005-11-09 02:22:40 +01:00
|
|
|
|
2007-03-16 21:42:50 +01:00
|
|
|
p1_base = p1->index_data;
|
|
|
|
p2_base = p2->index_data;
|
2007-04-09 07:06:37 +02:00
|
|
|
p1_base += 256 * 4 + ((p1->index_version < 2) ? 4 : 8);
|
|
|
|
p2_base += 256 * 4 + ((p2->index_version < 2) ? 4 : 8);
|
2018-05-02 02:25:38 +02:00
|
|
|
p1_step = hashsz + ((p1->index_version < 2) ? 4 : 0);
|
|
|
|
p2_step = hashsz + ((p2->index_version < 2) ? 4 : 0);
|
2005-11-09 02:22:40 +01:00
|
|
|
|
2007-04-09 07:06:37 +02:00
|
|
|
while (p1_off < p1->num_objects * p1_step &&
|
|
|
|
p2_off < p2->num_objects * p2_step)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
2006-08-17 20:54:57 +02:00
|
|
|
int cmp = hashcmp(p1_base + p1_off, p2_base + p2_off);
|
2005-11-09 02:22:40 +01:00
|
|
|
/* cmp ~ p1 - p2 */
|
|
|
|
if (cmp == 0) {
|
|
|
|
ret++;
|
2007-04-09 07:06:37 +02:00
|
|
|
p1_off += p1_step;
|
|
|
|
p2_off += p2_step;
|
2005-11-09 02:22:40 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (cmp < 0) { /* p1 has the object, p2 doesn't */
|
2007-04-09 07:06:37 +02:00
|
|
|
p1_off += p1_step;
|
2005-11-09 02:22:40 +01:00
|
|
|
} else { /* p2 has the object, p1 doesn't */
|
2007-04-09 07:06:37 +02:00
|
|
|
p2_off += p2_step;
|
2005-11-09 02:22:40 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* another O(n^2) function ... */
|
2005-11-21 01:52:52 +01:00
|
|
|
static size_t get_pack_redundancy(struct pack_list *pl)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
|
|
|
struct pack_list *subset;
|
2005-11-21 01:52:52 +01:00
|
|
|
size_t ret = 0;
|
2005-11-11 01:25:04 +01:00
|
|
|
|
2022-05-02 18:50:37 +02:00
|
|
|
if (!pl)
|
2005-11-11 01:25:04 +01:00
|
|
|
return 0;
|
|
|
|
|
2005-11-09 02:22:40 +01:00
|
|
|
while ((subset = pl->next)) {
|
2009-09-01 07:35:10 +02:00
|
|
|
while (subset) {
|
2005-11-09 02:22:40 +01:00
|
|
|
ret += sizeof_union(pl->pack, subset->pack);
|
|
|
|
subset = subset->next;
|
|
|
|
}
|
|
|
|
pl = pl->next;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2007-03-07 02:44:30 +01:00
|
|
|
static inline off_t pack_set_bytecount(struct pack_list *pl)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
2007-03-07 02:44:30 +01:00
|
|
|
off_t ret = 0;
|
2005-11-09 02:22:40 +01:00
|
|
|
while (pl) {
|
|
|
|
ret += pl->pack->pack_size;
|
|
|
|
ret += pl->pack->index_size;
|
|
|
|
pl = pl->next;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2019-02-02 14:30:17 +01:00
|
|
|
static int cmp_remaining_objects(const void *a, const void *b)
|
pack-redundant: new algorithm to find min packs
When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.
The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.
#!/bin/sh
repo="$(pwd)/test.git"
work="$(pwd)/test"
i=1
max=199
if test -d "$repo" || test -d "$work"; then
echo >&2 "ERROR: '$repo' or '$work' already exist"
exit 1
fi
git init -q --bare "$repo"
git --git-dir="$repo" config gc.auto 0
git --git-dir="$repo" config transfer.unpackLimit 0
git clone -q "$repo" "$work" 2>/dev/null
while :; do
cd "$work"
echo "loop $i: $(date +%s)" >$i
git add $i
git commit -q -sm "loop $i"
git push -q origin HEAD:master
printf "\rCreate pack %4d/%d\t" $i $max
if test $i -ge $max; then break; fi
cd "$repo"
git repack -q
if test $(($i % 2)) -eq 0; then
git repack -aq
pack=$(ls -t $repo/objects/pack/*.pack | head -1)
touch "${pack%.pack}.keep"
fi
i=$((i+1))
done
printf "\ndone\n"
To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:
1. Get the unique and non_uniqe packs, add the unique packs to the
`min` list.
2. Remove the objects of unique packs from non_unique packs, then each
object left in the non_unique packs will have at least two copies.
3. Sort the non_unique packs by the objects' size, more objects first,
and add the first non_unique pack to `min` list.
4. Drop the duplicated objects from other packs in the ordered
non_unique pack list, and repeat step 3.
Some test cases will fail on Mac OS X. Mark them and will resolve in
later commit.
Original PR and discussions: https://github.com/jiangxin/git/pull/25
Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-02 14:30:15 +01:00
|
|
|
{
|
|
|
|
struct pack_list *pl_a = *((struct pack_list **)a);
|
|
|
|
struct pack_list *pl_b = *((struct pack_list **)b);
|
|
|
|
|
2019-02-02 14:30:17 +01:00
|
|
|
if (pl_a->remaining_objects->size == pl_b->remaining_objects->size) {
|
|
|
|
/* have the same remaining_objects, big pack first */
|
|
|
|
if (pl_a->all_objects_size == pl_b->all_objects_size)
|
|
|
|
return 0;
|
|
|
|
else if (pl_a->all_objects_size < pl_b->all_objects_size)
|
|
|
|
return 1;
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
} else if (pl_a->remaining_objects->size < pl_b->remaining_objects->size) {
|
|
|
|
/* sort by remaining objects, more objects first */
|
pack-redundant: new algorithm to find min packs
When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.
The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.
#!/bin/sh
repo="$(pwd)/test.git"
work="$(pwd)/test"
i=1
max=199
if test -d "$repo" || test -d "$work"; then
echo >&2 "ERROR: '$repo' or '$work' already exist"
exit 1
fi
git init -q --bare "$repo"
git --git-dir="$repo" config gc.auto 0
git --git-dir="$repo" config transfer.unpackLimit 0
git clone -q "$repo" "$work" 2>/dev/null
while :; do
cd "$work"
echo "loop $i: $(date +%s)" >$i
git add $i
git commit -q -sm "loop $i"
git push -q origin HEAD:master
printf "\rCreate pack %4d/%d\t" $i $max
if test $i -ge $max; then break; fi
cd "$repo"
git repack -q
if test $(($i % 2)) -eq 0; then
git repack -aq
pack=$(ls -t $repo/objects/pack/*.pack | head -1)
touch "${pack%.pack}.keep"
fi
i=$((i+1))
done
printf "\ndone\n"
To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:
1. Get the unique and non_uniqe packs, add the unique packs to the
`min` list.
2. Remove the objects of unique packs from non_unique packs, then each
object left in the non_unique packs will have at least two copies.
3. Sort the non_unique packs by the objects' size, more objects first,
and add the first non_unique pack to `min` list.
4. Drop the duplicated objects from other packs in the ordered
non_unique pack list, and repeat step 3.
Some test cases will fail on Mac OS X. Mark them and will resolve in
later commit.
Original PR and discussions: https://github.com/jiangxin/git/pull/25
Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-02 14:30:15 +01:00
|
|
|
return 1;
|
2019-02-02 14:30:17 +01:00
|
|
|
} else {
|
pack-redundant: new algorithm to find min packs
When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.
The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.
#!/bin/sh
repo="$(pwd)/test.git"
work="$(pwd)/test"
i=1
max=199
if test -d "$repo" || test -d "$work"; then
echo >&2 "ERROR: '$repo' or '$work' already exist"
exit 1
fi
git init -q --bare "$repo"
git --git-dir="$repo" config gc.auto 0
git --git-dir="$repo" config transfer.unpackLimit 0
git clone -q "$repo" "$work" 2>/dev/null
while :; do
cd "$work"
echo "loop $i: $(date +%s)" >$i
git add $i
git commit -q -sm "loop $i"
git push -q origin HEAD:master
printf "\rCreate pack %4d/%d\t" $i $max
if test $i -ge $max; then break; fi
cd "$repo"
git repack -q
if test $(($i % 2)) -eq 0; then
git repack -aq
pack=$(ls -t $repo/objects/pack/*.pack | head -1)
touch "${pack%.pack}.keep"
fi
i=$((i+1))
done
printf "\ndone\n"
To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:
1. Get the unique and non_uniqe packs, add the unique packs to the
`min` list.
2. Remove the objects of unique packs from non_unique packs, then each
object left in the non_unique packs will have at least two copies.
3. Sort the non_unique packs by the objects' size, more objects first,
and add the first non_unique pack to `min` list.
4. Drop the duplicated objects from other packs in the ordered
non_unique pack list, and repeat step 3.
Some test cases will fail on Mac OS X. Mark them and will resolve in
later commit.
Original PR and discussions: https://github.com/jiangxin/git/pull/25
Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-02 14:30:15 +01:00
|
|
|
return -1;
|
2019-02-02 14:30:17 +01:00
|
|
|
}
|
pack-redundant: new algorithm to find min packs
When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.
The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.
#!/bin/sh
repo="$(pwd)/test.git"
work="$(pwd)/test"
i=1
max=199
if test -d "$repo" || test -d "$work"; then
echo >&2 "ERROR: '$repo' or '$work' already exist"
exit 1
fi
git init -q --bare "$repo"
git --git-dir="$repo" config gc.auto 0
git --git-dir="$repo" config transfer.unpackLimit 0
git clone -q "$repo" "$work" 2>/dev/null
while :; do
cd "$work"
echo "loop $i: $(date +%s)" >$i
git add $i
git commit -q -sm "loop $i"
git push -q origin HEAD:master
printf "\rCreate pack %4d/%d\t" $i $max
if test $i -ge $max; then break; fi
cd "$repo"
git repack -q
if test $(($i % 2)) -eq 0; then
git repack -aq
pack=$(ls -t $repo/objects/pack/*.pack | head -1)
touch "${pack%.pack}.keep"
fi
i=$((i+1))
done
printf "\ndone\n"
To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:
1. Get the unique and non_uniqe packs, add the unique packs to the
`min` list.
2. Remove the objects of unique packs from non_unique packs, then each
object left in the non_unique packs will have at least two copies.
3. Sort the non_unique packs by the objects' size, more objects first,
and add the first non_unique pack to `min` list.
4. Drop the duplicated objects from other packs in the ordered
non_unique pack list, and repeat step 3.
Some test cases will fail on Mac OS X. Mark them and will resolve in
later commit.
Original PR and discussions: https://github.com/jiangxin/git/pull/25
Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-02 14:30:15 +01:00
|
|
|
}
|
|
|
|
|
2019-02-02 14:30:16 +01:00
|
|
|
/* Sort pack_list, greater size of remaining_objects first */
|
pack-redundant: new algorithm to find min packs
When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.
The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.
#!/bin/sh
repo="$(pwd)/test.git"
work="$(pwd)/test"
i=1
max=199
if test -d "$repo" || test -d "$work"; then
echo >&2 "ERROR: '$repo' or '$work' already exist"
exit 1
fi
git init -q --bare "$repo"
git --git-dir="$repo" config gc.auto 0
git --git-dir="$repo" config transfer.unpackLimit 0
git clone -q "$repo" "$work" 2>/dev/null
while :; do
cd "$work"
echo "loop $i: $(date +%s)" >$i
git add $i
git commit -q -sm "loop $i"
git push -q origin HEAD:master
printf "\rCreate pack %4d/%d\t" $i $max
if test $i -ge $max; then break; fi
cd "$repo"
git repack -q
if test $(($i % 2)) -eq 0; then
git repack -aq
pack=$(ls -t $repo/objects/pack/*.pack | head -1)
touch "${pack%.pack}.keep"
fi
i=$((i+1))
done
printf "\ndone\n"
To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:
1. Get the unique and non_uniqe packs, add the unique packs to the
`min` list.
2. Remove the objects of unique packs from non_unique packs, then each
object left in the non_unique packs will have at least two copies.
3. Sort the non_unique packs by the objects' size, more objects first,
and add the first non_unique pack to `min` list.
4. Drop the duplicated objects from other packs in the ordered
non_unique pack list, and repeat step 3.
Some test cases will fail on Mac OS X. Mark them and will resolve in
later commit.
Original PR and discussions: https://github.com/jiangxin/git/pull/25
Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-02 14:30:15 +01:00
|
|
|
static void sort_pack_list(struct pack_list **pl)
|
|
|
|
{
|
|
|
|
struct pack_list **ary, *p;
|
|
|
|
int i;
|
|
|
|
size_t n = pack_list_size(*pl);
|
|
|
|
|
|
|
|
if (n < 2)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* prepare an array of packed_list for easier sorting */
|
2021-03-13 17:17:22 +01:00
|
|
|
CALLOC_ARRAY(ary, n);
|
pack-redundant: new algorithm to find min packs
When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.
The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.
#!/bin/sh
repo="$(pwd)/test.git"
work="$(pwd)/test"
i=1
max=199
if test -d "$repo" || test -d "$work"; then
echo >&2 "ERROR: '$repo' or '$work' already exist"
exit 1
fi
git init -q --bare "$repo"
git --git-dir="$repo" config gc.auto 0
git --git-dir="$repo" config transfer.unpackLimit 0
git clone -q "$repo" "$work" 2>/dev/null
while :; do
cd "$work"
echo "loop $i: $(date +%s)" >$i
git add $i
git commit -q -sm "loop $i"
git push -q origin HEAD:master
printf "\rCreate pack %4d/%d\t" $i $max
if test $i -ge $max; then break; fi
cd "$repo"
git repack -q
if test $(($i % 2)) -eq 0; then
git repack -aq
pack=$(ls -t $repo/objects/pack/*.pack | head -1)
touch "${pack%.pack}.keep"
fi
i=$((i+1))
done
printf "\ndone\n"
To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:
1. Get the unique and non_uniqe packs, add the unique packs to the
`min` list.
2. Remove the objects of unique packs from non_unique packs, then each
object left in the non_unique packs will have at least two copies.
3. Sort the non_unique packs by the objects' size, more objects first,
and add the first non_unique pack to `min` list.
4. Drop the duplicated objects from other packs in the ordered
non_unique pack list, and repeat step 3.
Some test cases will fail on Mac OS X. Mark them and will resolve in
later commit.
Original PR and discussions: https://github.com/jiangxin/git/pull/25
Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-02 14:30:15 +01:00
|
|
|
for (n = 0, p = *pl; p; p = p->next)
|
|
|
|
ary[n++] = p;
|
|
|
|
|
2019-02-02 14:30:17 +01:00
|
|
|
QSORT(ary, n, cmp_remaining_objects);
|
pack-redundant: new algorithm to find min packs
When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.
The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.
#!/bin/sh
repo="$(pwd)/test.git"
work="$(pwd)/test"
i=1
max=199
if test -d "$repo" || test -d "$work"; then
echo >&2 "ERROR: '$repo' or '$work' already exist"
exit 1
fi
git init -q --bare "$repo"
git --git-dir="$repo" config gc.auto 0
git --git-dir="$repo" config transfer.unpackLimit 0
git clone -q "$repo" "$work" 2>/dev/null
while :; do
cd "$work"
echo "loop $i: $(date +%s)" >$i
git add $i
git commit -q -sm "loop $i"
git push -q origin HEAD:master
printf "\rCreate pack %4d/%d\t" $i $max
if test $i -ge $max; then break; fi
cd "$repo"
git repack -q
if test $(($i % 2)) -eq 0; then
git repack -aq
pack=$(ls -t $repo/objects/pack/*.pack | head -1)
touch "${pack%.pack}.keep"
fi
i=$((i+1))
done
printf "\ndone\n"
To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:
1. Get the unique and non_uniqe packs, add the unique packs to the
`min` list.
2. Remove the objects of unique packs from non_unique packs, then each
object left in the non_unique packs will have at least two copies.
3. Sort the non_unique packs by the objects' size, more objects first,
and add the first non_unique pack to `min` list.
4. Drop the duplicated objects from other packs in the ordered
non_unique pack list, and repeat step 3.
Some test cases will fail on Mac OS X. Mark them and will resolve in
later commit.
Original PR and discussions: https://github.com/jiangxin/git/pull/25
Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-02 14:30:15 +01:00
|
|
|
|
|
|
|
/* link them back again */
|
|
|
|
for (i = 0; i < n - 1; i++)
|
|
|
|
ary[i]->next = ary[i + 1];
|
|
|
|
ary[n - 1]->next = NULL;
|
|
|
|
*pl = ary[0];
|
|
|
|
|
|
|
|
free(ary);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-11-21 01:52:52 +01:00
|
|
|
static void minimize(struct pack_list **min)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
pack-redundant: new algorithm to find min packs
When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.
The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.
#!/bin/sh
repo="$(pwd)/test.git"
work="$(pwd)/test"
i=1
max=199
if test -d "$repo" || test -d "$work"; then
echo >&2 "ERROR: '$repo' or '$work' already exist"
exit 1
fi
git init -q --bare "$repo"
git --git-dir="$repo" config gc.auto 0
git --git-dir="$repo" config transfer.unpackLimit 0
git clone -q "$repo" "$work" 2>/dev/null
while :; do
cd "$work"
echo "loop $i: $(date +%s)" >$i
git add $i
git commit -q -sm "loop $i"
git push -q origin HEAD:master
printf "\rCreate pack %4d/%d\t" $i $max
if test $i -ge $max; then break; fi
cd "$repo"
git repack -q
if test $(($i % 2)) -eq 0; then
git repack -aq
pack=$(ls -t $repo/objects/pack/*.pack | head -1)
touch "${pack%.pack}.keep"
fi
i=$((i+1))
done
printf "\ndone\n"
To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:
1. Get the unique and non_uniqe packs, add the unique packs to the
`min` list.
2. Remove the objects of unique packs from non_unique packs, then each
object left in the non_unique packs will have at least two copies.
3. Sort the non_unique packs by the objects' size, more objects first,
and add the first non_unique pack to `min` list.
4. Drop the duplicated objects from other packs in the ordered
non_unique pack list, and repeat step 3.
Some test cases will fail on Mac OS X. Mark them and will resolve in
later commit.
Original PR and discussions: https://github.com/jiangxin/git/pull/25
Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-02 14:30:15 +01:00
|
|
|
struct pack_list *pl, *unique = NULL, *non_unique = NULL;
|
|
|
|
struct llist *missing, *unique_pack_objects;
|
2005-11-09 02:22:40 +01:00
|
|
|
|
2005-11-11 01:25:04 +01:00
|
|
|
pl = local_packs;
|
2005-11-09 02:22:40 +01:00
|
|
|
while (pl) {
|
2009-09-01 07:35:10 +02:00
|
|
|
if (pl->unique_objects->size)
|
2005-11-09 02:22:40 +01:00
|
|
|
pack_list_insert(&unique, pl);
|
|
|
|
else
|
|
|
|
pack_list_insert(&non_unique, pl);
|
|
|
|
pl = pl->next;
|
|
|
|
}
|
|
|
|
/* find out which objects are missing from the set of unique packs */
|
|
|
|
missing = llist_copy(all_objects);
|
|
|
|
pl = unique;
|
|
|
|
while (pl) {
|
2019-02-02 14:30:16 +01:00
|
|
|
llist_sorted_difference_inplace(missing, pl->remaining_objects);
|
2005-11-09 02:22:40 +01:00
|
|
|
pl = pl->next;
|
|
|
|
}
|
|
|
|
|
pack-redundant: new algorithm to find min packs
When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.
The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.
#!/bin/sh
repo="$(pwd)/test.git"
work="$(pwd)/test"
i=1
max=199
if test -d "$repo" || test -d "$work"; then
echo >&2 "ERROR: '$repo' or '$work' already exist"
exit 1
fi
git init -q --bare "$repo"
git --git-dir="$repo" config gc.auto 0
git --git-dir="$repo" config transfer.unpackLimit 0
git clone -q "$repo" "$work" 2>/dev/null
while :; do
cd "$work"
echo "loop $i: $(date +%s)" >$i
git add $i
git commit -q -sm "loop $i"
git push -q origin HEAD:master
printf "\rCreate pack %4d/%d\t" $i $max
if test $i -ge $max; then break; fi
cd "$repo"
git repack -q
if test $(($i % 2)) -eq 0; then
git repack -aq
pack=$(ls -t $repo/objects/pack/*.pack | head -1)
touch "${pack%.pack}.keep"
fi
i=$((i+1))
done
printf "\ndone\n"
To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:
1. Get the unique and non_uniqe packs, add the unique packs to the
`min` list.
2. Remove the objects of unique packs from non_unique packs, then each
object left in the non_unique packs will have at least two copies.
3. Sort the non_unique packs by the objects' size, more objects first,
and add the first non_unique pack to `min` list.
4. Drop the duplicated objects from other packs in the ordered
non_unique pack list, and repeat step 3.
Some test cases will fail on Mac OS X. Mark them and will resolve in
later commit.
Original PR and discussions: https://github.com/jiangxin/git/pull/25
Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-02 14:30:15 +01:00
|
|
|
*min = unique;
|
|
|
|
|
2005-11-11 01:25:04 +01:00
|
|
|
/* return if there are no objects missing from the unique set */
|
2005-11-09 02:22:40 +01:00
|
|
|
if (missing->size == 0) {
|
2017-05-04 15:56:54 +02:00
|
|
|
free(missing);
|
2005-11-09 02:22:40 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
pack-redundant: new algorithm to find min packs
When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.
The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.
#!/bin/sh
repo="$(pwd)/test.git"
work="$(pwd)/test"
i=1
max=199
if test -d "$repo" || test -d "$work"; then
echo >&2 "ERROR: '$repo' or '$work' already exist"
exit 1
fi
git init -q --bare "$repo"
git --git-dir="$repo" config gc.auto 0
git --git-dir="$repo" config transfer.unpackLimit 0
git clone -q "$repo" "$work" 2>/dev/null
while :; do
cd "$work"
echo "loop $i: $(date +%s)" >$i
git add $i
git commit -q -sm "loop $i"
git push -q origin HEAD:master
printf "\rCreate pack %4d/%d\t" $i $max
if test $i -ge $max; then break; fi
cd "$repo"
git repack -q
if test $(($i % 2)) -eq 0; then
git repack -aq
pack=$(ls -t $repo/objects/pack/*.pack | head -1)
touch "${pack%.pack}.keep"
fi
i=$((i+1))
done
printf "\ndone\n"
To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:
1. Get the unique and non_uniqe packs, add the unique packs to the
`min` list.
2. Remove the objects of unique packs from non_unique packs, then each
object left in the non_unique packs will have at least two copies.
3. Sort the non_unique packs by the objects' size, more objects first,
and add the first non_unique pack to `min` list.
4. Drop the duplicated objects from other packs in the ordered
non_unique pack list, and repeat step 3.
Some test cases will fail on Mac OS X. Mark them and will resolve in
later commit.
Original PR and discussions: https://github.com/jiangxin/git/pull/25
Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-02 14:30:15 +01:00
|
|
|
unique_pack_objects = llist_copy(all_objects);
|
|
|
|
llist_sorted_difference_inplace(unique_pack_objects, missing);
|
|
|
|
|
|
|
|
/* remove unique pack objects from the non_unique packs */
|
|
|
|
pl = non_unique;
|
2009-09-01 07:35:10 +02:00
|
|
|
while (pl) {
|
2019-02-02 14:30:16 +01:00
|
|
|
llist_sorted_difference_inplace(pl->remaining_objects, unique_pack_objects);
|
2005-11-09 02:22:40 +01:00
|
|
|
pl = pl->next;
|
|
|
|
}
|
pack-redundant: new algorithm to find min packs
When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.
The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.
#!/bin/sh
repo="$(pwd)/test.git"
work="$(pwd)/test"
i=1
max=199
if test -d "$repo" || test -d "$work"; then
echo >&2 "ERROR: '$repo' or '$work' already exist"
exit 1
fi
git init -q --bare "$repo"
git --git-dir="$repo" config gc.auto 0
git --git-dir="$repo" config transfer.unpackLimit 0
git clone -q "$repo" "$work" 2>/dev/null
while :; do
cd "$work"
echo "loop $i: $(date +%s)" >$i
git add $i
git commit -q -sm "loop $i"
git push -q origin HEAD:master
printf "\rCreate pack %4d/%d\t" $i $max
if test $i -ge $max; then break; fi
cd "$repo"
git repack -q
if test $(($i % 2)) -eq 0; then
git repack -aq
pack=$(ls -t $repo/objects/pack/*.pack | head -1)
touch "${pack%.pack}.keep"
fi
i=$((i+1))
done
printf "\ndone\n"
To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:
1. Get the unique and non_uniqe packs, add the unique packs to the
`min` list.
2. Remove the objects of unique packs from non_unique packs, then each
object left in the non_unique packs will have at least two copies.
3. Sort the non_unique packs by the objects' size, more objects first,
and add the first non_unique pack to `min` list.
4. Drop the duplicated objects from other packs in the ordered
non_unique pack list, and repeat step 3.
Some test cases will fail on Mac OS X. Mark them and will resolve in
later commit.
Original PR and discussions: https://github.com/jiangxin/git/pull/25
Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-02 14:30:15 +01:00
|
|
|
|
|
|
|
while (non_unique) {
|
2019-02-02 14:30:16 +01:00
|
|
|
/* sort the non_unique packs, greater size of remaining_objects first */
|
pack-redundant: new algorithm to find min packs
When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.
The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.
#!/bin/sh
repo="$(pwd)/test.git"
work="$(pwd)/test"
i=1
max=199
if test -d "$repo" || test -d "$work"; then
echo >&2 "ERROR: '$repo' or '$work' already exist"
exit 1
fi
git init -q --bare "$repo"
git --git-dir="$repo" config gc.auto 0
git --git-dir="$repo" config transfer.unpackLimit 0
git clone -q "$repo" "$work" 2>/dev/null
while :; do
cd "$work"
echo "loop $i: $(date +%s)" >$i
git add $i
git commit -q -sm "loop $i"
git push -q origin HEAD:master
printf "\rCreate pack %4d/%d\t" $i $max
if test $i -ge $max; then break; fi
cd "$repo"
git repack -q
if test $(($i % 2)) -eq 0; then
git repack -aq
pack=$(ls -t $repo/objects/pack/*.pack | head -1)
touch "${pack%.pack}.keep"
fi
i=$((i+1))
done
printf "\ndone\n"
To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:
1. Get the unique and non_uniqe packs, add the unique packs to the
`min` list.
2. Remove the objects of unique packs from non_unique packs, then each
object left in the non_unique packs will have at least two copies.
3. Sort the non_unique packs by the objects' size, more objects first,
and add the first non_unique pack to `min` list.
4. Drop the duplicated objects from other packs in the ordered
non_unique pack list, and repeat step 3.
Some test cases will fail on Mac OS X. Mark them and will resolve in
later commit.
Original PR and discussions: https://github.com/jiangxin/git/pull/25
Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-02 14:30:15 +01:00
|
|
|
sort_pack_list(&non_unique);
|
2019-02-02 14:30:16 +01:00
|
|
|
if (non_unique->remaining_objects->size == 0)
|
pack-redundant: new algorithm to find min packs
When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.
The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.
#!/bin/sh
repo="$(pwd)/test.git"
work="$(pwd)/test"
i=1
max=199
if test -d "$repo" || test -d "$work"; then
echo >&2 "ERROR: '$repo' or '$work' already exist"
exit 1
fi
git init -q --bare "$repo"
git --git-dir="$repo" config gc.auto 0
git --git-dir="$repo" config transfer.unpackLimit 0
git clone -q "$repo" "$work" 2>/dev/null
while :; do
cd "$work"
echo "loop $i: $(date +%s)" >$i
git add $i
git commit -q -sm "loop $i"
git push -q origin HEAD:master
printf "\rCreate pack %4d/%d\t" $i $max
if test $i -ge $max; then break; fi
cd "$repo"
git repack -q
if test $(($i % 2)) -eq 0; then
git repack -aq
pack=$(ls -t $repo/objects/pack/*.pack | head -1)
touch "${pack%.pack}.keep"
fi
i=$((i+1))
done
printf "\ndone\n"
To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:
1. Get the unique and non_uniqe packs, add the unique packs to the
`min` list.
2. Remove the objects of unique packs from non_unique packs, then each
object left in the non_unique packs will have at least two copies.
3. Sort the non_unique packs by the objects' size, more objects first,
and add the first non_unique pack to `min` list.
4. Drop the duplicated objects from other packs in the ordered
non_unique pack list, and repeat step 3.
Some test cases will fail on Mac OS X. Mark them and will resolve in
later commit.
Original PR and discussions: https://github.com/jiangxin/git/pull/25
Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-02 14:30:15 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
pack_list_insert(min, non_unique);
|
|
|
|
|
2019-02-02 14:30:16 +01:00
|
|
|
for (pl = non_unique->next; pl && pl->remaining_objects->size > 0; pl = pl->next)
|
|
|
|
llist_sorted_difference_inplace(pl->remaining_objects, non_unique->remaining_objects);
|
pack-redundant: new algorithm to find min packs
When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.
The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.
#!/bin/sh
repo="$(pwd)/test.git"
work="$(pwd)/test"
i=1
max=199
if test -d "$repo" || test -d "$work"; then
echo >&2 "ERROR: '$repo' or '$work' already exist"
exit 1
fi
git init -q --bare "$repo"
git --git-dir="$repo" config gc.auto 0
git --git-dir="$repo" config transfer.unpackLimit 0
git clone -q "$repo" "$work" 2>/dev/null
while :; do
cd "$work"
echo "loop $i: $(date +%s)" >$i
git add $i
git commit -q -sm "loop $i"
git push -q origin HEAD:master
printf "\rCreate pack %4d/%d\t" $i $max
if test $i -ge $max; then break; fi
cd "$repo"
git repack -q
if test $(($i % 2)) -eq 0; then
git repack -aq
pack=$(ls -t $repo/objects/pack/*.pack | head -1)
touch "${pack%.pack}.keep"
fi
i=$((i+1))
done
printf "\ndone\n"
To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:
1. Get the unique and non_uniqe packs, add the unique packs to the
`min` list.
2. Remove the objects of unique packs from non_unique packs, then each
object left in the non_unique packs will have at least two copies.
3. Sort the non_unique packs by the objects' size, more objects first,
and add the first non_unique pack to `min` list.
4. Drop the duplicated objects from other packs in the ordered
non_unique pack list, and repeat step 3.
Some test cases will fail on Mac OS X. Mark them and will resolve in
later commit.
Original PR and discussions: https://github.com/jiangxin/git/pull/25
Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-02 14:30:15 +01:00
|
|
|
|
|
|
|
non_unique = non_unique->next;
|
|
|
|
}
|
2005-11-09 02:22:40 +01:00
|
|
|
}
|
|
|
|
|
2005-11-21 01:52:52 +01:00
|
|
|
static void load_all_objects(void)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
2005-11-11 01:25:04 +01:00
|
|
|
struct pack_list *pl = local_packs;
|
2005-11-09 02:22:40 +01:00
|
|
|
struct llist_item *hint, *l;
|
|
|
|
|
|
|
|
llist_init(&all_objects);
|
|
|
|
|
|
|
|
while (pl) {
|
|
|
|
hint = NULL;
|
2019-02-02 14:30:16 +01:00
|
|
|
l = pl->remaining_objects->front;
|
2005-11-09 02:22:40 +01:00
|
|
|
while (l) {
|
|
|
|
hint = llist_insert_sorted_unique(all_objects,
|
2021-04-26 03:02:54 +02:00
|
|
|
&l->oid, hint);
|
2005-11-09 02:22:40 +01:00
|
|
|
l = l->next;
|
|
|
|
}
|
|
|
|
pl = pl->next;
|
|
|
|
}
|
2005-11-11 01:25:04 +01:00
|
|
|
/* remove objects present in remote packs */
|
|
|
|
pl = altodb_packs;
|
|
|
|
while (pl) {
|
2019-02-02 14:30:16 +01:00
|
|
|
llist_sorted_difference_inplace(all_objects, pl->remaining_objects);
|
2005-11-11 01:25:04 +01:00
|
|
|
pl = pl->next;
|
|
|
|
}
|
2005-11-09 02:22:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* this scales like O(n^2) */
|
2005-11-21 01:52:52 +01:00
|
|
|
static void cmp_local_packs(void)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
2005-11-11 01:25:04 +01:00
|
|
|
struct pack_list *subset, *pl = local_packs;
|
2005-11-09 02:22:40 +01:00
|
|
|
|
2020-12-17 02:57:09 +01:00
|
|
|
/* only one packfile */
|
|
|
|
if (!pl->next) {
|
|
|
|
llist_init(&pl->unique_objects);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2005-11-11 01:25:04 +01:00
|
|
|
while ((subset = pl)) {
|
2009-09-01 07:35:10 +02:00
|
|
|
while ((subset = subset->next))
|
2005-11-11 01:25:04 +01:00
|
|
|
cmp_two_packs(pl, subset);
|
|
|
|
pl = pl->next;
|
|
|
|
}
|
2005-11-18 23:00:55 +01:00
|
|
|
}
|
2005-11-11 01:25:04 +01:00
|
|
|
|
2005-11-21 01:52:52 +01:00
|
|
|
static void scan_alt_odb_packs(void)
|
2005-11-18 23:00:55 +01:00
|
|
|
{
|
|
|
|
struct pack_list *local, *alt;
|
|
|
|
|
|
|
|
alt = altodb_packs;
|
|
|
|
while (alt) {
|
|
|
|
local = local_packs;
|
|
|
|
while (local) {
|
2019-02-02 14:30:16 +01:00
|
|
|
llist_sorted_difference_inplace(local->remaining_objects,
|
|
|
|
alt->remaining_objects);
|
2005-11-18 23:00:55 +01:00
|
|
|
local = local->next;
|
2005-11-11 01:25:04 +01:00
|
|
|
}
|
2005-11-18 23:00:55 +01:00
|
|
|
alt = alt->next;
|
2005-11-09 02:22:40 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-11-21 01:52:52 +01:00
|
|
|
static struct pack_list * add_pack(struct packed_git *p)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
|
|
|
struct pack_list l;
|
2020-11-13 06:07:01 +01:00
|
|
|
size_t off = 0, step;
|
2007-03-16 21:42:50 +01:00
|
|
|
const unsigned char *base;
|
2005-11-09 02:22:40 +01:00
|
|
|
|
2005-11-18 23:00:55 +01:00
|
|
|
if (!p->pack_local && !(alt_odb || verbose))
|
|
|
|
return NULL;
|
|
|
|
|
2005-11-09 02:22:40 +01:00
|
|
|
l.pack = p;
|
2019-02-02 14:30:16 +01:00
|
|
|
llist_init(&l.remaining_objects);
|
2005-11-09 02:22:40 +01:00
|
|
|
|
2007-05-30 08:12:28 +02:00
|
|
|
if (open_pack_index(p))
|
2007-05-26 07:24:19 +02:00
|
|
|
return NULL;
|
|
|
|
|
2007-03-16 21:42:50 +01:00
|
|
|
base = p->index_data;
|
2007-04-09 07:06:37 +02:00
|
|
|
base += 256 * 4 + ((p->index_version < 2) ? 4 : 8);
|
2018-05-02 02:25:38 +02:00
|
|
|
step = the_hash_algo->rawsz + ((p->index_version < 2) ? 4 : 0);
|
2007-04-09 07:06:37 +02:00
|
|
|
while (off < p->num_objects * step) {
|
2021-04-26 03:02:54 +02:00
|
|
|
llist_insert_back(l.remaining_objects, base + off);
|
2007-04-09 07:06:37 +02:00
|
|
|
off += step;
|
2005-11-09 02:22:40 +01:00
|
|
|
}
|
2019-02-02 14:30:17 +01:00
|
|
|
l.all_objects_size = l.remaining_objects->size;
|
2019-02-02 14:30:13 +01:00
|
|
|
l.unique_objects = NULL;
|
2005-11-11 01:25:04 +01:00
|
|
|
if (p->pack_local)
|
|
|
|
return pack_list_insert(&local_packs, &l);
|
|
|
|
else
|
2005-11-18 23:00:55 +01:00
|
|
|
return pack_list_insert(&altodb_packs, &l);
|
2005-11-09 02:22:40 +01:00
|
|
|
}
|
|
|
|
|
2010-01-22 16:42:14 +01:00
|
|
|
static struct pack_list * add_pack_file(const char *filename)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
2018-08-20 18:52:04 +02:00
|
|
|
struct packed_git *p = get_all_packs(the_repository);
|
2005-11-09 02:22:40 +01:00
|
|
|
|
|
|
|
if (strlen(filename) < 40)
|
2009-01-04 19:38:41 +01:00
|
|
|
die("Bad pack filename: %s", filename);
|
2005-11-09 02:22:40 +01:00
|
|
|
|
|
|
|
while (p) {
|
|
|
|
if (strstr(p->pack_name, filename))
|
|
|
|
return add_pack(p);
|
|
|
|
p = p->next;
|
|
|
|
}
|
2009-01-04 19:38:41 +01:00
|
|
|
die("Filename %s not found in packed_git", filename);
|
2005-11-09 02:22:40 +01:00
|
|
|
}
|
|
|
|
|
2005-11-21 01:52:52 +01:00
|
|
|
static void load_all(void)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
2018-08-20 18:52:04 +02:00
|
|
|
struct packed_git *p = get_all_packs(the_repository);
|
2005-11-09 02:22:40 +01:00
|
|
|
|
|
|
|
while (p) {
|
2005-11-11 01:25:04 +01:00
|
|
|
add_pack(p);
|
2005-11-09 02:22:40 +01:00
|
|
|
p = p->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
builtins: mark unused prefix parameters
All builtins receive a "prefix" parameter, but it is only useful if they
need to adjust filenames given by the user on the command line. For
builtins that do not even call parse_options(), they often don't look at
the prefix at all, and -Wunused-parameter complains.
Let's annotate those to silence the compiler warning. I gave a quick
scan of each of these cases, and it seems like they don't have anything
they _should_ be using the prefix for (i.e., there is no hidden bug that
we are missing). The only questionable cases I saw were:
- in git-unpack-file, we create a tempfile which will always be at the
root of the repository, even if the command is run from a subdir.
Arguably this should be created in the subdir from which we're run
(as we report the path only as a relative name). However, nobody has
complained, and I'm hesitant to change something that is deep
plumbing going back to April 2005 (though I think within our
scripts, the sole caller in git-merge-one-file would be OK, as it
moves to the toplevel itself).
- in fetch-pack, local-filesystem remotes are taken as relative to the
project root, not the current directory. So:
git init server.git
[...put stuff in server.git...]
git init client.git
cd client.git
mkdir subdir
cd subdir
git fetch-pack ../../server.git ...
won't work, as we quietly move to the top of the repository before
interpreting the path (so "../server.git" would work). This is
weird, but again, nobody has complained and this is how it has
always worked. And this is how "git fetch" works, too. Plus it
raises questions about how a configured remote like:
git config remote.origin.url ../server.git
should behave. I can certainly come up with a reasonable set of
behavior, but it may not be worth stirring up complications in a
plumbing tool.
So I've left the behavior untouched in both of those cases. If anybody
really wants to revisit them, it's easy enough to drop the UNUSED
marker. This commit is just about removing them as obstacles to turning
on -Wunused-parameter all the time.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-28 22:56:55 +02:00
|
|
|
int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED)
|
2005-11-09 02:22:40 +01:00
|
|
|
{
|
|
|
|
int i;
|
2020-08-26 00:45:52 +02:00
|
|
|
int i_still_use_this = 0;
|
pack-redundant: new algorithm to find min packs
When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.
The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.
#!/bin/sh
repo="$(pwd)/test.git"
work="$(pwd)/test"
i=1
max=199
if test -d "$repo" || test -d "$work"; then
echo >&2 "ERROR: '$repo' or '$work' already exist"
exit 1
fi
git init -q --bare "$repo"
git --git-dir="$repo" config gc.auto 0
git --git-dir="$repo" config transfer.unpackLimit 0
git clone -q "$repo" "$work" 2>/dev/null
while :; do
cd "$work"
echo "loop $i: $(date +%s)" >$i
git add $i
git commit -q -sm "loop $i"
git push -q origin HEAD:master
printf "\rCreate pack %4d/%d\t" $i $max
if test $i -ge $max; then break; fi
cd "$repo"
git repack -q
if test $(($i % 2)) -eq 0; then
git repack -aq
pack=$(ls -t $repo/objects/pack/*.pack | head -1)
touch "${pack%.pack}.keep"
fi
i=$((i+1))
done
printf "\ndone\n"
To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:
1. Get the unique and non_uniqe packs, add the unique packs to the
`min` list.
2. Remove the objects of unique packs from non_unique packs, then each
object left in the non_unique packs will have at least two copies.
3. Sort the non_unique packs by the objects' size, more objects first,
and add the first non_unique pack to `min` list.
4. Drop the duplicated objects from other packs in the ordered
non_unique pack list, and repeat step 3.
Some test cases will fail on Mac OS X. Mark them and will resolve in
later commit.
Original PR and discussions: https://github.com/jiangxin/git/pull/25
Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-02 14:30:15 +01:00
|
|
|
struct pack_list *min = NULL, *red, *pl;
|
2005-11-18 23:17:50 +01:00
|
|
|
struct llist *ignore;
|
2018-05-02 02:25:45 +02:00
|
|
|
struct object_id *oid;
|
|
|
|
char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
|
2005-11-09 02:22:40 +01:00
|
|
|
|
2009-11-09 16:05:01 +01:00
|
|
|
if (argc == 2 && !strcmp(argv[1], "-h"))
|
|
|
|
usage(pack_redundant_usage);
|
|
|
|
|
2005-11-09 02:22:40 +01:00
|
|
|
for (i = 1; i < argc; i++) {
|
|
|
|
const char *arg = argv[i];
|
2009-09-01 07:35:10 +02:00
|
|
|
if (!strcmp(arg, "--")) {
|
2005-11-10 00:16:13 +01:00
|
|
|
i++;
|
2005-11-09 02:22:40 +01:00
|
|
|
break;
|
2005-11-10 00:16:13 +01:00
|
|
|
}
|
2009-09-01 07:35:10 +02:00
|
|
|
if (!strcmp(arg, "--all")) {
|
2005-11-11 01:25:04 +01:00
|
|
|
load_all_packs = 1;
|
2005-11-09 02:22:40 +01:00
|
|
|
continue;
|
|
|
|
}
|
2009-09-01 07:35:10 +02:00
|
|
|
if (!strcmp(arg, "--verbose")) {
|
2005-11-09 02:22:40 +01:00
|
|
|
verbose = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2009-09-01 07:35:10 +02:00
|
|
|
if (!strcmp(arg, "--alt-odb")) {
|
2005-11-11 01:25:04 +01:00
|
|
|
alt_odb = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2020-08-26 00:45:52 +02:00
|
|
|
if (!strcmp(arg, "--i-still-use-this")) {
|
|
|
|
i_still_use_this = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2009-09-01 07:35:10 +02:00
|
|
|
if (*arg == '-')
|
2005-11-10 00:16:13 +01:00
|
|
|
usage(pack_redundant_usage);
|
2005-11-09 02:22:40 +01:00
|
|
|
else
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2020-08-26 00:45:52 +02:00
|
|
|
if (!i_still_use_this) {
|
|
|
|
fputs(_("'git pack-redundant' is nominated for removal.\n"
|
|
|
|
"If you still use this command, please add an extra\n"
|
|
|
|
"option, '--i-still-use-this', on the command line\n"
|
|
|
|
"and let us know you still use it by sending an e-mail\n"
|
|
|
|
"to <git@vger.kernel.org>. Thanks.\n"), stderr);
|
2023-03-23 21:40:47 +01:00
|
|
|
die(_("refusing to run without --i-still-use-this"));
|
2020-08-26 00:45:52 +02:00
|
|
|
}
|
|
|
|
|
2005-11-11 01:25:04 +01:00
|
|
|
if (load_all_packs)
|
2005-11-09 02:22:40 +01:00
|
|
|
load_all();
|
|
|
|
else
|
|
|
|
while (*(argv + i) != NULL)
|
|
|
|
add_pack_file(*(argv + i++));
|
|
|
|
|
2022-05-02 18:50:37 +02:00
|
|
|
if (!local_packs)
|
2009-01-04 19:38:41 +01:00
|
|
|
die("Zero packs found!");
|
2005-11-09 02:22:40 +01:00
|
|
|
|
|
|
|
load_all_objects();
|
|
|
|
|
2005-11-18 23:00:55 +01:00
|
|
|
if (alt_odb)
|
|
|
|
scan_alt_odb_packs();
|
|
|
|
|
2005-11-18 23:17:50 +01:00
|
|
|
/* ignore objects given on stdin */
|
|
|
|
llist_init(&ignore);
|
|
|
|
if (!isatty(0)) {
|
|
|
|
while (fgets(buf, sizeof(buf), stdin)) {
|
2018-05-02 02:25:45 +02:00
|
|
|
oid = xmalloc(sizeof(*oid));
|
|
|
|
if (get_oid_hex(buf, oid))
|
|
|
|
die("Bad object ID on stdin: %s", buf);
|
|
|
|
llist_insert_sorted_unique(ignore, oid, NULL);
|
2005-11-18 23:17:50 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
llist_sorted_difference_inplace(all_objects, ignore);
|
|
|
|
pl = local_packs;
|
|
|
|
while (pl) {
|
2019-02-02 14:30:16 +01:00
|
|
|
llist_sorted_difference_inplace(pl->remaining_objects, ignore);
|
2005-11-18 23:17:50 +01:00
|
|
|
pl = pl->next;
|
|
|
|
}
|
|
|
|
|
2019-02-02 14:30:13 +01:00
|
|
|
cmp_local_packs();
|
|
|
|
|
2005-11-09 02:22:40 +01:00
|
|
|
minimize(&min);
|
2005-11-18 23:00:55 +01:00
|
|
|
|
2005-11-09 02:22:40 +01:00
|
|
|
if (verbose) {
|
2005-11-12 17:33:24 +01:00
|
|
|
fprintf(stderr, "There are %lu packs available in alt-odbs.\n",
|
|
|
|
(unsigned long)pack_list_size(altodb_packs));
|
2005-11-09 02:22:40 +01:00
|
|
|
fprintf(stderr, "The smallest (bytewise) set of packs is:\n");
|
|
|
|
pl = min;
|
|
|
|
while (pl) {
|
|
|
|
fprintf(stderr, "\t%s\n", pl->pack->pack_name);
|
|
|
|
pl = pl->next;
|
|
|
|
}
|
2005-11-12 17:33:24 +01:00
|
|
|
fprintf(stderr, "containing %lu duplicate objects "
|
|
|
|
"with a total size of %lukb.\n",
|
|
|
|
(unsigned long)get_pack_redundancy(min),
|
|
|
|
(unsigned long)pack_set_bytecount(min)/1024);
|
|
|
|
fprintf(stderr, "A total of %lu unique objects were considered.\n",
|
|
|
|
(unsigned long)all_objects->size);
|
2005-11-09 02:22:40 +01:00
|
|
|
fprintf(stderr, "Redundant packs (with indexes):\n");
|
|
|
|
}
|
2005-11-11 01:25:04 +01:00
|
|
|
pl = red = pack_list_difference(local_packs, min);
|
2005-11-09 02:22:40 +01:00
|
|
|
while (pl) {
|
|
|
|
printf("%s\n%s\n",
|
2019-02-19 01:05:03 +01:00
|
|
|
sha1_pack_index_name(pl->pack->hash),
|
2005-11-11 01:25:04 +01:00
|
|
|
pl->pack->pack_name);
|
2005-11-09 02:22:40 +01:00
|
|
|
pl = pl->next;
|
|
|
|
}
|
2005-11-18 23:17:50 +01:00
|
|
|
if (verbose)
|
2005-11-24 01:08:36 +01:00
|
|
|
fprintf(stderr, "%luMB of redundant packs in total.\n",
|
|
|
|
(unsigned long)pack_set_bytecount(red)/(1024*1024));
|
2005-11-09 02:22:40 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|