From 1d6b38cc76c348e2477506ca9759fc241e3d0d46 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 22 Feb 2006 22:10:24 -0800 Subject: [PATCH 1/4] pack-objects: use full pathname to help hashing with "thin" pack. This uses the same hashing algorithm to the "preferred base tree" objects and the incoming pathnames, to group the same files from different revs together, while spreading files with the same basename in different directories. Signed-off-by: Junio C Hamano --- pack-objects.c | 60 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/pack-objects.c b/pack-objects.c index af3bdf5d35..3a16b7e4ce 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -439,9 +439,37 @@ static void rehash_objects(void) } } -static int add_object_entry(const unsigned char *sha1, const char *name, int exclude) +struct name_path { + struct name_path *up; + const char *elem; + int len; +}; + +static unsigned name_hash(struct name_path *path, const char *name) +{ + struct name_path *p = path; + const char *n = name + strlen(name); + unsigned hash = 0; + + if (n != name && n[-1] == '\n') + n--; + while (name <= --n) { + unsigned char c = *n; + hash = hash * 11 + c; + } + for (p = path; p; p = p->up) { + hash = hash * 11 + '/'; + n = p->elem + p->len; + while (p->elem <= --n) { + unsigned char c = *n; + hash = hash * 11 + c; + } + } + return hash; +} + +static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclude) { - unsigned int hash = 0; unsigned int idx = nr_objects; struct object_entry *entry; struct packed_git *p; @@ -467,13 +495,6 @@ static int add_object_entry(const unsigned char *sha1, const char *name, int exc if ((entry = locate_object_entry(sha1)) != NULL) goto already_added; - while (*name) { - unsigned char c = *name++; - if (isspace(c)) - continue; - hash = hash * 11 + c; - } - if (idx >= nr_alloc) { unsigned int needed = (idx + 1024) * 3 / 2; objects = xrealloc(objects, needed * sizeof(*entry)); @@ -507,12 +528,12 @@ static int add_object_entry(const unsigned char *sha1, const char *name, int exc return status; } -static void add_pbase_tree(struct tree_desc *tree) +static void add_pbase_tree(struct tree_desc *tree, struct name_path *up) { while (tree->size) { const unsigned char *sha1; const char *name; - unsigned mode; + unsigned mode, hash; unsigned long size; char type[20]; @@ -523,16 +544,22 @@ static void add_pbase_tree(struct tree_desc *tree) if (sha1_object_info(sha1, type, &size)) continue; - if (!add_object_entry(sha1, name, 1)) + hash = name_hash(up, name); + if (!add_object_entry(sha1, hash, 1)) continue; if (!strcmp(type, "tree")) { struct tree_desc sub; void *elem; + struct name_path me; + elem = read_sha1_file(sha1, type, &sub.size); sub.buf = elem; if (sub.buf) { - add_pbase_tree(&sub); + me.up = up; + me.elem = name; + me.len = strlen(name); + add_pbase_tree(&sub, &me); free(elem); } } @@ -543,12 +570,13 @@ static void add_preferred_base(unsigned char *sha1) { struct tree_desc tree; void *elem; + elem = read_object_with_reference(sha1, "tree", &tree.size, NULL); tree.buf = elem; if (!tree.buf) return; - if (add_object_entry(sha1, "", 1)) - add_pbase_tree(&tree); + if (add_object_entry(sha1, name_hash(NULL, ""), 1)) + add_pbase_tree(&tree, NULL); free(elem); } @@ -1031,7 +1059,7 @@ int main(int argc, char **argv) } if (get_sha1_hex(line, sha1)) die("expected sha1, got garbage:\n %s", line); - add_object_entry(sha1, line+40, 0); + add_object_entry(sha1, name_hash(NULL, line+41), 0); } if (progress) fprintf(stderr, "Done counting %d objects.\n", nr_objects); From b76f6b627802d0a3c8bbf66fba0c090dbe56d509 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 23 Feb 2006 23:04:52 -0800 Subject: [PATCH 2/4] pack-objects: allow "thin" packs to exceed depth limits When creating a new pack to be used in .git/objects/pack/ directory, we carefully count the depth of deltified objects to be reused, so that the generated pack does not to exceed the specified depth limit for runtime efficiency. However, when we are generating a thin pack that does not contain base objects, such a pack can only be used during network transfer that is expanded on the other end upon reception, so being careful and artificially cutting the delta chain does not buy us anything except increased bandwidth requirement. This patch disables the delta chain depth limit check when reusing an existing delta. Signed-off-by: Junio C Hamano --- pack-objects.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/pack-objects.c b/pack-objects.c index 3a16b7e4ce..2320bcf310 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -663,10 +663,23 @@ static void get_object_details(void) prepare_pack_ix(); for (i = 0, entry = objects; i < nr_objects; i++, entry++) check_object(entry); - for (i = 0, entry = objects; i < nr_objects; i++, entry++) - if (!entry->delta && entry->delta_child) - entry->delta_limit = - check_delta_limit(entry, 1); + + if (nr_objects == nr_result) { + /* + * Depth of objects that depend on the entry -- this + * is subtracted from depth-max to break too deep + * delta chain because of delta data reusing. + * However, we loosen this restriction when we know we + * are creating a thin pack -- it will have to be + * expanded on the other end anyway, so do not + * artificially cut the delta chain and let it go as + * deep as it wants. + */ + for (i = 0, entry = objects; i < nr_objects; i++, entry++) + if (!entry->delta && entry->delta_child) + entry->delta_limit = + check_delta_limit(entry, 1); + } } typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *); From e646de0d14bac20ef6e156c1742b9e62fb0b9020 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 22 Feb 2006 22:10:24 -0800 Subject: [PATCH 3/4] rev-list --objects: use full pathname to help hashing. This helps to group the same files from different revs together, while spreading files with the same basename in different directories, to help pack-object. Signed-off-by: Junio C Hamano --- rev-list.c | 69 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/rev-list.c b/rev-list.c index b5de0759f9..dda6fcaa96 100644 --- a/rev-list.c +++ b/rev-list.c @@ -63,6 +63,36 @@ static int no_merges = 0; static const char **paths = NULL; static int remove_empty_trees = 0; +struct name_path { + struct name_path *up; + int elem_len; + const char *elem; +}; + +static char *path_name(struct name_path *path, const char *name) +{ + struct name_path *p; + char *n, *m; + int nlen = strlen(name); + int len = nlen + 1; + + for (p = path; p; p = p->up) { + if (p->elem_len) + len += p->elem_len + 1; + } + n = xmalloc(len); + m = n + len - (nlen + 1); + strcpy(m, name); + for (p = path; p; p = p->up) { + if (p->elem_len) { + m -= p->elem_len + 1; + memcpy(m, p->elem, p->elem_len); + m[p->elem_len] = '/'; + } + } + return n; +} + static void show_commit(struct commit *commit) { commit->object.flags |= SHOWN; @@ -174,17 +204,23 @@ static int process_commit(struct commit * commit) return CONTINUE; } -static struct object_list **add_object(struct object *obj, struct object_list **p, const char *name) +static struct object_list **add_object(struct object *obj, + struct object_list **p, + struct name_path *path, + const char *name) { struct object_list *entry = xmalloc(sizeof(*entry)); entry->item = obj; entry->next = *p; - entry->name = name; + entry->name = path_name(path, name); *p = entry; return &entry->next; } -static struct object_list **process_blob(struct blob *blob, struct object_list **p, const char *name) +static struct object_list **process_blob(struct blob *blob, + struct object_list **p, + struct name_path *path, + const char *name) { struct object *obj = &blob->object; @@ -193,13 +229,17 @@ static struct object_list **process_blob(struct blob *blob, struct object_list * if (obj->flags & (UNINTERESTING | SEEN)) return p; obj->flags |= SEEN; - return add_object(obj, p, name); + return add_object(obj, p, path, name); } -static struct object_list **process_tree(struct tree *tree, struct object_list **p, const char *name) +static struct object_list **process_tree(struct tree *tree, + struct object_list **p, + struct name_path *path, + const char *name) { struct object *obj = &tree->object; struct tree_entry_list *entry; + struct name_path me; if (!tree_objects) return p; @@ -208,15 +248,18 @@ static struct object_list **process_tree(struct tree *tree, struct object_list * if (parse_tree(tree) < 0) die("bad tree object %s", sha1_to_hex(obj->sha1)); obj->flags |= SEEN; - p = add_object(obj, p, name); + p = add_object(obj, p, path, name); + me.up = path; + me.elem = name; + me.elem_len = strlen(name); entry = tree->entries; tree->entries = NULL; while (entry) { struct tree_entry_list *next = entry->next; if (entry->directory) - p = process_tree(entry->item.tree, p, entry->name); + p = process_tree(entry->item.tree, p, &me, entry->name); else - p = process_blob(entry->item.blob, p, entry->name); + p = process_blob(entry->item.blob, p, &me, entry->name); free(entry); entry = next; } @@ -231,7 +274,7 @@ static void show_commit_list(struct commit_list *list) while (list) { struct commit *commit = pop_most_recent_commit(&list, SEEN); - p = process_tree(commit->tree, p, ""); + p = process_tree(commit->tree, p, NULL, ""); if (process_commit(commit) == STOP) break; } @@ -242,15 +285,15 @@ static void show_commit_list(struct commit_list *list) continue; if (obj->type == tag_type) { obj->flags |= SEEN; - p = add_object(obj, p, name); + p = add_object(obj, p, NULL, name); continue; } if (obj->type == tree_type) { - p = process_tree((struct tree *)obj, p, name); + p = process_tree((struct tree *)obj, p, NULL, name); continue; } if (obj->type == blob_type) { - p = process_blob((struct blob *)obj, p, name); + p = process_blob((struct blob *)obj, p, NULL, name); continue; } die("unknown pending object %s (%s)", sha1_to_hex(obj->sha1), name); @@ -674,7 +717,7 @@ static struct commit_list *limit_list(struct commit_list *list) static void add_pending_object(struct object *obj, const char *name) { - add_object(obj, &pending_objects, name); + add_object(obj, &pending_objects, NULL, name); } static struct commit *get_commit_reference(const char *name, const unsigned char *sha1, unsigned int flags) From eeef7135fed9b8784627c4c96e125241c06c65e1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 23 Feb 2006 23:27:49 -0800 Subject: [PATCH 4/4] pack-objects: hash basename and direname a bit differently. ...so that "Makefile"s from different revs are sorted together, separate from "t/Makefile"s, but close enough. Signed-off-by: Junio C Hamano --- pack-objects.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/pack-objects.c b/pack-objects.c index 2320bcf310..095bcb8286 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -445,18 +445,29 @@ struct name_path { int len; }; +#define DIRBITS 12 + static unsigned name_hash(struct name_path *path, const char *name) { struct name_path *p = path; const char *n = name + strlen(name); - unsigned hash = 0; + unsigned hash = 0, name_hash = 0, name_done = 0; if (n != name && n[-1] == '\n') n--; while (name <= --n) { unsigned char c = *n; + if (c == '/' && !name_done) { + name_hash = hash; + name_done = 1; + hash = 0; + } hash = hash * 11 + c; } + if (!name_done) { + name_hash = hash; + hash = 0; + } for (p = path; p; p = p->up) { hash = hash * 11 + '/'; n = p->elem + p->len; @@ -465,6 +476,26 @@ static unsigned name_hash(struct name_path *path, const char *name) hash = hash * 11 + c; } } + /* + * Make sure "Makefile" and "t/Makefile" are hashed separately + * but close enough. + */ + hash = (name_hash<up) { + fputc('/', stderr); + n = p->elem + p->len; + while (p->elem <= --n) + fputc(*n, stderr); + } + fprintf(stderr, "\t%08x\n", hash); + } return hash; }