From 75a95490474ab6e991cbbbd10d980498a9109648 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 17 Mar 2013 04:22:36 -0400 Subject: [PATCH 1/4] avoid segfaults on parse_object failure Many call-sites of parse_object assume that they will get a non-NULL return value; this is not the case if we encounter an error while parsing the object. This patch adds a wrapper function around parse_object that handles dying automatically, and uses it anywhere we immediately try to access the return value as a non-NULL pointer (i.e., anywhere that we would currently segfault). This wrapper may also be useful in other places. The most obvious one is code like: o = parse_object(sha1); if (!o) die(...); However, these should not be mechanically converted to parse_object_or_die, as the die message is sometimes customized. Later patches can address these sites on a case-by-case basis. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- bundle.c | 6 +++--- object.c | 10 ++++++++++ object.h | 13 ++++++++++++- pack-refs.c | 2 +- 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/bundle.c b/bundle.c index 8d12816b9d..26ceebdb4b 100644 --- a/bundle.c +++ b/bundle.c @@ -279,12 +279,12 @@ int create_bundle(struct bundle_header *header, const char *path, if (buf.len > 0 && buf.buf[0] == '-') { write_or_die(bundle_fd, buf.buf, buf.len); if (!get_sha1_hex(buf.buf + 1, sha1)) { - struct object *object = parse_object(sha1); + struct object *object = parse_object_or_die(sha1, buf.buf); object->flags |= UNINTERESTING; add_pending_object(&revs, object, xstrdup(buf.buf)); } } else if (!get_sha1_hex(buf.buf, sha1)) { - struct object *object = parse_object(sha1); + struct object *object = parse_object_or_die(sha1, buf.buf); object->flags |= SHOWN; } } @@ -361,7 +361,7 @@ int create_bundle(struct bundle_header *header, const char *path, * end up triggering "empty bundle" * error. */ - obj = parse_object(sha1); + obj = parse_object_or_die(sha1, e->name); obj->flags |= SHOWN; add_pending_object(&revs, obj, e->name); } diff --git a/object.c b/object.c index 4af3451bf8..20703f52ed 100644 --- a/object.c +++ b/object.c @@ -185,6 +185,16 @@ struct object *parse_object_buffer(const unsigned char *sha1, enum object_type t return obj; } +struct object *parse_object_or_die(const unsigned char *sha1, + const char *name) +{ + struct object *o = parse_object(sha1); + if (o) + return o; + + die(_("unable to parse object: %s"), name ? name : sha1_to_hex(sha1)); +} + struct object *parse_object(const unsigned char *sha1) { unsigned long size; diff --git a/object.h b/object.h index 6a97b6ba1a..97d384b80a 100644 --- a/object.h +++ b/object.h @@ -54,9 +54,20 @@ struct object *lookup_object(const unsigned char *sha1); extern void *create_object(const unsigned char *sha1, int type, void *obj); -/** Returns the object, having parsed it to find out what it is. **/ +/* + * Returns the object, having parsed it to find out what it is. + * + * Returns NULL if the object is missing or corrupt. + */ struct object *parse_object(const unsigned char *sha1); +/* + * Like parse_object, but will die() instead of returning NULL. If the + * "name" parameter is not NULL, it is included in the error message + * (otherwise, the sha1 hex is given). + */ +struct object *parse_object_or_die(const unsigned char *sha1, const char *name); + /* Given the result of read_sha1_file(), returns the object after * parsing it. eaten_p indicates if the object has a borrowed copy * of buffer and the caller should not free() it. diff --git a/pack-refs.c b/pack-refs.c index f09a054228..6a689f35cb 100644 --- a/pack-refs.c +++ b/pack-refs.c @@ -40,7 +40,7 @@ static int handle_one_ref(const char *path, const unsigned char *sha1, fprintf(cb->refs_file, "%s %s\n", sha1_to_hex(sha1), path); if (is_tag_ref) { - struct object *o = parse_object(sha1); + struct object *o = parse_object_or_die(sha1, path); if (o->type == OBJ_TAG) { o = deref_tag(o, path, 0); if (o) From f7892d181752187513f10b13f2272fa46c9c8422 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 17 Mar 2013 04:23:31 -0400 Subject: [PATCH 2/4] use parse_object_or_die instead of die("bad object") Some call-sites do: o = parse_object(sha1); if (!o) die("bad object %s", some_name); We can now handle that as a one-liner, and get more consistent output. In the third case of this patch, it looks like we are losing information, as the existing message also outputs the sha1 hex; however, parse_object will already have written a more specific complaint about the sha1, so there is no point in repeating it here. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/grep.c | 4 +--- builtin/prune.c | 4 +--- reachable.c | 4 +--- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 0654e0b0f6..08ea5fdec8 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -898,9 +898,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) unsigned char sha1[20]; /* Is it a rev? */ if (!get_sha1(arg, sha1)) { - struct object *object = parse_object(sha1); - if (!object) - die(_("bad object %s"), arg); + struct object *object = parse_object_or_die(sha1, arg); add_object_array(object, arg, &list); continue; } diff --git a/builtin/prune.c b/builtin/prune.c index 6cb99443c1..67ca3d5699 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -149,9 +149,7 @@ int cmd_prune(int argc, const char **argv, const char *prefix) const char *name = *argv++; if (!get_sha1(name, sha1)) { - struct object *object = parse_object(sha1); - if (!object) - die("bad object: %s", name); + struct object *object = parse_object_or_die(sha1, name); add_pending_object(&revs, object, ""); } else diff --git a/reachable.c b/reachable.c index bf7970661f..e7e6a1e342 100644 --- a/reachable.c +++ b/reachable.c @@ -152,11 +152,9 @@ static int add_one_reflog_ent(unsigned char *osha1, unsigned char *nsha1, static int add_one_ref(const char *path, const unsigned char *sha1, int flag, void *cb_data) { - struct object *object = parse_object(sha1); + struct object *object = parse_object_or_die(sha1, path); struct rev_info *revs = (struct rev_info *)cb_data; - if (!object) - die("bad object ref: %s:%s", path, sha1_to_hex(sha1)); add_pending_object(revs, object, ""); return 0; From 03a8eddfd1bd7cdce0b2361753691f53c00e5ba6 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 17 Mar 2013 04:23:46 -0400 Subject: [PATCH 3/4] pack-refs: write peeled entry for non-tags When we pack an annotated tag ref, we write not only the sha1 of the tag object along with the ref, but also the sha1 obtained by peeling the tag. This lets readers of the pack-refs file know the peeled value without having to actually load the object, speeding up upload-pack's ref advertisement. The writer marks a packed-refs file with peeled refs using the "peeled" trait at the top of the file. When the reader sees this trait, it knows that each ref is either followed by its peeled value, or it is not an annotated tag. However, there is a mismatch between the assumptions of the reader and writer. The writer will only peel refs under refs/tags, but the reader does not know this; it will assume a ref without a peeled value must not be a tag object. Thus an annotated tag object placed outside of the refs/tags hierarchy will not have its peeled value printed by upload-pack. The simplest way to fix this is to start writing peel values for all refs. This matches what the reader expects for both new and old versions of git. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pack-refs.c | 16 ++++++++-------- t/t3211-peel-ref.sh | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 8 deletions(-) create mode 100755 t/t3211-peel-ref.sh diff --git a/pack-refs.c b/pack-refs.c index 6a689f35cb..ebde785055 100644 --- a/pack-refs.c +++ b/pack-refs.c @@ -27,6 +27,7 @@ static int handle_one_ref(const char *path, const unsigned char *sha1, int flags, void *cb_data) { struct pack_refs_cb_data *cb = cb_data; + struct object *o; int is_tag_ref; /* Do not pack the symbolic refs */ @@ -39,14 +40,13 @@ static int handle_one_ref(const char *path, const unsigned char *sha1, return 0; fprintf(cb->refs_file, "%s %s\n", sha1_to_hex(sha1), path); - if (is_tag_ref) { - struct object *o = parse_object_or_die(sha1, path); - if (o->type == OBJ_TAG) { - o = deref_tag(o, path, 0); - if (o) - fprintf(cb->refs_file, "^%s\n", - sha1_to_hex(o->sha1)); - } + + o = parse_object_or_die(sha1, path); + if (o->type == OBJ_TAG) { + o = deref_tag(o, path, 0); + if (o) + fprintf(cb->refs_file, "^%s\n", + sha1_to_hex(o->sha1)); } if ((cb->flags & PACK_REFS_PRUNE) && !do_not_prune(flags)) { diff --git a/t/t3211-peel-ref.sh b/t/t3211-peel-ref.sh new file mode 100755 index 0000000000..85f09be9f4 --- /dev/null +++ b/t/t3211-peel-ref.sh @@ -0,0 +1,42 @@ +#!/bin/sh + +test_description='tests for the peel_ref optimization of packed-refs' +. ./test-lib.sh + +test_expect_success 'create annotated tag in refs/tags' ' + test_commit base && + git tag -m annotated foo +' + +test_expect_success 'create annotated tag outside of refs/tags' ' + git update-ref refs/outside/foo refs/tags/foo +' + +# This matches show-ref's output +print_ref() { + echo "$(git rev-parse "$1") $1" +} + +test_expect_success 'set up expected show-ref output' ' + { + print_ref "refs/heads/master" && + print_ref "refs/outside/foo" && + print_ref "refs/outside/foo^{}" && + print_ref "refs/tags/base" && + print_ref "refs/tags/foo" && + print_ref "refs/tags/foo^{}" + } >expect +' + +test_expect_success 'refs are peeled outside of refs/tags (loose)' ' + git show-ref -d >actual && + test_cmp expect actual +' + +test_expect_success 'refs are peeled outside of refs/tags (packed)' ' + git pack-refs --all && + git show-ref -d >actual && + test_cmp expect actual +' + +test_done From c29c46fa2e21e608ce2e603649af5bf38e7969c2 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 18 Mar 2013 07:37:32 -0400 Subject: [PATCH 4/4] pack-refs: add fully-peeled trait Older versions of pack-refs did not write peel lines for refs outside of refs/tags. This meant that on reading the pack-refs file, we might set the REF_KNOWS_PEELED flag for such a ref, even though we do not know anything about its peeled value. The previous commit updated the writer to always peel, no matter what the ref is. That means that packed-refs files written by newer versions of git are fine to be read by both old and new versions of git. However, we still have the problem of reading packed-refs files written by older versions of git, or by other implementations which have not yet learned the same trick. The simplest fix would be to always unset the REF_KNOWS_PEELED flag for refs outside of refs/tags that do not have a peel line (if it has a peel line, we know it is valid, but we cannot assume a missing peel line means anything). But that loses an important optimization, as upload-pack should not need to load the object pointed to by refs/heads/foo to determine that it is not a tag. Instead, we add a "fully-peeled" trait to the packed-refs file. If it is set, we know that we can trust a missing peel line to mean that a ref cannot be peeled. Otherwise, we fall back to assuming nothing. [commit message and tests by Jeff King ] Signed-off-by: Michael Haggerty Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pack-refs.c | 2 +- refs.c | 49 ++++++++++++++++++++++++++++++++++++++++----- t/t3211-peel-ref.sh | 22 ++++++++++++++++++++ 3 files changed, 67 insertions(+), 6 deletions(-) diff --git a/pack-refs.c b/pack-refs.c index ebde785055..4461f71a37 100644 --- a/pack-refs.c +++ b/pack-refs.c @@ -128,7 +128,7 @@ int pack_refs(unsigned int flags) die_errno("unable to create ref-pack file structure"); /* perhaps other traits later as well */ - fprintf(cbdata.refs_file, "# pack-refs with: peeled \n"); + fprintf(cbdata.refs_file, "# pack-refs with: peeled fully-peeled \n"); for_each_ref(handle_one_ref, &cbdata); if (ferror(cbdata.refs_file)) diff --git a/refs.c b/refs.c index da74a2b29a..f5d954c27e 100644 --- a/refs.c +++ b/refs.c @@ -804,11 +804,38 @@ static const char *parse_ref_line(char *line, unsigned char *sha1) return line; } +/* + * Read f, which is a packed-refs file, into dir. + * + * A comment line of the form "# pack-refs with: " may contain zero or + * more traits. We interpret the traits as follows: + * + * No traits: + * + * Probably no references are peeled. But if the file contains a + * peeled value for a reference, we will use it. + * + * peeled: + * + * References under "refs/tags/", if they *can* be peeled, *are* + * peeled in this file. References outside of "refs/tags/" are + * probably not peeled even if they could have been, but if we find + * a peeled value for such a reference we will use it. + * + * fully-peeled: + * + * All references in the file that can be peeled are peeled. + * Inversely (and this is more important), any references in the + * file for which no peeled value is recorded is not peelable. This + * trait should typically be written alongside "peeled" for + * compatibility with older clients, but we do not require it + * (i.e., "peeled" is a no-op if "fully-peeled" is set). + */ static void read_packed_refs(FILE *f, struct ref_dir *dir) { struct ref_entry *last = NULL; char refline[PATH_MAX]; - int flag = REF_ISPACKED; + enum { PEELED_NONE, PEELED_TAGS, PEELED_FULLY } peeled = PEELED_NONE; while (fgets(refline, sizeof(refline), f)) { unsigned char sha1[20]; @@ -817,15 +844,20 @@ static void read_packed_refs(FILE *f, struct ref_dir *dir) if (!strncmp(refline, header, sizeof(header)-1)) { const char *traits = refline + sizeof(header) - 1; - if (strstr(traits, " peeled ")) - flag |= REF_KNOWS_PEELED; + if (strstr(traits, " fully-peeled ")) + peeled = PEELED_FULLY; + else if (strstr(traits, " peeled ")) + peeled = PEELED_TAGS; /* perhaps other traits later as well */ continue; } refname = parse_ref_line(refline, sha1); if (refname) { - last = create_ref_entry(refname, sha1, flag, 1); + last = create_ref_entry(refname, sha1, REF_ISPACKED, 1); + if (peeled == PEELED_FULLY || + (peeled == PEELED_TAGS && !prefixcmp(refname, "refs/tags/"))) + last->flag |= REF_KNOWS_PEELED; add_ref(dir, last); continue; } @@ -833,8 +865,15 @@ static void read_packed_refs(FILE *f, struct ref_dir *dir) refline[0] == '^' && strlen(refline) == 42 && refline[41] == '\n' && - !get_sha1_hex(refline + 1, sha1)) + !get_sha1_hex(refline + 1, sha1)) { hashcpy(last->u.value.peeled, sha1); + /* + * Regardless of what the file header said, + * we definitely know the value of *this* + * reference: + */ + last->flag |= REF_KNOWS_PEELED; + } } } diff --git a/t/t3211-peel-ref.sh b/t/t3211-peel-ref.sh index 85f09be9f4..d4d7792eae 100755 --- a/t/t3211-peel-ref.sh +++ b/t/t3211-peel-ref.sh @@ -39,4 +39,26 @@ test_expect_success 'refs are peeled outside of refs/tags (packed)' ' test_cmp expect actual ' +test_expect_success 'create old-style pack-refs without fully-peeled' ' + # Git no longer writes without fully-peeled, so we just write our own + # from scratch; we could also munge the existing file to remove the + # fully-peeled bits, but that seems even more prone to failure, + # especially if the format ever changes again. At least this way we + # know we are emulating exactly what an older git would have written. + { + echo "# pack-refs with: peeled " && + print_ref "refs/heads/master" && + print_ref "refs/outside/foo" && + print_ref "refs/tags/base" && + print_ref "refs/tags/foo" && + echo "^$(git rev-parse "refs/tags/foo^{}")" + } >tmp && + mv tmp .git/packed-refs +' + +test_expect_success 'refs are peeled outside of refs/tags (old packed)' ' + git show-ref -d >actual && + test_cmp expect actual +' + test_done