From 46f034483eb9f88cf79cba8b76f085464287fc58 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Sun, 3 May 2015 19:59:59 +0530 Subject: [PATCH 1/4] sha1_file: support reading from a loose object of unknown type Update sha1_loose_object_info() to optionally allow it to read from a loose object file of unknown/bogus type; as the function usually returns the type of the object it read in the form of enum for known types, add an optional "typename" field to receive the name of the type in textual form and a flag to indicate the reading of a loose object file of unknown/bogus type. Add parse_sha1_header_extended() which acts as a wrapper around parse_sha1_header() allowing more information to be obtained. Add unpack_sha1_header_to_strbuf() to unpack sha1 headers of unknown/corrupt objects which have a unknown sha1 header size to a strbuf structure. This was written by Junio C Hamano but tested by me. Helped-by: Junio C Hamano Helped-by: Eric Sunshine Helped-by: Ramsay Jones Hepled-by: Jeff King Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- cache.h | 2 + sha1_file.c | 128 +++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 108 insertions(+), 22 deletions(-) diff --git a/cache.h b/cache.h index 3d3244ba64..38419c3a17 100644 --- a/cache.h +++ b/cache.h @@ -838,6 +838,7 @@ extern int is_ntfs_dotgit(const char *name); /* object replacement */ #define LOOKUP_REPLACE_OBJECT 1 +#define LOOKUP_UNKNOWN_OBJECT 2 extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag); static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size) { @@ -1304,6 +1305,7 @@ struct object_info { unsigned long *sizep; unsigned long *disk_sizep; unsigned char *delta_base_sha1; + struct strbuf *typename; /* Response */ enum { diff --git a/sha1_file.c b/sha1_file.c index 88f06bac92..37e6f2cca9 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1564,6 +1564,40 @@ int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long ma return git_inflate(stream, 0); } +static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char *map, + unsigned long mapsize, void *buffer, + unsigned long bufsiz, struct strbuf *header) +{ + int status; + + status = unpack_sha1_header(stream, map, mapsize, buffer, bufsiz); + + /* + * Check if entire header is unpacked in the first iteration. + */ + if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) + return 0; + + /* + * buffer[0..bufsiz] was not large enough. Copy the partial + * result out to header, and then append the result of further + * reading the stream. + */ + strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer); + stream->next_out = buffer; + stream->avail_out = bufsiz; + + do { + status = git_inflate(stream, 0); + strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer); + if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) + return 0; + stream->next_out = buffer; + stream->avail_out = bufsiz; + } while (status != Z_STREAM_END); + return -1; +} + static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1) { int bytes = strlen(buffer) + 1; @@ -1614,27 +1648,38 @@ static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long s * too permissive for what we want to check. So do an anal * object header parse by hand. */ -int parse_sha1_header(const char *hdr, unsigned long *sizep) +static int parse_sha1_header_extended(const char *hdr, struct object_info *oi, + unsigned int flags) { - char type[10]; - int i; + const char *type_buf = hdr; unsigned long size; + int type, type_len = 0; /* - * The type can be at most ten bytes (including the - * terminating '\0' that we add), and is followed by + * The type can be of any size but is followed by * a space. */ - i = 0; for (;;) { char c = *hdr++; if (c == ' ') break; - type[i++] = c; - if (i >= sizeof(type)) - return -1; + type_len++; } - type[i] = 0; + + type = type_from_string_gently(type_buf, type_len, 1); + if (oi->typename) + strbuf_add(oi->typename, type_buf, type_len); + /* + * Set type to 0 if its an unknown object and + * we're obtaining the type using '--allow-unkown-type' + * option. + */ + if ((flags & LOOKUP_UNKNOWN_OBJECT) && (type < 0)) + type = 0; + else if (type < 0) + die("invalid object type"); + if (oi->typep) + *oi->typep = type; /* * The length must follow immediately, and be in canonical @@ -1652,12 +1697,24 @@ int parse_sha1_header(const char *hdr, unsigned long *sizep) size = size * 10 + c; } } - *sizep = size; + + if (oi->sizep) + *oi->sizep = size; /* * The length must be followed by a zero byte */ - return *hdr ? -1 : type_from_string(type); + return *hdr ? -1 : type; +} + +int parse_sha1_header(const char *hdr, unsigned long *sizep) +{ + struct object_info oi; + + oi.sizep = sizep; + oi.typename = NULL; + oi.typep = NULL; + return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT); } static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1) @@ -2524,13 +2581,15 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1, } static int sha1_loose_object_info(const unsigned char *sha1, - struct object_info *oi) + struct object_info *oi, + int flags) { - int status; - unsigned long mapsize, size; + int status = 0; + unsigned long mapsize; void *map; git_zstream stream; char hdr[32]; + struct strbuf hdrbuf = STRBUF_INIT; if (oi->delta_base_sha1) hashclr(oi->delta_base_sha1); @@ -2543,7 +2602,7 @@ static int sha1_loose_object_info(const unsigned char *sha1, * return value implicitly indicates whether the * object even exists. */ - if (!oi->typep && !oi->sizep) { + if (!oi->typep && !oi->typename && !oi->sizep) { struct stat st; if (stat_sha1_file(sha1, &st) < 0) return -1; @@ -2557,17 +2616,26 @@ static int sha1_loose_object_info(const unsigned char *sha1, return -1; if (oi->disk_sizep) *oi->disk_sizep = mapsize; - if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) + if ((flags & LOOKUP_UNKNOWN_OBJECT)) { + if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, hdr, sizeof(hdr), &hdrbuf) < 0) + status = error("unable to unpack %s header with --allow-unknown-type", + sha1_to_hex(sha1)); + } else if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) status = error("unable to unpack %s header", sha1_to_hex(sha1)); - else if ((status = parse_sha1_header(hdr, &size)) < 0) + if (status < 0) + ; /* Do nothing */ + else if (hdrbuf.len) { + if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0) + status = error("unable to parse %s header with --allow-unknown-type", + sha1_to_hex(sha1)); + } else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0) status = error("unable to parse %s header", sha1_to_hex(sha1)); - else if (oi->sizep) - *oi->sizep = size; git_inflate_end(&stream); munmap(map, mapsize); - if (oi->typep) + if (status && oi->typep) *oi->typep = status; + strbuf_release(&hdrbuf); return 0; } @@ -2576,6 +2644,7 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, struct cached_object *co; struct pack_entry e; int rtype; + enum object_type real_type; const unsigned char *real = lookup_replace_object_extended(sha1, flags); co = find_cached_object(real); @@ -2588,13 +2657,15 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, *(oi->disk_sizep) = 0; if (oi->delta_base_sha1) hashclr(oi->delta_base_sha1); + if (oi->typename) + strbuf_addstr(oi->typename, typename(co->type)); oi->whence = OI_CACHED; return 0; } if (!find_pack_entry(real, &e)) { /* Most likely it's a loose object. */ - if (!sha1_loose_object_info(real, oi)) { + if (!sha1_loose_object_info(real, oi, flags)) { oi->whence = OI_LOOSE; return 0; } @@ -2605,9 +2676,18 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, return -1; } + /* + * packed_object_info() does not follow the delta chain to + * find out the real type, unless it is given oi->typep. + */ + if (oi->typename && !oi->typep) + oi->typep = &real_type; + rtype = packed_object_info(e.p, e.offset, oi); if (rtype < 0) { mark_bad_packed_object(e.p, real); + if (oi->typep == &real_type) + oi->typep = NULL; return sha1_object_info_extended(real, oi, 0); } else if (in_delta_base_cache(e.p, e.offset)) { oi->whence = OI_DBCACHED; @@ -2618,6 +2698,10 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || rtype == OBJ_OFS_DELTA); } + if (oi->typename) + strbuf_addstr(oi->typename, typename(*oi->typep)); + if (oi->typep == &real_type) + oi->typep = NULL; return 0; } From b48158ac94cf725834b70b4a5ab7f2d152a741d4 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Sun, 3 May 2015 20:00:00 +0530 Subject: [PATCH 2/4] cat-file: make the options mutually exclusive We only parse the options if 2 or 3 arguments are specified. Update 'struct option options[]' to use OPT_CMDMODE rather than OPT_SET_INT to allow only one mutually exclusive option and avoid the need for checking number of arguments. This was written by Junio C Hamano, tested by me. Helped-by: Junio C Hamano Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index df99df4db1..53b5376126 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -362,12 +362,12 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix) const struct option options[] = { OPT_GROUP(N_(" can be one of: blob, tree, commit, tag")), - OPT_SET_INT('t', NULL, &opt, N_("show object type"), 't'), - OPT_SET_INT('s', NULL, &opt, N_("show object size"), 's'), - OPT_SET_INT('e', NULL, &opt, + OPT_CMDMODE('t', NULL, &opt, N_("show object type"), 't'), + OPT_CMDMODE('s', NULL, &opt, N_("show object size"), 's'), + OPT_CMDMODE('e', NULL, &opt, N_("exit with zero when there's no error"), 'e'), - OPT_SET_INT('p', NULL, &opt, N_("pretty-print object's content"), 'p'), - OPT_SET_INT(0, "textconv", &opt, + OPT_CMDMODE('p', NULL, &opt, N_("pretty-print object's content"), 'p'), + OPT_CMDMODE(0, "textconv", &opt, N_("for blob objects, run textconv on object's content"), 'c'), { OPTION_CALLBACK, 0, "batch", &batch, "format", N_("show info and content of objects fed from the standard input"), @@ -380,9 +380,6 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix) git_config(git_cat_file_config, NULL); - if (argc != 3 && argc != 2) - usage_with_options(cat_file_usage, options); - argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0); if (opt) { From 39e4ae388047bd7f6f3a4ad86ba5eba523dd0668 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Sun, 3 May 2015 20:00:01 +0530 Subject: [PATCH 3/4] cat-file: teach cat-file a '--allow-unknown-type' option 'git cat-file' throws an error while trying to print the type or size of a broken/corrupt object. This is because these objects are usually of unknown types. Teach git cat-file a '--allow-unknown-type' option where it prints the type or size of a broken/corrupt object without throwing an error. Modify '-t' and '-s' options to call sha1_object_info_extended() directly to support the '--allow-unknown-type' option. Add documentation for 'cat-file --allow-unknown-type'. Helped-by: Junio C Hamano Helped-by: Eric Sunshine Signed-off-by: Karthik Nayak cat-file: add documentation for '--allow-unknown-type' option. Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- Documentation/git-cat-file.txt | 5 ++++- builtin/cat-file.c | 38 +++++++++++++++++++++++----------- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt index f6a16f4300..499ae7b98a 100644 --- a/Documentation/git-cat-file.txt +++ b/Documentation/git-cat-file.txt @@ -9,7 +9,7 @@ git-cat-file - Provide content or type and size information for repository objec SYNOPSIS -------- [verse] -'git cat-file' (-t | -s | -e | -p | | --textconv ) +'git cat-file' (-t [--allow-unknown-type]| -s [--allow-unknown-type]| -e | -p | | --textconv ) 'git cat-file' (--batch | --batch-check) < DESCRIPTION @@ -69,6 +69,9 @@ OPTIONS not be combined with any other options or arguments. See the section `BATCH OUTPUT` below for details. +--allow-unknown-type:: + Allow -s or -t to query broken/corrupt objects of unknown type. + OUTPUT ------ If '-t' is specified, one of the . diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 53b5376126..ecb488822f 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -9,13 +9,20 @@ #include "userdiff.h" #include "streaming.h" -static int cat_one_file(int opt, const char *exp_type, const char *obj_name) +static int cat_one_file(int opt, const char *exp_type, const char *obj_name, + int unknown_type) { unsigned char sha1[20]; enum object_type type; char *buf; unsigned long size; struct object_context obj_context; + struct object_info oi = {NULL}; + struct strbuf sb = STRBUF_INIT; + unsigned flags = LOOKUP_REPLACE_OBJECT; + + if (unknown_type) + flags |= LOOKUP_UNKNOWN_OBJECT; if (get_sha1_with_context(obj_name, 0, sha1, &obj_context)) die("Not a valid object name %s", obj_name); @@ -23,20 +30,22 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name) buf = NULL; switch (opt) { case 't': - type = sha1_object_info(sha1, NULL); - if (type > 0) { - printf("%s\n", typename(type)); + oi.typename = &sb; + if (sha1_object_info_extended(sha1, &oi, flags) < 0) + die("git cat-file: could not get object info"); + if (sb.len) { + printf("%s\n", sb.buf); + strbuf_release(&sb); return 0; } break; case 's': - type = sha1_object_info(sha1, &size); - if (type > 0) { - printf("%lu\n", size); - return 0; - } - break; + oi.sizep = &size; + if (sha1_object_info_extended(sha1, &oi, flags) < 0) + die("git cat-file: could not get object info"); + printf("%lu\n", size); + return 0; case 'e': return !has_sha1_file(sha1); @@ -323,7 +332,7 @@ static int batch_objects(struct batch_options *opt) } static const char * const cat_file_usage[] = { - N_("git cat-file (-t | -s | -e | -p | | --textconv) "), + N_("git cat-file (-t [--allow-unknown-type]|-s [--allow-unknown-type]|-e|-p||--textconv) "), N_("git cat-file (--batch | --batch-check) < "), NULL }; @@ -359,6 +368,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix) int opt = 0; const char *exp_type = NULL, *obj_name = NULL; struct batch_options batch = {0}; + int unknown_type = 0; const struct option options[] = { OPT_GROUP(N_(" can be one of: blob, tree, commit, tag")), @@ -369,6 +379,8 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix) OPT_CMDMODE('p', NULL, &opt, N_("pretty-print object's content"), 'p'), OPT_CMDMODE(0, "textconv", &opt, N_("for blob objects, run textconv on object's content"), 'c'), + OPT_BOOL( 0, "allow-unknown-type", &unknown_type, + N_("allow -s and -t to work with broken/corrupt objects")), { OPTION_CALLBACK, 0, "batch", &batch, "format", N_("show info and content of objects fed from the standard input"), PARSE_OPT_OPTARG, batch_option_callback }, @@ -402,5 +414,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix) if (batch.enabled) return batch_objects(&batch); - return cat_one_file(opt, exp_type, obj_name); + if (unknown_type && opt != 't' && opt != 's') + die("git cat-file --allow-unknown-type: use with -s or -t"); + return cat_one_file(opt, exp_type, obj_name, unknown_type); } From 3e370f9faf0a4e26d92ae09a26baa19330887903 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Sun, 3 May 2015 20:00:02 +0530 Subject: [PATCH 4/4] t1006: add tests for git cat-file --allow-unknown-type Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- t/t1006-cat-file.sh | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index ab36b1eb72..4f225db9a7 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -47,6 +47,18 @@ $content" test_cmp expect actual ' + test_expect_success "Type of $type is correct using --allow-unknown-type" ' + echo $type >expect && + git cat-file -t --allow-unknown-type $sha1 >actual && + test_cmp expect actual + ' + + test_expect_success "Size of $type is correct using --allow-unknown-type" ' + echo $size >expect && + git cat-file -s --allow-unknown-type $sha1 >actual && + test_cmp expect actual + ' + test -z "$content" || test_expect_success "Content of $type is correct" ' maybe_remove_timestamp "$content" $no_ts >expect && @@ -296,4 +308,37 @@ test_expect_success '%(deltabase) reports packed delta bases' ' } ' +bogus_type="bogus" +bogus_content="bogus" +bogus_size=$(strlen "$bogus_content") +bogus_sha1=$(echo_without_newline "$bogus_content" | git hash-object -t $bogus_type --literally -w --stdin) + +test_expect_success "Type of broken object is correct" ' + echo $bogus_type >expect && + git cat-file -t --allow-unknown-type $bogus_sha1 >actual && + test_cmp expect actual +' + +test_expect_success "Size of broken object is correct" ' + echo $bogus_size >expect && + git cat-file -s --allow-unknown-type $bogus_sha1 >actual && + test_cmp expect actual +' +bogus_type="abcdefghijklmnopqrstuvwxyz1234679" +bogus_content="bogus" +bogus_size=$(strlen "$bogus_content") +bogus_sha1=$(echo_without_newline "$bogus_content" | git hash-object -t $bogus_type --literally -w --stdin) + +test_expect_success "Type of broken object is correct when type is large" ' + echo $bogus_type >expect && + git cat-file -t --allow-unknown-type $bogus_sha1 >actual && + test_cmp expect actual +' + +test_expect_success "Size of large broken object is correct when type is large" ' + echo $bogus_size >expect && + git cat-file -s --allow-unknown-type $bogus_sha1 >actual && + test_cmp expect actual +' + test_done