sha1_file: support reading from a loose object of unknown type

Update sha1_loose_object_info() to optionally allow it to read
from a loose object file of unknown/bogus type; as the function
usually returns the type of the object it read in the form of enum
for known types, add an optional "typename" field to receive the
name of the type in textual form and a flag to indicate the reading
of a loose object file of unknown/bogus type.

Add parse_sha1_header_extended() which acts as a wrapper around
parse_sha1_header() allowing more information to be obtained.

Add unpack_sha1_header_to_strbuf() to unpack sha1 headers of
unknown/corrupt objects which have a unknown sha1 header size to
a strbuf structure. This was written by Junio C Hamano but tested
by me.

Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Hepled-by: Jeff King <peff@peff.net>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Karthik Nayak 2015-05-03 19:59:59 +05:30 committed by Junio C Hamano
parent 2dfb2e07cb
commit 46f034483e
2 changed files with 108 additions and 22 deletions

View File

@ -838,6 +838,7 @@ extern int is_ntfs_dotgit(const char *name);
/* object replacement */ /* object replacement */
#define LOOKUP_REPLACE_OBJECT 1 #define LOOKUP_REPLACE_OBJECT 1
#define LOOKUP_UNKNOWN_OBJECT 2
extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag); extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag);
static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size) static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size)
{ {
@ -1304,6 +1305,7 @@ struct object_info {
unsigned long *sizep; unsigned long *sizep;
unsigned long *disk_sizep; unsigned long *disk_sizep;
unsigned char *delta_base_sha1; unsigned char *delta_base_sha1;
struct strbuf *typename;
/* Response */ /* Response */
enum { enum {

View File

@ -1564,6 +1564,40 @@ int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long ma
return git_inflate(stream, 0); return git_inflate(stream, 0);
} }
static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char *map,
unsigned long mapsize, void *buffer,
unsigned long bufsiz, struct strbuf *header)
{
int status;
status = unpack_sha1_header(stream, map, mapsize, buffer, bufsiz);
/*
* Check if entire header is unpacked in the first iteration.
*/
if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
return 0;
/*
* buffer[0..bufsiz] was not large enough. Copy the partial
* result out to header, and then append the result of further
* reading the stream.
*/
strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
stream->next_out = buffer;
stream->avail_out = bufsiz;
do {
status = git_inflate(stream, 0);
strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
return 0;
stream->next_out = buffer;
stream->avail_out = bufsiz;
} while (status != Z_STREAM_END);
return -1;
}
static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1) static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1)
{ {
int bytes = strlen(buffer) + 1; int bytes = strlen(buffer) + 1;
@ -1614,27 +1648,38 @@ static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long s
* too permissive for what we want to check. So do an anal * too permissive for what we want to check. So do an anal
* object header parse by hand. * object header parse by hand.
*/ */
int parse_sha1_header(const char *hdr, unsigned long *sizep) static int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
unsigned int flags)
{ {
char type[10]; const char *type_buf = hdr;
int i;
unsigned long size; unsigned long size;
int type, type_len = 0;
/* /*
* The type can be at most ten bytes (including the * The type can be of any size but is followed by
* terminating '\0' that we add), and is followed by
* a space. * a space.
*/ */
i = 0;
for (;;) { for (;;) {
char c = *hdr++; char c = *hdr++;
if (c == ' ') if (c == ' ')
break; break;
type[i++] = c; type_len++;
if (i >= sizeof(type))
return -1;
} }
type[i] = 0;
type = type_from_string_gently(type_buf, type_len, 1);
if (oi->typename)
strbuf_add(oi->typename, type_buf, type_len);
/*
* Set type to 0 if its an unknown object and
* we're obtaining the type using '--allow-unkown-type'
* option.
*/
if ((flags & LOOKUP_UNKNOWN_OBJECT) && (type < 0))
type = 0;
else if (type < 0)
die("invalid object type");
if (oi->typep)
*oi->typep = type;
/* /*
* The length must follow immediately, and be in canonical * The length must follow immediately, and be in canonical
@ -1652,12 +1697,24 @@ int parse_sha1_header(const char *hdr, unsigned long *sizep)
size = size * 10 + c; size = size * 10 + c;
} }
} }
*sizep = size;
if (oi->sizep)
*oi->sizep = size;
/* /*
* The length must be followed by a zero byte * The length must be followed by a zero byte
*/ */
return *hdr ? -1 : type_from_string(type); return *hdr ? -1 : type;
}
int parse_sha1_header(const char *hdr, unsigned long *sizep)
{
struct object_info oi;
oi.sizep = sizep;
oi.typename = NULL;
oi.typep = NULL;
return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT);
} }
static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1) static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1)
@ -2524,13 +2581,15 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,
} }
static int sha1_loose_object_info(const unsigned char *sha1, static int sha1_loose_object_info(const unsigned char *sha1,
struct object_info *oi) struct object_info *oi,
int flags)
{ {
int status; int status = 0;
unsigned long mapsize, size; unsigned long mapsize;
void *map; void *map;
git_zstream stream; git_zstream stream;
char hdr[32]; char hdr[32];
struct strbuf hdrbuf = STRBUF_INIT;
if (oi->delta_base_sha1) if (oi->delta_base_sha1)
hashclr(oi->delta_base_sha1); hashclr(oi->delta_base_sha1);
@ -2543,7 +2602,7 @@ static int sha1_loose_object_info(const unsigned char *sha1,
* return value implicitly indicates whether the * return value implicitly indicates whether the
* object even exists. * object even exists.
*/ */
if (!oi->typep && !oi->sizep) { if (!oi->typep && !oi->typename && !oi->sizep) {
struct stat st; struct stat st;
if (stat_sha1_file(sha1, &st) < 0) if (stat_sha1_file(sha1, &st) < 0)
return -1; return -1;
@ -2557,17 +2616,26 @@ static int sha1_loose_object_info(const unsigned char *sha1,
return -1; return -1;
if (oi->disk_sizep) if (oi->disk_sizep)
*oi->disk_sizep = mapsize; *oi->disk_sizep = mapsize;
if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) if ((flags & LOOKUP_UNKNOWN_OBJECT)) {
if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, hdr, sizeof(hdr), &hdrbuf) < 0)
status = error("unable to unpack %s header with --allow-unknown-type",
sha1_to_hex(sha1));
} else if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
status = error("unable to unpack %s header", status = error("unable to unpack %s header",
sha1_to_hex(sha1)); sha1_to_hex(sha1));
else if ((status = parse_sha1_header(hdr, &size)) < 0) if (status < 0)
; /* Do nothing */
else if (hdrbuf.len) {
if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0)
status = error("unable to parse %s header with --allow-unknown-type",
sha1_to_hex(sha1));
} else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0)
status = error("unable to parse %s header", sha1_to_hex(sha1)); status = error("unable to parse %s header", sha1_to_hex(sha1));
else if (oi->sizep)
*oi->sizep = size;
git_inflate_end(&stream); git_inflate_end(&stream);
munmap(map, mapsize); munmap(map, mapsize);
if (oi->typep) if (status && oi->typep)
*oi->typep = status; *oi->typep = status;
strbuf_release(&hdrbuf);
return 0; return 0;
} }
@ -2576,6 +2644,7 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
struct cached_object *co; struct cached_object *co;
struct pack_entry e; struct pack_entry e;
int rtype; int rtype;
enum object_type real_type;
const unsigned char *real = lookup_replace_object_extended(sha1, flags); const unsigned char *real = lookup_replace_object_extended(sha1, flags);
co = find_cached_object(real); co = find_cached_object(real);
@ -2588,13 +2657,15 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
*(oi->disk_sizep) = 0; *(oi->disk_sizep) = 0;
if (oi->delta_base_sha1) if (oi->delta_base_sha1)
hashclr(oi->delta_base_sha1); hashclr(oi->delta_base_sha1);
if (oi->typename)
strbuf_addstr(oi->typename, typename(co->type));
oi->whence = OI_CACHED; oi->whence = OI_CACHED;
return 0; return 0;
} }
if (!find_pack_entry(real, &e)) { if (!find_pack_entry(real, &e)) {
/* Most likely it's a loose object. */ /* Most likely it's a loose object. */
if (!sha1_loose_object_info(real, oi)) { if (!sha1_loose_object_info(real, oi, flags)) {
oi->whence = OI_LOOSE; oi->whence = OI_LOOSE;
return 0; return 0;
} }
@ -2605,9 +2676,18 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
return -1; return -1;
} }
/*
* packed_object_info() does not follow the delta chain to
* find out the real type, unless it is given oi->typep.
*/
if (oi->typename && !oi->typep)
oi->typep = &real_type;
rtype = packed_object_info(e.p, e.offset, oi); rtype = packed_object_info(e.p, e.offset, oi);
if (rtype < 0) { if (rtype < 0) {
mark_bad_packed_object(e.p, real); mark_bad_packed_object(e.p, real);
if (oi->typep == &real_type)
oi->typep = NULL;
return sha1_object_info_extended(real, oi, 0); return sha1_object_info_extended(real, oi, 0);
} else if (in_delta_base_cache(e.p, e.offset)) { } else if (in_delta_base_cache(e.p, e.offset)) {
oi->whence = OI_DBCACHED; oi->whence = OI_DBCACHED;
@ -2618,6 +2698,10 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
rtype == OBJ_OFS_DELTA); rtype == OBJ_OFS_DELTA);
} }
if (oi->typename)
strbuf_addstr(oi->typename, typename(*oi->typep));
if (oi->typep == &real_type)
oi->typep = NULL;
return 0; return 0;
} }