sha1_file: remove recursion in packed_object_info

packed_object_info() and packed_delta_info() were mutually recursive.
The former would handle ordinary types and defer deltas to the latter;
the latter would use the former to resolve the delta base.

This arrangement, however, leads to trouble with threaded index-pack
and long delta chains on platforms where thread stacks are small, as
happened on OS X (512kB thread stacks by default) with the chromium
repo.

The task of the two functions is not all that hard to describe without
any recursion, however.  It proceeds in three steps:

- determine the representation type and size, based on the outermost
  object (delta or not)

- follow through the delta chain, if any

- determine the object type from what is found at the end of the delta
  chain

The only complication stems from the error recovery.  If parsing fails
at any step, we want to mark that object (within the pack) as bad and
try getting the corresponding SHA1 from elsewhere.  If that also
fails, we want to repeat this process back up the delta chain until we
find a reasonable solution or conclude that there is no way to
reconstruct the object.  (This is conveniently checked by t5303.)

To achieve that within the pack, we keep track of the entire delta
chain in a stack.  When things go sour, we process that stack from the
top, marking entries as bad and attempting to re-resolve by sha1.  To
avoid excessive malloc(), the stack starts out with a small
stack-allocated array.  The choice of 64 is based on the default of
pack.depth, which is 50, in the hope that it covers "most" delta
chains without any need for malloc().

It's much harder to make the actual re-resolving by sha1 nonrecursive,
so we skip that.  If you can't afford *that* recursion, your
corruption problems are more serious than your stack size problems.

Reported-by: Stefan Zager <szager@google.com>
Signed-off-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Thomas Rast 2013-03-25 19:07:39 +01:00 committed by Junio C Hamano
parent 239222f587
commit 790d96c023

View File

@ -1560,50 +1560,6 @@ static off_t get_delta_base(struct packed_git *p,
return base_offset; return base_offset;
} }
/* forward declaration for a mutually recursive function */
static int packed_object_info(struct packed_git *p, off_t offset,
unsigned long *sizep, int *rtype);
static int packed_delta_info(struct packed_git *p,
struct pack_window **w_curs,
off_t curpos,
enum object_type type,
off_t obj_offset,
unsigned long *sizep)
{
off_t base_offset;
base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
if (!base_offset)
return OBJ_BAD;
type = packed_object_info(p, base_offset, NULL, NULL);
if (type <= OBJ_NONE) {
struct revindex_entry *revidx;
const unsigned char *base_sha1;
revidx = find_pack_revindex(p, base_offset);
if (!revidx)
return OBJ_BAD;
base_sha1 = nth_packed_object_sha1(p, revidx->nr);
mark_bad_packed_object(p, base_sha1);
type = sha1_object_info(base_sha1, NULL);
if (type <= OBJ_NONE)
return OBJ_BAD;
}
/* We choose to only get the type of the base object and
* ignore potentially corrupt pack file that expects the delta
* based on a base with a wrong size. This saves tons of
* inflate() calls.
*/
if (sizep) {
*sizep = get_size_from_delta(p, w_curs, curpos);
if (*sizep == 0)
type = OBJ_BAD;
}
return type;
}
int unpack_object_header(struct packed_git *p, int unpack_object_header(struct packed_git *p,
struct pack_window **w_curs, struct pack_window **w_curs,
off_t *curpos, off_t *curpos,
@ -1630,6 +1586,25 @@ int unpack_object_header(struct packed_git *p,
return type; return type;
} }
static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset)
{
int type;
struct revindex_entry *revidx;
const unsigned char *sha1;
revidx = find_pack_revindex(p, obj_offset);
if (!revidx)
return OBJ_BAD;
sha1 = nth_packed_object_sha1(p, revidx->nr);
mark_bad_packed_object(p, sha1);
type = sha1_object_info(sha1, NULL);
if (type <= OBJ_NONE)
return OBJ_BAD;
return type;
}
#define POI_STACK_PREALLOC 64
static int packed_object_info(struct packed_git *p, off_t obj_offset, static int packed_object_info(struct packed_git *p, off_t obj_offset,
unsigned long *sizep, int *rtype) unsigned long *sizep, int *rtype)
{ {
@ -1637,31 +1612,89 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
unsigned long size; unsigned long size;
off_t curpos = obj_offset; off_t curpos = obj_offset;
enum object_type type; enum object_type type;
off_t small_poi_stack[POI_STACK_PREALLOC];
off_t *poi_stack = small_poi_stack;
int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;
type = unpack_object_header(p, &w_curs, &curpos, &size); type = unpack_object_header(p, &w_curs, &curpos, &size);
if (rtype) if (rtype)
*rtype = type; /* representation type */ *rtype = type; /* representation type */
if (sizep) {
if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
off_t tmp_pos = curpos;
off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
type, obj_offset);
if (!base_offset) {
type = OBJ_BAD;
goto out;
}
*sizep = get_size_from_delta(p, &w_curs, tmp_pos);
if (*sizep == 0) {
type = OBJ_BAD;
goto out;
}
} else {
*sizep = size;
}
}
while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
off_t base_offset;
/* Push the object we're going to leave behind */
if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
poi_stack_alloc = alloc_nr(poi_stack_nr);
poi_stack = xmalloc(sizeof(off_t)*poi_stack_alloc);
memcpy(poi_stack, small_poi_stack, sizeof(off_t)*poi_stack_nr);
} else {
ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);
}
poi_stack[poi_stack_nr++] = obj_offset;
/* If parsing the base offset fails, just unwind */
base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
if (!base_offset)
goto unwind;
curpos = obj_offset = base_offset;
type = unpack_object_header(p, &w_curs, &curpos, &size);
if (type <= OBJ_NONE) {
/* If getting the base itself fails, we first
* retry the base, otherwise unwind */
type = retry_bad_packed_offset(p, base_offset);
if (type > OBJ_NONE)
goto out;
goto unwind;
}
}
switch (type) { switch (type) {
case OBJ_OFS_DELTA: case OBJ_BAD:
case OBJ_REF_DELTA:
type = packed_delta_info(p, &w_curs, curpos,
type, obj_offset, sizep);
break;
case OBJ_COMMIT: case OBJ_COMMIT:
case OBJ_TREE: case OBJ_TREE:
case OBJ_BLOB: case OBJ_BLOB:
case OBJ_TAG: case OBJ_TAG:
if (sizep)
*sizep = size;
break; break;
default: default:
error("unknown object type %i at offset %"PRIuMAX" in %s", error("unknown object type %i at offset %"PRIuMAX" in %s",
type, (uintmax_t)obj_offset, p->pack_name); type, (uintmax_t)obj_offset, p->pack_name);
type = OBJ_BAD; type = OBJ_BAD;
} }
out:
if (poi_stack != small_poi_stack)
free(poi_stack);
unuse_pack(&w_curs); unuse_pack(&w_curs);
return type; return type;
unwind:
while (poi_stack_nr) {
obj_offset = poi_stack[--poi_stack_nr];
type = retry_bad_packed_offset(p, obj_offset);
if (type > OBJ_NONE)
goto out;
}
type = OBJ_BAD;
goto out;
} }
static void *unpack_compressed_entry(struct packed_git *p, static void *unpack_compressed_entry(struct packed_git *p,