Merge branch 'jk/detect-truncated-zlib-input' into maint

A regression in Git 2.12 era made "git fsck" fall into an infinite
loop while processing truncated loose objects.

* jk/detect-truncated-zlib-input:
  cat-file: handle streaming failures consistently
  check_stream_sha1(): handle input underflow
  t1450: check large blob in trailing-garbage test
This commit is contained in:
Junio C Hamano 2018-11-21 22:57:52 +09:00
commit d75c41b2ae
3 changed files with 35 additions and 7 deletions

View File

@ -50,6 +50,13 @@ static int filter_object(const char *path, unsigned mode,
return 0; return 0;
} }
static int stream_blob(const struct object_id *oid)
{
if (stream_blob_to_fd(1, oid, NULL, 0))
die("unable to stream %s to stdout", oid_to_hex(oid));
return 0;
}
static int cat_one_file(int opt, const char *exp_type, const char *obj_name, static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
int unknown_type) int unknown_type)
{ {
@ -131,7 +138,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
} }
if (type == OBJ_BLOB) if (type == OBJ_BLOB)
return stream_blob_to_fd(1, &oid, NULL, 0); return stream_blob(&oid);
buf = read_object_file(&oid, &type, &size); buf = read_object_file(&oid, &type, &size);
if (!buf) if (!buf)
die("Cannot read object %s", obj_name); die("Cannot read object %s", obj_name);
@ -154,7 +161,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
oidcpy(&blob_oid, &oid); oidcpy(&blob_oid, &oid);
if (oid_object_info(the_repository, &blob_oid, NULL) == OBJ_BLOB) if (oid_object_info(the_repository, &blob_oid, NULL) == OBJ_BLOB)
return stream_blob_to_fd(1, &blob_oid, NULL, 0); return stream_blob(&blob_oid);
/* /*
* we attempted to dereference a tag to a blob * we attempted to dereference a tag to a blob
* and failed; there may be new dereference * and failed; there may be new dereference
@ -317,8 +324,9 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
BUG("invalid cmdmode: %c", opt->cmdmode); BUG("invalid cmdmode: %c", opt->cmdmode);
batch_write(opt, contents, size); batch_write(opt, contents, size);
free(contents); free(contents);
} else if (stream_blob_to_fd(1, oid, NULL, 0) < 0) } else {
die("unable to stream %s to stdout", oid_to_hex(oid)); stream_blob(oid);
}
} }
else { else {
enum object_type type; enum object_type type;

View File

@ -2191,7 +2191,8 @@ static int check_stream_sha1(git_zstream *stream,
* see the comment in unpack_sha1_rest for details. * see the comment in unpack_sha1_rest for details.
*/ */
while (total_read <= size && while (total_read <= size &&
(status == Z_OK || status == Z_BUF_ERROR)) { (status == Z_OK ||
(status == Z_BUF_ERROR && !stream->avail_out))) {
stream->next_out = buf; stream->next_out = buf;
stream->avail_out = sizeof(buf); stream->avail_out = sizeof(buf);
if (size - total_read < stream->avail_out) if (size - total_read < stream->avail_out)

View File

@ -673,16 +673,35 @@ test_expect_success 'fsck detects trailing loose garbage (commit)' '
test_i18ngrep "garbage.*$commit" out test_i18ngrep "garbage.*$commit" out
' '
test_expect_success 'fsck detects trailing loose garbage (blob)' ' test_expect_success 'fsck detects trailing loose garbage (large blob)' '
blob=$(echo trailing | git hash-object -w --stdin) && blob=$(echo trailing | git hash-object -w --stdin) &&
file=$(sha1_file $blob) && file=$(sha1_file $blob) &&
test_when_finished "remove_object $blob" && test_when_finished "remove_object $blob" &&
chmod +w "$file" && chmod +w "$file" &&
echo garbage >>"$file" && echo garbage >>"$file" &&
test_must_fail git fsck 2>out && test_must_fail git -c core.bigfilethreshold=5 fsck 2>out &&
test_i18ngrep "garbage.*$blob" out test_i18ngrep "garbage.*$blob" out
' '
test_expect_success 'fsck detects truncated loose object' '
# make it big enough that we know we will truncate in the data
# portion, not the header
test-tool genrandom truncate 4096 >file &&
blob=$(git hash-object -w file) &&
file=$(sha1_file $blob) &&
test_when_finished "remove_object $blob" &&
test_copy_bytes 1024 <"$file" >tmp &&
rm "$file" &&
mv -f tmp "$file" &&
# check both regular and streaming code paths
test_must_fail git fsck 2>out &&
test_i18ngrep corrupt.*$blob out &&
test_must_fail git -c core.bigfilethreshold=128 fsck 2>out &&
test_i18ngrep corrupt.*$blob out
'
# for each of type, we have one version which is referenced by another object # for each of type, we have one version which is referenced by another object
# (and so while unreachable, not dangling), and another variant which really is # (and so while unreachable, not dangling), and another variant which really is
# dangling. # dangling.