Merge branch 'jk/loose-object-fsck'

"git fsck" inspects loose objects more carefully now.

* jk/loose-object-fsck:
  fsck: detect trailing garbage in all object types
  fsck: parse loose object paths directly
  sha1_file: add read_loose_object() function
  t1450: test fsck of packed objects
  sha1_file: fix error message for alternate objects
  t1450: refactor loose-object removal
This commit is contained in:
Junio C Hamano 2017-01-31 13:14:57 -08:00
commit 42ace93e41
4 changed files with 284 additions and 41 deletions

View File

@ -362,18 +362,6 @@ static int fsck_obj(struct object *obj)
return 0;
}
static int fsck_sha1(const unsigned char *sha1)
{
struct object *obj = parse_object(sha1);
if (!obj) {
errors_found |= ERROR_OBJECT;
return error("%s: object corrupt or missing",
sha1_to_hex(sha1));
}
obj->flags |= HAS_OBJ;
return fsck_obj(obj);
}
static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type,
unsigned long size, void *buffer, int *eaten)
{
@ -488,9 +476,41 @@ static void get_default_heads(void)
}
}
static struct object *parse_loose_object(const unsigned char *sha1,
const char *path)
{
struct object *obj;
void *contents;
enum object_type type;
unsigned long size;
int eaten;
if (read_loose_object(path, sha1, &type, &size, &contents) < 0)
return NULL;
if (!contents && type != OBJ_BLOB)
die("BUG: read_loose_object streamed a non-blob");
obj = parse_object_buffer(sha1, type, size, contents, &eaten);
if (!eaten)
free(contents);
return obj;
}
static int fsck_loose(const unsigned char *sha1, const char *path, void *data)
{
if (fsck_sha1(sha1))
struct object *obj = parse_loose_object(sha1, path);
if (!obj) {
errors_found |= ERROR_OBJECT;
error("%s: object corrupt or missing: %s",
sha1_to_hex(sha1), path);
return 0; /* keep checking other objects */
}
obj->flags = HAS_OBJ;
if (fsck_obj(obj))
errors_found |= ERROR_OBJECT;
return 0;
}

13
cache.h
View File

@ -1142,6 +1142,19 @@ extern int finalize_object_file(const char *tmpfile, const char *filename);
extern int has_sha1_pack(const unsigned char *sha1);
/*
* Open the loose object at path, check its sha1, and return the contents,
* type, and size. If the object is a blob, then "contents" may return NULL,
* to allow streaming of large blobs.
*
* Returns 0 on success, negative on error (details may be written to stderr).
*/
int read_loose_object(const char *path,
const unsigned char *expected_sha1,
enum object_type *type,
unsigned long *size,
void **contents);
/*
* Return true iff we have an object named sha1, whether local or in
* an alternate object database, and whether packed or loose. This

View File

@ -1630,39 +1630,54 @@ int git_open_cloexec(const char *name, int flags)
return fd;
}
static int stat_sha1_file(const unsigned char *sha1, struct stat *st)
/*
* Find "sha1" as a loose object in the local repository or in an alternate.
* Returns 0 on success, negative on failure.
*
* The "path" out-parameter will give the path of the object we found (if any).
* Note that it may point to static storage and is only valid until another
* call to sha1_file_name(), etc.
*/
static int stat_sha1_file(const unsigned char *sha1, struct stat *st,
const char **path)
{
struct alternate_object_database *alt;
if (!lstat(sha1_file_name(sha1), st))
*path = sha1_file_name(sha1);
if (!lstat(*path, st))
return 0;
prepare_alt_odb();
errno = ENOENT;
for (alt = alt_odb_list; alt; alt = alt->next) {
const char *path = alt_sha1_path(alt, sha1);
if (!lstat(path, st))
*path = alt_sha1_path(alt, sha1);
if (!lstat(*path, st))
return 0;
}
return -1;
}
static int open_sha1_file(const unsigned char *sha1)
/*
* Like stat_sha1_file(), but actually open the object and return the
* descriptor. See the caveats on the "path" parameter above.
*/
static int open_sha1_file(const unsigned char *sha1, const char **path)
{
int fd;
struct alternate_object_database *alt;
int most_interesting_errno;
fd = git_open(sha1_file_name(sha1));
*path = sha1_file_name(sha1);
fd = git_open(*path);
if (fd >= 0)
return fd;
most_interesting_errno = errno;
prepare_alt_odb();
for (alt = alt_odb_list; alt; alt = alt->next) {
const char *path = alt_sha1_path(alt, sha1);
fd = git_open(path);
*path = alt_sha1_path(alt, sha1);
fd = git_open(*path);
if (fd >= 0)
return fd;
if (most_interesting_errno == ENOENT)
@ -1672,12 +1687,21 @@ static int open_sha1_file(const unsigned char *sha1)
return -1;
}
void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
/*
* Map the loose object at "path" if it is not NULL, or the path found by
* searching for a loose object named "sha1".
*/
static void *map_sha1_file_1(const char *path,
const unsigned char *sha1,
unsigned long *size)
{
void *map;
int fd;
fd = open_sha1_file(sha1);
if (path)
fd = git_open(path);
else
fd = open_sha1_file(sha1, &path);
map = NULL;
if (fd >= 0) {
struct stat st;
@ -1686,7 +1710,7 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
*size = xsize_t(st.st_size);
if (!*size) {
/* mmap() is forbidden on empty files */
error("object file %s is empty", sha1_file_name(sha1));
error("object file %s is empty", path);
return NULL;
}
map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
@ -1696,6 +1720,11 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
return map;
}
void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
{
return map_sha1_file_1(NULL, sha1, size);
}
unsigned long unpack_object_header_buffer(const unsigned char *buf,
unsigned long len, enum object_type *type, unsigned long *sizep)
{
@ -2806,8 +2835,9 @@ static int sha1_loose_object_info(const unsigned char *sha1,
* object even exists.
*/
if (!oi->typep && !oi->typename && !oi->sizep) {
const char *path;
struct stat st;
if (stat_sha1_file(sha1, &st) < 0)
if (stat_sha1_file(sha1, &st, &path) < 0)
return -1;
if (oi->disk_sizep)
*oi->disk_sizep = st.st_size;
@ -3003,6 +3033,8 @@ void *read_sha1_file_extended(const unsigned char *sha1,
{
void *data;
const struct packed_git *p;
const char *path;
struct stat st;
const unsigned char *repl = lookup_replace_object_extended(sha1, flag);
errno = 0;
@ -3018,12 +3050,9 @@ void *read_sha1_file_extended(const unsigned char *sha1,
die("replacement %s not found for %s",
sha1_to_hex(repl), sha1_to_hex(sha1));
if (has_loose_object(repl)) {
const char *path = sha1_file_name(sha1);
if (!stat_sha1_file(repl, &st, &path))
die("loose object %s (stored in %s) is corrupt",
sha1_to_hex(repl), path);
}
if ((p = has_packed_and_bad(repl)) != NULL)
die("packed object %s (stored in %s) is corrupt",
@ -3793,3 +3822,122 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
}
return r ? r : pack_errors;
}
static int check_stream_sha1(git_zstream *stream,
const char *hdr,
unsigned long size,
const char *path,
const unsigned char *expected_sha1)
{
git_SHA_CTX c;
unsigned char real_sha1[GIT_SHA1_RAWSZ];
unsigned char buf[4096];
unsigned long total_read;
int status = Z_OK;
git_SHA1_Init(&c);
git_SHA1_Update(&c, hdr, stream->total_out);
/*
* We already read some bytes into hdr, but the ones up to the NUL
* do not count against the object's content size.
*/
total_read = stream->total_out - strlen(hdr) - 1;
/*
* This size comparison must be "<=" to read the final zlib packets;
* see the comment in unpack_sha1_rest for details.
*/
while (total_read <= size &&
(status == Z_OK || status == Z_BUF_ERROR)) {
stream->next_out = buf;
stream->avail_out = sizeof(buf);
if (size - total_read < stream->avail_out)
stream->avail_out = size - total_read;
status = git_inflate(stream, Z_FINISH);
git_SHA1_Update(&c, buf, stream->next_out - buf);
total_read += stream->next_out - buf;
}
git_inflate_end(stream);
if (status != Z_STREAM_END) {
error("corrupt loose object '%s'", sha1_to_hex(expected_sha1));
return -1;
}
if (stream->avail_in) {
error("garbage at end of loose object '%s'",
sha1_to_hex(expected_sha1));
return -1;
}
git_SHA1_Final(real_sha1, &c);
if (hashcmp(expected_sha1, real_sha1)) {
error("sha1 mismatch for %s (expected %s)", path,
sha1_to_hex(expected_sha1));
return -1;
}
return 0;
}
int read_loose_object(const char *path,
const unsigned char *expected_sha1,
enum object_type *type,
unsigned long *size,
void **contents)
{
int ret = -1;
int fd = -1;
void *map = NULL;
unsigned long mapsize;
git_zstream stream;
char hdr[32];
*contents = NULL;
map = map_sha1_file_1(path, NULL, &mapsize);
if (!map) {
error_errno("unable to mmap %s", path);
goto out;
}
if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) {
error("unable to unpack header of %s", path);
goto out;
}
*type = parse_sha1_header(hdr, size);
if (*type < 0) {
error("unable to parse header of %s", path);
git_inflate_end(&stream);
goto out;
}
if (*type == OBJ_BLOB) {
if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
goto out;
} else {
*contents = unpack_sha1_rest(&stream, hdr, *size, expected_sha1);
if (!*contents) {
error("unable to unpack contents of %s", path);
git_inflate_end(&stream);
goto out;
}
if (check_sha1_signature(expected_sha1, *contents,
*size, typename(*type))) {
error("sha1 mismatch for %s (expected %s)", path,
sha1_to_hex(expected_sha1));
free(*contents);
goto out;
}
}
ret = 0; /* everything checks out */
out:
if (map)
munmap(map, mapsize);
if (fd >= 0)
close(fd);
return ret;
}

View File

@ -43,13 +43,13 @@ test_expect_success 'HEAD is part of refs, valid objects appear valid' '
test_expect_success 'setup: helpers for corruption tests' '
sha1_file() {
echo "$*" | sed "s#..#.git/objects/&/#"
remainder=${1#??} &&
firsttwo=${1%$remainder} &&
echo ".git/objects/$firsttwo/$remainder"
} &&
remove_object() {
file=$(sha1_file "$*") &&
test -e "$file" &&
rm -f "$file"
rm "$(sha1_file "$1")"
}
'
@ -535,13 +535,6 @@ test_expect_success 'fsck --connectivity-only' '
)
'
remove_loose_object () {
sha1="$(git rev-parse "$1")" &&
remainder=${sha1#??} &&
firsttwo=${sha1%$remainder} &&
rm .git/objects/$firsttwo/$remainder
}
test_expect_success 'fsck --name-objects' '
rm -rf name-objects &&
git init name-objects &&
@ -550,11 +543,80 @@ test_expect_success 'fsck --name-objects' '
test_commit julius caesar.t &&
test_commit augustus &&
test_commit caesar &&
remove_loose_object $(git rev-parse julius:caesar.t) &&
remove_object $(git rev-parse julius:caesar.t) &&
test_must_fail git fsck --name-objects >out &&
tree=$(git rev-parse --verify julius:) &&
grep "$tree (\(refs/heads/master\|HEAD\)@{[0-9]*}:" out
)
'
test_expect_success 'alternate objects are correctly blamed' '
test_when_finished "rm -rf alt.git .git/objects/info/alternates" &&
git init --bare alt.git &&
echo "../../alt.git/objects" >.git/objects/info/alternates &&
mkdir alt.git/objects/12 &&
>alt.git/objects/12/34567890123456789012345678901234567890 &&
test_must_fail git fsck >out 2>&1 &&
grep alt.git out
'
test_expect_success 'fsck errors in packed objects' '
git cat-file commit HEAD >basis &&
sed "s/</one/" basis >one &&
sed "s/</foo/" basis >two &&
one=$(git hash-object -t commit -w one) &&
two=$(git hash-object -t commit -w two) &&
pack=$(
{
echo $one &&
echo $two
} | git pack-objects .git/objects/pack/pack
) &&
test_when_finished "rm -f .git/objects/pack/pack-$pack.*" &&
remove_object $one &&
remove_object $two &&
test_must_fail git fsck 2>out &&
grep "error in commit $one.* - bad name" out &&
grep "error in commit $two.* - bad name" out &&
! grep corrupt out
'
test_expect_success 'fsck finds problems in duplicate loose objects' '
rm -rf broken-duplicate &&
git init broken-duplicate &&
(
cd broken-duplicate &&
test_commit duplicate &&
# no "-d" here, so we end up with duplicates
git repack &&
# now corrupt the loose copy
file=$(sha1_file "$(git rev-parse HEAD)") &&
rm "$file" &&
echo broken >"$file" &&
test_must_fail git fsck
)
'
test_expect_success 'fsck detects trailing loose garbage (commit)' '
git cat-file commit HEAD >basis &&
echo bump-commit-sha1 >>basis &&
commit=$(git hash-object -w -t commit basis) &&
file=$(sha1_file $commit) &&
test_when_finished "remove_object $commit" &&
chmod +w "$file" &&
echo garbage >>"$file" &&
test_must_fail git fsck 2>out &&
test_i18ngrep "garbage.*$commit" out
'
test_expect_success 'fsck detects trailing loose garbage (blob)' '
blob=$(echo trailing | git hash-object -w --stdin) &&
file=$(sha1_file $blob) &&
test_when_finished "remove_object $blob" &&
chmod +w "$file" &&
echo garbage >>"$file" &&
test_must_fail git fsck 2>out &&
test_i18ngrep "garbage.*$blob" out
'
test_done