Merge branch 'jk/loose-object-fsck'
"git fsck" inspects loose objects more carefully now. * jk/loose-object-fsck: fsck: detect trailing garbage in all object types fsck: parse loose object paths directly sha1_file: add read_loose_object() function t1450: test fsck of packed objects sha1_file: fix error message for alternate objects t1450: refactor loose-object removal
This commit is contained in:
commit
42ace93e41
@ -362,18 +362,6 @@ static int fsck_obj(struct object *obj)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fsck_sha1(const unsigned char *sha1)
|
||||
{
|
||||
struct object *obj = parse_object(sha1);
|
||||
if (!obj) {
|
||||
errors_found |= ERROR_OBJECT;
|
||||
return error("%s: object corrupt or missing",
|
||||
sha1_to_hex(sha1));
|
||||
}
|
||||
obj->flags |= HAS_OBJ;
|
||||
return fsck_obj(obj);
|
||||
}
|
||||
|
||||
static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type,
|
||||
unsigned long size, void *buffer, int *eaten)
|
||||
{
|
||||
@ -488,9 +476,41 @@ static void get_default_heads(void)
|
||||
}
|
||||
}
|
||||
|
||||
static struct object *parse_loose_object(const unsigned char *sha1,
|
||||
const char *path)
|
||||
{
|
||||
struct object *obj;
|
||||
void *contents;
|
||||
enum object_type type;
|
||||
unsigned long size;
|
||||
int eaten;
|
||||
|
||||
if (read_loose_object(path, sha1, &type, &size, &contents) < 0)
|
||||
return NULL;
|
||||
|
||||
if (!contents && type != OBJ_BLOB)
|
||||
die("BUG: read_loose_object streamed a non-blob");
|
||||
|
||||
obj = parse_object_buffer(sha1, type, size, contents, &eaten);
|
||||
|
||||
if (!eaten)
|
||||
free(contents);
|
||||
return obj;
|
||||
}
|
||||
|
||||
static int fsck_loose(const unsigned char *sha1, const char *path, void *data)
|
||||
{
|
||||
if (fsck_sha1(sha1))
|
||||
struct object *obj = parse_loose_object(sha1, path);
|
||||
|
||||
if (!obj) {
|
||||
errors_found |= ERROR_OBJECT;
|
||||
error("%s: object corrupt or missing: %s",
|
||||
sha1_to_hex(sha1), path);
|
||||
return 0; /* keep checking other objects */
|
||||
}
|
||||
|
||||
obj->flags = HAS_OBJ;
|
||||
if (fsck_obj(obj))
|
||||
errors_found |= ERROR_OBJECT;
|
||||
return 0;
|
||||
}
|
||||
|
13
cache.h
13
cache.h
@ -1142,6 +1142,19 @@ extern int finalize_object_file(const char *tmpfile, const char *filename);
|
||||
|
||||
extern int has_sha1_pack(const unsigned char *sha1);
|
||||
|
||||
/*
|
||||
* Open the loose object at path, check its sha1, and return the contents,
|
||||
* type, and size. If the object is a blob, then "contents" may return NULL,
|
||||
* to allow streaming of large blobs.
|
||||
*
|
||||
* Returns 0 on success, negative on error (details may be written to stderr).
|
||||
*/
|
||||
int read_loose_object(const char *path,
|
||||
const unsigned char *expected_sha1,
|
||||
enum object_type *type,
|
||||
unsigned long *size,
|
||||
void **contents);
|
||||
|
||||
/*
|
||||
* Return true iff we have an object named sha1, whether local or in
|
||||
* an alternate object database, and whether packed or loose. This
|
||||
|
180
sha1_file.c
180
sha1_file.c
@ -1630,39 +1630,54 @@ int git_open_cloexec(const char *name, int flags)
|
||||
return fd;
|
||||
}
|
||||
|
||||
static int stat_sha1_file(const unsigned char *sha1, struct stat *st)
|
||||
/*
|
||||
* Find "sha1" as a loose object in the local repository or in an alternate.
|
||||
* Returns 0 on success, negative on failure.
|
||||
*
|
||||
* The "path" out-parameter will give the path of the object we found (if any).
|
||||
* Note that it may point to static storage and is only valid until another
|
||||
* call to sha1_file_name(), etc.
|
||||
*/
|
||||
static int stat_sha1_file(const unsigned char *sha1, struct stat *st,
|
||||
const char **path)
|
||||
{
|
||||
struct alternate_object_database *alt;
|
||||
|
||||
if (!lstat(sha1_file_name(sha1), st))
|
||||
*path = sha1_file_name(sha1);
|
||||
if (!lstat(*path, st))
|
||||
return 0;
|
||||
|
||||
prepare_alt_odb();
|
||||
errno = ENOENT;
|
||||
for (alt = alt_odb_list; alt; alt = alt->next) {
|
||||
const char *path = alt_sha1_path(alt, sha1);
|
||||
if (!lstat(path, st))
|
||||
*path = alt_sha1_path(alt, sha1);
|
||||
if (!lstat(*path, st))
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int open_sha1_file(const unsigned char *sha1)
|
||||
/*
|
||||
* Like stat_sha1_file(), but actually open the object and return the
|
||||
* descriptor. See the caveats on the "path" parameter above.
|
||||
*/
|
||||
static int open_sha1_file(const unsigned char *sha1, const char **path)
|
||||
{
|
||||
int fd;
|
||||
struct alternate_object_database *alt;
|
||||
int most_interesting_errno;
|
||||
|
||||
fd = git_open(sha1_file_name(sha1));
|
||||
*path = sha1_file_name(sha1);
|
||||
fd = git_open(*path);
|
||||
if (fd >= 0)
|
||||
return fd;
|
||||
most_interesting_errno = errno;
|
||||
|
||||
prepare_alt_odb();
|
||||
for (alt = alt_odb_list; alt; alt = alt->next) {
|
||||
const char *path = alt_sha1_path(alt, sha1);
|
||||
fd = git_open(path);
|
||||
*path = alt_sha1_path(alt, sha1);
|
||||
fd = git_open(*path);
|
||||
if (fd >= 0)
|
||||
return fd;
|
||||
if (most_interesting_errno == ENOENT)
|
||||
@ -1672,12 +1687,21 @@ static int open_sha1_file(const unsigned char *sha1)
|
||||
return -1;
|
||||
}
|
||||
|
||||
void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
|
||||
/*
|
||||
* Map the loose object at "path" if it is not NULL, or the path found by
|
||||
* searching for a loose object named "sha1".
|
||||
*/
|
||||
static void *map_sha1_file_1(const char *path,
|
||||
const unsigned char *sha1,
|
||||
unsigned long *size)
|
||||
{
|
||||
void *map;
|
||||
int fd;
|
||||
|
||||
fd = open_sha1_file(sha1);
|
||||
if (path)
|
||||
fd = git_open(path);
|
||||
else
|
||||
fd = open_sha1_file(sha1, &path);
|
||||
map = NULL;
|
||||
if (fd >= 0) {
|
||||
struct stat st;
|
||||
@ -1686,7 +1710,7 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
|
||||
*size = xsize_t(st.st_size);
|
||||
if (!*size) {
|
||||
/* mmap() is forbidden on empty files */
|
||||
error("object file %s is empty", sha1_file_name(sha1));
|
||||
error("object file %s is empty", path);
|
||||
return NULL;
|
||||
}
|
||||
map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
|
||||
@ -1696,6 +1720,11 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
|
||||
return map;
|
||||
}
|
||||
|
||||
void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
|
||||
{
|
||||
return map_sha1_file_1(NULL, sha1, size);
|
||||
}
|
||||
|
||||
unsigned long unpack_object_header_buffer(const unsigned char *buf,
|
||||
unsigned long len, enum object_type *type, unsigned long *sizep)
|
||||
{
|
||||
@ -2806,8 +2835,9 @@ static int sha1_loose_object_info(const unsigned char *sha1,
|
||||
* object even exists.
|
||||
*/
|
||||
if (!oi->typep && !oi->typename && !oi->sizep) {
|
||||
const char *path;
|
||||
struct stat st;
|
||||
if (stat_sha1_file(sha1, &st) < 0)
|
||||
if (stat_sha1_file(sha1, &st, &path) < 0)
|
||||
return -1;
|
||||
if (oi->disk_sizep)
|
||||
*oi->disk_sizep = st.st_size;
|
||||
@ -3003,6 +3033,8 @@ void *read_sha1_file_extended(const unsigned char *sha1,
|
||||
{
|
||||
void *data;
|
||||
const struct packed_git *p;
|
||||
const char *path;
|
||||
struct stat st;
|
||||
const unsigned char *repl = lookup_replace_object_extended(sha1, flag);
|
||||
|
||||
errno = 0;
|
||||
@ -3018,12 +3050,9 @@ void *read_sha1_file_extended(const unsigned char *sha1,
|
||||
die("replacement %s not found for %s",
|
||||
sha1_to_hex(repl), sha1_to_hex(sha1));
|
||||
|
||||
if (has_loose_object(repl)) {
|
||||
const char *path = sha1_file_name(sha1);
|
||||
|
||||
if (!stat_sha1_file(repl, &st, &path))
|
||||
die("loose object %s (stored in %s) is corrupt",
|
||||
sha1_to_hex(repl), path);
|
||||
}
|
||||
|
||||
if ((p = has_packed_and_bad(repl)) != NULL)
|
||||
die("packed object %s (stored in %s) is corrupt",
|
||||
@ -3793,3 +3822,122 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
|
||||
}
|
||||
return r ? r : pack_errors;
|
||||
}
|
||||
|
||||
static int check_stream_sha1(git_zstream *stream,
|
||||
const char *hdr,
|
||||
unsigned long size,
|
||||
const char *path,
|
||||
const unsigned char *expected_sha1)
|
||||
{
|
||||
git_SHA_CTX c;
|
||||
unsigned char real_sha1[GIT_SHA1_RAWSZ];
|
||||
unsigned char buf[4096];
|
||||
unsigned long total_read;
|
||||
int status = Z_OK;
|
||||
|
||||
git_SHA1_Init(&c);
|
||||
git_SHA1_Update(&c, hdr, stream->total_out);
|
||||
|
||||
/*
|
||||
* We already read some bytes into hdr, but the ones up to the NUL
|
||||
* do not count against the object's content size.
|
||||
*/
|
||||
total_read = stream->total_out - strlen(hdr) - 1;
|
||||
|
||||
/*
|
||||
* This size comparison must be "<=" to read the final zlib packets;
|
||||
* see the comment in unpack_sha1_rest for details.
|
||||
*/
|
||||
while (total_read <= size &&
|
||||
(status == Z_OK || status == Z_BUF_ERROR)) {
|
||||
stream->next_out = buf;
|
||||
stream->avail_out = sizeof(buf);
|
||||
if (size - total_read < stream->avail_out)
|
||||
stream->avail_out = size - total_read;
|
||||
status = git_inflate(stream, Z_FINISH);
|
||||
git_SHA1_Update(&c, buf, stream->next_out - buf);
|
||||
total_read += stream->next_out - buf;
|
||||
}
|
||||
git_inflate_end(stream);
|
||||
|
||||
if (status != Z_STREAM_END) {
|
||||
error("corrupt loose object '%s'", sha1_to_hex(expected_sha1));
|
||||
return -1;
|
||||
}
|
||||
if (stream->avail_in) {
|
||||
error("garbage at end of loose object '%s'",
|
||||
sha1_to_hex(expected_sha1));
|
||||
return -1;
|
||||
}
|
||||
|
||||
git_SHA1_Final(real_sha1, &c);
|
||||
if (hashcmp(expected_sha1, real_sha1)) {
|
||||
error("sha1 mismatch for %s (expected %s)", path,
|
||||
sha1_to_hex(expected_sha1));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int read_loose_object(const char *path,
|
||||
const unsigned char *expected_sha1,
|
||||
enum object_type *type,
|
||||
unsigned long *size,
|
||||
void **contents)
|
||||
{
|
||||
int ret = -1;
|
||||
int fd = -1;
|
||||
void *map = NULL;
|
||||
unsigned long mapsize;
|
||||
git_zstream stream;
|
||||
char hdr[32];
|
||||
|
||||
*contents = NULL;
|
||||
|
||||
map = map_sha1_file_1(path, NULL, &mapsize);
|
||||
if (!map) {
|
||||
error_errno("unable to mmap %s", path);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) {
|
||||
error("unable to unpack header of %s", path);
|
||||
goto out;
|
||||
}
|
||||
|
||||
*type = parse_sha1_header(hdr, size);
|
||||
if (*type < 0) {
|
||||
error("unable to parse header of %s", path);
|
||||
git_inflate_end(&stream);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (*type == OBJ_BLOB) {
|
||||
if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
|
||||
goto out;
|
||||
} else {
|
||||
*contents = unpack_sha1_rest(&stream, hdr, *size, expected_sha1);
|
||||
if (!*contents) {
|
||||
error("unable to unpack contents of %s", path);
|
||||
git_inflate_end(&stream);
|
||||
goto out;
|
||||
}
|
||||
if (check_sha1_signature(expected_sha1, *contents,
|
||||
*size, typename(*type))) {
|
||||
error("sha1 mismatch for %s (expected %s)", path,
|
||||
sha1_to_hex(expected_sha1));
|
||||
free(*contents);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0; /* everything checks out */
|
||||
|
||||
out:
|
||||
if (map)
|
||||
munmap(map, mapsize);
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
return ret;
|
||||
}
|
||||
|
@ -43,13 +43,13 @@ test_expect_success 'HEAD is part of refs, valid objects appear valid' '
|
||||
|
||||
test_expect_success 'setup: helpers for corruption tests' '
|
||||
sha1_file() {
|
||||
echo "$*" | sed "s#..#.git/objects/&/#"
|
||||
remainder=${1#??} &&
|
||||
firsttwo=${1%$remainder} &&
|
||||
echo ".git/objects/$firsttwo/$remainder"
|
||||
} &&
|
||||
|
||||
remove_object() {
|
||||
file=$(sha1_file "$*") &&
|
||||
test -e "$file" &&
|
||||
rm -f "$file"
|
||||
rm "$(sha1_file "$1")"
|
||||
}
|
||||
'
|
||||
|
||||
@ -535,13 +535,6 @@ test_expect_success 'fsck --connectivity-only' '
|
||||
)
|
||||
'
|
||||
|
||||
remove_loose_object () {
|
||||
sha1="$(git rev-parse "$1")" &&
|
||||
remainder=${sha1#??} &&
|
||||
firsttwo=${sha1%$remainder} &&
|
||||
rm .git/objects/$firsttwo/$remainder
|
||||
}
|
||||
|
||||
test_expect_success 'fsck --name-objects' '
|
||||
rm -rf name-objects &&
|
||||
git init name-objects &&
|
||||
@ -550,11 +543,80 @@ test_expect_success 'fsck --name-objects' '
|
||||
test_commit julius caesar.t &&
|
||||
test_commit augustus &&
|
||||
test_commit caesar &&
|
||||
remove_loose_object $(git rev-parse julius:caesar.t) &&
|
||||
remove_object $(git rev-parse julius:caesar.t) &&
|
||||
test_must_fail git fsck --name-objects >out &&
|
||||
tree=$(git rev-parse --verify julius:) &&
|
||||
grep "$tree (\(refs/heads/master\|HEAD\)@{[0-9]*}:" out
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'alternate objects are correctly blamed' '
|
||||
test_when_finished "rm -rf alt.git .git/objects/info/alternates" &&
|
||||
git init --bare alt.git &&
|
||||
echo "../../alt.git/objects" >.git/objects/info/alternates &&
|
||||
mkdir alt.git/objects/12 &&
|
||||
>alt.git/objects/12/34567890123456789012345678901234567890 &&
|
||||
test_must_fail git fsck >out 2>&1 &&
|
||||
grep alt.git out
|
||||
'
|
||||
|
||||
test_expect_success 'fsck errors in packed objects' '
|
||||
git cat-file commit HEAD >basis &&
|
||||
sed "s/</one/" basis >one &&
|
||||
sed "s/</foo/" basis >two &&
|
||||
one=$(git hash-object -t commit -w one) &&
|
||||
two=$(git hash-object -t commit -w two) &&
|
||||
pack=$(
|
||||
{
|
||||
echo $one &&
|
||||
echo $two
|
||||
} | git pack-objects .git/objects/pack/pack
|
||||
) &&
|
||||
test_when_finished "rm -f .git/objects/pack/pack-$pack.*" &&
|
||||
remove_object $one &&
|
||||
remove_object $two &&
|
||||
test_must_fail git fsck 2>out &&
|
||||
grep "error in commit $one.* - bad name" out &&
|
||||
grep "error in commit $two.* - bad name" out &&
|
||||
! grep corrupt out
|
||||
'
|
||||
|
||||
test_expect_success 'fsck finds problems in duplicate loose objects' '
|
||||
rm -rf broken-duplicate &&
|
||||
git init broken-duplicate &&
|
||||
(
|
||||
cd broken-duplicate &&
|
||||
test_commit duplicate &&
|
||||
# no "-d" here, so we end up with duplicates
|
||||
git repack &&
|
||||
# now corrupt the loose copy
|
||||
file=$(sha1_file "$(git rev-parse HEAD)") &&
|
||||
rm "$file" &&
|
||||
echo broken >"$file" &&
|
||||
test_must_fail git fsck
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'fsck detects trailing loose garbage (commit)' '
|
||||
git cat-file commit HEAD >basis &&
|
||||
echo bump-commit-sha1 >>basis &&
|
||||
commit=$(git hash-object -w -t commit basis) &&
|
||||
file=$(sha1_file $commit) &&
|
||||
test_when_finished "remove_object $commit" &&
|
||||
chmod +w "$file" &&
|
||||
echo garbage >>"$file" &&
|
||||
test_must_fail git fsck 2>out &&
|
||||
test_i18ngrep "garbage.*$commit" out
|
||||
'
|
||||
|
||||
test_expect_success 'fsck detects trailing loose garbage (blob)' '
|
||||
blob=$(echo trailing | git hash-object -w --stdin) &&
|
||||
file=$(sha1_file $blob) &&
|
||||
test_when_finished "remove_object $blob" &&
|
||||
chmod +w "$file" &&
|
||||
echo garbage >>"$file" &&
|
||||
test_must_fail git fsck 2>out &&
|
||||
test_i18ngrep "garbage.*$blob" out
|
||||
'
|
||||
|
||||
test_done
|
||||
|
Loading…
Reference in New Issue
Block a user