Merge branch 'tb/midx-use-checksum'

When rebuilding the multi-pack index file reusing an existing one,
we used to blindly trust the existing file and ended up carrying
corrupted data into the updated file, which has been corrected.

* tb/midx-use-checksum:
  midx: report checksum mismatches during 'verify'
  midx: don't reuse corrupt MIDXs when writing
  commit-graph: rewrite to use checksum_valid()
  csum-file: introduce checksum_valid()
This commit is contained in:
Junio C Hamano 2021-07-16 17:42:46 -07:00
commit 3b57e72c0c
6 changed files with 52 additions and 18 deletions

View File

@ -2422,14 +2422,16 @@ static void graph_report(const char *fmt, ...)
#define GENERATION_ZERO_EXISTS 1
#define GENERATION_NUMBER_EXISTS 2
static int commit_graph_checksum_valid(struct commit_graph *g)
{
return hashfile_checksum_valid(g->data, g->data_len);
}
int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
{
uint32_t i, cur_fanout_pos = 0;
struct object_id prev_oid, cur_oid;
unsigned char checksum[GIT_MAX_HEXSZ];
int generation_zero = 0;
struct hashfile *f;
int devnull;
struct progress *progress = NULL;
int local_error = 0;
@ -2442,11 +2444,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
if (verify_commit_graph_error)
return verify_commit_graph_error;
devnull = open("/dev/null", O_WRONLY);
f = hashfd(devnull, NULL);
hashwrite(f, g->data, g->data_len - g->hash_len);
finalize_hashfile(f, checksum, CSUM_CLOSE);
if (!hasheq(checksum, g->data + g->data_len - g->hash_len)) {
if (!commit_graph_checksum_valid(g)) {
graph_report(_("the commit-graph file has incorrect checksum and is likely corrupt"));
verify_commit_graph_error = VERIFY_COMMIT_GRAPH_ERROR_HASH;
}

View File

@ -217,3 +217,19 @@ uint32_t crc32_end(struct hashfile *f)
f->do_crc = 0;
return f->crc32;
}
int hashfile_checksum_valid(const unsigned char *data, size_t total_len)
{
unsigned char got[GIT_MAX_RAWSZ];
git_hash_ctx ctx;
size_t data_len = total_len - the_hash_algo->rawsz;
if (total_len < the_hash_algo->rawsz)
return 0; /* say "too short"? */
the_hash_algo->init_fn(&ctx);
the_hash_algo->update_fn(&ctx, data, data_len);
the_hash_algo->final_fn(got, &ctx);
return hasheq(got, data + data_len);
}

View File

@ -44,6 +44,9 @@ void hashflush(struct hashfile *f);
void crc32_begin(struct hashfile *);
uint32_t crc32_end(struct hashfile *);
/* Verify checksum validity while reading. Returns non-zero on success. */
int hashfile_checksum_valid(const unsigned char *data, size_t len);
/*
* Returns the total number of bytes fed to the hashfile so far (including ones
* that have not been written out to the descriptor yet).

13
midx.c
View File

@ -885,6 +885,11 @@ static void write_midx_reverse_index(char *midx_name, unsigned char *midx_hash,
static void clear_midx_files_ext(struct repository *r, const char *ext,
unsigned char *keep_hash);
static int midx_checksum_valid(struct multi_pack_index *m)
{
return hashfile_checksum_valid(m->data, m->data_len);
}
static int write_midx_internal(const char *object_dir, struct multi_pack_index *m,
struct string_list *packs_to_drop,
const char *preferred_pack_name,
@ -911,6 +916,11 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
else
ctx.m = load_multi_pack_index(object_dir, 1);
if (ctx.m && !midx_checksum_valid(ctx.m)) {
warning(_("ignoring existing multi-pack-index; checksum mismatch"));
ctx.m = NULL;
}
ctx.nr = 0;
ctx.alloc = ctx.m ? ctx.m->num_packs : 16;
ctx.info = NULL;
@ -1218,6 +1228,9 @@ int verify_midx_file(struct repository *r, const char *object_dir, unsigned flag
return result;
}
if (!midx_checksum_valid(m))
midx_report(_("incorrect checksum"));
if (flags & MIDX_PROGRESS)
progress = start_delayed_progress(_("Looking for referenced packfiles"),
m->num_packs);

View File

@ -164,22 +164,13 @@ static int verify_packfile(struct repository *r,
int verify_pack_index(struct packed_git *p)
{
size_t len;
const unsigned char *index_base;
git_hash_ctx ctx;
unsigned char hash[GIT_MAX_RAWSZ];
int err = 0;
if (open_pack_index(p))
return error("packfile %s index not opened", p->pack_name);
index_base = p->index_data;
len = p->index_size - the_hash_algo->rawsz;
/* Verify SHA1 sum of the index file */
the_hash_algo->init_fn(&ctx);
the_hash_algo->update_fn(&ctx, index_base, len);
the_hash_algo->final_fn(hash, &ctx);
if (!hasheq(hash, index_base + len))
if (!hashfile_checksum_valid(p->index_data, p->index_size))
err = error("Packfile index for %s hash mismatch",
p->pack_name);
return err;

View File

@ -410,6 +410,19 @@ test_expect_success 'git-fsck incorrect offset' '
"git -c core.multipackindex=true fsck"
'
test_expect_success 'corrupt MIDX is not reused' '
corrupt_midx_and_verify $MIDX_BYTE_OFFSET "\377" $objdir \
"incorrect object offset" &&
git multi-pack-index write 2>err &&
test_i18ngrep checksum.mismatch err &&
git multi-pack-index verify
'
test_expect_success 'verify incorrect checksum' '
pos=$(($(wc -c <$objdir/pack/multi-pack-index) - 1)) &&
corrupt_midx_and_verify $pos "\377" $objdir "incorrect checksum"
'
test_expect_success 'repack progress off for redirected stderr' '
GIT_PROGRESS_DELAY=0 git multi-pack-index --object-dir=$objdir repack 2>err &&
test_line_count = 0 err