From c8a45eb66e0a1086a31af4d76cd0f5e228497229 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 25 Nov 2020 12:17:28 -0500 Subject: [PATCH 1/2] packfile.c: protect against disappearing indexes In 17c35c8969 (packfile: skip loading index if in multi-pack-index, 2018-07-12) we stopped loading the .idx file for packs that are contained within a multi-pack index. This saves us the effort of loading an .idx and doing some lightweight validity checks by way of 'packfile.c:load_idx()', but introduces a race between processes that need to load the index (e.g., to generate a reverse index) and processes that can delete the index. For example, running the following in your shell: $ git init repo && cd repo $ git commit --allow-empty -m 'base' $ git repack -ad && git multi-pack-index write followed by: $ rm -f .git/objects/pack/pack-*.idx $ git rev-parse HEAD | git cat-file --batch-check='%(objectsize:disk)' will result in a segfault prior to this patch. What's happening here is that we notice that the pack is in the multi-pack index, and so don't check that it still has a .idx. When we then try and load that index to generate a reverse index, we don't have it, so the call to 'find_pack_revindex()' in 'packfile.c:packed_object_info()' returns NULL, and then dereferencing it causes a segfault. Of course, we don't ever expect someone to remove the index file by hand, or to be in a state where we never wrote it to begin with (yet find that pack in the multi-pack-index). But, this can happen in a timing race with 'git repack -ad', which removes all existing packs after writing a new pack containing all of their objects. Avoid this by reverting the hunk of 17c35c8969 which stops loading the index when the pack is contained in a MIDX. This makes the latter half of 17c35c8969 useless, since we'll always have a non-NULL 'p->index_data', in which case that if statement isn't guarding anything. These two together effectively revert 17c35c8969, and avoid the race explained above. Co-authored-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- packfile.c | 19 ++----------------- t/t5319-multi-pack-index.sh | 24 ++++++++++++++++++++++-- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/packfile.c b/packfile.c index 355066de17..eb14f7c5f1 100644 --- a/packfile.c +++ b/packfile.c @@ -514,19 +514,8 @@ static int open_packed_git_1(struct packed_git *p) ssize_t read_result; const unsigned hashsz = the_hash_algo->rawsz; - if (!p->index_data) { - struct multi_pack_index *m; - const char *pack_name = pack_basename(p); - - for (m = the_repository->objects->multi_pack_index; - m; m = m->next) { - if (midx_contains_pack(m, pack_name)) - break; - } - - if (!m && open_pack_index(p)) - return error("packfile %s index unavailable", p->pack_name); - } + if (open_pack_index(p)) + return error("packfile %s index unavailable", p->pack_name); if (!pack_max_fds) { unsigned int max_fds = get_max_fd_limit(); @@ -577,10 +566,6 @@ static int open_packed_git_1(struct packed_git *p) " supported (try upgrading GIT to a newer version)", p->pack_name, ntohl(hdr.hdr_version)); - /* Skip index checking if in multi-pack-index */ - if (!p->index_data) - return 0; - /* Verify the pack matches its index. */ if (p->num_objects != ntohl(hdr.hdr_entries)) return error("packfile %s claims to have %"PRIu32" objects" diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index c72ca04399..98c7c9df15 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -117,7 +117,7 @@ test_expect_success 'write midx with one v2 pack' ' compare_results_with_midx "one v2 pack" -test_expect_success 'corrupt idx not opened' ' +test_expect_success 'corrupt idx reports errors' ' idx=$(test-tool read-midx $objdir | grep "\.idx\$") && mv $objdir/pack/$idx backup-$idx && test_when_finished "mv backup-\$idx \$objdir/pack/\$idx" && @@ -128,7 +128,7 @@ test_expect_success 'corrupt idx not opened' ' test_copy_bytes 1064 $objdir/pack/$idx && git -c core.multiPackIndex=true rev-list --objects --all 2>err && - test_must_be_empty err + grep "index unavailable" err ' test_expect_success 'add more objects' ' @@ -547,4 +547,24 @@ test_expect_success 'repack --batch-size=0 repacks everything' ' ) ' +test_expect_success 'load reverse index when missing .idx' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + git config core.multiPackIndex true && + + test_commit base && + git repack -ad && + git multi-pack-index write && + + git rev-parse HEAD >tip && + idx=$(ls .git/objects/pack/pack-*.idx) && + + mv $idx $idx.bak && + git cat-file --batch-check="%(objectsize:disk)" Date: Wed, 25 Nov 2020 12:17:33 -0500 Subject: [PATCH 2/2] midx.c: protect against disappearing packs When a packed object is stored in a multi-pack index, but that pack has racily gone away, the MIDX code simply calls die(), when it could be returning an error to the caller, which would in turn lead to re-scanning the pack directory. A pack can racily disappear, for example, due to a simultaneous 'git repack -ad', You can also reproduce this with two terminals, where one is running: git init while true; do git commit -q --allow-empty -m foo git repack -ad git multi-pack-index write done (in effect, constantly writing new MIDXs), and the other is running: obj=$(git rev-parse HEAD) while true; do echo $obj | git cat-file --batch-check='%(objectsize:disk)' || break done That will sometimes hit the error preparing packfile from multi-pack-index message, which this patch fixes. Right now, that path to discovering a missing pack looks something like 'find_pack_entry()' calling 'fill_midx_entry()' and eventually making its way to call 'nth_midxed_pack_entry()'. 'nth_midxed_pack_entry()' already checks 'is_pack_valid()' and propagates an error if the pack is invalid. So, this works if the pack has gone away between calling 'prepare_midx_pack()' and before calling 'is_pack_valid()', but not if it disappears before then. Catch the case where the pack has already disappeared before 'prepare_midx_pack()' by returning an error in that case, too. Co-authored-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 2 +- t/t5319-multi-pack-index.sh | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/midx.c b/midx.c index f29afc0d2d..62101c7608 100644 --- a/midx.c +++ b/midx.c @@ -285,7 +285,7 @@ static int nth_midxed_pack_entry(struct repository *r, pack_int_id = nth_midxed_pack_int_id(m, pos); if (prepare_midx_pack(r, m, pack_int_id)) - die(_("error preparing packfile from multi-pack-index")); + return 0; p = m->packs[pack_int_id]; /* diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 98c7c9df15..27c7eb906a 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -547,7 +547,7 @@ test_expect_success 'repack --batch-size=0 repacks everything' ' ) ' -test_expect_success 'load reverse index when missing .idx' ' +test_expect_success 'load reverse index when missing .idx, .pack' ' git init repo && test_when_finished "rm -fr repo" && ( @@ -560,9 +560,15 @@ test_expect_success 'load reverse index when missing .idx' ' git multi-pack-index write && git rev-parse HEAD >tip && + pack=$(ls .git/objects/pack/pack-*.pack) && idx=$(ls .git/objects/pack/pack-*.idx) && mv $idx $idx.bak && + git cat-file --batch-check="%(objectsize:disk)"