From a125a223347a8d43fffc1b7ec2bec93d88ec17b7 Mon Sep 17 00:00:00 2001 From: Thomas Gummerer Date: Sun, 7 Jan 2018 22:30:13 +0000 Subject: [PATCH 1/3] read-cache: fix reading the shared index for other repos read_index_from() takes a path argument for the location of the index file. For reading the shared index in split index mode however it just ignores that path argument, and reads it from the gitdir of the current repository. This works as long as an index in the_repository is read. Once that changes, such as when we read the index of a submodule, or of a different working tree than the current one, the gitdir of the_repository will no longer contain the appropriate shared index, and git will fail to read it. For example t3007-ls-files-recurse-submodules.sh was broken with GIT_TEST_SPLIT_INDEX set in 188dce131f ("ls-files: use repository object", 2017-06-22), and t7814-grep-recurse-submodules.sh was also broken in a similar manner, probably by introducing struct repository there, although I didn't track down the exact commit for that. be489d02d2 ("revision.c: --indexed-objects add objects from all worktrees", 2017-08-23) breaks with split index mode in a similar manner, not erroring out when it can't read the index, but instead carrying on with pruning, without taking the index of the worktree into account. Fix this by passing an additional gitdir parameter to read_index_from, to indicate where it should look for and read the shared index from. read_cache_from() defaults to using the gitdir of the_repository. As it is mostly a convenience macro, having to pass get_git_dir() for every call seems overkill, and if necessary users can have more control by using read_index_from(). Helped-by: Brandon Williams Signed-off-by: Thomas Gummerer Signed-off-by: Junio C Hamano --- cache-tree.c | 2 +- cache.h | 5 +++-- read-cache.c | 23 +++++++++++++---------- repository.c | 2 +- revision.c | 3 ++- 5 files changed, 20 insertions(+), 15 deletions(-) diff --git a/cache-tree.c b/cache-tree.c index d3f7401278..e006215edc 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -608,7 +608,7 @@ int write_index_as_tree(unsigned char *sha1, struct index_state *index_state, co newfd = hold_lock_file_for_update(&lock_file, index_path, LOCK_DIE_ON_ERROR); - entries = read_index_from(index_state, index_path); + entries = read_index_from(index_state, index_path, get_git_dir()); if (entries < 0) { ret = WRITE_TREE_UNREADABLE_INDEX; goto out; diff --git a/cache.h b/cache.h index 6440e2bf21..bcf0fc55fd 100644 --- a/cache.h +++ b/cache.h @@ -364,7 +364,7 @@ extern void free_name_hash(struct index_state *istate); #define active_cache_tree (the_index.cache_tree) #define read_cache() read_index(&the_index) -#define read_cache_from(path) read_index_from(&the_index, (path)) +#define read_cache_from(path) read_index_from(&the_index, (path), (get_git_dir())) #define read_cache_preload(pathspec) read_index_preload(&the_index, (pathspec)) #define is_cache_unborn() is_index_unborn(&the_index) #define read_cache_unmerged() read_index_unmerged(&the_index) @@ -599,7 +599,8 @@ extern int read_index(struct index_state *); extern int read_index_preload(struct index_state *, const struct pathspec *pathspec); extern int do_read_index(struct index_state *istate, const char *path, int must_exist); /* for testting only! */ -extern int read_index_from(struct index_state *, const char *path); +extern int read_index_from(struct index_state *, const char *path, + const char *gitdir); extern int is_index_unborn(struct index_state *); extern int read_index_unmerged(struct index_state *); #define COMMIT_LOCK (1 << 0) diff --git a/read-cache.c b/read-cache.c index 65f4fe8375..1248a09de4 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1568,7 +1568,7 @@ int hold_locked_index(struct lock_file *lk, int lock_flags) int read_index(struct index_state *istate) { - return read_index_from(istate, get_index_file()); + return read_index_from(istate, get_index_file(), get_git_dir()); } static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *ondisk, @@ -1824,20 +1824,19 @@ unmap: * This way, shared index can be removed if they have not been used * for some time. */ -static void freshen_shared_index(char *base_sha1_hex, int warn) +static void freshen_shared_index(const char *shared_index, int warn) { - char *shared_index = git_pathdup("sharedindex.%s", base_sha1_hex); if (!check_and_freshen_file(shared_index, 1) && warn) warning("could not freshen shared index '%s'", shared_index); - free(shared_index); } -int read_index_from(struct index_state *istate, const char *path) +int read_index_from(struct index_state *istate, const char *path, + const char *gitdir) { struct split_index *split_index; int ret; char *base_sha1_hex; - const char *base_path; + char *base_path; /* istate->initialized covers both .git/index and .git/sharedindex.xxx */ if (istate->initialized) @@ -1857,16 +1856,17 @@ int read_index_from(struct index_state *istate, const char *path) split_index->base = xcalloc(1, sizeof(*split_index->base)); base_sha1_hex = sha1_to_hex(split_index->base_sha1); - base_path = git_path("sharedindex.%s", base_sha1_hex); + base_path = xstrfmt("%s/sharedindex.%s", gitdir, base_sha1_hex); ret = do_read_index(split_index->base, base_path, 1); if (hashcmp(split_index->base_sha1, split_index->base->sha1)) die("broken index, expect %s in %s, got %s", base_sha1_hex, base_path, sha1_to_hex(split_index->base->sha1)); - freshen_shared_index(base_sha1_hex, 0); + freshen_shared_index(base_path, 0); merge_base_index(istate); post_read_index_from(istate); + free(base_path); return ret; } @@ -2521,8 +2521,11 @@ int write_locked_index(struct index_state *istate, struct lock_file *lock, ret = write_split_index(istate, lock, flags); /* Freshen the shared index only if the split-index was written */ - if (!ret && !new_shared_index) - freshen_shared_index(sha1_to_hex(si->base_sha1), 1); + if (!ret && !new_shared_index) { + const char *shared_index = git_path("sharedindex.%s", + sha1_to_hex(si->base_sha1)); + freshen_shared_index(shared_index, 1); + } return ret; } diff --git a/repository.c b/repository.c index bb2fae5446..161e2d712a 100644 --- a/repository.c +++ b/repository.c @@ -229,5 +229,5 @@ int repo_read_index(struct repository *repo) if (!repo->index) repo->index = xcalloc(1, sizeof(*repo->index)); - return read_index_from(repo->index, repo->index_file); + return read_index_from(repo->index, repo->index_file, repo->gitdir); } diff --git a/revision.c b/revision.c index d167223e69..d6a89c5af1 100644 --- a/revision.c +++ b/revision.c @@ -1343,7 +1343,8 @@ void add_index_objects_to_pending(struct rev_info *revs, unsigned int flags) continue; /* current index already taken care of */ if (read_index_from(&istate, - worktree_git_path(wt, "index")) > 0) + worktree_git_path(wt, "index"), + get_worktree_git_dir(wt)) > 0) do_add_index_objects_to_pending(revs, &istate); discard_index(&istate); } From 4bddd98311f249e9ed8a9b6379c7a097201612d1 Mon Sep 17 00:00:00 2001 From: Thomas Gummerer Date: Sun, 7 Jan 2018 22:30:14 +0000 Subject: [PATCH 2/3] split-index: don't write cache tree with null oid entries In a96d3cc3f6 ("cache-tree: reject entries with null sha1", 2017-04-21) we made sure that broken cache entries do not get propagated to new trees. Part of that was making sure not to re-use an existing cache tree that includes a null oid. It did so by dropping the cache tree in 'do_write_index()' if one of the entries contains a null oid. In split index mode however, there are two invocations to 'do_write_index()', one for the shared index and one for the split index. The cache tree is only written once, to the split index. As we only loop through the elements that are effectively being written by the current invocation, that may not include the entry with a null oid in the split index (when it is already written to the shared index), where we write the cache tree. Therefore in split index mode we may still end up writing the cache tree, even though there is an entry with a null oid in the index. Fix this by checking for null oids in prepare_to_write_split_index, where we loop the entries of the shared index as well as the entries for the split index. This fixes t7009 with GIT_TEST_SPLIT_INDEX. Also add a new test that's more specifically showing the problem. Signed-off-by: Thomas Gummerer Signed-off-by: Junio C Hamano --- cache.h | 3 ++- read-cache.c | 2 +- split-index.c | 2 ++ t/t1700-split-index.sh | 19 +++++++++++++++++++ 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/cache.h b/cache.h index bcf0fc55fd..add5f9f50a 100644 --- a/cache.h +++ b/cache.h @@ -340,7 +340,8 @@ struct index_state { struct split_index *split_index; struct cache_time timestamp; unsigned name_hash_initialized : 1, - initialized : 1; + initialized : 1, + drop_cache_tree : 1; struct hashmap name_hash; struct hashmap dir_hash; unsigned char sha1[20]; diff --git a/read-cache.c b/read-cache.c index 1248a09de4..5cd14e2f72 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2199,7 +2199,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, struct stat st; struct ondisk_cache_entry_extended ondisk; struct strbuf previous_name_buf = STRBUF_INIT, *previous_name; - int drop_cache_tree = 0; + int drop_cache_tree = istate->drop_cache_tree; for (i = removed = extended = 0; i < entries; i++) { if (cache[i]->ce_flags & CE_REMOVE) diff --git a/split-index.c b/split-index.c index 83e39ec8d7..284d04d67f 100644 --- a/split-index.c +++ b/split-index.c @@ -238,6 +238,8 @@ void prepare_to_write_split_index(struct index_state *istate) ALLOC_GROW(entries, nr_entries+1, nr_alloc); entries[nr_entries++] = ce; } + if (is_null_oid(&ce->oid)) + istate->drop_cache_tree = 1; } } diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh index 22f69a410b..322721dd64 100755 --- a/t/t1700-split-index.sh +++ b/t/t1700-split-index.sh @@ -400,4 +400,23 @@ done <<\EOF 0642 -rw-r---w- EOF +test_expect_success 'writing split index with null sha1 does not write cache tree' ' + git config core.splitIndex true && + git config splitIndex.maxPercentChange 0 && + git commit -m "commit" && + { + git ls-tree HEAD && + printf "160000 commit $_z40\\tbroken\\n" + } >broken-tree && + echo "add broken entry" >msg && + + tree=$(git mktree cache-tree.out || true) && + test_line_count = 0 cache-tree.out +' + test_done From ae59a4e44f359f74d0fede109c29bb53f726209b Mon Sep 17 00:00:00 2001 From: Thomas Gummerer Date: Sun, 7 Jan 2018 22:30:15 +0000 Subject: [PATCH 3/3] travis: run tests with GIT_TEST_SPLIT_INDEX Split index mode only has a few dedicated tests, but as the index is involved in nearly every git operation, this doesn't quite cover all the ways repositories with split index can break. To use split index mode throughout the test suite a GIT_TEST_SPLIT_INDEX environment variable can be set, which makes git split the index at random and thus excercises the functionality much more thoroughly. As this is not turned on by default, it is not executed nearly as often as the test suite is run, so occationally breakages slip through. Try to counteract that by running the test suite with GIT_TEST_SPLIT_INDEX mode turned on on travis. To avoid using too many cycles on travis only run split index mode in the linux-gcc target only. The Linux build was chosen over the Mac OS builds because it tends to be much faster to complete. The linux gcc build was chosen over the linux clang build because the linux clang build is the fastest build, so it can serve as an early indicator if something is broken and we want to avoid spending the extra cycles of running the test suite twice for that. Helped-by: Lars Schneider Helped-by: Junio C Hamano Signed-off-by: Thomas Gummerer Signed-off-by: Junio C Hamano --- ci/run-tests.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ci/run-tests.sh b/ci/run-tests.sh index f0c743de94..c7aee5b9ff 100755 --- a/ci/run-tests.sh +++ b/ci/run-tests.sh @@ -8,3 +8,7 @@ mkdir -p $HOME/travis-cache ln -s $HOME/travis-cache/.prove t/.prove make --quiet test +if test "$jobname" = "linux-gcc" +then + GIT_TEST_SPLIT_INDEX=YesPlease make --quiet test +fi