Merge branch 'ds/sparse-index-ignored-files'

In cone mode, the sparse-index code path learned to remove ignored
files (like build artifacts) outside the sparse cone, allowing the
entire directory outside the sparse cone to be removed, which is
especially useful when the sparse patterns change.

* ds/sparse-index-ignored-files:
  sparse-checkout: clear tracked sparse dirs
  sparse-index: add SPARSE_INDEX_MEMORY_ONLY flag
  attr: be careful about sparse directories
  sparse-checkout: create helper methods
  sparse-index: use WRITE_TREE_MISSING_OK
  sparse-index: silently return when cache tree fails
  unpack-trees: fix nested sparse-dir search
  sparse-index: silently return when not using cone-mode patterns
  t7519: rewrite sparse index test
This commit is contained in:
Junio C Hamano 2021-09-20 15:20:44 -07:00
commit dc89c34d9e
12 changed files with 312 additions and 62 deletions

View File

@ -210,6 +210,16 @@ case-insensitive check. This corrects for case mismatched filenames in the
'git sparse-checkout set' command to reflect the expected cone in the working
directory.
When changing the sparse-checkout patterns in cone mode, Git will inspect each
tracked directory that is not within the sparse-checkout cone to see if it
contains any untracked files. If all of those files are ignored due to the
`.gitignore` patterns, then the directory will be deleted. If any of the
untracked files within that directory is not ignored, then no deletions will
occur within that directory and a warning message will appear. If these files
are important, then reset your sparse-checkout definition so they are included,
use `git add` and `git commit` to store them, then remove any remaining files
manually to ensure Git can behave optimally.
SUBMODULES
----------

15
attr.c
View File

@ -14,6 +14,7 @@
#include "utf8.h"
#include "quote.h"
#include "thread-utils.h"
#include "dir.h"
const char git_attr__true[] = "(builtin)true";
const char git_attr__false[] = "\0(builtin)false";
@ -744,6 +745,20 @@ static struct attr_stack *read_attr_from_index(struct index_state *istate,
if (!istate)
return NULL;
/*
* The .gitattributes file only applies to files within its
* parent directory. In the case of cone-mode sparse-checkout,
* the .gitattributes file is sparse if and only if all paths
* within that directory are also sparse. Thus, don't load the
* .gitattributes file since it will not matter.
*
* In the case of a sparse index, it is critical that we don't go
* looking for a .gitattributes file, as doing so would cause the
* index to expand.
*/
if (!path_in_cone_mode_sparse_checkout(path, istate))
return NULL;
buf = read_blob_data_from_index(istate, path, NULL);
if (!buf)
return NULL;

View File

@ -190,8 +190,6 @@ static int refresh(int verbose, const struct pathspec *pathspec)
struct string_list only_match_skip_worktree = STRING_LIST_INIT_NODUP;
int flags = REFRESH_IGNORE_SKIP_WORKTREE |
(verbose ? REFRESH_IN_PORCELAIN : REFRESH_QUIET);
struct pattern_list pl = { 0 };
int sparse_checkout_enabled = !get_sparse_checkout_patterns(&pl);
seen = xcalloc(pathspec->nr, 1);
refresh_index(&the_index, flags, pathspec, seen,
@ -199,12 +197,9 @@ static int refresh(int verbose, const struct pathspec *pathspec)
for (i = 0; i < pathspec->nr; i++) {
if (!seen[i]) {
const char *path = pathspec->items[i].original;
int dtype = DT_REG;
if (matches_skip_worktree(pathspec, i, &skip_worktree_seen) ||
(sparse_checkout_enabled &&
!path_matches_pattern_list(path, strlen(path), NULL,
&dtype, &pl, &the_index))) {
!path_in_sparse_checkout(path, &the_index)) {
string_list_append(&only_match_skip_worktree,
pathspec->items[i].original);
} else {

View File

@ -100,6 +100,98 @@ static int sparse_checkout_list(int argc, const char **argv)
return 0;
}
static void clean_tracked_sparse_directories(struct repository *r)
{
int i, was_full = 0;
struct strbuf path = STRBUF_INIT;
size_t pathlen;
struct string_list_item *item;
struct string_list sparse_dirs = STRING_LIST_INIT_DUP;
/*
* If we are not using cone mode patterns, then we cannot
* delete directories outside of the sparse cone.
*/
if (!r || !r->index || !r->worktree)
return;
if (init_sparse_checkout_patterns(r->index) ||
!r->index->sparse_checkout_patterns->use_cone_patterns)
return;
/*
* Use the sparse index as a data structure to assist finding
* directories that are safe to delete. This conversion to a
* sparse index will not delete directories that contain
* conflicted entries or submodules.
*/
if (!r->index->sparse_index) {
/*
* If something, such as a merge conflict or other concern,
* prevents us from converting to a sparse index, then do
* not try deleting files.
*/
if (convert_to_sparse(r->index, SPARSE_INDEX_MEMORY_ONLY))
return;
was_full = 1;
}
strbuf_addstr(&path, r->worktree);
strbuf_complete(&path, '/');
pathlen = path.len;
/*
* Collect directories that have gone out of scope but also
* exist on disk, so there is some work to be done. We need to
* store the entries in a list before exploring, since that might
* expand the sparse-index again.
*/
for (i = 0; i < r->index->cache_nr; i++) {
struct cache_entry *ce = r->index->cache[i];
if (S_ISSPARSEDIR(ce->ce_mode) &&
repo_file_exists(r, ce->name))
string_list_append(&sparse_dirs, ce->name);
}
for_each_string_list_item(item, &sparse_dirs) {
struct dir_struct dir = DIR_INIT;
struct pathspec p = { 0 };
struct strvec s = STRVEC_INIT;
strbuf_setlen(&path, pathlen);
strbuf_addstr(&path, item->string);
dir.flags |= DIR_SHOW_IGNORED_TOO;
setup_standard_excludes(&dir);
strvec_push(&s, path.buf);
parse_pathspec(&p, PATHSPEC_GLOB, 0, NULL, s.v);
fill_directory(&dir, r->index, &p);
if (dir.nr) {
warning(_("directory '%s' contains untracked files,"
" but is not in the sparse-checkout cone"),
item->string);
} else if (remove_dir_recursively(&path, 0)) {
/*
* Removal is "best effort". If something blocks
* the deletion, then continue with a warning.
*/
warning(_("failed to remove directory '%s'"),
item->string);
}
dir_clear(&dir);
}
string_list_clear(&sparse_dirs, 0);
strbuf_release(&path);
if (was_full)
ensure_full_index(r->index);
}
static int update_working_directory(struct pattern_list *pl)
{
enum update_sparsity_result result;
@ -141,6 +233,8 @@ static int update_working_directory(struct pattern_list *pl)
else
rollback_lock_file(&lock_file);
clean_tracked_sparse_directories(r);
r->index->sparse_checkout_patterns = NULL;
return result;
}

52
dir.c
View File

@ -1439,6 +1439,58 @@ done:
return result;
}
int init_sparse_checkout_patterns(struct index_state *istate)
{
if (!core_apply_sparse_checkout)
return 1;
if (istate->sparse_checkout_patterns)
return 0;
CALLOC_ARRAY(istate->sparse_checkout_patterns, 1);
if (get_sparse_checkout_patterns(istate->sparse_checkout_patterns) < 0) {
FREE_AND_NULL(istate->sparse_checkout_patterns);
return -1;
}
return 0;
}
static int path_in_sparse_checkout_1(const char *path,
struct index_state *istate,
int require_cone_mode)
{
const char *base;
int dtype = DT_REG;
/*
* We default to accepting a path if there are no patterns or
* they are of the wrong type.
*/
if (init_sparse_checkout_patterns(istate) ||
(require_cone_mode &&
!istate->sparse_checkout_patterns->use_cone_patterns))
return 1;
base = strrchr(path, '/');
return path_matches_pattern_list(path, strlen(path), base ? base + 1 : path,
&dtype,
istate->sparse_checkout_patterns,
istate) > 0;
}
int path_in_sparse_checkout(const char *path,
struct index_state *istate)
{
return path_in_sparse_checkout_1(path, istate, 0);
}
int path_in_cone_mode_sparse_checkout(const char *path,
struct index_state *istate)
{
return path_in_sparse_checkout_1(path, istate, 1);
}
static struct path_pattern *last_matching_pattern_from_lists(
struct dir_struct *dir, struct index_state *istate,
const char *pathname, int pathlen,

8
dir.h
View File

@ -394,6 +394,14 @@ enum pattern_match_result path_matches_pattern_list(const char *pathname,
const char *basename, int *dtype,
struct pattern_list *pl,
struct index_state *istate);
int init_sparse_checkout_patterns(struct index_state *state);
int path_in_sparse_checkout(const char *path,
struct index_state *istate);
int path_in_cone_mode_sparse_checkout(const char *path,
struct index_state *istate);
struct dir_entry *dir_add_ignored(struct dir_struct *dir,
struct index_state *istate,
const char *pathname, int len);

View File

@ -3069,7 +3069,7 @@ static int do_write_locked_index(struct index_state *istate, struct lock_file *l
int ret;
int was_full = !istate->sparse_index;
ret = convert_to_sparse(istate);
ret = convert_to_sparse(istate, 0);
if (ret) {
warning(_("failed to convert to a sparse-index"));
@ -3182,7 +3182,7 @@ static int write_shared_index(struct index_state *istate,
int ret, was_full = !istate->sparse_index;
move_cache_to_base_index(istate);
convert_to_sparse(istate);
convert_to_sparse(istate, 0);
trace2_region_enter_printf("index", "shared/do_write_index",
the_repository, "%s", get_tempfile_path(*temp));

View File

@ -33,19 +33,14 @@ static int convert_to_sparse_rec(struct index_state *istate,
{
int i, can_convert = 1;
int start_converted = num_converted;
enum pattern_match_result match;
int dtype = DT_UNKNOWN;
struct strbuf child_path = STRBUF_INIT;
struct pattern_list *pl = istate->sparse_checkout_patterns;
/*
* Is the current path outside of the sparse cone?
* Then check if the region can be replaced by a sparse
* directory entry (everything is sparse and merged).
*/
match = path_matches_pattern_list(ct_path, ct_pathlen,
NULL, &dtype, pl, istate);
if (match != NOT_MATCHED)
if (path_in_sparse_checkout(ct_path, istate))
can_convert = 0;
for (i = start; can_convert && i < end; i++) {
@ -127,41 +122,51 @@ static int index_has_unmerged_entries(struct index_state *istate)
return 0;
}
int convert_to_sparse(struct index_state *istate)
int convert_to_sparse(struct index_state *istate, int flags)
{
int test_env;
if (istate->split_index || istate->sparse_index ||
if (istate->sparse_index || !istate->cache_nr ||
!core_apply_sparse_checkout || !core_sparse_checkout_cone)
return 0;
if (!istate->repo)
istate->repo = the_repository;
/*
* The GIT_TEST_SPARSE_INDEX environment variable triggers the
* index.sparse config variable to be on.
*/
test_env = git_env_bool("GIT_TEST_SPARSE_INDEX", -1);
if (test_env >= 0)
set_sparse_index_config(istate->repo, test_env);
if (!(flags & SPARSE_INDEX_MEMORY_ONLY)) {
/*
* The sparse index is not (yet) integrated with a split index.
*/
if (istate->split_index)
return 0;
/*
* The GIT_TEST_SPARSE_INDEX environment variable triggers the
* index.sparse config variable to be on.
*/
test_env = git_env_bool("GIT_TEST_SPARSE_INDEX", -1);
if (test_env >= 0)
set_sparse_index_config(istate->repo, test_env);
/*
* Only convert to sparse if index.sparse is set.
*/
prepare_repo_settings(istate->repo);
if (!istate->repo->settings.sparse_index)
return 0;
if (!istate->sparse_checkout_patterns) {
istate->sparse_checkout_patterns = xcalloc(1, sizeof(struct pattern_list));
if (get_sparse_checkout_patterns(istate->sparse_checkout_patterns) < 0)
/*
* Only convert to sparse if index.sparse is set.
*/
prepare_repo_settings(istate->repo);
if (!istate->repo->settings.sparse_index)
return 0;
}
if (!istate->sparse_checkout_patterns->use_cone_patterns) {
warning(_("attempting to use sparse-index without cone mode"));
return -1;
}
if (init_sparse_checkout_patterns(istate))
return 0;
/*
* We need cone-mode patterns to use sparse-index. If a user edits
* their sparse-checkout file manually, then we can detect during
* parsing that they are not actually using cone-mode patterns and
* hence we need to abort this conversion _without error_. Warnings
* already exist in the pattern parsing to inform the user of their
* bad patterns.
*/
if (!istate->sparse_checkout_patterns->use_cone_patterns)
return 0;
/*
* NEEDSWORK: If we have unmerged entries, then stay full.
@ -172,10 +177,15 @@ int convert_to_sparse(struct index_state *istate)
/* Clear and recompute the cache-tree */
cache_tree_free(&istate->cache_tree);
if (cache_tree_update(istate, 0)) {
warning(_("unable to update cache-tree, staying full"));
return -1;
}
/*
* Silently return if there is a problem with the cache tree update,
* which might just be due to a conflict state in some entry.
*
* This might create new tree objects, so be sure to use
* WRITE_TREE_MISSING_OK.
*/
if (cache_tree_update(istate, WRITE_TREE_MISSING_OK))
return 0;
remove_fsmonitor(istate);

View File

@ -2,7 +2,8 @@
#define SPARSE_INDEX_H__
struct index_state;
int convert_to_sparse(struct index_state *istate);
#define SPARSE_INDEX_MEMORY_ONLY (1 << 0)
int convert_to_sparse(struct index_state *istate, int flags);
/*
* Some places in the codebase expect to search for a specific path.

View File

@ -642,4 +642,63 @@ test_expect_success MINGW 'cone mode replaces backslashes with slashes' '
check_files repo/deep a deeper1
'
test_expect_success 'cone mode clears ignored subdirectories' '
rm repo/.git/info/sparse-checkout &&
git -C repo sparse-checkout init --cone &&
git -C repo sparse-checkout set deep/deeper1 &&
cat >repo/.gitignore <<-\EOF &&
obj/
*.o
EOF
git -C repo add .gitignore &&
git -C repo commit -m ".gitignore" &&
mkdir -p repo/obj repo/folder1/obj repo/deep/deeper2/obj &&
for file in folder1/obj/a obj/a folder1/file.o folder1.o \
deep/deeper2/obj/a deep/deeper2/file.o file.o
do
echo ignored >repo/$file || return 1
done &&
git -C repo status --porcelain=v2 >out &&
test_must_be_empty out &&
git -C repo sparse-checkout reapply &&
test_path_is_missing repo/folder1 &&
test_path_is_missing repo/deep/deeper2 &&
test_path_is_dir repo/obj &&
test_path_is_file repo/file.o &&
git -C repo status --porcelain=v2 >out &&
test_must_be_empty out &&
git -C repo sparse-checkout set deep/deeper2 &&
test_path_is_missing repo/deep/deeper1 &&
test_path_is_dir repo/deep/deeper2 &&
test_path_is_dir repo/obj &&
test_path_is_file repo/file.o &&
>repo/deep/deeper2/ignored.o &&
>repo/deep/deeper2/untracked &&
# When an untracked file is in the way, all untracked files
# (even ignored files) are preserved.
git -C repo sparse-checkout set folder1 2>err &&
grep "contains untracked files" err &&
test_path_is_file repo/deep/deeper2/ignored.o &&
test_path_is_file repo/deep/deeper2/untracked &&
# The rest of the cone matches expectation
test_path_is_missing repo/deep/deeper1 &&
test_path_is_dir repo/obj &&
test_path_is_file repo/file.o &&
git -C repo status --porcelain=v2 >out &&
echo "? deep/deeper2/untracked" >expect &&
test_cmp expect out
'
test_done

View File

@ -389,43 +389,47 @@ test_expect_success 'status succeeds after staging/unstaging' '
# If "!" is supplied, then we verify that we do not call ensure_full_index
# during a call to 'git status'. Otherwise, we verify that we _do_ call it.
check_sparse_index_behavior () {
git status --porcelain=v2 >expect &&
git sparse-checkout init --cone --sparse-index &&
git sparse-checkout set dir1 dir2 &&
git -C full status --porcelain=v2 >expect &&
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" GIT_TRACE2_EVENT_NESTING=10 \
git status --porcelain=v2 >actual &&
git -C sparse status --porcelain=v2 >actual &&
test_region $1 index ensure_full_index trace2.txt &&
test_region fsm_hook query trace2.txt &&
test_cmp expect actual &&
rm trace2.txt &&
git sparse-checkout disable
rm trace2.txt
}
test_expect_success 'status succeeds with sparse index' '
git reset --hard &&
git clone . full &&
git clone --sparse . sparse &&
git -C sparse sparse-checkout init --cone --sparse-index &&
git -C sparse sparse-checkout set dir1 dir2 &&
test_config core.fsmonitor "$TEST_DIRECTORY/t7519/fsmonitor-all" &&
check_sparse_index_behavior ! &&
write_script .git/hooks/fsmonitor-test<<-\EOF &&
write_script .git/hooks/fsmonitor-test <<-\EOF &&
printf "last_update_token\0"
EOF
git config core.fsmonitor .git/hooks/fsmonitor-test &&
git -C full config core.fsmonitor ../.git/hooks/fsmonitor-test &&
git -C sparse config core.fsmonitor ../.git/hooks/fsmonitor-test &&
check_sparse_index_behavior ! &&
write_script .git/hooks/fsmonitor-test<<-\EOF &&
write_script .git/hooks/fsmonitor-test <<-\EOF &&
printf "last_update_token\0"
printf "dir1/modified\0"
EOF
check_sparse_index_behavior ! &&
cp -r dir1 dir1a &&
git add dir1a &&
git commit -m "add dir1a" &&
git -C sparse sparse-checkout add dir1a &&
for repo in full sparse
do
cp -r $repo/dir1 $repo/dir1a &&
git -C $repo add dir1a &&
git -C $repo commit -m "add dir1a" || return 1
done &&
git -C sparse sparse-checkout set dir1 dir2 &&
# This one modifies outside the sparse-checkout definition
# and hence we expect to expand the sparse-index.
write_script .git/hooks/fsmonitor-test<<-\EOF &&
write_script .git/hooks/fsmonitor-test <<-\EOF &&
printf "last_update_token\0"
printf "dir1a/modified\0"
EOF

View File

@ -1255,7 +1255,7 @@ static int sparse_dir_matches_path(const struct cache_entry *ce,
static struct cache_entry *find_cache_entry(struct traverse_info *info,
const struct name_entry *p)
{
struct cache_entry *ce;
const char *path;
int pos = find_cache_pos(info, p->path, p->pathlen);
struct unpack_trees_options *o = info->data;
@ -1281,9 +1281,11 @@ static struct cache_entry *find_cache_entry(struct traverse_info *info,
* paths (e.g. "subdir-").
*/
while (pos >= 0) {
ce = o->src_index->cache[pos];
struct cache_entry *ce = o->src_index->cache[pos];
if (strncmp(ce->name, p->path, p->pathlen))
if (!skip_prefix(ce->name, info->traverse_path, &path) ||
strncmp(path, p->path, p->pathlen) ||
path[p->pathlen] != '/')
return NULL;
if (S_ISSPARSEDIR(ce->ce_mode) &&