unpack-trees: unpack new trees as sparse directories

If 'unpack_single_entry()' is unpacking a new directory tree (that is, one
not already present in the index) into a sparse index, unpack the tree as a
sparse directory rather than traversing its contents and unpacking each file
individually. This helps keep the sparse index as collapsed as possible in
cases such as 'git reset --hard' restoring a outside-of-cone directory
removed with 'git rm -r --sparse'.

Without this patch, 'unpack_single_entry()' will only unpack a directory
into the index as a sparse directory (rather than traversing into it and
unpacking its files one-by-one) if an entry with the same name already
exists in the index. This patch allows sparse directory unpacking without a
matching index entry when the following conditions are met:

1. the directory's path is outside the sparse cone, and
2. there are no children of the directory in the index

If a directory meets these requirements (as determined by
'is_new_sparse_dir()'), 'unpack_single_entry()' unpacks the sparse directory
index entry and propagates the decision back up to 'unpack_callback()' to
prevent unnecessary tree traversal into the unpacked directory.

Reported-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Victoria Dye 2022-08-08 19:07:52 +00:00 committed by Junio C Hamano
parent 9553aa0f6c
commit b15207b8cf
2 changed files with 113 additions and 10 deletions

View File

@ -695,6 +695,23 @@ test_expect_success 'reset with wildcard pathspec' '
test_all_match git ls-files -s -- folder1
'
test_expect_success 'reset hard with removed sparse dir' '
init_repos &&
run_on_all git rm -r --sparse folder1 &&
test_all_match git status --porcelain=v2 &&
test_all_match git reset --hard &&
test_all_match git status --porcelain=v2 &&
cat >expect <<-\EOF &&
folder1/
EOF
git -C sparse-index ls-files --sparse folder1 >out &&
test_cmp expect out
'
test_expect_success 'update-index modify outside sparse definition' '
init_repos &&

View File

@ -1069,6 +1069,67 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info,
return ce;
}
/*
* Determine whether the path specified by 'p' should be unpacked as a new
* sparse directory in a sparse index. A new sparse directory 'A/':
* - must be outside the sparse cone.
* - must not already be in the index (i.e., no index entry with name 'A/'
* exists).
* - must not have any child entries in the index (i.e., no index entry
* 'A/<something>' exists).
* If 'p' meets the above requirements, return 1; otherwise, return 0.
*/
static int entry_is_new_sparse_dir(const struct traverse_info *info,
const struct name_entry *p)
{
int res, pos;
struct strbuf dirpath = STRBUF_INIT;
struct unpack_trees_options *o = info->data;
if (!S_ISDIR(p->mode))
return 0;
/*
* If the path is inside the sparse cone, it can't be a sparse directory.
*/
strbuf_add(&dirpath, info->traverse_path, info->pathlen);
strbuf_add(&dirpath, p->path, p->pathlen);
strbuf_addch(&dirpath, '/');
if (path_in_cone_mode_sparse_checkout(dirpath.buf, o->src_index)) {
res = 0;
goto cleanup;
}
pos = index_name_pos_sparse(o->src_index, dirpath.buf, dirpath.len);
if (pos >= 0) {
/* Path is already in the index, not a new sparse dir */
res = 0;
goto cleanup;
}
/* Where would this sparse dir be inserted into the index? */
pos = -pos - 1;
if (pos >= o->src_index->cache_nr) {
/*
* Sparse dir would be inserted at the end of the index, so we
* know it has no child entries.
*/
res = 1;
goto cleanup;
}
/*
* If the dir has child entries in the index, the first would be at the
* position the sparse directory would be inserted. If the entry at this
* position is inside the dir, not a new sparse dir.
*/
res = strncmp(o->src_index->cache[pos]->name, dirpath.buf, dirpath.len);
cleanup:
strbuf_release(&dirpath);
return res;
}
/*
* Note that traverse_by_cache_tree() duplicates some logic in this function
* without actually calling it. If you change the logic here you may need to
@ -1078,21 +1139,44 @@ static int unpack_single_entry(int n, unsigned long mask,
unsigned long dirmask,
struct cache_entry **src,
const struct name_entry *names,
const struct traverse_info *info)
const struct traverse_info *info,
int *is_new_sparse_dir)
{
int i;
struct unpack_trees_options *o = info->data;
unsigned long conflicts = info->df_conflicts | dirmask;
const struct name_entry *p = names;
if (mask == dirmask && !src[0])
return 0;
*is_new_sparse_dir = 0;
if (mask == dirmask && !src[0]) {
/*
* If we're not in a sparse index, we can't unpack a directory
* without recursing into it, so we return.
*/
if (!o->src_index->sparse_index)
return 0;
/* Find first entry with a real name (we could use "mask" too) */
while (!p->mode)
p++;
/*
* If the directory is completely missing from the index but
* would otherwise be a sparse directory, we should unpack it.
* If not, we'll return and continue recursively traversing the
* tree.
*/
*is_new_sparse_dir = entry_is_new_sparse_dir(info, p);
if (!*is_new_sparse_dir)
return 0;
}
/*
* When we have a sparse directory entry for src[0],
* then this isn't necessarily a directory-file conflict.
* When we are unpacking a sparse directory, then this isn't necessarily
* a directory-file conflict.
*/
if (mask == dirmask && src[0] &&
S_ISSPARSEDIR(src[0]->ce_mode))
if (mask == dirmask &&
(*is_new_sparse_dir || (src[0] && S_ISSPARSEDIR(src[0]->ce_mode))))
conflicts = 0;
/*
@ -1352,7 +1436,7 @@ static int unpack_sparse_callback(int n, unsigned long mask, unsigned long dirma
{
struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
struct unpack_trees_options *o = info->data;
int ret;
int ret, is_new_sparse_dir;
assert(o->merge);
@ -1376,7 +1460,7 @@ static int unpack_sparse_callback(int n, unsigned long mask, unsigned long dirma
* "index" tree (i.e., names[0]) and adjust 'names', 'n', 'mask', and
* 'dirmask' accordingly.
*/
ret = unpack_single_entry(n - 1, mask >> 1, dirmask >> 1, src, names + 1, info);
ret = unpack_single_entry(n - 1, mask >> 1, dirmask >> 1, src, names + 1, info, &is_new_sparse_dir);
if (src[0])
discard_cache_entry(src[0]);
@ -1394,6 +1478,7 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
struct unpack_trees_options *o = info->data;
const struct name_entry *p = names;
int is_new_sparse_dir;
/* Find first entry with a real name (we could use "mask" too) */
while (!p->mode)
@ -1440,7 +1525,7 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
}
}
if (unpack_single_entry(n, mask, dirmask, src, names, info) < 0)
if (unpack_single_entry(n, mask, dirmask, src, names, info, &is_new_sparse_dir))
return -1;
if (o->merge && src[0]) {
@ -1478,6 +1563,7 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
}
if (!is_sparse_directory_entry(src[0], names, info) &&
!is_new_sparse_dir &&
traverse_trees_recursive(n, dirmask, mask & ~dirmask,
names, info) < 0) {
return -1;