From 712610274211dc6cd7c83c5ccf88db5a75af8c25 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 15 Aug 2013 12:08:45 -0700 Subject: [PATCH 1/4] dir.c: use the cache_* macro to access the current index These codepaths always start from the_index and use index_* functions, but there is no reason to do so. Use the compatibility cache_* macro to access the current in-core index like everybody else. While at it, fix typo in the comment for a function to check if a path within a directory appears in the index. Signed-off-by: Junio C Hamano --- dir.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dir.c b/dir.c index a5926fbd1a..2f82cd193e 100644 --- a/dir.c +++ b/dir.c @@ -472,15 +472,14 @@ static void *read_skip_worktree_file_from_index(const char *path, size_t *size) unsigned long sz; enum object_type type; void *data; - struct index_state *istate = &the_index; len = strlen(path); - pos = index_name_pos(istate, path, len); + pos = cache_name_pos(path, len); if (pos < 0) return NULL; - if (!ce_skip_worktree(istate->cache[pos])) + if (!ce_skip_worktree(active_cache[pos])) return NULL; - data = read_sha1_file(istate->cache[pos]->sha1, &type, &sz); + data = read_sha1_file(active_cache[pos]->sha1, &type, &sz); if (!data || type != OBJ_BLOB) { free(data); return NULL; @@ -924,13 +923,13 @@ enum exist_status { }; /* - * Do not use the alphabetically stored index to look up + * Do not use the alphabetically sorted index to look up * the directory name; instead, use the case insensitive * name hash. */ static enum exist_status directory_exists_in_index_icase(const char *dirname, int len) { - struct cache_entry *ce = index_name_exists(&the_index, dirname, len + 1, ignore_case); + struct cache_entry *ce = cache_name_exists(dirname, len + 1, ignore_case); unsigned char endchar; if (!ce) From 2eac2a4cc4bdc8d77b31204bc20751cb56f0d575 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 15 Aug 2013 12:13:46 -0700 Subject: [PATCH 2/4] ls-files -k: a directory only can be killed if the index has a non-directory "ls-files -o" and "ls-files -k" both traverse the working tree down to find either all untracked paths or those that will be "killed" (removed from the working tree to make room) when the paths recorded in the index are checked out. It is necessary to traverse the working tree fully when enumerating all the "other" paths, but when we are only interested in "killed" paths, we can take advantage of the fact that paths that do not overlap with entries in the index can never be killed. The treat_one_path() helper function, which is called during the recursive traversal, is the ideal place to implement an optimization. When we are looking at a directory P in the working tree, there are three cases: (1) P exists in the index. Everything inside the directory P in the working tree needs to go when P is checked out from the index. (2) P does not exist in the index, but there is P/Q in the index. We know P will stay a directory when we check out the contents of the index, but we do not know yet if there is a directory P/Q in the working tree to be killed, so we need to recurse. (3) P does not exist in the index, and there is no P/Q in the index to require P to be a directory, either. Only in this case, we know that everything inside P will not be killed without recursing. Note that this helper is called by treat_leading_path() that decides if we need to traverse only subdirectories of a single common leading directory, which is essential for this optimization to be correct. This caller checks each level of the leading path component from shallower directory to deeper ones, and that is what allows us to only check if the path appears in the index. If the call to treat_one_path() weren't there, given a path P/Q/R, the real traversal may start from directory P/Q/R, even when the index records P as a regular file, and we would end up having to check if any leading subpath in P/Q/R, e.g. P, appears in the index. Signed-off-by: Junio C Hamano --- builtin/ls-files.c | 2 ++ dir.c | 29 +++++++++++++++++++++++++++-- dir.h | 3 ++- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/builtin/ls-files.c b/builtin/ls-files.c index 22020729cb..c7eb6f4873 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -213,6 +213,8 @@ static void show_files(struct dir_struct *dir) /* For cached/deleted files we don't need to even do the readdir */ if (show_others || show_killed) { + if (!show_others) + dir->flags |= DIR_COLLECT_KILLED_ONLY; fill_directory(dir, pathspec); if (show_others) show_other_files(dir); diff --git a/dir.c b/dir.c index 2f82cd193e..ff768f31af 100644 --- a/dir.c +++ b/dir.c @@ -1173,12 +1173,37 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, int dtype, struct dirent *de) { int exclude; + int has_path_in_index = !!cache_name_exists(path->buf, path->len, ignore_case); + if (dtype == DT_UNKNOWN) dtype = get_dtype(de, path->buf, path->len); /* Always exclude indexed files */ - if (dtype != DT_DIR && - cache_name_exists(path->buf, path->len, ignore_case)) + if (dtype != DT_DIR && has_path_in_index) + return path_none; + + /* + * When we are looking at a directory P in the working tree, + * there are three cases: + * + * (1) P exists in the index. Everything inside the directory P in + * the working tree needs to go when P is checked out from the + * index. + * + * (2) P does not exist in the index, but there is P/Q in the index. + * We know P will stay a directory when we check out the contents + * of the index, but we do not know yet if there is a directory + * P/Q in the working tree to be killed, so we need to recurse. + * + * (3) P does not exist in the index, and there is no P/Q in the index + * to require P to be a directory, either. Only in this case, we + * know that everything inside P will not be killed without + * recursing. + */ + if ((dir->flags & DIR_COLLECT_KILLED_ONLY) && + (dtype == DT_DIR) && + !has_path_in_index && + (directory_exists_in_index(path->buf, path->len) == index_nonexistent)) return path_none; exclude = is_excluded(dir, path->buf, &dtype); diff --git a/dir.h b/dir.h index 3d6b80c933..4677b861f0 100644 --- a/dir.h +++ b/dir.h @@ -80,7 +80,8 @@ struct dir_struct { DIR_HIDE_EMPTY_DIRECTORIES = 1<<2, DIR_NO_GITLINKS = 1<<3, DIR_COLLECT_IGNORED = 1<<4, - DIR_SHOW_IGNORED_TOO = 1<<5 + DIR_SHOW_IGNORED_TOO = 1<<5, + DIR_COLLECT_KILLED_ONLY = 1<<6 } flags; struct dir_entry **entries; struct dir_entry **ignored; From 3c56875176390eee7d81795294124ce90189d876 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 15 Aug 2013 13:51:09 -0700 Subject: [PATCH 3/4] t3010: update to demonstrate "ls-files -k" optimization pitfalls An earlier draft of the previous step used cache_name_exists() to check the directory we were looking at, which missed the second case described in its log message. Demonstrate why it is not sufficient. Signed-off-by: Junio C Hamano --- t/t3010-ls-files-killed-modified.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/t/t3010-ls-files-killed-modified.sh b/t/t3010-ls-files-killed-modified.sh index 95671c2053..6ea7ca8265 100755 --- a/t/t3010-ls-files-killed-modified.sh +++ b/t/t3010-ls-files-killed-modified.sh @@ -11,6 +11,7 @@ This test prepares the following in the cache: path1 - a symlink path2/file2 - a file in a directory path3/file3 - a file in a directory + pathx/ju - a file in a directory and the following on the filesystem: @@ -21,6 +22,7 @@ and the following on the filesystem: path4 - a file path5 - a symlink path6/file6 - a file in a directory + pathx/ju/nk - a file in a directory to be killed git ls-files -k should report that existing filesystem objects except path4, path5 and path6/file6 to be killed. @@ -44,16 +46,17 @@ then else date > path1 fi -mkdir path2 path3 +mkdir path2 path3 pathx date >path2/file2 date >path3/file3 +>pathx/ju : >path7 date >path8 : >path9 date >path10 test_expect_success \ 'git update-index --add to add various paths.' \ - "git update-index --add -- path0 path1 path?/file? path7 path8 path9 path10" + "git update-index --add -- path0 path1 path?/file? pathx/ju path7 path8 path9 path10" rm -fr path? ;# leave path10 alone date >path2 @@ -65,7 +68,7 @@ else date > path3 date > path5 fi -mkdir path0 path1 path6 +mkdir -p path0 path1 path6 pathx/ju date >path0/file0 date >path1/file1 date >path6/file6 @@ -73,6 +76,7 @@ date >path7 : >path8 : >path9 touch path10 +>pathx/ju/nk test_expect_success \ 'git ls-files -k to show killed files.' \ @@ -82,6 +86,7 @@ path0/file0 path1/file1 path2 path3 +pathx/ju/nk EOF test_expect_success \ @@ -98,6 +103,7 @@ path2/file2 path3/file3 path7 path8 +pathx/ju EOF test_expect_success \ From 680be044d98b3b703bc33d546a987c19b3779aeb Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Fri, 23 Aug 2013 16:26:59 -0700 Subject: [PATCH 4/4] dir.c::test_one_path(): work around directory_exists_in_index_icase() breakage directory_exists_in_index() takes pathname and its length, but its helper function directory_exists_in_index_icase() reads one byte beyond the end of the pathname and expects there to be a '/'. This needs to be fixed, as that one-byte-beyond-the-end location may not even be readable, possibly by not registering directories to name hashes with trailing slashes. In the meantime, update the new caller added recently to treat_one_path() to make sure that the path buffer it gives the function is one byte longer than the path it is asking the function about by appending a slash to it. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- dir.c | 18 +++++++++++++++--- t/t3010-ls-files-killed-modified.sh | 15 +++++++++------ 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/dir.c b/dir.c index ff768f31af..1000dc2368 100644 --- a/dir.c +++ b/dir.c @@ -1202,9 +1202,21 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, */ if ((dir->flags & DIR_COLLECT_KILLED_ONLY) && (dtype == DT_DIR) && - !has_path_in_index && - (directory_exists_in_index(path->buf, path->len) == index_nonexistent)) - return path_none; + !has_path_in_index) { + /* + * NEEDSWORK: directory_exists_in_index_icase() + * assumes that one byte past the given path is + * readable and has '/', which needs to be fixed, but + * until then, work it around in the caller. + */ + strbuf_addch(path, '/'); + if (directory_exists_in_index(path->buf, path->len - 1) == + index_nonexistent) { + strbuf_setlen(path, path->len - 1); + return path_none; + } + strbuf_setlen(path, path->len - 1); + } exclude = is_excluded(dir, path->buf, &dtype); diff --git a/t/t3010-ls-files-killed-modified.sh b/t/t3010-ls-files-killed-modified.sh index 6ea7ca8265..ab1deae3b2 100755 --- a/t/t3010-ls-files-killed-modified.sh +++ b/t/t3010-ls-files-killed-modified.sh @@ -78,9 +78,6 @@ date >path7 touch path10 >pathx/ju/nk -test_expect_success \ - 'git ls-files -k to show killed files.' \ - 'git ls-files -k >.output' cat >.expected <.output && + test_cmp .expected .output +' + +test_expect_success 'git ls-files -k to show killed files (w/ icase)' ' + git -c core.ignorecase=true ls-files -k >.output && + test_cmp .expected .output +' test_expect_success \ 'git ls-files -m to show modified files.' \