From 560bb7a7a146fddad3394dc913f1469d477d26a9 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Mon, 15 Apr 2013 21:05:19 +0200 Subject: [PATCH 01/14] dir.c: git-status --ignored: don't drop ignored directories 'git-status --ignored' drops ignored directories if they contain untracked files in an untracked sub directory. Fix it by getting exact (recursive) excluded status in treat_directory. Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- dir.c | 9 +++++++++ t/t7061-wtstatus-ignore.sh | 27 +++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/dir.c b/dir.c index 91cfd99671..dc3a50baf6 100644 --- a/dir.c +++ b/dir.c @@ -1104,6 +1104,15 @@ static enum directory_treatment treat_directory(struct dir_struct *dir, /* This is the "show_other_directories" case */ + /* might be a sub directory in an excluded directory */ + if (!exclude) { + struct path_exclude_check check; + int dt = DT_DIR; + path_exclude_check_init(&check, dir); + exclude = is_path_excluded(&check, dirname, len, &dt); + path_exclude_check_clear(&check); + } + /* * We are looking for ignored files and our directory is not ignored, * check if it contains only ignored files diff --git a/t/t7061-wtstatus-ignore.sh b/t/t7061-wtstatus-ignore.sh index 0da1214bcc..0f1034ed50 100755 --- a/t/t7061-wtstatus-ignore.sh +++ b/t/t7061-wtstatus-ignore.sh @@ -143,4 +143,31 @@ test_expect_success 'status ignored tracked directory and uncommitted file with test_cmp expected actual ' +cat >expected <<\EOF +?? .gitignore +?? actual +?? expected +!! tracked/ +EOF + +test_expect_success 'status ignored tracked directory with uncommitted file in untracked subdir with --ignore' ' + rm -rf tracked/uncommitted && + mkdir tracked/ignored && + : >tracked/ignored/uncommitted && + git status --porcelain --ignored >actual && + test_cmp expected actual +' + +cat >expected <<\EOF +?? .gitignore +?? actual +?? expected +!! tracked/ignored/uncommitted +EOF + +test_expect_success 'status ignored tracked directory with uncommitted file in untracked subdir with --ignore -u' ' + git status --porcelain --ignored -u >actual && + test_cmp expected actual +' + test_done From 289ff5598fc4947fe0e6cfeb6db652e64894151c Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Mon, 15 Apr 2013 21:06:30 +0200 Subject: [PATCH 02/14] dir.c: git-status --ignored: don't list files in ignored directories 'git-status --ignored' lists both the ignored directory and the ignored files if the files are in a tracked sub directory. When recursing into sub directories in read_directory_recursive, pass on the check_only parameter so that we don't accidentally add the files. Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- dir.c | 4 +--- t/t7061-wtstatus-ignore.sh | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/dir.c b/dir.c index dc3a50baf6..248cfea39f 100644 --- a/dir.c +++ b/dir.c @@ -1317,7 +1317,6 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, return path_ignored; case DT_DIR: strbuf_addch(path, '/'); - switch (treat_directory(dir, path->buf, path->len, exclude, simplify)) { case show_directory: break; @@ -1387,8 +1386,7 @@ static int read_directory_recursive(struct dir_struct *dir, switch (treat_path(dir, de, &path, baselen, simplify)) { case path_recurse: contents += read_directory_recursive(dir, path.buf, - path.len, 0, - simplify); + path.len, check_only, simplify); continue; case path_ignored: continue; diff --git a/t/t7061-wtstatus-ignore.sh b/t/t7061-wtstatus-ignore.sh index 0f1034ed50..4ece1292b4 100755 --- a/t/t7061-wtstatus-ignore.sh +++ b/t/t7061-wtstatus-ignore.sh @@ -170,4 +170,31 @@ test_expect_success 'status ignored tracked directory with uncommitted file in u test_cmp expected actual ' +cat >expected <<\EOF +?? .gitignore +?? actual +?? expected +!! tracked/ +EOF + +test_expect_success 'status ignored tracked directory with uncommitted file in tracked subdir with --ignore' ' + : >tracked/ignored/committed && + git add -f tracked/ignored/committed && + git commit -m. && + git status --porcelain --ignored >actual && + test_cmp expected actual +' + +cat >expected <<\EOF +?? .gitignore +?? actual +?? expected +!! tracked/ignored/uncommitted +EOF + +test_expect_success 'status ignored tracked directory with uncommitted file in tracked subdir with --ignore -u' ' + git status --porcelain --ignored -u >actual && + test_cmp expected actual +' + test_done From 0104c9e7816e30701e4fdd9143889faacfa0eefa Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Mon, 15 Apr 2013 21:07:16 +0200 Subject: [PATCH 03/14] dir.c: git-status --ignored: don't list empty ignored directories 'git-status --ignored' lists ignored tracked directories without any ignored files if a tracked file happens to match an exclude pattern. Always exclude tracked files. Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- dir.c | 11 ++++------- t/t7061-wtstatus-ignore.sh | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/dir.c b/dir.c index 248cfea39f..4723cd537d 100644 --- a/dir.c +++ b/dir.c @@ -1153,16 +1153,13 @@ static int treat_file(struct dir_struct *dir, struct strbuf *path, int exclude, struct path_exclude_check check; int exclude_file = 0; + /* Always exclude indexed files */ + if (index_name_exists(&the_index, path->buf, path->len, ignore_case)) + return 1; + if (exclude) exclude_file = !(dir->flags & DIR_SHOW_IGNORED); else if (dir->flags & DIR_SHOW_IGNORED) { - /* Always exclude indexed files */ - struct cache_entry *ce = index_name_exists(&the_index, - path->buf, path->len, ignore_case); - - if (ce) - return 1; - path_exclude_check_init(&check, dir); if (!is_path_excluded(&check, path->buf, path->len, dtype)) diff --git a/t/t7061-wtstatus-ignore.sh b/t/t7061-wtstatus-ignore.sh index 4ece1292b4..28b7d957a5 100755 --- a/t/t7061-wtstatus-ignore.sh +++ b/t/t7061-wtstatus-ignore.sh @@ -118,6 +118,29 @@ test_expect_success 'status ignored tracked directory with --ignore -u' ' test_cmp expected actual ' +cat >expected <<\EOF +?? .gitignore +?? actual +?? expected +EOF + +test_expect_success 'status ignored tracked directory and ignored file with --ignore' ' + echo "committed" >>.gitignore && + git status --porcelain --ignored >actual && + test_cmp expected actual +' + +cat >expected <<\EOF +?? .gitignore +?? actual +?? expected +EOF + +test_expect_success 'status ignored tracked directory and ignored file with --ignore -u' ' + git status --porcelain --ignored -u >actual && + test_cmp expected actual +' + cat >expected <<\EOF ?? .gitignore ?? actual @@ -126,6 +149,7 @@ cat >expected <<\EOF EOF test_expect_success 'status ignored tracked directory and uncommitted file with --ignore' ' + echo "tracked" >.gitignore && : >tracked/uncommitted && git status --porcelain --ignored >actual && test_cmp expected actual From 184d2a8e964d721c20775026308fcf63d57c0b4d Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Mon, 15 Apr 2013 21:08:02 +0200 Subject: [PATCH 04/14] dir.c: git-ls-files --directories: don't hide empty directories 'git-ls-files --ignored --directories' hides empty directories even though --no-empty-directory was not specified. Treat the DIR_HIDE_EMPTY_DIRECTORIES flag independently from DIR_SHOW_IGNORED to make all git-ls-files options work as expected. Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- dir.c | 6 ++---- t/t3001-ls-files-others-exclude.sh | 23 +++++++++++++++++++++++ wt-status.c | 2 +- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/dir.c b/dir.c index 4723cd537d..15d7277a02 100644 --- a/dir.c +++ b/dir.c @@ -1120,15 +1120,13 @@ static enum directory_treatment treat_directory(struct dir_struct *dir, if ((dir->flags & DIR_SHOW_IGNORED) && !exclude) { int ignored; dir->flags &= ~DIR_SHOW_IGNORED; - dir->flags |= DIR_HIDE_EMPTY_DIRECTORIES; ignored = read_directory_recursive(dir, dirname, len, 1, simplify); - dir->flags &= ~DIR_HIDE_EMPTY_DIRECTORIES; dir->flags |= DIR_SHOW_IGNORED; return ignored ? ignore_directory : show_directory; } - if (!(dir->flags & DIR_SHOW_IGNORED) && - !(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES)) + + if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES)) return show_directory; if (!read_directory_recursive(dir, dirname, len, 1, simplify)) return ignore_directory; diff --git a/t/t3001-ls-files-others-exclude.sh b/t/t3001-ls-files-others-exclude.sh index efb7ebc91f..859da350ab 100755 --- a/t/t3001-ls-files-others-exclude.sh +++ b/t/t3001-ls-files-others-exclude.sh @@ -214,6 +214,29 @@ test_expect_success 'subdirectory ignore (l1)' ' test_cmp expect actual ' +test_expect_success 'show/hide empty ignored directory (setup)' ' + rm top/l1/l2/l1 && + rm top/l1/.gitignore +' + +test_expect_success 'show empty ignored directory with --directory' ' + ( + cd top && + git ls-files -o -i --exclude l1 --directory + ) >actual && + echo l1/ >expect && + test_cmp expect actual +' + +test_expect_success 'hide empty ignored directory with --no-empty-directory' ' + ( + cd top && + git ls-files -o -i --exclude l1 --directory --no-empty-directory + ) >actual && + >expect && + test_cmp expect actual +' + test_expect_success 'pattern matches prefix completely' ' : >expect && git ls-files -i -o --exclude "/three/a.3[abc]" >actual && diff --git a/wt-status.c b/wt-status.c index ec5f27c599..676b058e59 100644 --- a/wt-status.c +++ b/wt-status.c @@ -526,7 +526,7 @@ static void wt_status_collect_untracked(struct wt_status *s) dir.nr = 0; dir.flags = DIR_SHOW_IGNORED; if (s->show_untracked_files != SHOW_ALL_UNTRACKED_FILES) - dir.flags |= DIR_SHOW_OTHER_DIRECTORIES; + dir.flags |= DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES; fill_directory(&dir, s->pathspec); for (i = 0; i < dir.nr; i++) { struct dir_entry *ent = dir.entries[i]; From c94ab010266776c85b588473260ed048d1e654ff Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Mon, 15 Apr 2013 21:08:42 +0200 Subject: [PATCH 05/14] dir.c: git-status --ignored: don't list empty directories as ignored 'git-status --ignored' lists empty untracked directories as ignored, even though they don't have any ignored files. When checking if a directory is already listed as untracked (i.e. shouldn't be listed as ignored as well), don't assume that the directory has only ignored files if it doesn't have untracked files, as the directory may be empty. Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- dir.c | 5 +++-- t/t7061-wtstatus-ignore.sh | 28 +++++++++++++++++++++++++--- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/dir.c b/dir.c index 15d7277a02..fecb6da0ae 100644 --- a/dir.c +++ b/dir.c @@ -1115,7 +1115,7 @@ static enum directory_treatment treat_directory(struct dir_struct *dir, /* * We are looking for ignored files and our directory is not ignored, - * check if it contains only ignored files + * check if it contains untracked files (i.e. is listed as untracked) */ if ((dir->flags & DIR_SHOW_IGNORED) && !exclude) { int ignored; @@ -1123,7 +1123,8 @@ static enum directory_treatment treat_directory(struct dir_struct *dir, ignored = read_directory_recursive(dir, dirname, len, 1, simplify); dir->flags |= DIR_SHOW_IGNORED; - return ignored ? ignore_directory : show_directory; + if (ignored) + return ignore_directory; } if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES)) diff --git a/t/t7061-wtstatus-ignore.sh b/t/t7061-wtstatus-ignore.sh index 28b7d957a5..6171a49cf9 100755 --- a/t/t7061-wtstatus-ignore.sh +++ b/t/t7061-wtstatus-ignore.sh @@ -60,6 +60,31 @@ test_expect_success 'status ignored directory with --ignore -u' ' test_cmp expected actual ' +cat >expected <<\EOF +?? .gitignore +?? actual +?? expected +EOF + +test_expect_success 'status empty untracked directory with --ignore' ' + rm -rf ignored && + mkdir untracked-ignored && + mkdir untracked-ignored/test && + git status --porcelain --ignored >actual && + test_cmp expected actual +' + +cat >expected <<\EOF +?? .gitignore +?? actual +?? expected +EOF + +test_expect_success 'status empty untracked directory with --ignore -u' ' + git status --porcelain --ignored -u >actual && + test_cmp expected actual +' + cat >expected <<\EOF ?? .gitignore ?? actual @@ -68,9 +93,6 @@ cat >expected <<\EOF EOF test_expect_success 'status untracked directory with ignored files with --ignore' ' - rm -rf ignored && - mkdir untracked-ignored && - mkdir untracked-ignored/test && : >untracked-ignored/ignored && : >untracked-ignored/test/ignored && git status --porcelain --ignored >actual && From be8a84c526691667fc04a8241d93a3de1de298ab Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Mon, 15 Apr 2013 21:09:25 +0200 Subject: [PATCH 06/14] dir.c: make 'git-status --ignored' work within leading directories 'git-status --ignored path/' doesn't list ignored files and directories within 'path' if some component of 'path' is classified as untracked. Disable the DIR_SHOW_OTHER_DIRECTORIES flag while traversing leading directories. This prevents treat_leading_path() with DIR_SHOW_IGNORED flag from aborting at the top level untracked directory. As a side effect, this also eliminates a recursive directory scan per leading directory level, as treat_directory() can no longer call read_directory_recursive() when called from treat_leading_path(). Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- dir.c | 3 +++ t/t7061-wtstatus-ignore.sh | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/dir.c b/dir.c index fecb6da0ae..fd4aeae3e7 100644 --- a/dir.c +++ b/dir.c @@ -1447,12 +1447,14 @@ static int treat_leading_path(struct dir_struct *dir, struct strbuf sb = STRBUF_INIT; int baselen, rc = 0; const char *cp; + int old_flags = dir->flags; while (len && path[len - 1] == '/') len--; if (!len) return 1; baselen = 0; + dir->flags &= ~DIR_SHOW_OTHER_DIRECTORIES; while (1) { cp = path + baselen + !!baselen; cp = memchr(cp, '/', path + len - cp); @@ -1475,6 +1477,7 @@ static int treat_leading_path(struct dir_struct *dir, } } strbuf_release(&sb); + dir->flags = old_flags; return rc; } diff --git a/t/t7061-wtstatus-ignore.sh b/t/t7061-wtstatus-ignore.sh index 6171a49cf9..4c6f145d97 100755 --- a/t/t7061-wtstatus-ignore.sh +++ b/t/t7061-wtstatus-ignore.sh @@ -32,6 +32,25 @@ test_expect_success 'status untracked directory with --ignored -u' ' git status --porcelain --ignored -u >actual && test_cmp expected actual ' +cat >expected <<\EOF +?? untracked/uncommitted +!! untracked/ignored +EOF + +test_expect_success 'status prefixed untracked directory with --ignored' ' + git status --porcelain --ignored untracked/ >actual && + test_cmp expected actual +' + +cat >expected <<\EOF +?? untracked/uncommitted +!! untracked/ignored +EOF + +test_expect_success 'status prefixed untracked sub-directory with --ignored -u' ' + git status --porcelain --ignored -u untracked/ >actual && + test_cmp expected actual +' cat >expected <<\EOF ?? .gitignore From 5bd8e2d894be3a27e9b32b062ff224cc2396b69c Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Mon, 15 Apr 2013 21:10:05 +0200 Subject: [PATCH 07/14] dir.c: git-clean -d -X: don't delete tracked directories The notion of "ignored tracked" directories introduced in 721ac4ed "dir.c: Make git-status --ignored more consistent" has a few unwanted side effects: - git-clean -d -X: deletes ignored tracked directories. git-clean should never delete tracked content. - git-ls-files --ignored --other --directory: lists ignored tracked directories instead of "other" directories. - git-status --ignored: lists ignored tracked directories while contained files may be listed as modified. Paths listed by git-status should be disjoint (except in long format where a path may be listed in both the staged and unstaged section). Additionally, the current behaviour violates documentation in gitignore(5) ("Specifies intentionally *untracked* files to ignore") and Documentation/ technical/api-directory-listing.txt ("DIR_SHOW_OTHER_DIRECTORIES: Include a directory that is *not tracked*."). In dir.c::treat_directory, remove the special handling of ignored tracked directories, so that the DIR_SHOW_OTHER_DIRECTORIES flag only affects "other" (i.e. untracked) directories. In dir.c::dir_add_name, check that added paths are untracked even if DIR_SHOW_IGNORED is set. Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- dir.c | 11 +++------- t/t3001-ls-files-others-exclude.sh | 26 +++++++++++++++++++++++ t/t7061-wtstatus-ignore.sh | 6 +++--- t/t7300-clean.sh | 34 ++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 11 deletions(-) diff --git a/dir.c b/dir.c index fd4aeae3e7..7a98e3ac8a 100644 --- a/dir.c +++ b/dir.c @@ -941,8 +941,7 @@ static struct dir_entry *dir_entry_new(const char *pathname, int len) static struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len) { - if (!(dir->flags & DIR_SHOW_IGNORED) && - cache_name_exists(pathname, len, ignore_case)) + if (cache_name_exists(pathname, len, ignore_case)) return NULL; ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc); @@ -1044,9 +1043,8 @@ static enum exist_status directory_exists_in_index(const char *dirname, int len) * traversal routine. * * Case 1: If we *already* have entries in the index under that - * directory name, we recurse into the directory to see all the files, - * unless the directory is excluded and we want to show ignored - * directories + * directory name, we always recurse into the directory to see + * all the files. * * Case 2: If we *already* have that directory name as a gitlink, * we always continue to see it as a gitlink, regardless of whether @@ -1081,9 +1079,6 @@ static enum directory_treatment treat_directory(struct dir_struct *dir, /* The "len-1" is to strip the final '/' */ switch (directory_exists_in_index(dirname, len-1)) { case index_directory: - if ((dir->flags & DIR_SHOW_OTHER_DIRECTORIES) && exclude) - break; - return recurse_into_directory; case index_gitdir: diff --git a/t/t3001-ls-files-others-exclude.sh b/t/t3001-ls-files-others-exclude.sh index 859da350ab..ec4fae2f39 100755 --- a/t/t3001-ls-files-others-exclude.sh +++ b/t/t3001-ls-files-others-exclude.sh @@ -237,6 +237,32 @@ test_expect_success 'hide empty ignored directory with --no-empty-directory' ' test_cmp expect actual ' +test_expect_success 'show/hide empty ignored sub-directory (setup)' ' + > top/l1/tracked && + ( + cd top && + git add -f l1/tracked + ) +' + +test_expect_success 'show empty ignored sub-directory with --directory' ' + ( + cd top && + git ls-files -o -i --exclude l1 --directory + ) >actual && + echo l1/l2/ >expect && + test_cmp expect actual +' + +test_expect_success 'hide empty ignored sub-directory with --no-empty-directory' ' + ( + cd top && + git ls-files -o -i --exclude l1 --directory --no-empty-directory + ) >actual && + >expect && + test_cmp expect actual +' + test_expect_success 'pattern matches prefix completely' ' : >expect && git ls-files -i -o --exclude "/three/a.3[abc]" >actual && diff --git a/t/t7061-wtstatus-ignore.sh b/t/t7061-wtstatus-ignore.sh index 4c6f145d97..460789b4d8 100755 --- a/t/t7061-wtstatus-ignore.sh +++ b/t/t7061-wtstatus-ignore.sh @@ -186,7 +186,7 @@ cat >expected <<\EOF ?? .gitignore ?? actual ?? expected -!! tracked/ +!! tracked/uncommitted EOF test_expect_success 'status ignored tracked directory and uncommitted file with --ignore' ' @@ -212,7 +212,7 @@ cat >expected <<\EOF ?? .gitignore ?? actual ?? expected -!! tracked/ +!! tracked/ignored/ EOF test_expect_success 'status ignored tracked directory with uncommitted file in untracked subdir with --ignore' ' @@ -239,7 +239,7 @@ cat >expected <<\EOF ?? .gitignore ?? actual ?? expected -!! tracked/ +!! tracked/ignored/uncommitted EOF test_expect_success 'status ignored tracked directory with uncommitted file in tracked subdir with --ignore' ' diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh index ccfb54de7a..710be90489 100755 --- a/t/t7300-clean.sh +++ b/t/t7300-clean.sh @@ -298,6 +298,23 @@ test_expect_success 'git clean -d -x' ' ' +test_expect_success 'git clean -d -x with ignored tracked directory' ' + + mkdir -p build docs && + touch a.out src/part3.c docs/manual.txt obj.o build/lib.so && + git clean -d -x -e src && + test -f Makefile && + test -f README && + test -f src/part1.c && + test -f src/part2.c && + test ! -f a.out && + test -f src/part3.c && + test ! -d docs && + test ! -f obj.o && + test ! -d build + +' + test_expect_success 'git clean -X' ' mkdir -p build docs && @@ -332,6 +349,23 @@ test_expect_success 'git clean -d -X' ' ' +test_expect_success 'git clean -d -X with ignored tracked directory' ' + + mkdir -p build docs && + touch a.out src/part3.c docs/manual.txt obj.o build/lib.so && + git clean -d -X -e src && + test -f Makefile && + test -f README && + test -f src/part1.c && + test -f src/part2.c && + test -f a.out && + test ! -f src/part3.c && + test -f docs/manual.txt && + test ! -f obj.o && + test ! -d build + +' + test_expect_success 'clean.requireForce defaults to true' ' git config --unset clean.requireForce && From 46aa2f95d2fa79164fb0f5ad79bdd1d26fe689ea Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Mon, 15 Apr 2013 21:11:02 +0200 Subject: [PATCH 08/14] dir.c: factor out parts of last_exclude_matching for later reuse Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- dir.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/dir.c b/dir.c index 7a98e3ac8a..46d127ca20 100644 --- a/dir.c +++ b/dir.c @@ -795,25 +795,13 @@ int is_excluded_from_list(const char *pathname, return -1; /* undecided */ } -/* - * Loads the exclude lists for the directory containing pathname, then - * scans all exclude lists to determine whether pathname is excluded. - * Returns the exclude_list element which matched, or NULL for - * undecided. - */ -static struct exclude *last_exclude_matching(struct dir_struct *dir, - const char *pathname, - int *dtype_p) +static struct exclude *last_exclude_matching_from_lists(struct dir_struct *dir, + const char *pathname, int pathlen, const char *basename, + int *dtype_p) { - int pathlen = strlen(pathname); int i, j; struct exclude_list_group *group; struct exclude *exclude; - const char *basename = strrchr(pathname, '/'); - basename = (basename) ? basename+1 : pathname; - - prep_exclude(dir, pathname, basename-pathname); - for (i = EXC_CMDL; i <= EXC_FILE; i++) { group = &dir->exclude_list_group[i]; for (j = group->nr - 1; j >= 0; j--) { @@ -827,6 +815,26 @@ static struct exclude *last_exclude_matching(struct dir_struct *dir, return NULL; } +/* + * Loads the exclude lists for the directory containing pathname, then + * scans all exclude lists to determine whether pathname is excluded. + * Returns the exclude_list element which matched, or NULL for + * undecided. + */ +static struct exclude *last_exclude_matching(struct dir_struct *dir, + const char *pathname, + int *dtype_p) +{ + int pathlen = strlen(pathname); + const char *basename = strrchr(pathname, '/'); + basename = (basename) ? basename+1 : pathname; + + prep_exclude(dir, pathname, basename-pathname); + + return last_exclude_matching_from_lists(dir, pathname, pathlen, + basename, dtype_p); +} + /* * Loads the exclude lists for the directory containing pathname, then * scans all exclude lists to determine whether pathname is excluded. From 6cd5c582dcf8e6b960079247ac9d0dbace856458 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Mon, 15 Apr 2013 21:11:37 +0200 Subject: [PATCH 09/14] dir.c: move prep_exclude Move prep_exclude in preparation for the next patch. Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- dir.c | 144 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 72 insertions(+), 72 deletions(-) diff --git a/dir.c b/dir.c index 46d127ca20..7d87c3c52b 100644 --- a/dir.c +++ b/dir.c @@ -578,78 +578,6 @@ void add_excludes_from_file(struct dir_struct *dir, const char *fname) die("cannot use %s as an exclude file", fname); } -/* - * Loads the per-directory exclude list for the substring of base - * which has a char length of baselen. - */ -static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) -{ - struct exclude_list_group *group; - struct exclude_list *el; - struct exclude_stack *stk = NULL; - int current; - - if ((!dir->exclude_per_dir) || - (baselen + strlen(dir->exclude_per_dir) >= PATH_MAX)) - return; /* too long a path -- ignore */ - - group = &dir->exclude_list_group[EXC_DIRS]; - - /* Pop the exclude lists from the EXCL_DIRS exclude_list_group - * which originate from directories not in the prefix of the - * path being checked. */ - while ((stk = dir->exclude_stack) != NULL) { - if (stk->baselen <= baselen && - !strncmp(dir->basebuf, base, stk->baselen)) - break; - el = &group->el[dir->exclude_stack->exclude_ix]; - dir->exclude_stack = stk->prev; - free((char *)el->src); /* see strdup() below */ - clear_exclude_list(el); - free(stk); - group->nr--; - } - - /* Read from the parent directories and push them down. */ - current = stk ? stk->baselen : -1; - while (current < baselen) { - struct exclude_stack *stk = xcalloc(1, sizeof(*stk)); - const char *cp; - - if (current < 0) { - cp = base; - current = 0; - } - else { - cp = strchr(base + current + 1, '/'); - if (!cp) - die("oops in prep_exclude"); - cp++; - } - stk->prev = dir->exclude_stack; - stk->baselen = cp - base; - memcpy(dir->basebuf + current, base + current, - stk->baselen - current); - strcpy(dir->basebuf + stk->baselen, dir->exclude_per_dir); - /* - * dir->basebuf gets reused by the traversal, but we - * need fname to remain unchanged to ensure the src - * member of each struct exclude correctly - * back-references its source file. Other invocations - * of add_exclude_list provide stable strings, so we - * strdup() and free() here in the caller. - */ - el = add_exclude_list(dir, EXC_DIRS, strdup(dir->basebuf)); - stk->exclude_ix = group->nr - 1; - add_excludes_from_file_to_list(dir->basebuf, - dir->basebuf, stk->baselen, - el, 1); - dir->exclude_stack = stk; - current = stk->baselen; - } - dir->basebuf[baselen] = '\0'; -} - int match_basename(const char *basename, int basenamelen, const char *pattern, int prefix, int patternlen, int flags) @@ -815,6 +743,78 @@ static struct exclude *last_exclude_matching_from_lists(struct dir_struct *dir, return NULL; } +/* + * Loads the per-directory exclude list for the substring of base + * which has a char length of baselen. + */ +static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) +{ + struct exclude_list_group *group; + struct exclude_list *el; + struct exclude_stack *stk = NULL; + int current; + + if ((!dir->exclude_per_dir) || + (baselen + strlen(dir->exclude_per_dir) >= PATH_MAX)) + return; /* too long a path -- ignore */ + + group = &dir->exclude_list_group[EXC_DIRS]; + + /* Pop the exclude lists from the EXCL_DIRS exclude_list_group + * which originate from directories not in the prefix of the + * path being checked. */ + while ((stk = dir->exclude_stack) != NULL) { + if (stk->baselen <= baselen && + !strncmp(dir->basebuf, base, stk->baselen)) + break; + el = &group->el[dir->exclude_stack->exclude_ix]; + dir->exclude_stack = stk->prev; + free((char *)el->src); /* see strdup() below */ + clear_exclude_list(el); + free(stk); + group->nr--; + } + + /* Read from the parent directories and push them down. */ + current = stk ? stk->baselen : -1; + while (current < baselen) { + struct exclude_stack *stk = xcalloc(1, sizeof(*stk)); + const char *cp; + + if (current < 0) { + cp = base; + current = 0; + } + else { + cp = strchr(base + current + 1, '/'); + if (!cp) + die("oops in prep_exclude"); + cp++; + } + stk->prev = dir->exclude_stack; + stk->baselen = cp - base; + memcpy(dir->basebuf + current, base + current, + stk->baselen - current); + strcpy(dir->basebuf + stk->baselen, dir->exclude_per_dir); + /* + * dir->basebuf gets reused by the traversal, but we + * need fname to remain unchanged to ensure the src + * member of each struct exclude correctly + * back-references its source file. Other invocations + * of add_exclude_list provide stable strings, so we + * strdup() and free() here in the caller. + */ + el = add_exclude_list(dir, EXC_DIRS, strdup(dir->basebuf)); + stk->exclude_ix = group->nr - 1; + add_excludes_from_file_to_list(dir->basebuf, + dir->basebuf, stk->baselen, + el, 1); + dir->exclude_stack = stk; + current = stk->baselen; + } + dir->basebuf[baselen] = '\0'; +} + /* * Loads the exclude lists for the directory containing pathname, then * scans all exclude lists to determine whether pathname is excluded. From 95c6f27164b58152efcfb5aaf6164030f10d9459 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Mon, 15 Apr 2013 21:12:14 +0200 Subject: [PATCH 10/14] dir.c: unify is_excluded and is_path_excluded APIs The is_excluded and is_path_excluded APIs are very similar, except for a few noteworthy differences: is_excluded doesn't handle ignored directories, results for paths within ignored directories are incorrect. This is probably based on the premise that recursive directory scans should stop at ignored directories, which is no longer true (in certain cases, read_directory_recursive currently calls is_excluded *and* is_path_excluded to get correct ignored state). is_excluded caches parsed .gitignore files of the last directory in struct dir_struct. If the directory changes, it finds a common parent directory and is very careful to drop only as much state as necessary. On the other hand, is_excluded will also read and parse .gitignore files in already ignored directories, which are completely irrelevant. is_path_excluded correctly handles ignored directories by checking if any component in the path is excluded. As it uses is_excluded internally, this unfortunately forces is_excluded to drop and re-read all .gitignore files, as there is no common parent directory for the root dir. is_path_excluded tracks state in a separate struct path_exclude_check, which is essentially a wrapper of dir_struct with two more fields. However, as is_path_excluded also modifies dir_struct, it is not possible to e.g. use multiple path_exclude_check structures with the same dir_struct in parallel. The additional structure just unnecessarily complicates the API. Teach is_excluded / prep_exclude about ignored directories: whenever entering a new directory, first check if the entire directory is excluded. Remember the excluded state in dir_struct. Don't traverse into already ignored directories (i.e. don't read irrelevant .gitignore files). Directories could also be excluded by exclude patterns specified on the command line or .git/info/exclude, so we cannot simply skip prep_exclude entirely if there's no .gitignore file name (dir_struct.exclude_per_dir). Move this check to just before actually reading the file. is_path_excluded is now equivalent to is_excluded, so we can simply redirect to it (the public API is cleaned up in the next patch). The performance impact of the additional ignored check per directory is hardly noticeable when reading directories recursively (e.g. 'git status'). However, performance of git commands using the is_path_excluded API (e.g. 'git ls-files --cached --ignored --exclude-standard') is greatly improved as this no longer re-reads .gitignore files on each call. Here's some performance data from the linux and WebKit repos (best of 10 runs on a Debian Linux on SSD, core.preloadIndex=true): | ls-files -ci | status | status --ignored | linux | WebKit | linux | WebKit | linux | WebKit -------+-------+--------+-------+--------+-------+--------- before | 0.506 | 6.539 | 0.212 | 1.555 | 0.323 | 2.541 after | 0.080 | 1.191 | 0.218 | 1.583 | 0.321 | 2.579 gain | 6.325 | 5.490 | 0.972 | 0.982 | 1.006 | 0.985 Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- dir.c | 107 +++++++++++++++++++++++----------------------------------- dir.h | 6 ++-- 2 files changed, 46 insertions(+), 67 deletions(-) diff --git a/dir.c b/dir.c index 7d87c3c52b..8ac3d5a973 100644 --- a/dir.c +++ b/dir.c @@ -754,10 +754,6 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) struct exclude_stack *stk = NULL; int current; - if ((!dir->exclude_per_dir) || - (baselen + strlen(dir->exclude_per_dir) >= PATH_MAX)) - return; /* too long a path -- ignore */ - group = &dir->exclude_list_group[EXC_DIRS]; /* Pop the exclude lists from the EXCL_DIRS exclude_list_group @@ -769,12 +765,17 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) break; el = &group->el[dir->exclude_stack->exclude_ix]; dir->exclude_stack = stk->prev; + dir->exclude = NULL; free((char *)el->src); /* see strdup() below */ clear_exclude_list(el); free(stk); group->nr--; } + /* Skip traversing into sub directories if the parent is excluded */ + if (dir->exclude) + return; + /* Read from the parent directories and push them down. */ current = stk ? stk->baselen : -1; while (current < baselen) { @@ -793,22 +794,43 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) } stk->prev = dir->exclude_stack; stk->baselen = cp - base; + stk->exclude_ix = group->nr; + el = add_exclude_list(dir, EXC_DIRS, NULL); memcpy(dir->basebuf + current, base + current, stk->baselen - current); - strcpy(dir->basebuf + stk->baselen, dir->exclude_per_dir); - /* - * dir->basebuf gets reused by the traversal, but we - * need fname to remain unchanged to ensure the src - * member of each struct exclude correctly - * back-references its source file. Other invocations - * of add_exclude_list provide stable strings, so we - * strdup() and free() here in the caller. - */ - el = add_exclude_list(dir, EXC_DIRS, strdup(dir->basebuf)); - stk->exclude_ix = group->nr - 1; - add_excludes_from_file_to_list(dir->basebuf, - dir->basebuf, stk->baselen, - el, 1); + + /* Abort if the directory is excluded */ + if (stk->baselen) { + int dt = DT_DIR; + dir->basebuf[stk->baselen - 1] = 0; + dir->exclude = last_exclude_matching_from_lists(dir, + dir->basebuf, stk->baselen - 1, + dir->basebuf + current, &dt); + dir->basebuf[stk->baselen - 1] = '/'; + if (dir->exclude) { + dir->basebuf[stk->baselen] = 0; + dir->exclude_stack = stk; + return; + } + } + + /* Try to read per-directory file unless path is too long */ + if (dir->exclude_per_dir && + stk->baselen + strlen(dir->exclude_per_dir) < PATH_MAX) { + strcpy(dir->basebuf + stk->baselen, + dir->exclude_per_dir); + /* + * dir->basebuf gets reused by the traversal, but we + * need fname to remain unchanged to ensure the src + * member of each struct exclude correctly + * back-references its source file. Other invocations + * of add_exclude_list provide stable strings, so we + * strdup() and free() here in the caller. + */ + el->src = strdup(dir->basebuf); + add_excludes_from_file_to_list(dir->basebuf, + dir->basebuf, stk->baselen, el, 1); + } dir->exclude_stack = stk; current = stk->baselen; } @@ -831,6 +853,9 @@ static struct exclude *last_exclude_matching(struct dir_struct *dir, prep_exclude(dir, pathname, basename-pathname); + if (dir->exclude) + return dir->exclude; + return last_exclude_matching_from_lists(dir, pathname, pathlen, basename, dtype_p); } @@ -853,13 +878,10 @@ void path_exclude_check_init(struct path_exclude_check *check, struct dir_struct *dir) { check->dir = dir; - check->exclude = NULL; - strbuf_init(&check->path, 256); } void path_exclude_check_clear(struct path_exclude_check *check) { - strbuf_release(&check->path); } /* @@ -875,49 +897,6 @@ struct exclude *last_exclude_matching_path(struct path_exclude_check *check, const char *name, int namelen, int *dtype) { - int i; - struct strbuf *path = &check->path; - struct exclude *exclude; - - /* - * we allow the caller to pass namelen as an optimization; it - * must match the length of the name, as we eventually call - * is_excluded() on the whole name string. - */ - if (namelen < 0) - namelen = strlen(name); - - /* - * If path is non-empty, and name is equal to path or a - * subdirectory of path, name should be excluded, because - * it's inside a directory which is already known to be - * excluded and was previously left in check->path. - */ - if (path->len && - path->len <= namelen && - !memcmp(name, path->buf, path->len) && - (!name[path->len] || name[path->len] == '/')) - return check->exclude; - - strbuf_setlen(path, 0); - for (i = 0; name[i]; i++) { - int ch = name[i]; - - if (ch == '/') { - int dt = DT_DIR; - exclude = last_exclude_matching(check->dir, - path->buf, &dt); - if (exclude) { - check->exclude = exclude; - return exclude; - } - } - strbuf_addch(path, ch); - } - - /* An entry in the index; cannot be a directory with subentries */ - strbuf_setlen(path, 0); - return last_exclude_matching(check->dir, name, dtype); } diff --git a/dir.h b/dir.h index c3eb4b520e..cd166d0c63 100644 --- a/dir.h +++ b/dir.h @@ -110,9 +110,11 @@ struct dir_struct { * * exclude_stack points to the top of the exclude_stack, and * basebuf contains the full path to the current - * (sub)directory in the traversal. + * (sub)directory in the traversal. Exclude points to the + * matching exclude struct if the directory is excluded. */ struct exclude_stack *exclude_stack; + struct exclude *exclude; char basebuf[PATH_MAX]; }; @@ -156,8 +158,6 @@ extern int match_pathname(const char *, int, */ struct path_exclude_check { struct dir_struct *dir; - struct exclude *exclude; - struct strbuf path; }; extern void path_exclude_check_init(struct path_exclude_check *, struct dir_struct *); extern void path_exclude_check_clear(struct path_exclude_check *); From b07bc8c8c3c492d657a8bedf04ff939763ea8222 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Mon, 15 Apr 2013 21:12:57 +0200 Subject: [PATCH 11/14] dir.c: replace is_path_excluded with now equivalent is_excluded API Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- builtin/add.c | 5 +-- builtin/check-ignore.c | 6 +--- builtin/ls-files.c | 15 +++----- dir.c | 79 ++++-------------------------------------- dir.h | 16 ++------- unpack-trees.c | 10 +----- unpack-trees.h | 1 - 7 files changed, 16 insertions(+), 116 deletions(-) diff --git a/builtin/add.c b/builtin/add.c index ab1c9e8fb7..06f365d129 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -444,9 +444,7 @@ int cmd_add(int argc, const char **argv, const char *prefix) if (pathspec) { int i; - struct path_exclude_check check; - path_exclude_check_init(&check, &dir); if (!seen) seen = find_pathspecs_matching_against_index(pathspec); for (i = 0; pathspec[i]; i++) { @@ -454,7 +452,7 @@ int cmd_add(int argc, const char **argv, const char *prefix) && !file_exists(pathspec[i])) { if (ignore_missing) { int dtype = DT_UNKNOWN; - if (is_path_excluded(&check, pathspec[i], -1, &dtype)) + if (is_excluded(&dir, pathspec[i], &dtype)) dir_add_ignored(&dir, pathspec[i], strlen(pathspec[i])); } else die(_("pathspec '%s' did not match any files"), @@ -462,7 +460,6 @@ int cmd_add(int argc, const char **argv, const char *prefix) } } free(seen); - path_exclude_check_clear(&check); } plug_bulk_checkin(); diff --git a/builtin/check-ignore.c b/builtin/check-ignore.c index 0240f99b57..7388346ef2 100644 --- a/builtin/check-ignore.c +++ b/builtin/check-ignore.c @@ -59,7 +59,6 @@ static int check_ignore(const char *prefix, const char **pathspec) const char *path, *full_path; char *seen; int num_ignored = 0, dtype = DT_UNKNOWN, i; - struct path_exclude_check check; struct exclude *exclude; /* read_cache() is only necessary so we can watch out for submodules. */ @@ -76,7 +75,6 @@ static int check_ignore(const char *prefix, const char **pathspec) return 0; } - path_exclude_check_init(&check, &dir); /* * look for pathspecs matching entries in the index, since these * should not be ignored, in order to be consistent with @@ -90,8 +88,7 @@ static int check_ignore(const char *prefix, const char **pathspec) full_path = check_path_for_gitlink(full_path); die_if_path_beyond_symlink(full_path, prefix); if (!seen[i]) { - exclude = last_exclude_matching_path(&check, full_path, - -1, &dtype); + exclude = last_exclude_matching(&dir, full_path, &dtype); if (exclude) { if (!quiet) output_exclude(path, exclude); @@ -101,7 +98,6 @@ static int check_ignore(const char *prefix, const char **pathspec) } free(seen); clear_directory(&dir); - path_exclude_check_clear(&check); return num_ignored; } diff --git a/builtin/ls-files.c b/builtin/ls-files.c index 175e6e3e72..22020729cb 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -201,19 +201,15 @@ static void show_ru_info(void) } } -static int ce_excluded(struct path_exclude_check *check, struct cache_entry *ce) +static int ce_excluded(struct dir_struct *dir, struct cache_entry *ce) { int dtype = ce_to_dtype(ce); - return is_path_excluded(check, ce->name, ce_namelen(ce), &dtype); + return is_excluded(dir, ce->name, &dtype); } static void show_files(struct dir_struct *dir) { int i; - struct path_exclude_check check; - - if ((dir->flags & DIR_SHOW_IGNORED)) - path_exclude_check_init(&check, dir); /* For cached/deleted files we don't need to even do the readdir */ if (show_others || show_killed) { @@ -227,7 +223,7 @@ static void show_files(struct dir_struct *dir) for (i = 0; i < active_nr; i++) { struct cache_entry *ce = active_cache[i]; if ((dir->flags & DIR_SHOW_IGNORED) && - !ce_excluded(&check, ce)) + !ce_excluded(dir, ce)) continue; if (show_unmerged && !ce_stage(ce)) continue; @@ -243,7 +239,7 @@ static void show_files(struct dir_struct *dir) struct stat st; int err; if ((dir->flags & DIR_SHOW_IGNORED) && - !ce_excluded(&check, ce)) + !ce_excluded(dir, ce)) continue; if (ce->ce_flags & CE_UPDATE) continue; @@ -256,9 +252,6 @@ static void show_files(struct dir_struct *dir) show_ce_entry(tag_modified, ce); } } - - if ((dir->flags & DIR_SHOW_IGNORED)) - path_exclude_check_clear(&check); } /* diff --git a/dir.c b/dir.c index 8ac3d5a973..47397600af 100644 --- a/dir.c +++ b/dir.c @@ -843,7 +843,7 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) * Returns the exclude_list element which matched, or NULL for * undecided. */ -static struct exclude *last_exclude_matching(struct dir_struct *dir, +struct exclude *last_exclude_matching(struct dir_struct *dir, const char *pathname, int *dtype_p) { @@ -865,7 +865,7 @@ static struct exclude *last_exclude_matching(struct dir_struct *dir, * scans all exclude lists to determine whether pathname is excluded. * Returns 1 if true, otherwise 0. */ -static int is_excluded(struct dir_struct *dir, const char *pathname, int *dtype_p) +int is_excluded(struct dir_struct *dir, const char *pathname, int *dtype_p) { struct exclude *exclude = last_exclude_matching(dir, pathname, dtype_p); @@ -874,47 +874,6 @@ static int is_excluded(struct dir_struct *dir, const char *pathname, int *dtype_ return 0; } -void path_exclude_check_init(struct path_exclude_check *check, - struct dir_struct *dir) -{ - check->dir = dir; -} - -void path_exclude_check_clear(struct path_exclude_check *check) -{ -} - -/* - * For each subdirectory in name, starting with the top-most, checks - * to see if that subdirectory is excluded, and if so, returns the - * corresponding exclude structure. Otherwise, checks whether name - * itself (which is presumably a file) is excluded. - * - * A path to a directory known to be excluded is left in check->path to - * optimize for repeated checks for files in the same excluded directory. - */ -struct exclude *last_exclude_matching_path(struct path_exclude_check *check, - const char *name, int namelen, - int *dtype) -{ - return last_exclude_matching(check->dir, name, dtype); -} - -/* - * Is this name excluded? This is for a caller like show_files() that - * do not honor directory hierarchy and iterate through paths that are - * possibly in an ignored directory. - */ -int is_path_excluded(struct path_exclude_check *check, - const char *name, int namelen, int *dtype) -{ - struct exclude *exclude = - last_exclude_matching_path(check, name, namelen, dtype); - if (exclude) - return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; - return 0; -} - static struct dir_entry *dir_entry_new(const char *pathname, int len) { struct dir_entry *ent; @@ -1086,15 +1045,6 @@ static enum directory_treatment treat_directory(struct dir_struct *dir, /* This is the "show_other_directories" case */ - /* might be a sub directory in an excluded directory */ - if (!exclude) { - struct path_exclude_check check; - int dt = DT_DIR; - path_exclude_check_init(&check, dir); - exclude = is_path_excluded(&check, dirname, len, &dt); - path_exclude_check_clear(&check); - } - /* * We are looking for ignored files and our directory is not ignored, * check if it contains untracked files (i.e. is listed as untracked) @@ -1129,27 +1079,13 @@ static enum directory_treatment treat_directory(struct dir_struct *dir, * * Return 1 for exclude, 0 for include. */ -static int treat_file(struct dir_struct *dir, struct strbuf *path, int exclude, int *dtype) +static int treat_file(struct dir_struct *dir, struct strbuf *path, int exclude) { - struct path_exclude_check check; - int exclude_file = 0; - /* Always exclude indexed files */ if (index_name_exists(&the_index, path->buf, path->len, ignore_case)) return 1; - if (exclude) - exclude_file = !(dir->flags & DIR_SHOW_IGNORED); - else if (dir->flags & DIR_SHOW_IGNORED) { - path_exclude_check_init(&check, dir); - - if (!is_path_excluded(&check, path->buf, path->len, dtype)) - exclude_file = 1; - - path_exclude_check_clear(&check); - } - - return exclude_file; + return exclude == !(dir->flags & DIR_SHOW_IGNORED); } /* @@ -1306,12 +1242,9 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, break; case DT_REG: case DT_LNK: - switch (treat_file(dir, path, exclude, &dtype)) { - case 1: + if (treat_file(dir, path, exclude)) return path_ignored; - default: - break; - } + break; } return path_handled; } diff --git a/dir.h b/dir.h index cd166d0c63..bfe726e58e 100644 --- a/dir.h +++ b/dir.h @@ -151,20 +151,10 @@ extern int match_pathname(const char *, int, const char *, int, const char *, int, int, int); -/* - * The is_excluded() API is meant for callers that check each level of leading - * directory hierarchies with is_excluded() to avoid recursing into excluded - * directories. Callers that do not do so should use this API instead. - */ -struct path_exclude_check { - struct dir_struct *dir; -}; -extern void path_exclude_check_init(struct path_exclude_check *, struct dir_struct *); -extern void path_exclude_check_clear(struct path_exclude_check *); -extern struct exclude *last_exclude_matching_path(struct path_exclude_check *, const char *, - int namelen, int *dtype); -extern int is_path_excluded(struct path_exclude_check *, const char *, int namelen, int *dtype); +extern struct exclude *last_exclude_matching(struct dir_struct *dir, + const char *name, int *dtype); +extern int is_excluded(struct dir_struct *dir, const char *name, int *dtype); extern struct exclude_list *add_exclude_list(struct dir_struct *dir, int group_type, const char *src); diff --git a/unpack-trees.c b/unpack-trees.c index 09e53df3b2..ede4299b83 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1026,10 +1026,6 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options o->el = ⪙ } - if (o->dir) { - o->path_exclude_check = xmalloc(sizeof(struct path_exclude_check)); - path_exclude_check_init(o->path_exclude_check, o->dir); - } memset(&o->result, 0, sizeof(o->result)); o->result.initialized = 1; o->result.timestamp.sec = o->src_index->timestamp.sec; @@ -1155,10 +1151,6 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options done: clear_exclude_list(&el); - if (o->path_exclude_check) { - path_exclude_check_clear(o->path_exclude_check); - free(o->path_exclude_check); - } return ret; return_failed: @@ -1375,7 +1367,7 @@ static int check_ok_to_remove(const char *name, int len, int dtype, return 0; if (o->dir && - is_path_excluded(o->path_exclude_check, name, -1, &dtype)) + is_excluded(o->dir, name, &dtype)) /* * ce->name is explicitly excluded, so it is Ok to * overwrite it. diff --git a/unpack-trees.h b/unpack-trees.h index ec74a9f19a..5e432f576e 100644 --- a/unpack-trees.h +++ b/unpack-trees.h @@ -52,7 +52,6 @@ struct unpack_trees_options { const char *prefix; int cache_bottom; struct dir_struct *dir; - struct path_exclude_check *path_exclude_check; struct pathspec *pathspec; merge_fn_t fn; const char *msgs[NB_UNPACK_TREES_ERROR_TYPES]; From 8aaf8d7728e8ac50cbf6bcad05b6e896d4e69e0b Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Mon, 15 Apr 2013 21:13:35 +0200 Subject: [PATCH 12/14] dir.c: git-status: avoid is_excluded checks for tracked files Checking if a file is in the index is much faster (hashtable lookup) than checking if the file is excluded (linear search over exclude patterns). Skip is_excluded checks for files: move the cache_name_exists check from treat_file to treat_one_path and return early if the file is tracked. This can safely be done as all other code paths also return path_ignored for tracked files, and dir_add_ignored skips tracked files as well. There's just one line left in treat_file, so move this to treat_one_path as well. Here's some performance data for git-status from the linux and WebKit repos (best of 10 runs on a Debian Linux on SSD, core.preloadIndex=true): | status | status --ignored | linux | WebKit | linux | WebKit -------+-------+--------+-------+--------- before | 0.218 | 1.583 | 0.321 | 2.579 after | 0.156 | 0.988 | 0.202 | 1.279 gain | 1.397 | 1.602 | 1.589 | 2.016 Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- dir.c | 38 +++++++++++--------------------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/dir.c b/dir.c index 47397600af..9fc032fe3d 100644 --- a/dir.c +++ b/dir.c @@ -1066,28 +1066,6 @@ static enum directory_treatment treat_directory(struct dir_struct *dir, return show_directory; } -/* - * Decide what to do when we find a file while traversing the - * filesystem. Mostly two cases: - * - * 1. We are looking for ignored files - * (a) File is ignored, include it - * (b) File is in ignored path, include it - * (c) File is not ignored, exclude it - * - * 2. Other scenarios, include the file if not excluded - * - * Return 1 for exclude, 0 for include. - */ -static int treat_file(struct dir_struct *dir, struct strbuf *path, int exclude) -{ - /* Always exclude indexed files */ - if (index_name_exists(&the_index, path->buf, path->len, ignore_case)) - return 1; - - return exclude == !(dir->flags & DIR_SHOW_IGNORED); -} - /* * This is an inexact early pruning of any recursive directory * reading - if the path cannot possibly be in the pathspec, @@ -1211,7 +1189,16 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, const struct path_simplify *simplify, int dtype, struct dirent *de) { - int exclude = is_excluded(dir, path->buf, &dtype); + int exclude; + if (dtype == DT_UNKNOWN) + dtype = get_dtype(de, path->buf, path->len); + + /* Always exclude indexed files */ + if (dtype != DT_DIR && + cache_name_exists(path->buf, path->len, ignore_case)) + return path_ignored; + + exclude = is_excluded(dir, path->buf, &dtype); if (exclude && (dir->flags & DIR_COLLECT_IGNORED) && exclude_matches_pathspec(path->buf, path->len, simplify)) dir_add_ignored(dir, path->buf, path->len); @@ -1223,9 +1210,6 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, if (exclude && !(dir->flags & DIR_SHOW_IGNORED)) return path_ignored; - if (dtype == DT_UNKNOWN) - dtype = get_dtype(de, path->buf, path->len); - switch (dtype) { default: return path_ignored; @@ -1242,7 +1226,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, break; case DT_REG: case DT_LNK: - if (treat_file(dir, path, exclude)) + if (exclude == !(dir->flags & DIR_SHOW_IGNORED)) return path_ignored; break; } From defd7c7b3717394ee05b454172bf7b1e747af6ae Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Mon, 15 Apr 2013 21:14:22 +0200 Subject: [PATCH 13/14] dir.c: git-status --ignored: don't scan the work tree three times 'git-status --ignored' recursively scans directories up to three times: 1. To collect untracked files. 2. To collect ignored files. 3. When collecting ignored files, to check that an untracked directory that potentially contains ignored files doesn't also contain untracked files (i.e. isn't already listed as untracked). Let's get rid of case 3 first. Currently, read_directory_recursive returns a boolean whether a directory contains the requested files or not (actually, it returns the number of files, but no caller actually needs that), and DIR_SHOW_IGNORED specifies what we're looking for. To be able to test for both untracked and ignored files in a single scan, we need to return a bit more info, and the result must be independent of the DIR_SHOW_IGNORED flag. Reuse the path_treatment enum as return value of read_directory_recursive. Split path_handled in two separate values path_excluded and path_untracked that don't change their meaning with the DIR_SHOW_IGNORED flag. We don't need an extra value path_untracked_and_excluded, as directories with both untracked and ignored files should be listed as untracked. Rename path_ignored to path_none for clarity (i.e. "don't treat that path" in contrast to "the path is ignored and should be treated according to DIR_SHOW_IGNORED"). Replace enum directory_treatment with path_treatment. That's just another enum with the same meaning, no need to translate back and forth. In treat_directory, get rid of the extra read_directory_recursive call and all the DIR_SHOW_IGNORED-specific code. In read_directory_recursive, decide whether to dir_add_name path_excluded or path_untracked paths based on the DIR_SHOW_IGNORED flag. The return value of read_directory_recursive is the maximum path_treatment of all files and sub-directories. In the check_only case, abort when we've reached the most significant value (path_untracked). Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- dir.c | 146 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 72 insertions(+), 74 deletions(-) diff --git a/dir.c b/dir.c index 9fc032fe3d..efd8c1234d 100644 --- a/dir.c +++ b/dir.c @@ -17,7 +17,21 @@ struct path_simplify { const char *path; }; -static int read_directory_recursive(struct dir_struct *dir, const char *path, int len, +/* + * Tells read_directory_recursive how a file or directory should be treated. + * Values are ordered by significance, e.g. if a directory contains both + * excluded and untracked files, it is listed as untracked because + * path_untracked > path_excluded. + */ +enum path_treatment { + path_none = 0, + path_recurse, + path_excluded, + path_untracked +}; + +static enum path_treatment read_directory_recursive(struct dir_struct *dir, + const char *path, int len, int check_only, const struct path_simplify *simplify); static int get_dtype(struct dirent *de, const char *path, int len); @@ -1002,35 +1016,26 @@ static enum exist_status directory_exists_in_index(const char *dirname, int len) * * (a) if "show_other_directories" is true, we show it as * just a directory, unless "hide_empty_directories" is - * also true and the directory is empty, in which case - * we just ignore it entirely. - * if we are looking for ignored directories, look if it - * contains only ignored files to decide if it must be shown as - * ignored or not. + * also true, in which case we need to check if it contains any + * untracked and / or ignored files. * (b) if it looks like a git directory, and we don't have * 'no_gitlinks' set we treat it as a gitlink, and show it * as a directory. * (c) otherwise, we recurse into it. */ -enum directory_treatment { - show_directory, - ignore_directory, - recurse_into_directory -}; - -static enum directory_treatment treat_directory(struct dir_struct *dir, +static enum path_treatment treat_directory(struct dir_struct *dir, const char *dirname, int len, int exclude, const struct path_simplify *simplify) { /* The "len-1" is to strip the final '/' */ switch (directory_exists_in_index(dirname, len-1)) { case index_directory: - return recurse_into_directory; + return path_recurse; case index_gitdir: if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES) - return ignore_directory; - return show_directory; + return path_none; + return path_untracked; case index_nonexistent: if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES) @@ -1038,32 +1043,17 @@ static enum directory_treatment treat_directory(struct dir_struct *dir, if (!(dir->flags & DIR_NO_GITLINKS)) { unsigned char sha1[20]; if (resolve_gitlink_ref(dirname, "HEAD", sha1) == 0) - return show_directory; + return path_untracked; } - return recurse_into_directory; + return path_recurse; } /* This is the "show_other_directories" case */ - /* - * We are looking for ignored files and our directory is not ignored, - * check if it contains untracked files (i.e. is listed as untracked) - */ - if ((dir->flags & DIR_SHOW_IGNORED) && !exclude) { - int ignored; - dir->flags &= ~DIR_SHOW_IGNORED; - ignored = read_directory_recursive(dir, dirname, len, 1, simplify); - dir->flags |= DIR_SHOW_IGNORED; - - if (ignored) - return ignore_directory; - } - if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES)) - return show_directory; - if (!read_directory_recursive(dir, dirname, len, 1, simplify)) - return ignore_directory; - return show_directory; + return exclude ? path_excluded : path_untracked; + + return read_directory_recursive(dir, dirname, len, 1, simplify); } /* @@ -1178,12 +1168,6 @@ static int get_dtype(struct dirent *de, const char *path, int len) return dtype; } -enum path_treatment { - path_ignored, - path_handled, - path_recurse -}; - static enum path_treatment treat_one_path(struct dir_struct *dir, struct strbuf *path, const struct path_simplify *simplify, @@ -1196,7 +1180,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, /* Always exclude indexed files */ if (dtype != DT_DIR && cache_name_exists(path->buf, path->len, ignore_case)) - return path_ignored; + return path_none; exclude = is_excluded(dir, path->buf, &dtype); if (exclude && (dir->flags & DIR_COLLECT_IGNORED) @@ -1208,29 +1192,19 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, * ignored files, ignore it */ if (exclude && !(dir->flags & DIR_SHOW_IGNORED)) - return path_ignored; + return path_excluded; switch (dtype) { default: - return path_ignored; + return path_none; case DT_DIR: strbuf_addch(path, '/'); - switch (treat_directory(dir, path->buf, path->len, exclude, simplify)) { - case show_directory: - break; - case recurse_into_directory: - return path_recurse; - case ignore_directory: - return path_ignored; - } - break; + return treat_directory(dir, path->buf, path->len, exclude, + simplify); case DT_REG: case DT_LNK: - if (exclude == !(dir->flags & DIR_SHOW_IGNORED)) - return path_ignored; - break; + return exclude ? path_excluded : path_untracked; } - return path_handled; } static enum path_treatment treat_path(struct dir_struct *dir, @@ -1242,11 +1216,11 @@ static enum path_treatment treat_path(struct dir_struct *dir, int dtype; if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git")) - return path_ignored; + return path_none; strbuf_setlen(path, baselen); strbuf_addstr(path, de->d_name); if (simplify_away(path->buf, path->len, simplify)) - return path_ignored; + return path_none; dtype = DTYPE(de); return treat_one_path(dir, path, simplify, dtype, de); @@ -1260,14 +1234,16 @@ static enum path_treatment treat_path(struct dir_struct *dir, * * Also, we ignore the name ".git" (even if it is not a directory). * That likely will not change. + * + * Returns the most significant path_treatment value encountered in the scan. */ -static int read_directory_recursive(struct dir_struct *dir, +static enum path_treatment read_directory_recursive(struct dir_struct *dir, const char *base, int baselen, int check_only, const struct path_simplify *simplify) { DIR *fdir; - int contents = 0; + enum path_treatment state, subdir_state, dir_state = path_none; struct dirent *de; struct strbuf path = STRBUF_INIT; @@ -1278,26 +1254,48 @@ static int read_directory_recursive(struct dir_struct *dir, goto out; while ((de = readdir(fdir)) != NULL) { - switch (treat_path(dir, de, &path, baselen, simplify)) { - case path_recurse: - contents += read_directory_recursive(dir, path.buf, + /* check how the file or directory should be treated */ + state = treat_path(dir, de, &path, baselen, simplify); + if (state > dir_state) + dir_state = state; + + /* recurse into subdir if instructed by treat_path */ + if (state == path_recurse) { + subdir_state = read_directory_recursive(dir, path.buf, path.len, check_only, simplify); + if (subdir_state > dir_state) + dir_state = subdir_state; + } + + if (check_only) { + /* abort early if maximum state has been reached */ + if (dir_state == path_untracked) + break; + /* skip the dir_add_* part */ continue; - case path_ignored: - continue; - case path_handled: + } + + /* add the path to the appropriate result list */ + switch (state) { + case path_excluded: + if (dir->flags & DIR_SHOW_IGNORED) + dir_add_name(dir, path.buf, path.len); + break; + + case path_untracked: + if (!(dir->flags & DIR_SHOW_IGNORED)) + dir_add_name(dir, path.buf, path.len); + break; + + default: break; } - contents++; - if (check_only) - break; - dir_add_name(dir, path.buf, path.len); } closedir(fdir); out: strbuf_release(&path); - return contents; + return dir_state; } static int cmp_name(const void *p1, const void *p2) @@ -1368,7 +1366,7 @@ static int treat_leading_path(struct dir_struct *dir, if (simplify_away(sb.buf, sb.len, simplify)) break; if (treat_one_path(dir, &sb, simplify, - DT_DIR, NULL) == path_ignored) + DT_DIR, NULL) == path_none) break; /* do not recurse into it */ if (len <= baselen) { rc = 1; From 0aaf62b6e018484bad9cea47dc00644d57b7ad49 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Mon, 15 Apr 2013 21:15:03 +0200 Subject: [PATCH 14/14] dir.c: git-status --ignored: don't scan the work tree twice 'git-status --ignored' still scans the work tree twice to collect untracked and ignored files, respectively. fill_directory / read_directory already supports collecting untracked and ignored files in a single directory scan. However, the DIR_COLLECT_IGNORED flag to enable this has some git-add specific side-effects (e.g. it doesn't recurse into ignored directories, so listing ignored files with --untracked=all doesn't work). The DIR_SHOW_IGNORED flag doesn't list untracked files and returns ignored files in dir_struct.entries[] (instead of dir_struct.ignored[] as DIR_COLLECT_IGNORED). DIR_SHOW_IGNORED is used all throughout git. We don't want to break the existing API, so lets introduce a new flag DIR_SHOW_IGNORED_TOO that lists untracked as well as ignored files similar to DIR_COLLECT_FILES, but will recurse into sub-directories based on the other flags as DIR_SHOW_IGNORED does. In dir.c::read_directory_recursive, add ignored files to either dir_struct.entries[] or dir_struct.ignored[] based on the flags. Also move the DIR_COLLECT_IGNORED case here so that filling result lists is in a common place. In wt-status.c::wt_status_collect_untracked, use the new flag and read results from dir_struct.ignored[]. Remove the extra fill_directory call. builtin/check-ignore.c doesn't call fill_directory, setting the git-add specific DIR_COLLECT_IGNORED flag has no effect here. Remove for clarity. Update API documentation to reflect the changes. Performance: with this patch, 'git-status --ignored' is typically as fast as 'git-status'. Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- .../technical/api-directory-listing.txt | 25 ++++++++++++++++--- builtin/check-ignore.c | 1 - dir.c | 10 +++++--- dir.h | 3 ++- wt-status.c | 24 ++++++++---------- 5 files changed, 41 insertions(+), 22 deletions(-) diff --git a/Documentation/technical/api-directory-listing.txt b/Documentation/technical/api-directory-listing.txt index 1f349b28ae..7f8e78d916 100644 --- a/Documentation/technical/api-directory-listing.txt +++ b/Documentation/technical/api-directory-listing.txt @@ -22,12 +22,23 @@ The notable options are: `flags`:: - A bit-field of options: + A bit-field of options (the `*IGNORED*` flags are mutually exclusive): `DIR_SHOW_IGNORED`::: - The traversal is for finding just ignored files, not unignored - files. + Return just ignored files in `entries[]`, not untracked files. + +`DIR_SHOW_IGNORED_TOO`::: + + Similar to `DIR_SHOW_IGNORED`, but return ignored files in `ignored[]` + in addition to untracked files in `entries[]`. + +`DIR_COLLECT_IGNORED`::: + + Special mode for git-add. Return ignored files in `ignored[]` and + untracked files in `entries[]`. Only returns ignored files that match + pathspec exactly (no wildcards). Does not recurse into ignored + directories. `DIR_SHOW_OTHER_DIRECTORIES`::: @@ -57,6 +68,14 @@ The result of the enumeration is left in these fields: Internal use; keeps track of allocation of `entries[]` array. +`ignored[]`:: + + An array of `struct dir_entry`, used for ignored paths with the + `DIR_SHOW_IGNORED_TOO` and `DIR_COLLECT_IGNORED` flags. + +`ignored_nr`:: + + The number of members in `ignored[]` array. Calling sequence ---------------- diff --git a/builtin/check-ignore.c b/builtin/check-ignore.c index 7388346ef2..854a88a056 100644 --- a/builtin/check-ignore.c +++ b/builtin/check-ignore.c @@ -66,7 +66,6 @@ static int check_ignore(const char *prefix, const char **pathspec) die(_("index file corrupt")); memset(&dir, 0, sizeof(dir)); - dir.flags |= DIR_COLLECT_IGNORED; setup_standard_excludes(&dir); if (!pathspec || !*pathspec) { diff --git a/dir.c b/dir.c index efd8c1234d..a5926fbd1a 100644 --- a/dir.c +++ b/dir.c @@ -1183,15 +1183,12 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, return path_none; exclude = is_excluded(dir, path->buf, &dtype); - if (exclude && (dir->flags & DIR_COLLECT_IGNORED) - && exclude_matches_pathspec(path->buf, path->len, simplify)) - dir_add_ignored(dir, path->buf, path->len); /* * Excluded? If we don't explicitly want to show * ignored files, ignore it */ - if (exclude && !(dir->flags & DIR_SHOW_IGNORED)) + if (exclude && !(dir->flags & (DIR_SHOW_IGNORED|DIR_SHOW_IGNORED_TOO))) return path_excluded; switch (dtype) { @@ -1280,6 +1277,11 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, case path_excluded: if (dir->flags & DIR_SHOW_IGNORED) dir_add_name(dir, path.buf, path.len); + else if ((dir->flags & DIR_SHOW_IGNORED_TOO) || + ((dir->flags & DIR_COLLECT_IGNORED) && + exclude_matches_pathspec(path.buf, path.len, + simplify))) + dir_add_ignored(dir, path.buf, path.len); break; case path_untracked: diff --git a/dir.h b/dir.h index bfe726e58e..3d6b80c933 100644 --- a/dir.h +++ b/dir.h @@ -79,7 +79,8 @@ struct dir_struct { DIR_SHOW_OTHER_DIRECTORIES = 1<<1, DIR_HIDE_EMPTY_DIRECTORIES = 1<<2, DIR_NO_GITLINKS = 1<<3, - DIR_COLLECT_IGNORED = 1<<4 + DIR_COLLECT_IGNORED = 1<<4, + DIR_SHOW_IGNORED_TOO = 1<<5 } flags; struct dir_entry **entries; struct dir_entry **ignored; diff --git a/wt-status.c b/wt-status.c index 676b058e59..bf84a86ee3 100644 --- a/wt-status.c +++ b/wt-status.c @@ -511,9 +511,12 @@ static void wt_status_collect_untracked(struct wt_status *s) if (s->show_untracked_files != SHOW_ALL_UNTRACKED_FILES) dir.flags |= DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES; + if (s->show_ignored_files) + dir.flags |= DIR_SHOW_IGNORED_TOO; setup_standard_excludes(&dir); fill_directory(&dir, s->pathspec); + for (i = 0; i < dir.nr; i++) { struct dir_entry *ent = dir.entries[i]; if (cache_name_is_other(ent->name, ent->len) && @@ -522,22 +525,17 @@ static void wt_status_collect_untracked(struct wt_status *s) free(ent); } - if (s->show_ignored_files) { - dir.nr = 0; - dir.flags = DIR_SHOW_IGNORED; - if (s->show_untracked_files != SHOW_ALL_UNTRACKED_FILES) - dir.flags |= DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES; - fill_directory(&dir, s->pathspec); - for (i = 0; i < dir.nr; i++) { - struct dir_entry *ent = dir.entries[i]; - if (cache_name_is_other(ent->name, ent->len) && - match_pathspec(s->pathspec, ent->name, ent->len, 0, NULL)) - string_list_insert(&s->ignored, ent->name); - free(ent); - } + for (i = 0; i < dir.ignored_nr; i++) { + struct dir_entry *ent = dir.ignored[i]; + if (cache_name_is_other(ent->name, ent->len) && + match_pathspec(s->pathspec, ent->name, ent->len, 0, NULL)) + string_list_insert(&s->ignored, ent->name); + free(ent); } free(dir.entries); + free(dir.ignored); + clear_directory(&dir); if (advice_status_u_option) { struct timeval t_end;