From b3487ccc0bffc28e134333b164d0d84fdd15d9f4 Mon Sep 17 00:00:00 2001 From: Samuel Lijin Date: Thu, 18 May 2017 04:21:49 -0400 Subject: [PATCH 1/6] t7300: clean -d should skip dirs with ignored files If git sees a directory which contains only untracked and ignored files, clean -d should not remove that directory. It was recently discovered that this is *not* true of git clean -d, and it's possible that this has never worked correctly; this test and its accompanying patch series aims to fix that. Signed-off-by: Samuel Lijin Signed-off-by: Junio C Hamano --- t/t7300-clean.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh index b89fd2a6ad..3a2d709c29 100755 --- a/t/t7300-clean.sh +++ b/t/t7300-clean.sh @@ -653,4 +653,20 @@ test_expect_success 'git clean -d respects pathspecs (pathspec is prefix of dir) test_path_is_dir foobar ' +test_expect_failure 'git clean -d skips untracked dirs containing ignored files' ' + echo /foo/bar >.gitignore && + echo ignoreme >>.gitignore && + rm -rf foo && + mkdir -p foo/a/aa/aaa foo/b/bb/bbb && + touch foo/bar foo/baz foo/a/aa/ignoreme foo/b/ignoreme foo/b/bb/1 foo/b/bb/2 && + git clean -df && + test_path_is_dir foo && + test_path_is_file foo/bar && + test_path_is_missing foo/baz && + test_path_is_file foo/a/aa/ignoreme && + test_path_is_missing foo/a/aa/aaa && + test_path_is_file foo/b/ignoreme && + test_path_is_missing foo/b/bb +' + test_done From 0a81d4a55917514091727696a09ac649d03b57ff Mon Sep 17 00:00:00 2001 From: Samuel Lijin Date: Thu, 18 May 2017 04:21:50 -0400 Subject: [PATCH 2/6] t7061: status --ignored should search untracked dirs Per eb8c5b87, `status --ignored` by design does not list ignored files if they are in a directory which contains only ignored and untracked files (which is itself considered to be untracked) without `-uall`. This does not make sense for `--ignored`, which claims to "Show ignored files as well." Thus we revisit eb8c5b87 and decide that for such directories, `status --ignored` will list the directory as untracked *and* list all ignored files within said directory even without `-uall`. Signed-off-by: Samuel Lijin Signed-off-by: Junio C Hamano --- t/t7061-wtstatus-ignore.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/t/t7061-wtstatus-ignore.sh b/t/t7061-wtstatus-ignore.sh index cdc0747bf0..15e7592b6b 100755 --- a/t/t7061-wtstatus-ignore.sh +++ b/t/t7061-wtstatus-ignore.sh @@ -9,9 +9,10 @@ cat >expected <<\EOF ?? actual ?? expected ?? untracked/ +!! untracked/ignored EOF -test_expect_success 'status untracked directory with --ignored' ' +test_expect_failure 'status untracked directory with --ignored' ' echo "ignored" >.gitignore && mkdir untracked && : >untracked/ignored && @@ -20,7 +21,7 @@ test_expect_success 'status untracked directory with --ignored' ' test_cmp expected actual ' -test_expect_success 'same with gitignore starting with BOM' ' +test_expect_failure 'same with gitignore starting with BOM' ' printf "\357\273\277ignored\n" >.gitignore && mkdir -p untracked && : >untracked/ignored && From df5bcdf83aeb94718602ebc8c0f597166bb493f1 Mon Sep 17 00:00:00 2001 From: Samuel Lijin Date: Thu, 18 May 2017 04:21:51 -0400 Subject: [PATCH 3/6] dir: recurse into untracked dirs for ignored files We consider directories containing only untracked and ignored files to be themselves untracked, which in the usual case means we don't have to search these directories. This is problematic when we want to collect ignored files with DIR_SHOW_IGNORED_TOO, though, so we teach read_directory_recursive() to recurse into untracked directories to find the ignored files they contain when DIR_SHOW_IGNORED_TOO is set. This has the side effect of also collecting all untracked files in untracked directories as well. Signed-off-by: Samuel Lijin Signed-off-by: Junio C Hamano --- dir.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dir.c b/dir.c index aeeb5ce104..6b4eeef584 100644 --- a/dir.c +++ b/dir.c @@ -1747,7 +1747,10 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, dir_state = state; /* recurse into subdir if instructed by treat_path */ - if (state == path_recurse) { + if ((state == path_recurse) || + ((state == path_untracked) && + (dir->flags & DIR_SHOW_IGNORED_TOO) && + (get_dtype(cdir.de, path.buf, path.len) == DT_DIR))) { struct untracked_cache_dir *ud; ud = lookup_untracked(dir->untracked, untracked, path.buf + baselen, From fb898888491b83c9a3396fb559032ca78807a0c0 Mon Sep 17 00:00:00 2001 From: Samuel Lijin Date: Thu, 18 May 2017 04:21:52 -0400 Subject: [PATCH 4/6] dir: hide untracked contents of untracked dirs When we taught read_directory_recursive() to recurse into untracked directories in search of ignored files given DIR_SHOW_IGNORED_TOO, that had the side effect of teaching it to collect the untracked contents of untracked directories. It doesn't always make sense to return these, though (we do need them for `clean -d`), so we introduce a flag (DIR_KEEP_UNTRACKED_CONTENTS) to control whether or not read_directory() strips dir->entries of the untracked contents of untracked dirs. We also introduce check_contains() to check if one dir_entry corresponds to a path which contains the path corresponding to another dir_entry. This also fixes known breakages in t7061, since status --ignored now searches untracked directories for ignored files. Signed-off-by: Samuel Lijin Signed-off-by: Junio C Hamano --- .../technical/api-directory-listing.txt | 6 ++++ dir.c | 31 +++++++++++++++++++ dir.h | 3 +- t/t7061-wtstatus-ignore.sh | 4 +-- 4 files changed, 41 insertions(+), 3 deletions(-) diff --git a/Documentation/technical/api-directory-listing.txt b/Documentation/technical/api-directory-listing.txt index 7f8e78d916..6c77b4920c 100644 --- a/Documentation/technical/api-directory-listing.txt +++ b/Documentation/technical/api-directory-listing.txt @@ -33,6 +33,12 @@ The notable options are: Similar to `DIR_SHOW_IGNORED`, but return ignored files in `ignored[]` in addition to untracked files in `entries[]`. +`DIR_KEEP_UNTRACKED_CONTENTS`::: + + Only has meaning if `DIR_SHOW_IGNORED_TOO` is also set; if this is set, the + untracked contents of untracked directories are also returned in + `entries[]`. + `DIR_COLLECT_IGNORED`::: Special mode for git-add. Return ignored files in `ignored[]` and diff --git a/dir.c b/dir.c index 6b4eeef584..61561ea91d 100644 --- a/dir.c +++ b/dir.c @@ -1813,6 +1813,14 @@ static int cmp_name(const void *p1, const void *p2) return name_compare(e1->name, e1->len, e2->name, e2->len); } +/* check if *out lexically strictly contains *in */ +static int check_contains(const struct dir_entry *out, const struct dir_entry *in) +{ + return (out->len < in->len) && + (out->name[out->len - 1] == '/') && + !memcmp(out->name, in->name, out->len); +} + static int treat_leading_path(struct dir_struct *dir, const char *path, int len, const struct pathspec *pathspec) @@ -2028,6 +2036,29 @@ int read_directory(struct dir_struct *dir, const char *path, read_directory_recursive(dir, path, len, untracked, 0, pathspec); QSORT(dir->entries, dir->nr, cmp_name); QSORT(dir->ignored, dir->ignored_nr, cmp_name); + + /* + * If DIR_SHOW_IGNORED_TOO is set, read_directory_recursive() will + * also pick up untracked contents of untracked dirs; by default + * we discard these, but given DIR_KEEP_UNTRACKED_CONTENTS we do not. + */ + if ((dir->flags & DIR_SHOW_IGNORED_TOO) && + !(dir->flags & DIR_KEEP_UNTRACKED_CONTENTS)) { + int i, j; + + /* remove from dir->entries untracked contents of untracked dirs */ + for (i = j = 0; j < dir->nr; j++) { + if (i && check_contains(dir->entries[i - 1], dir->entries[j])) { + free(dir->entries[j]); + dir->entries[j] = NULL; + } else { + dir->entries[i++] = dir->entries[j]; + } + } + + dir->nr = i; + } + if (dir->untracked) { static struct trace_key trace_untracked_stats = TRACE_KEY_INIT(UNTRACKED_STATS); trace_printf_key(&trace_untracked_stats, diff --git a/dir.h b/dir.h index bf23a470af..650e54bdf6 100644 --- a/dir.h +++ b/dir.h @@ -151,7 +151,8 @@ struct dir_struct { DIR_NO_GITLINKS = 1<<3, DIR_COLLECT_IGNORED = 1<<4, DIR_SHOW_IGNORED_TOO = 1<<5, - DIR_COLLECT_KILLED_ONLY = 1<<6 + DIR_COLLECT_KILLED_ONLY = 1<<6, + DIR_KEEP_UNTRACKED_CONTENTS = 1<<7 } flags; struct dir_entry **entries; struct dir_entry **ignored; diff --git a/t/t7061-wtstatus-ignore.sh b/t/t7061-wtstatus-ignore.sh index 15e7592b6b..fc6013ba3c 100755 --- a/t/t7061-wtstatus-ignore.sh +++ b/t/t7061-wtstatus-ignore.sh @@ -12,7 +12,7 @@ cat >expected <<\EOF !! untracked/ignored EOF -test_expect_failure 'status untracked directory with --ignored' ' +test_expect_success 'status untracked directory with --ignored' ' echo "ignored" >.gitignore && mkdir untracked && : >untracked/ignored && @@ -21,7 +21,7 @@ test_expect_failure 'status untracked directory with --ignored' ' test_cmp expected actual ' -test_expect_failure 'same with gitignore starting with BOM' ' +test_expect_success 'same with gitignore starting with BOM' ' printf "\357\273\277ignored\n" >.gitignore && mkdir -p untracked && : >untracked/ignored && From bbf504a9957e8a2a262619641ffa30348d71a76f Mon Sep 17 00:00:00 2001 From: Samuel Lijin Date: Thu, 18 May 2017 04:21:53 -0400 Subject: [PATCH 5/6] dir: expose cmp_name() and check_contains() We want to use cmp_name() and check_contains() (which both compare `struct dir_entry`s, the former in terms of the sort order, the latter in terms of whether one lexically contains another) outside of dir.c, so we have to (1) change their linkage and (2) rename them as appropriate for the global namespace. The second is achieved by renaming cmp_name() to cmp_dir_entry() and check_contains() to check_dir_entry_contains(). Signed-off-by: Samuel Lijin Signed-off-by: Junio C Hamano --- dir.c | 11 ++++++----- dir.h | 3 +++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/dir.c b/dir.c index 61561ea91d..31c6e1dac0 100644 --- a/dir.c +++ b/dir.c @@ -1805,7 +1805,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, return dir_state; } -static int cmp_name(const void *p1, const void *p2) +int cmp_dir_entry(const void *p1, const void *p2) { const struct dir_entry *e1 = *(const struct dir_entry **)p1; const struct dir_entry *e2 = *(const struct dir_entry **)p2; @@ -1814,7 +1814,7 @@ static int cmp_name(const void *p1, const void *p2) } /* check if *out lexically strictly contains *in */ -static int check_contains(const struct dir_entry *out, const struct dir_entry *in) +int check_dir_entry_contains(const struct dir_entry *out, const struct dir_entry *in) { return (out->len < in->len) && (out->name[out->len - 1] == '/') && @@ -2034,8 +2034,8 @@ int read_directory(struct dir_struct *dir, const char *path, dir->untracked = NULL; if (!len || treat_leading_path(dir, path, len, pathspec)) read_directory_recursive(dir, path, len, untracked, 0, pathspec); - QSORT(dir->entries, dir->nr, cmp_name); - QSORT(dir->ignored, dir->ignored_nr, cmp_name); + QSORT(dir->entries, dir->nr, cmp_dir_entry); + QSORT(dir->ignored, dir->ignored_nr, cmp_dir_entry); /* * If DIR_SHOW_IGNORED_TOO is set, read_directory_recursive() will @@ -2048,7 +2048,8 @@ int read_directory(struct dir_struct *dir, const char *path, /* remove from dir->entries untracked contents of untracked dirs */ for (i = j = 0; j < dir->nr; j++) { - if (i && check_contains(dir->entries[i - 1], dir->entries[j])) { + if (i && + check_dir_entry_contains(dir->entries[i - 1], dir->entries[j])) { free(dir->entries[j]); dir->entries[j] = NULL; } else { diff --git a/dir.h b/dir.h index 650e54bdf6..edb5fda586 100644 --- a/dir.h +++ b/dir.h @@ -327,6 +327,9 @@ static inline int dir_path_match(const struct dir_entry *ent, has_trailing_dir); } +int cmp_dir_entry(const void *p1, const void *p2); +int check_dir_entry_contains(const struct dir_entry *out, const struct dir_entry *in); + void untracked_cache_invalidate_path(struct index_state *, const char *); void untracked_cache_remove_from_index(struct index_state *, const char *); void untracked_cache_add_to_index(struct index_state *, const char *); From 6b1db43109ab3d4c92e61874cd149779c66016db Mon Sep 17 00:00:00 2001 From: Samuel Lijin Date: Tue, 23 May 2017 06:09:37 -0400 Subject: [PATCH 6/6] clean: teach clean -d to preserve ignored paths There is an implicit assumption that a directory containing only untracked and ignored paths should itself be considered untracked. This makes sense in use cases where we're asking if a directory should be added to the git database, but not when we're asking if a directory can be safely removed from the working tree; as a result, clean -d would assume that an "untracked" directory containing ignored paths could be deleted, even though doing so would also remove the ignored paths. To get around this, we teach clean -d to collect ignored paths and skip an untracked directory if it contained an ignored path, instead just removing the untracked contents thereof. To achieve this, cmd_clean() has to collect all untracked contents of untracked directories, in addition to all ignored paths, to determine which untracked dirs must be skipped (because they contain ignored paths) and which ones should *not* be skipped. For this purpose, correct_untracked_entries() is introduced to prune a given dir_struct of untracked entries containing ignored paths and those untracked entries encompassed by the untracked entries which are not pruned away. A memory leak is also fixed in cmd_clean(). This also fixes the known breakage in t7300, since clean -d now skips untracked directories containing ignored paths. Signed-off-by: Samuel Lijin Signed-off-by: Junio C Hamano --- builtin/clean.c | 42 ++++++++++++++++++++++++++++++++++++++++++ t/t7300-clean.sh | 2 +- 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/builtin/clean.c b/builtin/clean.c index d6bc3aaaea..7272033187 100644 --- a/builtin/clean.c +++ b/builtin/clean.c @@ -851,6 +851,38 @@ static void interactive_main_loop(void) } } +static void correct_untracked_entries(struct dir_struct *dir) +{ + int src, dst, ign; + + for (src = dst = ign = 0; src < dir->nr; src++) { + /* skip paths in ignored[] that cannot be inside entries[src] */ + while (ign < dir->ignored_nr && + 0 <= cmp_dir_entry(&dir->entries[src], &dir->ignored[ign])) + ign++; + + if (ign < dir->ignored_nr && + check_dir_entry_contains(dir->entries[src], dir->ignored[ign])) { + /* entries[src] contains an ignored path, so we drop it */ + free(dir->entries[src]); + } else { + struct dir_entry *ent = dir->entries[src++]; + + /* entries[src] does not contain an ignored path, so we keep it */ + dir->entries[dst++] = ent; + + /* then discard paths in entries[] contained inside entries[src] */ + while (src < dir->nr && + check_dir_entry_contains(ent, dir->entries[src])) + free(dir->entries[src++]); + + /* compensate for the outer loop's loop control */ + src--; + } + } + dir->nr = dst; +} + int cmd_clean(int argc, const char **argv, const char *prefix) { int i, res; @@ -910,6 +942,9 @@ int cmd_clean(int argc, const char **argv, const char *prefix) dir.flags |= DIR_SHOW_OTHER_DIRECTORIES; + if (remove_directories) + dir.flags |= DIR_SHOW_IGNORED_TOO | DIR_KEEP_UNTRACKED_CONTENTS; + if (read_cache() < 0) die(_("index file corrupt")); @@ -925,6 +960,7 @@ int cmd_clean(int argc, const char **argv, const char *prefix) prefix, argv); fill_directory(&dir, &pathspec); + correct_untracked_entries(&dir); for (i = 0; i < dir.nr; i++) { struct dir_entry *ent = dir.entries[i]; @@ -952,6 +988,12 @@ int cmd_clean(int argc, const char **argv, const char *prefix) string_list_append(&del_list, rel); } + for (i = 0; i < dir.nr; i++) + free(dir.entries[i]); + + for (i = 0; i < dir.ignored_nr; i++) + free(dir.ignored[i]); + if (interactive && del_list.nr > 0) interactive_main_loop(); diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh index 3a2d709c29..7b36954d63 100755 --- a/t/t7300-clean.sh +++ b/t/t7300-clean.sh @@ -653,7 +653,7 @@ test_expect_success 'git clean -d respects pathspecs (pathspec is prefix of dir) test_path_is_dir foobar ' -test_expect_failure 'git clean -d skips untracked dirs containing ignored files' ' +test_expect_success 'git clean -d skips untracked dirs containing ignored files' ' echo /foo/bar >.gitignore && echo ignoreme >>.gitignore && rm -rf foo &&