From bd2f371d34b495ebbc4689604cfc34a825c47d2b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 26 Mar 2013 10:28:07 -0700 Subject: [PATCH 1/6] attr.c::path_matches(): the basename is part of the pathname The function takes two strings (pathname and basename) as if they are independent strings, but in reality, the latter is always pointing into a substring in the former. Clarify this relationship by expressing the latter as an offset into the former. Signed-off-by: Junio C Hamano --- attr.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/attr.c b/attr.c index ab2aab28de..4cfe0ee0fc 100644 --- a/attr.c +++ b/attr.c @@ -655,7 +655,7 @@ static void prepare_attr_stack(const char *path, int dirlen) } static int path_matches(const char *pathname, int pathlen, - const char *basename, + int basename_offset, const struct pattern *pat, const char *base, int baselen) { @@ -667,8 +667,8 @@ static int path_matches(const char *pathname, int pathlen, return 0; if (pat->flags & EXC_FLAG_NODIR) { - return match_basename(basename, - pathlen - (basename - pathname), + return match_basename(pathname + basename_offset, + pathlen - basename_offset, pattern, prefix, pat->patternlen, pat->flags); } @@ -701,7 +701,7 @@ static int fill_one(const char *what, struct match_attr *a, int rem) return rem; } -static int fill(const char *path, int pathlen, const char *basename, +static int fill(const char *path, int pathlen, int basename_offset, struct attr_stack *stk, int rem) { int i; @@ -711,7 +711,7 @@ static int fill(const char *path, int pathlen, const char *basename, struct match_attr *a = stk->attrs[i]; if (a->is_macro) continue; - if (path_matches(path, pathlen, basename, + if (path_matches(path, pathlen, basename_offset, &a->u.pat, base, stk->originlen)) rem = fill_one("fill", a, rem); } @@ -750,7 +750,8 @@ static void collect_all_attrs(const char *path) { struct attr_stack *stk; int i, pathlen, rem, dirlen; - const char *basename, *cp, *last_slash = NULL; + const char *cp, *last_slash = NULL; + int basename_offset; for (cp = path; *cp; cp++) { if (*cp == '/' && cp[1]) @@ -758,10 +759,10 @@ static void collect_all_attrs(const char *path) } pathlen = cp - path; if (last_slash) { - basename = last_slash + 1; + basename_offset = last_slash + 1 - path; dirlen = last_slash - path; } else { - basename = path; + basename_offset = 0; dirlen = 0; } @@ -771,7 +772,7 @@ static void collect_all_attrs(const char *path) rem = attr_nr; for (stk = attr_stack; 0 < rem && stk; stk = stk->prev) - rem = fill(path, pathlen, basename, stk, rem); + rem = fill(path, pathlen, basename_offset, stk, rem); } int git_check_attr(const char *path, int num, struct git_attr_check *check) From dc09e9ec43c09cdf803cc4f39f1fcb8ebcf80eb1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 28 Mar 2013 17:49:13 -0400 Subject: [PATCH 2/6] attr.c::path_matches(): special case paths that end with a slash The function is given a string that ends with a slash to signal that the path is a directory to make sure that a pattern that ends with a slash (i.e. MUSTBEDIR) can tell directories and non-directories apart. However, the pattern itself (pat->pattern and pat->patternlen) that came from such a MUSTBEDIR pattern is represented as a string that ends with a slash, but patternlen does not count that trailing slash. A MUSTBEDIR pattern "element/" is represented as a counted string <"element/", 7> and this must match match pathname "element/". Because match_basename() and match_pathname() want to see pathname "element" to match against the pattern <"element/", 7>, reduce the length of the path to exclude the trailing slash when calling these functions. Signed-off-by: Junio C Hamano Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- attr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/attr.c b/attr.c index 4cfe0ee0fc..4d620bc52a 100644 --- a/attr.c +++ b/attr.c @@ -661,18 +661,18 @@ static int path_matches(const char *pathname, int pathlen, { const char *pattern = pat->pattern; int prefix = pat->nowildcardlen; + int isdir = (pathlen && pathname[pathlen - 1] == '/'); - if ((pat->flags & EXC_FLAG_MUSTBEDIR) && - ((!pathlen) || (pathname[pathlen-1] != '/'))) + if ((pat->flags & EXC_FLAG_MUSTBEDIR) && !isdir) return 0; if (pat->flags & EXC_FLAG_NODIR) { return match_basename(pathname + basename_offset, - pathlen - basename_offset, + pathlen - basename_offset - isdir, pattern, prefix, pat->patternlen, pat->flags); } - return match_pathname(pathname, pathlen, + return match_pathname(pathname, pathlen - isdir, base, baselen, pattern, prefix, pat->patternlen, pat->flags); } From 0b6e56dfe6c7f75c2a02cc0cf8731c9e62b6d3d1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 28 Mar 2013 17:47:28 -0400 Subject: [PATCH 3/6] dir.c::match_basename(): pay attention to the length of string parameters The function takes two counted strings ( and ) as parameters, together with prefix (the length of the prefix in pattern that is to be matched literally without globbing against the basename) and EXC_* flags that tells it how to match the pattern against the basename. However, it did not pay attention to the length of these counted strings. Update them to do the following: * When the entire pattern is to be matched literally, the pattern matches the basename only when the lengths of them are the same, and they match up to that length. * When the pattern is "*" followed by a string to be matched literally, make sure that the basenamelen is equal or longer than the "literal" part of the pattern, and the tail of the basename string matches that literal part. * Otherwise, use the new fnmatch_icase_mem helper to make sure we only lookmake sure we use only look at the counted part of the strings. Because these counted strings are full strings most of the time, we check for termination to avoid unnecessary allocation. Signed-off-by: Junio C Hamano Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- dir.c | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/dir.c b/dir.c index 5a83aa7897..0db79f9cf6 100644 --- a/dir.c +++ b/dir.c @@ -34,6 +34,33 @@ int fnmatch_icase(const char *pattern, const char *string, int flags) return fnmatch(pattern, string, flags | (ignore_case ? FNM_CASEFOLD : 0)); } +static int fnmatch_icase_mem(const char *pattern, int patternlen, + const char *string, int stringlen, + int flags) +{ + int match_status; + struct strbuf pat_buf = STRBUF_INIT; + struct strbuf str_buf = STRBUF_INIT; + const char *use_pat = pattern; + const char *use_str = string; + + if (pattern[patternlen]) { + strbuf_add(&pat_buf, pattern, patternlen); + use_pat = pat_buf.buf; + } + if (string[stringlen]) { + strbuf_add(&str_buf, string, stringlen); + use_str = str_buf.buf; + } + + match_status = fnmatch_icase(use_pat, use_str, flags); + + strbuf_release(&pat_buf); + strbuf_release(&str_buf); + + return match_status; +} + static size_t common_prefix_len(const char **pathspec) { const char *n, *first; @@ -537,15 +564,20 @@ int match_basename(const char *basename, int basenamelen, int flags) { if (prefix == patternlen) { - if (!strcmp_icase(pattern, basename)) + if (patternlen == basenamelen && + !strncmp_icase(pattern, basename, basenamelen)) return 1; } else if (flags & EXC_FLAG_ENDSWITH) { + /* "*literal" matching against "fooliteral" */ if (patternlen - 1 <= basenamelen && - !strcmp_icase(pattern + 1, - basename + basenamelen - patternlen + 1)) + !strncmp_icase(pattern + 1, + basename + basenamelen - (patternlen - 1), + patternlen - 1)) return 1; } else { - if (fnmatch_icase(pattern, basename, 0) == 0) + if (fnmatch_icase_mem(pattern, patternlen, + basename, basenamelen, + 0) == 0) return 1; } return 0; From 982ac87316a1cf5126888157bdcbfa32268ebe47 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 28 Mar 2013 17:47:47 -0400 Subject: [PATCH 4/6] dir.c::match_pathname(): adjust patternlen when shifting pattern If we receive a pattern that starts with "/", we shift it forward to avoid looking at the "/" part. Since the prefix and patternlen parameters are counts of what is in the pattern, we must decrement them as we increment the pointer. We remembered to handle prefix, but not patternlen. This didn't cause any bugs, though, because the patternlen parameter is not actually used. Since it will be used in future patches, let's correct this oversight. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- dir.c | 1 + 1 file changed, 1 insertion(+) diff --git a/dir.c b/dir.c index 0db79f9cf6..1757a3f4e0 100644 --- a/dir.c +++ b/dir.c @@ -597,6 +597,7 @@ int match_pathname(const char *pathname, int pathlen, */ if (*pattern == '/') { pattern++; + patternlen--; prefix--; } From ab3aebc15c13d083013591777688cecbcb703fb2 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 28 Mar 2013 17:48:21 -0400 Subject: [PATCH 5/6] dir.c::match_pathname(): pay attention to the length of string parameters This function takes two counted strings: a pair and a pair. But we end up feeding the result to fnmatch, which expects NUL-terminated strings. We can fix this by calling the fnmatch_icase_mem function, which handles re-allocating into a NUL-terminated string if necessary. While we're at it, we can avoid even calling fnmatch in some cases. In addition to patternlen, we get "prefix", the size of the pattern that contains no wildcard characters. We do a straight match of the prefix part first, and then use fnmatch to cover the rest. But if there are no wildcards in the pattern at all, we do not even need to call fnmatch; we would simply be comparing two empty strings. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- dir.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/dir.c b/dir.c index 1757a3f4e0..8fea94e185 100644 --- a/dir.c +++ b/dir.c @@ -624,11 +624,22 @@ int match_pathname(const char *pathname, int pathlen, if (strncmp_icase(pattern, name, prefix)) return 0; pattern += prefix; + patternlen -= prefix; name += prefix; namelen -= prefix; + + /* + * If the whole pattern did not have a wildcard, + * then our prefix match is all we need; we + * do not need to call fnmatch at all. + */ + if (!patternlen && !namelen) + return 1; } - return fnmatch_icase(pattern, name, FNM_PATHNAME) == 0; + return fnmatch_icase_mem(pattern, patternlen, + name, namelen, + FNM_PATHNAME) == 0; } /* Scan the list and let the last match determine the fate. From efa5f82540ac0ea75ac185324a78fa230befa050 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 28 Mar 2013 17:50:04 -0400 Subject: [PATCH 6/6] t: check that a pattern without trailing slash matches a directory Prior to v1.8.1.1, with: git init echo content >foo && mkdir subdir && echo content >subdir/bar && echo "subdir export-ignore" >.gitattributes git add . && git commit -m one && git archive HEAD | tar tf - the resulting archive would contain only "foo" and ".gitattributes", not subdir. This was broken with a recent change that intended to allow "subdir/ export-ignore" to also exclude the directory, but instead ended up _requiring_ the trailing slash by mistake. A pattern "subdir" should match any path "subdir", whether it is a directory or a non-directory. A pattern "subdir/" insists that a path "subdir" must be a directory for it to match. This patch adds test not just for this simple case, but also for deeper cross-directory cases, as well as cases with wildcards. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t5002-archive-attr-pattern.sh | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/t/t5002-archive-attr-pattern.sh b/t/t5002-archive-attr-pattern.sh index 0c847fb454..6667d159ab 100755 --- a/t/t5002-archive-attr-pattern.sh +++ b/t/t5002-archive-attr-pattern.sh @@ -27,6 +27,25 @@ test_expect_success 'setup' ' echo ignored-only-if-dir/ export-ignore >>.git/info/attributes && git add ignored-only-if-dir && + mkdir -p ignored-without-slash && + echo "ignored without slash" >ignored-without-slash/foo && + git add ignored-without-slash/foo && + echo "ignored-without-slash export-ignore" >>.git/info/attributes && + + mkdir -p wildcard-without-slash && + echo "ignored without slash" >wildcard-without-slash/foo && + git add wildcard-without-slash/foo && + echo "wild*-without-slash export-ignore" >>.git/info/attributes && + + mkdir -p deep/and/slashless && + echo "ignored without slash" >deep/and/slashless/foo && + git add deep/and/slashless/foo && + echo "deep/and/slashless export-ignore" >>.git/info/attributes && + + mkdir -p deep/with/wildcard && + echo "ignored without slash" >deep/with/wildcard/foo && + git add deep/with/wildcard/foo && + echo "deep/*t*/wildcard export-ignore" >>.git/info/attributes && mkdir -p one-level-lower/two-levels-lower/ignored-only-if-dir && echo ignored by ignored dir >one-level-lower/two-levels-lower/ignored-only-if-dir/ignored-by-ignored-dir && @@ -49,6 +68,14 @@ test_expect_exists archive/not-ignored-dir/ignored-only-if-dir test_expect_exists archive/not-ignored-dir/ test_expect_missing archive/ignored-only-if-dir/ test_expect_missing archive/ignored-ony-if-dir/ignored-by-ignored-dir +test_expect_missing archive/ignored-without-slash/ && +test_expect_missing archive/ignored-without-slash/foo && +test_expect_missing archive/wildcard-without-slash/ +test_expect_missing archive/wildcard-without-slash/foo && +test_expect_missing archive/deep/and/slashless/ && +test_expect_missing archive/deep/and/slashless/foo && +test_expect_missing archive/deep/with/wildcard/ && +test_expect_missing archive/deep/with/wildcard/foo && test_expect_exists archive/one-level-lower/ test_expect_missing archive/one-level-lower/two-levels-lower/ignored-only-if-dir/ test_expect_missing archive/one-level-lower/two-levels-lower/ignored-ony-if-dir/ignored-by-ignored-dir