From 692663303f67371a2e4af4e3d353b042f754c036 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Mon, 15 Oct 2012 13:24:34 +0700 Subject: [PATCH 1/6] exclude: stricten a length check in EXC_FLAG_ENDSWITH case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This block of code deals with the "basename" part only, which has the length of "pathlen - (basename - pathname)". Stricten the length check and remove "pathname" from the main expression to avoid confusion. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dir.c b/dir.c index 0015cc54f4..b0ae417b37 100644 --- a/dir.c +++ b/dir.c @@ -534,8 +534,9 @@ int excluded_from_list(const char *pathname, if (!strcmp_icase(exclude, basename)) return to_exclude; } else if (x->flags & EXC_FLAG_ENDSWITH) { - if (x->patternlen - 1 <= pathlen && - !strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1)) + int len = pathlen - (basename - pathname); + if (x->patternlen - 1 <= len && + !strcmp_icase(exclude + 1, basename + len - x->patternlen + 1)) return to_exclude; } else { if (fnmatch_icase(exclude, basename, 0) == 0) From 593cb8802eb43b17e6344790278cea7e6ec68b43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Mon, 15 Oct 2012 13:24:35 +0700 Subject: [PATCH 2/6] exclude: split basename matching code into a separate function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/dir.c b/dir.c index b0ae417b37..d9b5561466 100644 --- a/dir.c +++ b/dir.c @@ -503,6 +503,25 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) dir->basebuf[baselen] = '\0'; } +static int match_basename(const char *basename, int basenamelen, + const char *pattern, int prefix, int patternlen, + int flags) +{ + if (prefix == patternlen) { + if (!strcmp_icase(pattern, basename)) + return 1; + } else if (flags & EXC_FLAG_ENDSWITH) { + if (patternlen - 1 <= basenamelen && + !strcmp_icase(pattern + 1, + basename + basenamelen - patternlen + 1)) + return 1; + } else { + if (fnmatch_icase(pattern, basename, 0) == 0) + return 1; + } + return 0; +} + /* Scan the list and let the last match determine the fate. * Return 1 for exclude, 0 for include and -1 for undecided. */ @@ -529,19 +548,11 @@ int excluded_from_list(const char *pathname, } if (x->flags & EXC_FLAG_NODIR) { - /* match basename */ - if (prefix == x->patternlen) { - if (!strcmp_icase(exclude, basename)) - return to_exclude; - } else if (x->flags & EXC_FLAG_ENDSWITH) { - int len = pathlen - (basename - pathname); - if (x->patternlen - 1 <= len && - !strcmp_icase(exclude + 1, basename + len - x->patternlen + 1)) - return to_exclude; - } else { - if (fnmatch_icase(exclude, basename, 0) == 0) - return to_exclude; - } + if (match_basename(basename, + pathlen - (basename - pathname), + exclude, prefix, x->patternlen, + x->flags)) + return to_exclude; continue; } From a3ea4d7199870595eb3a264c4b41e725b4417bc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Mon, 15 Oct 2012 13:24:36 +0700 Subject: [PATCH 3/6] exclude: fix a bug in prefix compare optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When "namelen" becomes zero at this stage, we have matched the fixed part, but whether it actually matches the pattern still depends on the pattern in "exclude". As demonstrated in t3001, path "three/a.3" exists and it matches the "three/a.3" part in pattern "three/a.3[abc]", but that does not mean a true match. Don't be too optimistic and let fnmatch() do the job. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 2 +- t/t3001-ls-files-others-exclude.sh | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/dir.c b/dir.c index d9b5561466..22d0b7b726 100644 --- a/dir.c +++ b/dir.c @@ -585,7 +585,7 @@ int excluded_from_list(const char *pathname, namelen -= prefix; } - if (!namelen || !fnmatch_icase(exclude, name, FNM_PATHNAME)) + if (!fnmatch_icase(exclude, name, FNM_PATHNAME)) return to_exclude; } return -1; /* undecided */ diff --git a/t/t3001-ls-files-others-exclude.sh b/t/t3001-ls-files-others-exclude.sh index c8fe978267..dc2f0458fd 100755 --- a/t/t3001-ls-files-others-exclude.sh +++ b/t/t3001-ls-files-others-exclude.sh @@ -214,4 +214,10 @@ test_expect_success 'subdirectory ignore (l1)' ' test_cmp expect actual ' +test_expect_success 'pattern matches prefix completely' ' + : >expect && + git ls-files -i -o --exclude "/three/a.3[abc]" >actual && + test_cmp expect actual +' + test_done From b559263216146b3f452c35ea1b12937d5533c89e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Mon, 15 Oct 2012 13:24:37 +0700 Subject: [PATCH 4/6] exclude: split pathname matching code into a separate function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 85 +++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/dir.c b/dir.c index 22d0b7b726..32d1c90e58 100644 --- a/dir.c +++ b/dir.c @@ -522,6 +522,53 @@ static int match_basename(const char *basename, int basenamelen, return 0; } +static int match_pathname(const char *pathname, int pathlen, + const char *base, int baselen, + const char *pattern, int prefix, int patternlen, + int flags) +{ + const char *name; + int namelen; + + /* + * match with FNM_PATHNAME; the pattern has base implicitly + * in front of it. + */ + if (*pattern == '/') { + pattern++; + prefix--; + } + + /* + * baselen does not count the trailing slash. base[] may or + * may not end with a trailing slash though. + */ + if (pathlen < baselen + 1 || + (baselen && pathname[baselen] != '/') || + strncmp_icase(pathname, base, baselen)) + return 0; + + namelen = baselen ? pathlen - baselen - 1 : pathlen; + name = pathname + pathlen - namelen; + + if (prefix) { + /* + * if the non-wildcard part is longer than the + * remaining pathname, surely it cannot match. + */ + if (prefix > namelen) + return 0; + + if (strncmp_icase(pattern, name, prefix)) + return 0; + pattern += prefix; + name += prefix; + namelen -= prefix; + } + + return fnmatch_icase(pattern, name, FNM_PATHNAME) == 0; +} + /* Scan the list and let the last match determine the fate. * Return 1 for exclude, 0 for include and -1 for undecided. */ @@ -536,9 +583,9 @@ int excluded_from_list(const char *pathname, for (i = el->nr - 1; 0 <= i; i--) { struct exclude *x = el->excludes[i]; - const char *name, *exclude = x->pattern; + const char *exclude = x->pattern; int to_exclude = x->to_exclude; - int namelen, prefix = x->nowildcardlen; + int prefix = x->nowildcardlen; if (x->flags & EXC_FLAG_MUSTBEDIR) { if (*dtype == DT_UNKNOWN) @@ -556,36 +603,10 @@ int excluded_from_list(const char *pathname, continue; } - /* match with FNM_PATHNAME: - * exclude has base (baselen long) implicitly in front of it. - */ - if (*exclude == '/') { - exclude++; - prefix--; - } - - if (pathlen < x->baselen || - (x->baselen && pathname[x->baselen-1] != '/') || - strncmp_icase(pathname, x->base, x->baselen)) - continue; - - namelen = x->baselen ? pathlen - x->baselen : pathlen; - name = pathname + pathlen - namelen; - - /* if the non-wildcard part is longer than the - remaining pathname, surely it cannot match */ - if (prefix > namelen) - continue; - - if (prefix) { - if (strncmp_icase(exclude, name, prefix)) - continue; - exclude += prefix; - name += prefix; - namelen -= prefix; - } - - if (!fnmatch_icase(exclude, name, FNM_PATHNAME)) + assert(x->baselen == 0 || x->base[x->baselen - 1] == '/'); + if (match_pathname(pathname, pathlen, + x->base, x->baselen ? x->baselen - 1 : 0, + exclude, prefix, x->patternlen, x->flags)) return to_exclude; } return -1; /* undecided */ From 84460eec8dcc82417840bf4be2eb0c13d14b1c94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Mon, 15 Oct 2012 13:24:38 +0700 Subject: [PATCH 5/6] gitignore: make pattern parsing code a separate function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function can later be reused by attr.c. Also turn to_exclude field into a flag. Signed-off-by: Junio C Hamano Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 71 +++++++++++++++++++++++++++++++++++++++++------------------ dir.h | 2 +- 2 files changed, 50 insertions(+), 23 deletions(-) diff --git a/dir.c b/dir.c index 32d1c90e58..c4e64a5bc9 100644 --- a/dir.c +++ b/dir.c @@ -308,42 +308,69 @@ static int no_wildcard(const char *string) return string[simple_length(string)] == '\0'; } +static void parse_exclude_pattern(const char **pattern, + int *patternlen, + int *flags, + int *nowildcardlen) +{ + const char *p = *pattern; + size_t i, len; + + *flags = 0; + if (*p == '!') { + *flags |= EXC_FLAG_NEGATIVE; + p++; + } + len = strlen(p); + if (len && p[len - 1] == '/') { + len--; + *flags |= EXC_FLAG_MUSTBEDIR; + } + for (i = 0; i < len; i++) { + if (p[i] == '/') + break; + } + if (i == len) + *flags |= EXC_FLAG_NODIR; + *nowildcardlen = simple_length(p); + /* + * we should have excluded the trailing slash from 'p' too, + * but that's one more allocation. Instead just make sure + * nowildcardlen does not exceed real patternlen + */ + if (*nowildcardlen > len) + *nowildcardlen = len; + if (*p == '*' && no_wildcard(p + 1)) + *flags |= EXC_FLAG_ENDSWITH; + *pattern = p; + *patternlen = len; +} + void add_exclude(const char *string, const char *base, int baselen, struct exclude_list *which) { struct exclude *x; - size_t len; - int to_exclude = 1; - int flags = 0; + int patternlen; + int flags; + int nowildcardlen; - if (*string == '!') { - to_exclude = 0; - string++; - } - len = strlen(string); - if (len && string[len - 1] == '/') { + parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen); + if (flags & EXC_FLAG_MUSTBEDIR) { char *s; - x = xmalloc(sizeof(*x) + len); + x = xmalloc(sizeof(*x) + patternlen + 1); s = (char *)(x+1); - memcpy(s, string, len - 1); - s[len - 1] = '\0'; - string = s; + memcpy(s, string, patternlen); + s[patternlen] = '\0'; x->pattern = s; - flags = EXC_FLAG_MUSTBEDIR; } else { x = xmalloc(sizeof(*x)); x->pattern = string; } - x->to_exclude = to_exclude; - x->patternlen = strlen(string); + x->patternlen = patternlen; + x->nowildcardlen = nowildcardlen; x->base = base; x->baselen = baselen; x->flags = flags; - if (!strchr(string, '/')) - x->flags |= EXC_FLAG_NODIR; - x->nowildcardlen = simple_length(string); - if (*string == '*' && no_wildcard(string+1)) - x->flags |= EXC_FLAG_ENDSWITH; ALLOC_GROW(which->excludes, which->nr + 1, which->alloc); which->excludes[which->nr++] = x; } @@ -584,7 +611,7 @@ int excluded_from_list(const char *pathname, for (i = el->nr - 1; 0 <= i; i--) { struct exclude *x = el->excludes[i]; const char *exclude = x->pattern; - int to_exclude = x->to_exclude; + int to_exclude = x->flags & EXC_FLAG_NEGATIVE ? 0 : 1; int prefix = x->nowildcardlen; if (x->flags & EXC_FLAG_MUSTBEDIR) { diff --git a/dir.h b/dir.h index 893465a1e8..41ea32d957 100644 --- a/dir.h +++ b/dir.h @@ -11,6 +11,7 @@ struct dir_entry { #define EXC_FLAG_NODIR 1 #define EXC_FLAG_ENDSWITH 4 #define EXC_FLAG_MUSTBEDIR 8 +#define EXC_FLAG_NEGATIVE 16 struct exclude_list { int nr; @@ -21,7 +22,6 @@ struct exclude_list { int nowildcardlen; const char *base; int baselen; - int to_exclude; int flags; } **excludes; }; From 82dce998c2028b6ee96691921b7037a6e182ec89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Mon, 15 Oct 2012 13:24:39 +0700 Subject: [PATCH 6/6] attr: more matching optimizations from .gitignore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .gitattributes and .gitignore share the same pattern syntax but has separate matching implementation. Over the years, ignore's implementation accumulates more optimizations while attr's stays the same. This patch reuses the core matching functions that are also used by excluded_from_list. excluded_from_list and path_matches can't be merged due to differences in exclude and attr, for example: * "!pattern" syntax is forbidden in .gitattributes. As an attribute can be unset (i.e. set to a special value "false") or made back to unspecified (i.e. not even set to "false"), "!pattern attr" is unclear which one it means. * we support attaching attributes to directories, but git-core internally does not currently make use of attributes on directories. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- Documentation/gitattributes.txt | 1 + attr.c | 52 ++++++++++++++++++++------------- dir.c | 22 +++++++------- dir.h | 11 +++++++ t/t0003-attributes.sh | 10 +++++++ 5 files changed, 64 insertions(+), 32 deletions(-) diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 80120ea14f..b7c0e6577e 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -56,6 +56,7 @@ When more than one pattern matches the path, a later line overrides an earlier line. This overriding is done per attribute. The rules how the pattern matches paths are the same as in `.gitignore` files; see linkgit:gitignore[5]. +Unlike `.gitignore`, negative patterns are forbidden. When deciding what attributes are assigned to a path, git consults `$GIT_DIR/info/attributes` file (which has the highest diff --git a/attr.c b/attr.c index e7caee4ddd..2fc6353628 100644 --- a/attr.c +++ b/attr.c @@ -115,6 +115,13 @@ struct attr_state { const char *setto; }; +struct pattern { + const char *pattern; + int patternlen; + int nowildcardlen; + int flags; /* EXC_FLAG_* */ +}; + /* * One rule, as from a .gitattributes file. * @@ -131,7 +138,7 @@ struct attr_state { */ struct match_attr { union { - char *pattern; + struct pattern pat; struct git_attr *attr; } u; char is_macro; @@ -241,9 +248,16 @@ static struct match_attr *parse_attr_line(const char *line, const char *src, if (is_macro) res->u.attr = git_attr_internal(name, namelen); else { - res->u.pattern = (char *)&(res->state[num_attr]); - memcpy(res->u.pattern, name, namelen); - res->u.pattern[namelen] = 0; + char *p = (char *)&(res->state[num_attr]); + memcpy(p, name, namelen); + res->u.pat.pattern = p; + parse_exclude_pattern(&res->u.pat.pattern, + &res->u.pat.patternlen, + &res->u.pat.flags, + &res->u.pat.nowildcardlen); + if (res->u.pat.flags & EXC_FLAG_NEGATIVE) + die(_("Negative patterns are forbidden in git attributes\n" + "Use '\\!' for literal leading exclamation.")); } res->is_macro = is_macro; res->num_attr = num_attr; @@ -640,25 +654,21 @@ static void prepare_attr_stack(const char *path) static int path_matches(const char *pathname, int pathlen, const char *basename, - const char *pattern, + const struct pattern *pat, const char *base, int baselen) { - if (!strchr(pattern, '/')) { - return (fnmatch_icase(pattern, basename, 0) == 0); + const char *pattern = pat->pattern; + int prefix = pat->nowildcardlen; + + if (pat->flags & EXC_FLAG_NODIR) { + return match_basename(basename, + pathlen - (basename - pathname), + pattern, prefix, + pat->patternlen, pat->flags); } - /* - * match with FNM_PATHNAME; the pattern has base implicitly - * in front of it. - */ - if (*pattern == '/') - pattern++; - if (pathlen < baselen || - (baselen && pathname[baselen] != '/') || - strncmp(pathname, base, baselen)) - return 0; - if (baselen != 0) - baselen++; - return fnmatch_icase(pattern, pathname + baselen, FNM_PATHNAME) == 0; + return match_pathname(pathname, pathlen, + base, baselen, + pattern, prefix, pat->patternlen, pat->flags); } static int macroexpand_one(int attr_nr, int rem); @@ -696,7 +706,7 @@ static int fill(const char *path, int pathlen, const char *basename, if (a->is_macro) continue; if (path_matches(path, pathlen, basename, - a->u.pattern, base, stk->originlen)) + &a->u.pat, base, stk->originlen)) rem = fill_one("fill", a, rem); } return rem; diff --git a/dir.c b/dir.c index c4e64a5bc9..ee8e7115a8 100644 --- a/dir.c +++ b/dir.c @@ -308,10 +308,10 @@ static int no_wildcard(const char *string) return string[simple_length(string)] == '\0'; } -static void parse_exclude_pattern(const char **pattern, - int *patternlen, - int *flags, - int *nowildcardlen) +void parse_exclude_pattern(const char **pattern, + int *patternlen, + int *flags, + int *nowildcardlen) { const char *p = *pattern; size_t i, len; @@ -530,9 +530,9 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) dir->basebuf[baselen] = '\0'; } -static int match_basename(const char *basename, int basenamelen, - const char *pattern, int prefix, int patternlen, - int flags) +int match_basename(const char *basename, int basenamelen, + const char *pattern, int prefix, int patternlen, + int flags) { if (prefix == patternlen) { if (!strcmp_icase(pattern, basename)) @@ -549,10 +549,10 @@ static int match_basename(const char *basename, int basenamelen, return 0; } -static int match_pathname(const char *pathname, int pathlen, - const char *base, int baselen, - const char *pattern, int prefix, int patternlen, - int flags) +int match_pathname(const char *pathname, int pathlen, + const char *base, int baselen, + const char *pattern, int prefix, int patternlen, + int flags) { const char *name; int namelen; diff --git a/dir.h b/dir.h index 41ea32d957..f5c89e3b80 100644 --- a/dir.h +++ b/dir.h @@ -80,6 +80,16 @@ extern int excluded_from_list(const char *pathname, int pathlen, const char *bas int *dtype, struct exclude_list *el); struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len); +/* + * these implement the matching logic for dir.c:excluded_from_list and + * attr.c:path_matches() + */ +extern int match_basename(const char *, int, + const char *, int, int, int); +extern int match_pathname(const char *, int, + const char *, int, + const char *, int, int, int); + /* * The excluded() API is meant for callers that check each level of leading * directory hierarchies with excluded() to avoid recursing into excluded @@ -97,6 +107,7 @@ extern int path_excluded(struct path_exclude_check *, const char *, int namelen, extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen, char **buf_p, struct exclude_list *which, int check_index); extern void add_excludes_from_file(struct dir_struct *, const char *fname); +extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen); extern void add_exclude(const char *string, const char *base, int baselen, struct exclude_list *which); extern void free_excludes(struct exclude_list *el); diff --git a/t/t0003-attributes.sh b/t/t0003-attributes.sh index 51f3045ba4..f6c21ea4ea 100755 --- a/t/t0003-attributes.sh +++ b/t/t0003-attributes.sh @@ -206,6 +206,16 @@ test_expect_success 'root subdir attribute test' ' attr_check subdir/a/i unspecified ' +test_expect_success 'negative patterns' ' + echo "!f test=bar" >.gitattributes && + test_must_fail git check-attr test -- f +' + +test_expect_success 'patterns starting with exclamation' ' + echo "\!f test=foo" >.gitattributes && + attr_check "!f" foo +' + test_expect_success 'setup bare' ' git clone --bare . bare.git && cd bare.git