Merge branch 'nd/attr-match-optim-more'

Start laying the foundation to build the "wildmatch" after we can
agree on its desired semantics.

* nd/attr-match-optim-more:
  attr: more matching optimizations from .gitignore
  gitignore: make pattern parsing code a separate function
  exclude: split pathname matching code into a separate function
  exclude: fix a bug in prefix compare optimization
  exclude: split basename matching code into a separate function
  exclude: stricten a length check in EXC_FLAG_ENDSWITH case
This commit is contained in:
Jeff King 2012-11-09 12:42:25 -05:00
commit 5f836422ab
6 changed files with 186 additions and 88 deletions

View File

@ -56,6 +56,7 @@ When more than one pattern matches the path, a later line
overrides an earlier line. This overriding is done per overrides an earlier line. This overriding is done per
attribute. The rules how the pattern matches paths are the attribute. The rules how the pattern matches paths are the
same as in `.gitignore` files; see linkgit:gitignore[5]. same as in `.gitignore` files; see linkgit:gitignore[5].
Unlike `.gitignore`, negative patterns are forbidden.
When deciding what attributes are assigned to a path, git When deciding what attributes are assigned to a path, git
consults `$GIT_DIR/info/attributes` file (which has the highest consults `$GIT_DIR/info/attributes` file (which has the highest

52
attr.c
View File

@ -115,6 +115,13 @@ struct attr_state {
const char *setto; const char *setto;
}; };
struct pattern {
const char *pattern;
int patternlen;
int nowildcardlen;
int flags; /* EXC_FLAG_* */
};
/* /*
* One rule, as from a .gitattributes file. * One rule, as from a .gitattributes file.
* *
@ -131,7 +138,7 @@ struct attr_state {
*/ */
struct match_attr { struct match_attr {
union { union {
char *pattern; struct pattern pat;
struct git_attr *attr; struct git_attr *attr;
} u; } u;
char is_macro; char is_macro;
@ -241,9 +248,16 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
if (is_macro) if (is_macro)
res->u.attr = git_attr_internal(name, namelen); res->u.attr = git_attr_internal(name, namelen);
else { else {
res->u.pattern = (char *)&(res->state[num_attr]); char *p = (char *)&(res->state[num_attr]);
memcpy(res->u.pattern, name, namelen); memcpy(p, name, namelen);
res->u.pattern[namelen] = 0; res->u.pat.pattern = p;
parse_exclude_pattern(&res->u.pat.pattern,
&res->u.pat.patternlen,
&res->u.pat.flags,
&res->u.pat.nowildcardlen);
if (res->u.pat.flags & EXC_FLAG_NEGATIVE)
die(_("Negative patterns are forbidden in git attributes\n"
"Use '\\!' for literal leading exclamation."));
} }
res->is_macro = is_macro; res->is_macro = is_macro;
res->num_attr = num_attr; res->num_attr = num_attr;
@ -648,25 +662,21 @@ static void prepare_attr_stack(const char *path)
static int path_matches(const char *pathname, int pathlen, static int path_matches(const char *pathname, int pathlen,
const char *basename, const char *basename,
const char *pattern, const struct pattern *pat,
const char *base, int baselen) const char *base, int baselen)
{ {
if (!strchr(pattern, '/')) { const char *pattern = pat->pattern;
return (fnmatch_icase(pattern, basename, 0) == 0); int prefix = pat->nowildcardlen;
if (pat->flags & EXC_FLAG_NODIR) {
return match_basename(basename,
pathlen - (basename - pathname),
pattern, prefix,
pat->patternlen, pat->flags);
} }
/* return match_pathname(pathname, pathlen,
* match with FNM_PATHNAME; the pattern has base implicitly base, baselen,
* in front of it. pattern, prefix, pat->patternlen, pat->flags);
*/
if (*pattern == '/')
pattern++;
if (pathlen < baselen ||
(baselen && pathname[baselen] != '/') ||
strncmp(pathname, base, baselen))
return 0;
if (baselen != 0)
baselen++;
return fnmatch_icase(pattern, pathname + baselen, FNM_PATHNAME) == 0;
} }
static int macroexpand_one(int attr_nr, int rem); static int macroexpand_one(int attr_nr, int rem);
@ -704,7 +714,7 @@ static int fill(const char *path, int pathlen, const char *basename,
if (a->is_macro) if (a->is_macro)
continue; continue;
if (path_matches(path, pathlen, basename, if (path_matches(path, pathlen, basename,
a->u.pattern, base, stk->originlen)) &a->u.pat, base, stk->originlen))
rem = fill_one("fill", a, rem); rem = fill_one("fill", a, rem);
} }
return rem; return rem;

190
dir.c
View File

@ -308,42 +308,69 @@ static int no_wildcard(const char *string)
return string[simple_length(string)] == '\0'; return string[simple_length(string)] == '\0';
} }
void parse_exclude_pattern(const char **pattern,
int *patternlen,
int *flags,
int *nowildcardlen)
{
const char *p = *pattern;
size_t i, len;
*flags = 0;
if (*p == '!') {
*flags |= EXC_FLAG_NEGATIVE;
p++;
}
len = strlen(p);
if (len && p[len - 1] == '/') {
len--;
*flags |= EXC_FLAG_MUSTBEDIR;
}
for (i = 0; i < len; i++) {
if (p[i] == '/')
break;
}
if (i == len)
*flags |= EXC_FLAG_NODIR;
*nowildcardlen = simple_length(p);
/*
* we should have excluded the trailing slash from 'p' too,
* but that's one more allocation. Instead just make sure
* nowildcardlen does not exceed real patternlen
*/
if (*nowildcardlen > len)
*nowildcardlen = len;
if (*p == '*' && no_wildcard(p + 1))
*flags |= EXC_FLAG_ENDSWITH;
*pattern = p;
*patternlen = len;
}
void add_exclude(const char *string, const char *base, void add_exclude(const char *string, const char *base,
int baselen, struct exclude_list *which) int baselen, struct exclude_list *which)
{ {
struct exclude *x; struct exclude *x;
size_t len; int patternlen;
int to_exclude = 1; int flags;
int flags = 0; int nowildcardlen;
if (*string == '!') { parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen);
to_exclude = 0; if (flags & EXC_FLAG_MUSTBEDIR) {
string++;
}
len = strlen(string);
if (len && string[len - 1] == '/') {
char *s; char *s;
x = xmalloc(sizeof(*x) + len); x = xmalloc(sizeof(*x) + patternlen + 1);
s = (char *)(x+1); s = (char *)(x+1);
memcpy(s, string, len - 1); memcpy(s, string, patternlen);
s[len - 1] = '\0'; s[patternlen] = '\0';
string = s;
x->pattern = s; x->pattern = s;
flags = EXC_FLAG_MUSTBEDIR;
} else { } else {
x = xmalloc(sizeof(*x)); x = xmalloc(sizeof(*x));
x->pattern = string; x->pattern = string;
} }
x->to_exclude = to_exclude; x->patternlen = patternlen;
x->patternlen = strlen(string); x->nowildcardlen = nowildcardlen;
x->base = base; x->base = base;
x->baselen = baselen; x->baselen = baselen;
x->flags = flags; x->flags = flags;
if (!strchr(string, '/'))
x->flags |= EXC_FLAG_NODIR;
x->nowildcardlen = simple_length(string);
if (*string == '*' && no_wildcard(string+1))
x->flags |= EXC_FLAG_ENDSWITH;
ALLOC_GROW(which->excludes, which->nr + 1, which->alloc); ALLOC_GROW(which->excludes, which->nr + 1, which->alloc);
which->excludes[which->nr++] = x; which->excludes[which->nr++] = x;
} }
@ -505,6 +532,72 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
dir->basebuf[baselen] = '\0'; dir->basebuf[baselen] = '\0';
} }
int match_basename(const char *basename, int basenamelen,
const char *pattern, int prefix, int patternlen,
int flags)
{
if (prefix == patternlen) {
if (!strcmp_icase(pattern, basename))
return 1;
} else if (flags & EXC_FLAG_ENDSWITH) {
if (patternlen - 1 <= basenamelen &&
!strcmp_icase(pattern + 1,
basename + basenamelen - patternlen + 1))
return 1;
} else {
if (fnmatch_icase(pattern, basename, 0) == 0)
return 1;
}
return 0;
}
int match_pathname(const char *pathname, int pathlen,
const char *base, int baselen,
const char *pattern, int prefix, int patternlen,
int flags)
{
const char *name;
int namelen;
/*
* match with FNM_PATHNAME; the pattern has base implicitly
* in front of it.
*/
if (*pattern == '/') {
pattern++;
prefix--;
}
/*
* baselen does not count the trailing slash. base[] may or
* may not end with a trailing slash though.
*/
if (pathlen < baselen + 1 ||
(baselen && pathname[baselen] != '/') ||
strncmp_icase(pathname, base, baselen))
return 0;
namelen = baselen ? pathlen - baselen - 1 : pathlen;
name = pathname + pathlen - namelen;
if (prefix) {
/*
* if the non-wildcard part is longer than the
* remaining pathname, surely it cannot match.
*/
if (prefix > namelen)
return 0;
if (strncmp_icase(pattern, name, prefix))
return 0;
pattern += prefix;
name += prefix;
namelen -= prefix;
}
return fnmatch_icase(pattern, name, FNM_PATHNAME) == 0;
}
/* Scan the list and let the last match determine the fate. /* Scan the list and let the last match determine the fate.
* Return 1 for exclude, 0 for include and -1 for undecided. * Return 1 for exclude, 0 for include and -1 for undecided.
*/ */
@ -519,9 +612,9 @@ int excluded_from_list(const char *pathname,
for (i = el->nr - 1; 0 <= i; i--) { for (i = el->nr - 1; 0 <= i; i--) {
struct exclude *x = el->excludes[i]; struct exclude *x = el->excludes[i];
const char *name, *exclude = x->pattern; const char *exclude = x->pattern;
int to_exclude = x->to_exclude; int to_exclude = x->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
int namelen, prefix = x->nowildcardlen; int prefix = x->nowildcardlen;
if (x->flags & EXC_FLAG_MUSTBEDIR) { if (x->flags & EXC_FLAG_MUSTBEDIR) {
if (*dtype == DT_UNKNOWN) if (*dtype == DT_UNKNOWN)
@ -531,51 +624,18 @@ int excluded_from_list(const char *pathname,
} }
if (x->flags & EXC_FLAG_NODIR) { if (x->flags & EXC_FLAG_NODIR) {
/* match basename */ if (match_basename(basename,
if (prefix == x->patternlen) { pathlen - (basename - pathname),
if (!strcmp_icase(exclude, basename)) exclude, prefix, x->patternlen,
x->flags))
return to_exclude; return to_exclude;
} else if (x->flags & EXC_FLAG_ENDSWITH) {
if (x->patternlen - 1 <= pathlen &&
!strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1))
return to_exclude;
} else {
if (fnmatch_icase(exclude, basename, 0) == 0)
return to_exclude;
}
continue; continue;
} }
/* match with FNM_PATHNAME: assert(x->baselen == 0 || x->base[x->baselen - 1] == '/');
* exclude has base (baselen long) implicitly in front of it. if (match_pathname(pathname, pathlen,
*/ x->base, x->baselen ? x->baselen - 1 : 0,
if (*exclude == '/') { exclude, prefix, x->patternlen, x->flags))
exclude++;
prefix--;
}
if (pathlen < x->baselen ||
(x->baselen && pathname[x->baselen-1] != '/') ||
strncmp_icase(pathname, x->base, x->baselen))
continue;
namelen = x->baselen ? pathlen - x->baselen : pathlen;
name = pathname + pathlen - namelen;
/* if the non-wildcard part is longer than the
remaining pathname, surely it cannot match */
if (prefix > namelen)
continue;
if (prefix) {
if (strncmp_icase(exclude, name, prefix))
continue;
exclude += prefix;
name += prefix;
namelen -= prefix;
}
if (!namelen || !fnmatch_icase(exclude, name, FNM_PATHNAME))
return to_exclude; return to_exclude;
} }
return -1; /* undecided */ return -1; /* undecided */

13
dir.h
View File

@ -11,6 +11,7 @@ struct dir_entry {
#define EXC_FLAG_NODIR 1 #define EXC_FLAG_NODIR 1
#define EXC_FLAG_ENDSWITH 4 #define EXC_FLAG_ENDSWITH 4
#define EXC_FLAG_MUSTBEDIR 8 #define EXC_FLAG_MUSTBEDIR 8
#define EXC_FLAG_NEGATIVE 16
struct exclude_list { struct exclude_list {
int nr; int nr;
@ -21,7 +22,6 @@ struct exclude_list {
int nowildcardlen; int nowildcardlen;
const char *base; const char *base;
int baselen; int baselen;
int to_exclude;
int flags; int flags;
} **excludes; } **excludes;
}; };
@ -80,6 +80,16 @@ extern int excluded_from_list(const char *pathname, int pathlen, const char *bas
int *dtype, struct exclude_list *el); int *dtype, struct exclude_list *el);
struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len); struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len);
/*
* these implement the matching logic for dir.c:excluded_from_list and
* attr.c:path_matches()
*/
extern int match_basename(const char *, int,
const char *, int, int, int);
extern int match_pathname(const char *, int,
const char *, int,
const char *, int, int, int);
/* /*
* The excluded() API is meant for callers that check each level of leading * The excluded() API is meant for callers that check each level of leading
* directory hierarchies with excluded() to avoid recursing into excluded * directory hierarchies with excluded() to avoid recursing into excluded
@ -97,6 +107,7 @@ extern int path_excluded(struct path_exclude_check *, const char *, int namelen,
extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen, extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
char **buf_p, struct exclude_list *which, int check_index); char **buf_p, struct exclude_list *which, int check_index);
extern void add_excludes_from_file(struct dir_struct *, const char *fname); extern void add_excludes_from_file(struct dir_struct *, const char *fname);
extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen);
extern void add_exclude(const char *string, const char *base, extern void add_exclude(const char *string, const char *base,
int baselen, struct exclude_list *which); int baselen, struct exclude_list *which);
extern void free_excludes(struct exclude_list *el); extern void free_excludes(struct exclude_list *el);

View File

@ -196,6 +196,16 @@ test_expect_success 'root subdir attribute test' '
attr_check subdir/a/i unspecified attr_check subdir/a/i unspecified
' '
test_expect_success 'negative patterns' '
echo "!f test=bar" >.gitattributes &&
test_must_fail git check-attr test -- f
'
test_expect_success 'patterns starting with exclamation' '
echo "\!f test=foo" >.gitattributes &&
attr_check "!f" foo
'
test_expect_success 'setup bare' ' test_expect_success 'setup bare' '
git clone --bare . bare.git && git clone --bare . bare.git &&
cd bare.git cd bare.git

View File

@ -214,4 +214,10 @@ test_expect_success 'subdirectory ignore (l1)' '
test_cmp expect actual test_cmp expect actual
' '
test_expect_success 'pattern matches prefix completely' '
: >expect &&
git ls-files -i -o --exclude "/three/a.3[abc]" >actual &&
test_cmp expect actual
'
test_done test_done