Merge branch 'nd/attr-match-optim-more'

Start laying the foundation to build the "wildmatch" after we can
agree on its desired semantics.

* nd/attr-match-optim-more:
  attr: more matching optimizations from .gitignore
  gitignore: make pattern parsing code a separate function
  exclude: split pathname matching code into a separate function
  exclude: fix a bug in prefix compare optimization
  exclude: split basename matching code into a separate function
  exclude: stricten a length check in EXC_FLAG_ENDSWITH case
This commit is contained in:
Jeff King 2012-11-09 12:42:25 -05:00
commit 5f836422ab
6 changed files with 186 additions and 88 deletions

View File

@ -56,6 +56,7 @@ When more than one pattern matches the path, a later line
overrides an earlier line. This overriding is done per
attribute. The rules how the pattern matches paths are the
same as in `.gitignore` files; see linkgit:gitignore[5].
Unlike `.gitignore`, negative patterns are forbidden.
When deciding what attributes are assigned to a path, git
consults `$GIT_DIR/info/attributes` file (which has the highest

52
attr.c
View File

@ -115,6 +115,13 @@ struct attr_state {
const char *setto;
};
struct pattern {
const char *pattern;
int patternlen;
int nowildcardlen;
int flags; /* EXC_FLAG_* */
};
/*
* One rule, as from a .gitattributes file.
*
@ -131,7 +138,7 @@ struct attr_state {
*/
struct match_attr {
union {
char *pattern;
struct pattern pat;
struct git_attr *attr;
} u;
char is_macro;
@ -241,9 +248,16 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
if (is_macro)
res->u.attr = git_attr_internal(name, namelen);
else {
res->u.pattern = (char *)&(res->state[num_attr]);
memcpy(res->u.pattern, name, namelen);
res->u.pattern[namelen] = 0;
char *p = (char *)&(res->state[num_attr]);
memcpy(p, name, namelen);
res->u.pat.pattern = p;
parse_exclude_pattern(&res->u.pat.pattern,
&res->u.pat.patternlen,
&res->u.pat.flags,
&res->u.pat.nowildcardlen);
if (res->u.pat.flags & EXC_FLAG_NEGATIVE)
die(_("Negative patterns are forbidden in git attributes\n"
"Use '\\!' for literal leading exclamation."));
}
res->is_macro = is_macro;
res->num_attr = num_attr;
@ -648,25 +662,21 @@ static void prepare_attr_stack(const char *path)
static int path_matches(const char *pathname, int pathlen,
const char *basename,
const char *pattern,
const struct pattern *pat,
const char *base, int baselen)
{
if (!strchr(pattern, '/')) {
return (fnmatch_icase(pattern, basename, 0) == 0);
const char *pattern = pat->pattern;
int prefix = pat->nowildcardlen;
if (pat->flags & EXC_FLAG_NODIR) {
return match_basename(basename,
pathlen - (basename - pathname),
pattern, prefix,
pat->patternlen, pat->flags);
}
/*
* match with FNM_PATHNAME; the pattern has base implicitly
* in front of it.
*/
if (*pattern == '/')
pattern++;
if (pathlen < baselen ||
(baselen && pathname[baselen] != '/') ||
strncmp(pathname, base, baselen))
return 0;
if (baselen != 0)
baselen++;
return fnmatch_icase(pattern, pathname + baselen, FNM_PATHNAME) == 0;
return match_pathname(pathname, pathlen,
base, baselen,
pattern, prefix, pat->patternlen, pat->flags);
}
static int macroexpand_one(int attr_nr, int rem);
@ -704,7 +714,7 @@ static int fill(const char *path, int pathlen, const char *basename,
if (a->is_macro)
continue;
if (path_matches(path, pathlen, basename,
a->u.pattern, base, stk->originlen))
&a->u.pat, base, stk->originlen))
rem = fill_one("fill", a, rem);
}
return rem;

190
dir.c
View File

@ -308,42 +308,69 @@ static int no_wildcard(const char *string)
return string[simple_length(string)] == '\0';
}
void parse_exclude_pattern(const char **pattern,
int *patternlen,
int *flags,
int *nowildcardlen)
{
const char *p = *pattern;
size_t i, len;
*flags = 0;
if (*p == '!') {
*flags |= EXC_FLAG_NEGATIVE;
p++;
}
len = strlen(p);
if (len && p[len - 1] == '/') {
len--;
*flags |= EXC_FLAG_MUSTBEDIR;
}
for (i = 0; i < len; i++) {
if (p[i] == '/')
break;
}
if (i == len)
*flags |= EXC_FLAG_NODIR;
*nowildcardlen = simple_length(p);
/*
* we should have excluded the trailing slash from 'p' too,
* but that's one more allocation. Instead just make sure
* nowildcardlen does not exceed real patternlen
*/
if (*nowildcardlen > len)
*nowildcardlen = len;
if (*p == '*' && no_wildcard(p + 1))
*flags |= EXC_FLAG_ENDSWITH;
*pattern = p;
*patternlen = len;
}
void add_exclude(const char *string, const char *base,
int baselen, struct exclude_list *which)
{
struct exclude *x;
size_t len;
int to_exclude = 1;
int flags = 0;
int patternlen;
int flags;
int nowildcardlen;
if (*string == '!') {
to_exclude = 0;
string++;
}
len = strlen(string);
if (len && string[len - 1] == '/') {
parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen);
if (flags & EXC_FLAG_MUSTBEDIR) {
char *s;
x = xmalloc(sizeof(*x) + len);
x = xmalloc(sizeof(*x) + patternlen + 1);
s = (char *)(x+1);
memcpy(s, string, len - 1);
s[len - 1] = '\0';
string = s;
memcpy(s, string, patternlen);
s[patternlen] = '\0';
x->pattern = s;
flags = EXC_FLAG_MUSTBEDIR;
} else {
x = xmalloc(sizeof(*x));
x->pattern = string;
}
x->to_exclude = to_exclude;
x->patternlen = strlen(string);
x->patternlen = patternlen;
x->nowildcardlen = nowildcardlen;
x->base = base;
x->baselen = baselen;
x->flags = flags;
if (!strchr(string, '/'))
x->flags |= EXC_FLAG_NODIR;
x->nowildcardlen = simple_length(string);
if (*string == '*' && no_wildcard(string+1))
x->flags |= EXC_FLAG_ENDSWITH;
ALLOC_GROW(which->excludes, which->nr + 1, which->alloc);
which->excludes[which->nr++] = x;
}
@ -505,6 +532,72 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
dir->basebuf[baselen] = '\0';
}
int match_basename(const char *basename, int basenamelen,
const char *pattern, int prefix, int patternlen,
int flags)
{
if (prefix == patternlen) {
if (!strcmp_icase(pattern, basename))
return 1;
} else if (flags & EXC_FLAG_ENDSWITH) {
if (patternlen - 1 <= basenamelen &&
!strcmp_icase(pattern + 1,
basename + basenamelen - patternlen + 1))
return 1;
} else {
if (fnmatch_icase(pattern, basename, 0) == 0)
return 1;
}
return 0;
}
int match_pathname(const char *pathname, int pathlen,
const char *base, int baselen,
const char *pattern, int prefix, int patternlen,
int flags)
{
const char *name;
int namelen;
/*
* match with FNM_PATHNAME; the pattern has base implicitly
* in front of it.
*/
if (*pattern == '/') {
pattern++;
prefix--;
}
/*
* baselen does not count the trailing slash. base[] may or
* may not end with a trailing slash though.
*/
if (pathlen < baselen + 1 ||
(baselen && pathname[baselen] != '/') ||
strncmp_icase(pathname, base, baselen))
return 0;
namelen = baselen ? pathlen - baselen - 1 : pathlen;
name = pathname + pathlen - namelen;
if (prefix) {
/*
* if the non-wildcard part is longer than the
* remaining pathname, surely it cannot match.
*/
if (prefix > namelen)
return 0;
if (strncmp_icase(pattern, name, prefix))
return 0;
pattern += prefix;
name += prefix;
namelen -= prefix;
}
return fnmatch_icase(pattern, name, FNM_PATHNAME) == 0;
}
/* Scan the list and let the last match determine the fate.
* Return 1 for exclude, 0 for include and -1 for undecided.
*/
@ -519,9 +612,9 @@ int excluded_from_list(const char *pathname,
for (i = el->nr - 1; 0 <= i; i--) {
struct exclude *x = el->excludes[i];
const char *name, *exclude = x->pattern;
int to_exclude = x->to_exclude;
int namelen, prefix = x->nowildcardlen;
const char *exclude = x->pattern;
int to_exclude = x->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
int prefix = x->nowildcardlen;
if (x->flags & EXC_FLAG_MUSTBEDIR) {
if (*dtype == DT_UNKNOWN)
@ -531,51 +624,18 @@ int excluded_from_list(const char *pathname,
}
if (x->flags & EXC_FLAG_NODIR) {
/* match basename */
if (prefix == x->patternlen) {
if (!strcmp_icase(exclude, basename))
if (match_basename(basename,
pathlen - (basename - pathname),
exclude, prefix, x->patternlen,
x->flags))
return to_exclude;
} else if (x->flags & EXC_FLAG_ENDSWITH) {
if (x->patternlen - 1 <= pathlen &&
!strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1))
return to_exclude;
} else {
if (fnmatch_icase(exclude, basename, 0) == 0)
return to_exclude;
}
continue;
}
/* match with FNM_PATHNAME:
* exclude has base (baselen long) implicitly in front of it.
*/
if (*exclude == '/') {
exclude++;
prefix--;
}
if (pathlen < x->baselen ||
(x->baselen && pathname[x->baselen-1] != '/') ||
strncmp_icase(pathname, x->base, x->baselen))
continue;
namelen = x->baselen ? pathlen - x->baselen : pathlen;
name = pathname + pathlen - namelen;
/* if the non-wildcard part is longer than the
remaining pathname, surely it cannot match */
if (prefix > namelen)
continue;
if (prefix) {
if (strncmp_icase(exclude, name, prefix))
continue;
exclude += prefix;
name += prefix;
namelen -= prefix;
}
if (!namelen || !fnmatch_icase(exclude, name, FNM_PATHNAME))
assert(x->baselen == 0 || x->base[x->baselen - 1] == '/');
if (match_pathname(pathname, pathlen,
x->base, x->baselen ? x->baselen - 1 : 0,
exclude, prefix, x->patternlen, x->flags))
return to_exclude;
}
return -1; /* undecided */

13
dir.h
View File

@ -11,6 +11,7 @@ struct dir_entry {
#define EXC_FLAG_NODIR 1
#define EXC_FLAG_ENDSWITH 4
#define EXC_FLAG_MUSTBEDIR 8
#define EXC_FLAG_NEGATIVE 16
struct exclude_list {
int nr;
@ -21,7 +22,6 @@ struct exclude_list {
int nowildcardlen;
const char *base;
int baselen;
int to_exclude;
int flags;
} **excludes;
};
@ -80,6 +80,16 @@ extern int excluded_from_list(const char *pathname, int pathlen, const char *bas
int *dtype, struct exclude_list *el);
struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len);
/*
* these implement the matching logic for dir.c:excluded_from_list and
* attr.c:path_matches()
*/
extern int match_basename(const char *, int,
const char *, int, int, int);
extern int match_pathname(const char *, int,
const char *, int,
const char *, int, int, int);
/*
* The excluded() API is meant for callers that check each level of leading
* directory hierarchies with excluded() to avoid recursing into excluded
@ -97,6 +107,7 @@ extern int path_excluded(struct path_exclude_check *, const char *, int namelen,
extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
char **buf_p, struct exclude_list *which, int check_index);
extern void add_excludes_from_file(struct dir_struct *, const char *fname);
extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen);
extern void add_exclude(const char *string, const char *base,
int baselen, struct exclude_list *which);
extern void free_excludes(struct exclude_list *el);

View File

@ -196,6 +196,16 @@ test_expect_success 'root subdir attribute test' '
attr_check subdir/a/i unspecified
'
test_expect_success 'negative patterns' '
echo "!f test=bar" >.gitattributes &&
test_must_fail git check-attr test -- f
'
test_expect_success 'patterns starting with exclamation' '
echo "\!f test=foo" >.gitattributes &&
attr_check "!f" foo
'
test_expect_success 'setup bare' '
git clone --bare . bare.git &&
cd bare.git

View File

@ -214,4 +214,10 @@ test_expect_success 'subdirectory ignore (l1)' '
test_cmp expect actual
'
test_expect_success 'pattern matches prefix completely' '
: >expect &&
git ls-files -i -o --exclude "/three/a.3[abc]" >actual &&
test_cmp expect actual
'
test_done