Merge branch 'nd/attr-match-optim-more'
Start laying the foundation to build the "wildmatch" after we can agree on its desired semantics. * nd/attr-match-optim-more: attr: more matching optimizations from .gitignore gitignore: make pattern parsing code a separate function exclude: split pathname matching code into a separate function exclude: fix a bug in prefix compare optimization exclude: split basename matching code into a separate function exclude: stricten a length check in EXC_FLAG_ENDSWITH case
This commit is contained in:
commit
5f836422ab
@ -56,6 +56,7 @@ When more than one pattern matches the path, a later line
|
||||
overrides an earlier line. This overriding is done per
|
||||
attribute. The rules how the pattern matches paths are the
|
||||
same as in `.gitignore` files; see linkgit:gitignore[5].
|
||||
Unlike `.gitignore`, negative patterns are forbidden.
|
||||
|
||||
When deciding what attributes are assigned to a path, git
|
||||
consults `$GIT_DIR/info/attributes` file (which has the highest
|
||||
|
52
attr.c
52
attr.c
@ -115,6 +115,13 @@ struct attr_state {
|
||||
const char *setto;
|
||||
};
|
||||
|
||||
struct pattern {
|
||||
const char *pattern;
|
||||
int patternlen;
|
||||
int nowildcardlen;
|
||||
int flags; /* EXC_FLAG_* */
|
||||
};
|
||||
|
||||
/*
|
||||
* One rule, as from a .gitattributes file.
|
||||
*
|
||||
@ -131,7 +138,7 @@ struct attr_state {
|
||||
*/
|
||||
struct match_attr {
|
||||
union {
|
||||
char *pattern;
|
||||
struct pattern pat;
|
||||
struct git_attr *attr;
|
||||
} u;
|
||||
char is_macro;
|
||||
@ -241,9 +248,16 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
|
||||
if (is_macro)
|
||||
res->u.attr = git_attr_internal(name, namelen);
|
||||
else {
|
||||
res->u.pattern = (char *)&(res->state[num_attr]);
|
||||
memcpy(res->u.pattern, name, namelen);
|
||||
res->u.pattern[namelen] = 0;
|
||||
char *p = (char *)&(res->state[num_attr]);
|
||||
memcpy(p, name, namelen);
|
||||
res->u.pat.pattern = p;
|
||||
parse_exclude_pattern(&res->u.pat.pattern,
|
||||
&res->u.pat.patternlen,
|
||||
&res->u.pat.flags,
|
||||
&res->u.pat.nowildcardlen);
|
||||
if (res->u.pat.flags & EXC_FLAG_NEGATIVE)
|
||||
die(_("Negative patterns are forbidden in git attributes\n"
|
||||
"Use '\\!' for literal leading exclamation."));
|
||||
}
|
||||
res->is_macro = is_macro;
|
||||
res->num_attr = num_attr;
|
||||
@ -648,25 +662,21 @@ static void prepare_attr_stack(const char *path)
|
||||
|
||||
static int path_matches(const char *pathname, int pathlen,
|
||||
const char *basename,
|
||||
const char *pattern,
|
||||
const struct pattern *pat,
|
||||
const char *base, int baselen)
|
||||
{
|
||||
if (!strchr(pattern, '/')) {
|
||||
return (fnmatch_icase(pattern, basename, 0) == 0);
|
||||
const char *pattern = pat->pattern;
|
||||
int prefix = pat->nowildcardlen;
|
||||
|
||||
if (pat->flags & EXC_FLAG_NODIR) {
|
||||
return match_basename(basename,
|
||||
pathlen - (basename - pathname),
|
||||
pattern, prefix,
|
||||
pat->patternlen, pat->flags);
|
||||
}
|
||||
/*
|
||||
* match with FNM_PATHNAME; the pattern has base implicitly
|
||||
* in front of it.
|
||||
*/
|
||||
if (*pattern == '/')
|
||||
pattern++;
|
||||
if (pathlen < baselen ||
|
||||
(baselen && pathname[baselen] != '/') ||
|
||||
strncmp(pathname, base, baselen))
|
||||
return 0;
|
||||
if (baselen != 0)
|
||||
baselen++;
|
||||
return fnmatch_icase(pattern, pathname + baselen, FNM_PATHNAME) == 0;
|
||||
return match_pathname(pathname, pathlen,
|
||||
base, baselen,
|
||||
pattern, prefix, pat->patternlen, pat->flags);
|
||||
}
|
||||
|
||||
static int macroexpand_one(int attr_nr, int rem);
|
||||
@ -704,7 +714,7 @@ static int fill(const char *path, int pathlen, const char *basename,
|
||||
if (a->is_macro)
|
||||
continue;
|
||||
if (path_matches(path, pathlen, basename,
|
||||
a->u.pattern, base, stk->originlen))
|
||||
&a->u.pat, base, stk->originlen))
|
||||
rem = fill_one("fill", a, rem);
|
||||
}
|
||||
return rem;
|
||||
|
190
dir.c
190
dir.c
@ -308,42 +308,69 @@ static int no_wildcard(const char *string)
|
||||
return string[simple_length(string)] == '\0';
|
||||
}
|
||||
|
||||
void parse_exclude_pattern(const char **pattern,
|
||||
int *patternlen,
|
||||
int *flags,
|
||||
int *nowildcardlen)
|
||||
{
|
||||
const char *p = *pattern;
|
||||
size_t i, len;
|
||||
|
||||
*flags = 0;
|
||||
if (*p == '!') {
|
||||
*flags |= EXC_FLAG_NEGATIVE;
|
||||
p++;
|
||||
}
|
||||
len = strlen(p);
|
||||
if (len && p[len - 1] == '/') {
|
||||
len--;
|
||||
*flags |= EXC_FLAG_MUSTBEDIR;
|
||||
}
|
||||
for (i = 0; i < len; i++) {
|
||||
if (p[i] == '/')
|
||||
break;
|
||||
}
|
||||
if (i == len)
|
||||
*flags |= EXC_FLAG_NODIR;
|
||||
*nowildcardlen = simple_length(p);
|
||||
/*
|
||||
* we should have excluded the trailing slash from 'p' too,
|
||||
* but that's one more allocation. Instead just make sure
|
||||
* nowildcardlen does not exceed real patternlen
|
||||
*/
|
||||
if (*nowildcardlen > len)
|
||||
*nowildcardlen = len;
|
||||
if (*p == '*' && no_wildcard(p + 1))
|
||||
*flags |= EXC_FLAG_ENDSWITH;
|
||||
*pattern = p;
|
||||
*patternlen = len;
|
||||
}
|
||||
|
||||
void add_exclude(const char *string, const char *base,
|
||||
int baselen, struct exclude_list *which)
|
||||
{
|
||||
struct exclude *x;
|
||||
size_t len;
|
||||
int to_exclude = 1;
|
||||
int flags = 0;
|
||||
int patternlen;
|
||||
int flags;
|
||||
int nowildcardlen;
|
||||
|
||||
if (*string == '!') {
|
||||
to_exclude = 0;
|
||||
string++;
|
||||
}
|
||||
len = strlen(string);
|
||||
if (len && string[len - 1] == '/') {
|
||||
parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen);
|
||||
if (flags & EXC_FLAG_MUSTBEDIR) {
|
||||
char *s;
|
||||
x = xmalloc(sizeof(*x) + len);
|
||||
x = xmalloc(sizeof(*x) + patternlen + 1);
|
||||
s = (char *)(x+1);
|
||||
memcpy(s, string, len - 1);
|
||||
s[len - 1] = '\0';
|
||||
string = s;
|
||||
memcpy(s, string, patternlen);
|
||||
s[patternlen] = '\0';
|
||||
x->pattern = s;
|
||||
flags = EXC_FLAG_MUSTBEDIR;
|
||||
} else {
|
||||
x = xmalloc(sizeof(*x));
|
||||
x->pattern = string;
|
||||
}
|
||||
x->to_exclude = to_exclude;
|
||||
x->patternlen = strlen(string);
|
||||
x->patternlen = patternlen;
|
||||
x->nowildcardlen = nowildcardlen;
|
||||
x->base = base;
|
||||
x->baselen = baselen;
|
||||
x->flags = flags;
|
||||
if (!strchr(string, '/'))
|
||||
x->flags |= EXC_FLAG_NODIR;
|
||||
x->nowildcardlen = simple_length(string);
|
||||
if (*string == '*' && no_wildcard(string+1))
|
||||
x->flags |= EXC_FLAG_ENDSWITH;
|
||||
ALLOC_GROW(which->excludes, which->nr + 1, which->alloc);
|
||||
which->excludes[which->nr++] = x;
|
||||
}
|
||||
@ -505,6 +532,72 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
|
||||
dir->basebuf[baselen] = '\0';
|
||||
}
|
||||
|
||||
int match_basename(const char *basename, int basenamelen,
|
||||
const char *pattern, int prefix, int patternlen,
|
||||
int flags)
|
||||
{
|
||||
if (prefix == patternlen) {
|
||||
if (!strcmp_icase(pattern, basename))
|
||||
return 1;
|
||||
} else if (flags & EXC_FLAG_ENDSWITH) {
|
||||
if (patternlen - 1 <= basenamelen &&
|
||||
!strcmp_icase(pattern + 1,
|
||||
basename + basenamelen - patternlen + 1))
|
||||
return 1;
|
||||
} else {
|
||||
if (fnmatch_icase(pattern, basename, 0) == 0)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int match_pathname(const char *pathname, int pathlen,
|
||||
const char *base, int baselen,
|
||||
const char *pattern, int prefix, int patternlen,
|
||||
int flags)
|
||||
{
|
||||
const char *name;
|
||||
int namelen;
|
||||
|
||||
/*
|
||||
* match with FNM_PATHNAME; the pattern has base implicitly
|
||||
* in front of it.
|
||||
*/
|
||||
if (*pattern == '/') {
|
||||
pattern++;
|
||||
prefix--;
|
||||
}
|
||||
|
||||
/*
|
||||
* baselen does not count the trailing slash. base[] may or
|
||||
* may not end with a trailing slash though.
|
||||
*/
|
||||
if (pathlen < baselen + 1 ||
|
||||
(baselen && pathname[baselen] != '/') ||
|
||||
strncmp_icase(pathname, base, baselen))
|
||||
return 0;
|
||||
|
||||
namelen = baselen ? pathlen - baselen - 1 : pathlen;
|
||||
name = pathname + pathlen - namelen;
|
||||
|
||||
if (prefix) {
|
||||
/*
|
||||
* if the non-wildcard part is longer than the
|
||||
* remaining pathname, surely it cannot match.
|
||||
*/
|
||||
if (prefix > namelen)
|
||||
return 0;
|
||||
|
||||
if (strncmp_icase(pattern, name, prefix))
|
||||
return 0;
|
||||
pattern += prefix;
|
||||
name += prefix;
|
||||
namelen -= prefix;
|
||||
}
|
||||
|
||||
return fnmatch_icase(pattern, name, FNM_PATHNAME) == 0;
|
||||
}
|
||||
|
||||
/* Scan the list and let the last match determine the fate.
|
||||
* Return 1 for exclude, 0 for include and -1 for undecided.
|
||||
*/
|
||||
@ -519,9 +612,9 @@ int excluded_from_list(const char *pathname,
|
||||
|
||||
for (i = el->nr - 1; 0 <= i; i--) {
|
||||
struct exclude *x = el->excludes[i];
|
||||
const char *name, *exclude = x->pattern;
|
||||
int to_exclude = x->to_exclude;
|
||||
int namelen, prefix = x->nowildcardlen;
|
||||
const char *exclude = x->pattern;
|
||||
int to_exclude = x->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
|
||||
int prefix = x->nowildcardlen;
|
||||
|
||||
if (x->flags & EXC_FLAG_MUSTBEDIR) {
|
||||
if (*dtype == DT_UNKNOWN)
|
||||
@ -531,51 +624,18 @@ int excluded_from_list(const char *pathname,
|
||||
}
|
||||
|
||||
if (x->flags & EXC_FLAG_NODIR) {
|
||||
/* match basename */
|
||||
if (prefix == x->patternlen) {
|
||||
if (!strcmp_icase(exclude, basename))
|
||||
if (match_basename(basename,
|
||||
pathlen - (basename - pathname),
|
||||
exclude, prefix, x->patternlen,
|
||||
x->flags))
|
||||
return to_exclude;
|
||||
} else if (x->flags & EXC_FLAG_ENDSWITH) {
|
||||
if (x->patternlen - 1 <= pathlen &&
|
||||
!strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1))
|
||||
return to_exclude;
|
||||
} else {
|
||||
if (fnmatch_icase(exclude, basename, 0) == 0)
|
||||
return to_exclude;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* match with FNM_PATHNAME:
|
||||
* exclude has base (baselen long) implicitly in front of it.
|
||||
*/
|
||||
if (*exclude == '/') {
|
||||
exclude++;
|
||||
prefix--;
|
||||
}
|
||||
|
||||
if (pathlen < x->baselen ||
|
||||
(x->baselen && pathname[x->baselen-1] != '/') ||
|
||||
strncmp_icase(pathname, x->base, x->baselen))
|
||||
continue;
|
||||
|
||||
namelen = x->baselen ? pathlen - x->baselen : pathlen;
|
||||
name = pathname + pathlen - namelen;
|
||||
|
||||
/* if the non-wildcard part is longer than the
|
||||
remaining pathname, surely it cannot match */
|
||||
if (prefix > namelen)
|
||||
continue;
|
||||
|
||||
if (prefix) {
|
||||
if (strncmp_icase(exclude, name, prefix))
|
||||
continue;
|
||||
exclude += prefix;
|
||||
name += prefix;
|
||||
namelen -= prefix;
|
||||
}
|
||||
|
||||
if (!namelen || !fnmatch_icase(exclude, name, FNM_PATHNAME))
|
||||
assert(x->baselen == 0 || x->base[x->baselen - 1] == '/');
|
||||
if (match_pathname(pathname, pathlen,
|
||||
x->base, x->baselen ? x->baselen - 1 : 0,
|
||||
exclude, prefix, x->patternlen, x->flags))
|
||||
return to_exclude;
|
||||
}
|
||||
return -1; /* undecided */
|
||||
|
13
dir.h
13
dir.h
@ -11,6 +11,7 @@ struct dir_entry {
|
||||
#define EXC_FLAG_NODIR 1
|
||||
#define EXC_FLAG_ENDSWITH 4
|
||||
#define EXC_FLAG_MUSTBEDIR 8
|
||||
#define EXC_FLAG_NEGATIVE 16
|
||||
|
||||
struct exclude_list {
|
||||
int nr;
|
||||
@ -21,7 +22,6 @@ struct exclude_list {
|
||||
int nowildcardlen;
|
||||
const char *base;
|
||||
int baselen;
|
||||
int to_exclude;
|
||||
int flags;
|
||||
} **excludes;
|
||||
};
|
||||
@ -80,6 +80,16 @@ extern int excluded_from_list(const char *pathname, int pathlen, const char *bas
|
||||
int *dtype, struct exclude_list *el);
|
||||
struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len);
|
||||
|
||||
/*
|
||||
* these implement the matching logic for dir.c:excluded_from_list and
|
||||
* attr.c:path_matches()
|
||||
*/
|
||||
extern int match_basename(const char *, int,
|
||||
const char *, int, int, int);
|
||||
extern int match_pathname(const char *, int,
|
||||
const char *, int,
|
||||
const char *, int, int, int);
|
||||
|
||||
/*
|
||||
* The excluded() API is meant for callers that check each level of leading
|
||||
* directory hierarchies with excluded() to avoid recursing into excluded
|
||||
@ -97,6 +107,7 @@ extern int path_excluded(struct path_exclude_check *, const char *, int namelen,
|
||||
extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
|
||||
char **buf_p, struct exclude_list *which, int check_index);
|
||||
extern void add_excludes_from_file(struct dir_struct *, const char *fname);
|
||||
extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen);
|
||||
extern void add_exclude(const char *string, const char *base,
|
||||
int baselen, struct exclude_list *which);
|
||||
extern void free_excludes(struct exclude_list *el);
|
||||
|
@ -196,6 +196,16 @@ test_expect_success 'root subdir attribute test' '
|
||||
attr_check subdir/a/i unspecified
|
||||
'
|
||||
|
||||
test_expect_success 'negative patterns' '
|
||||
echo "!f test=bar" >.gitattributes &&
|
||||
test_must_fail git check-attr test -- f
|
||||
'
|
||||
|
||||
test_expect_success 'patterns starting with exclamation' '
|
||||
echo "\!f test=foo" >.gitattributes &&
|
||||
attr_check "!f" foo
|
||||
'
|
||||
|
||||
test_expect_success 'setup bare' '
|
||||
git clone --bare . bare.git &&
|
||||
cd bare.git
|
||||
|
@ -214,4 +214,10 @@ test_expect_success 'subdirectory ignore (l1)' '
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
test_expect_success 'pattern matches prefix completely' '
|
||||
: >expect &&
|
||||
git ls-files -i -o --exclude "/three/a.3[abc]" >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
test_done
|
||||
|
Loading…
Reference in New Issue
Block a user