Merge branch 'nd/attr-match-optim-more'
Start laying the foundation to build the "wildmatch" after we can agree on its desired semantics. * nd/attr-match-optim-more: attr: more matching optimizations from .gitignore gitignore: make pattern parsing code a separate function exclude: split pathname matching code into a separate function exclude: fix a bug in prefix compare optimization exclude: split basename matching code into a separate function exclude: stricten a length check in EXC_FLAG_ENDSWITH case
This commit is contained in:
commit
5f836422ab
@ -56,6 +56,7 @@ When more than one pattern matches the path, a later line
|
|||||||
overrides an earlier line. This overriding is done per
|
overrides an earlier line. This overriding is done per
|
||||||
attribute. The rules how the pattern matches paths are the
|
attribute. The rules how the pattern matches paths are the
|
||||||
same as in `.gitignore` files; see linkgit:gitignore[5].
|
same as in `.gitignore` files; see linkgit:gitignore[5].
|
||||||
|
Unlike `.gitignore`, negative patterns are forbidden.
|
||||||
|
|
||||||
When deciding what attributes are assigned to a path, git
|
When deciding what attributes are assigned to a path, git
|
||||||
consults `$GIT_DIR/info/attributes` file (which has the highest
|
consults `$GIT_DIR/info/attributes` file (which has the highest
|
||||||
|
52
attr.c
52
attr.c
@ -115,6 +115,13 @@ struct attr_state {
|
|||||||
const char *setto;
|
const char *setto;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct pattern {
|
||||||
|
const char *pattern;
|
||||||
|
int patternlen;
|
||||||
|
int nowildcardlen;
|
||||||
|
int flags; /* EXC_FLAG_* */
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* One rule, as from a .gitattributes file.
|
* One rule, as from a .gitattributes file.
|
||||||
*
|
*
|
||||||
@ -131,7 +138,7 @@ struct attr_state {
|
|||||||
*/
|
*/
|
||||||
struct match_attr {
|
struct match_attr {
|
||||||
union {
|
union {
|
||||||
char *pattern;
|
struct pattern pat;
|
||||||
struct git_attr *attr;
|
struct git_attr *attr;
|
||||||
} u;
|
} u;
|
||||||
char is_macro;
|
char is_macro;
|
||||||
@ -241,9 +248,16 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
|
|||||||
if (is_macro)
|
if (is_macro)
|
||||||
res->u.attr = git_attr_internal(name, namelen);
|
res->u.attr = git_attr_internal(name, namelen);
|
||||||
else {
|
else {
|
||||||
res->u.pattern = (char *)&(res->state[num_attr]);
|
char *p = (char *)&(res->state[num_attr]);
|
||||||
memcpy(res->u.pattern, name, namelen);
|
memcpy(p, name, namelen);
|
||||||
res->u.pattern[namelen] = 0;
|
res->u.pat.pattern = p;
|
||||||
|
parse_exclude_pattern(&res->u.pat.pattern,
|
||||||
|
&res->u.pat.patternlen,
|
||||||
|
&res->u.pat.flags,
|
||||||
|
&res->u.pat.nowildcardlen);
|
||||||
|
if (res->u.pat.flags & EXC_FLAG_NEGATIVE)
|
||||||
|
die(_("Negative patterns are forbidden in git attributes\n"
|
||||||
|
"Use '\\!' for literal leading exclamation."));
|
||||||
}
|
}
|
||||||
res->is_macro = is_macro;
|
res->is_macro = is_macro;
|
||||||
res->num_attr = num_attr;
|
res->num_attr = num_attr;
|
||||||
@ -648,25 +662,21 @@ static void prepare_attr_stack(const char *path)
|
|||||||
|
|
||||||
static int path_matches(const char *pathname, int pathlen,
|
static int path_matches(const char *pathname, int pathlen,
|
||||||
const char *basename,
|
const char *basename,
|
||||||
const char *pattern,
|
const struct pattern *pat,
|
||||||
const char *base, int baselen)
|
const char *base, int baselen)
|
||||||
{
|
{
|
||||||
if (!strchr(pattern, '/')) {
|
const char *pattern = pat->pattern;
|
||||||
return (fnmatch_icase(pattern, basename, 0) == 0);
|
int prefix = pat->nowildcardlen;
|
||||||
|
|
||||||
|
if (pat->flags & EXC_FLAG_NODIR) {
|
||||||
|
return match_basename(basename,
|
||||||
|
pathlen - (basename - pathname),
|
||||||
|
pattern, prefix,
|
||||||
|
pat->patternlen, pat->flags);
|
||||||
}
|
}
|
||||||
/*
|
return match_pathname(pathname, pathlen,
|
||||||
* match with FNM_PATHNAME; the pattern has base implicitly
|
base, baselen,
|
||||||
* in front of it.
|
pattern, prefix, pat->patternlen, pat->flags);
|
||||||
*/
|
|
||||||
if (*pattern == '/')
|
|
||||||
pattern++;
|
|
||||||
if (pathlen < baselen ||
|
|
||||||
(baselen && pathname[baselen] != '/') ||
|
|
||||||
strncmp(pathname, base, baselen))
|
|
||||||
return 0;
|
|
||||||
if (baselen != 0)
|
|
||||||
baselen++;
|
|
||||||
return fnmatch_icase(pattern, pathname + baselen, FNM_PATHNAME) == 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int macroexpand_one(int attr_nr, int rem);
|
static int macroexpand_one(int attr_nr, int rem);
|
||||||
@ -704,7 +714,7 @@ static int fill(const char *path, int pathlen, const char *basename,
|
|||||||
if (a->is_macro)
|
if (a->is_macro)
|
||||||
continue;
|
continue;
|
||||||
if (path_matches(path, pathlen, basename,
|
if (path_matches(path, pathlen, basename,
|
||||||
a->u.pattern, base, stk->originlen))
|
&a->u.pat, base, stk->originlen))
|
||||||
rem = fill_one("fill", a, rem);
|
rem = fill_one("fill", a, rem);
|
||||||
}
|
}
|
||||||
return rem;
|
return rem;
|
||||||
|
190
dir.c
190
dir.c
@ -308,42 +308,69 @@ static int no_wildcard(const char *string)
|
|||||||
return string[simple_length(string)] == '\0';
|
return string[simple_length(string)] == '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void parse_exclude_pattern(const char **pattern,
|
||||||
|
int *patternlen,
|
||||||
|
int *flags,
|
||||||
|
int *nowildcardlen)
|
||||||
|
{
|
||||||
|
const char *p = *pattern;
|
||||||
|
size_t i, len;
|
||||||
|
|
||||||
|
*flags = 0;
|
||||||
|
if (*p == '!') {
|
||||||
|
*flags |= EXC_FLAG_NEGATIVE;
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
len = strlen(p);
|
||||||
|
if (len && p[len - 1] == '/') {
|
||||||
|
len--;
|
||||||
|
*flags |= EXC_FLAG_MUSTBEDIR;
|
||||||
|
}
|
||||||
|
for (i = 0; i < len; i++) {
|
||||||
|
if (p[i] == '/')
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (i == len)
|
||||||
|
*flags |= EXC_FLAG_NODIR;
|
||||||
|
*nowildcardlen = simple_length(p);
|
||||||
|
/*
|
||||||
|
* we should have excluded the trailing slash from 'p' too,
|
||||||
|
* but that's one more allocation. Instead just make sure
|
||||||
|
* nowildcardlen does not exceed real patternlen
|
||||||
|
*/
|
||||||
|
if (*nowildcardlen > len)
|
||||||
|
*nowildcardlen = len;
|
||||||
|
if (*p == '*' && no_wildcard(p + 1))
|
||||||
|
*flags |= EXC_FLAG_ENDSWITH;
|
||||||
|
*pattern = p;
|
||||||
|
*patternlen = len;
|
||||||
|
}
|
||||||
|
|
||||||
void add_exclude(const char *string, const char *base,
|
void add_exclude(const char *string, const char *base,
|
||||||
int baselen, struct exclude_list *which)
|
int baselen, struct exclude_list *which)
|
||||||
{
|
{
|
||||||
struct exclude *x;
|
struct exclude *x;
|
||||||
size_t len;
|
int patternlen;
|
||||||
int to_exclude = 1;
|
int flags;
|
||||||
int flags = 0;
|
int nowildcardlen;
|
||||||
|
|
||||||
if (*string == '!') {
|
parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen);
|
||||||
to_exclude = 0;
|
if (flags & EXC_FLAG_MUSTBEDIR) {
|
||||||
string++;
|
|
||||||
}
|
|
||||||
len = strlen(string);
|
|
||||||
if (len && string[len - 1] == '/') {
|
|
||||||
char *s;
|
char *s;
|
||||||
x = xmalloc(sizeof(*x) + len);
|
x = xmalloc(sizeof(*x) + patternlen + 1);
|
||||||
s = (char *)(x+1);
|
s = (char *)(x+1);
|
||||||
memcpy(s, string, len - 1);
|
memcpy(s, string, patternlen);
|
||||||
s[len - 1] = '\0';
|
s[patternlen] = '\0';
|
||||||
string = s;
|
|
||||||
x->pattern = s;
|
x->pattern = s;
|
||||||
flags = EXC_FLAG_MUSTBEDIR;
|
|
||||||
} else {
|
} else {
|
||||||
x = xmalloc(sizeof(*x));
|
x = xmalloc(sizeof(*x));
|
||||||
x->pattern = string;
|
x->pattern = string;
|
||||||
}
|
}
|
||||||
x->to_exclude = to_exclude;
|
x->patternlen = patternlen;
|
||||||
x->patternlen = strlen(string);
|
x->nowildcardlen = nowildcardlen;
|
||||||
x->base = base;
|
x->base = base;
|
||||||
x->baselen = baselen;
|
x->baselen = baselen;
|
||||||
x->flags = flags;
|
x->flags = flags;
|
||||||
if (!strchr(string, '/'))
|
|
||||||
x->flags |= EXC_FLAG_NODIR;
|
|
||||||
x->nowildcardlen = simple_length(string);
|
|
||||||
if (*string == '*' && no_wildcard(string+1))
|
|
||||||
x->flags |= EXC_FLAG_ENDSWITH;
|
|
||||||
ALLOC_GROW(which->excludes, which->nr + 1, which->alloc);
|
ALLOC_GROW(which->excludes, which->nr + 1, which->alloc);
|
||||||
which->excludes[which->nr++] = x;
|
which->excludes[which->nr++] = x;
|
||||||
}
|
}
|
||||||
@ -505,6 +532,72 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
|
|||||||
dir->basebuf[baselen] = '\0';
|
dir->basebuf[baselen] = '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int match_basename(const char *basename, int basenamelen,
|
||||||
|
const char *pattern, int prefix, int patternlen,
|
||||||
|
int flags)
|
||||||
|
{
|
||||||
|
if (prefix == patternlen) {
|
||||||
|
if (!strcmp_icase(pattern, basename))
|
||||||
|
return 1;
|
||||||
|
} else if (flags & EXC_FLAG_ENDSWITH) {
|
||||||
|
if (patternlen - 1 <= basenamelen &&
|
||||||
|
!strcmp_icase(pattern + 1,
|
||||||
|
basename + basenamelen - patternlen + 1))
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
if (fnmatch_icase(pattern, basename, 0) == 0)
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int match_pathname(const char *pathname, int pathlen,
|
||||||
|
const char *base, int baselen,
|
||||||
|
const char *pattern, int prefix, int patternlen,
|
||||||
|
int flags)
|
||||||
|
{
|
||||||
|
const char *name;
|
||||||
|
int namelen;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* match with FNM_PATHNAME; the pattern has base implicitly
|
||||||
|
* in front of it.
|
||||||
|
*/
|
||||||
|
if (*pattern == '/') {
|
||||||
|
pattern++;
|
||||||
|
prefix--;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* baselen does not count the trailing slash. base[] may or
|
||||||
|
* may not end with a trailing slash though.
|
||||||
|
*/
|
||||||
|
if (pathlen < baselen + 1 ||
|
||||||
|
(baselen && pathname[baselen] != '/') ||
|
||||||
|
strncmp_icase(pathname, base, baselen))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
namelen = baselen ? pathlen - baselen - 1 : pathlen;
|
||||||
|
name = pathname + pathlen - namelen;
|
||||||
|
|
||||||
|
if (prefix) {
|
||||||
|
/*
|
||||||
|
* if the non-wildcard part is longer than the
|
||||||
|
* remaining pathname, surely it cannot match.
|
||||||
|
*/
|
||||||
|
if (prefix > namelen)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (strncmp_icase(pattern, name, prefix))
|
||||||
|
return 0;
|
||||||
|
pattern += prefix;
|
||||||
|
name += prefix;
|
||||||
|
namelen -= prefix;
|
||||||
|
}
|
||||||
|
|
||||||
|
return fnmatch_icase(pattern, name, FNM_PATHNAME) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Scan the list and let the last match determine the fate.
|
/* Scan the list and let the last match determine the fate.
|
||||||
* Return 1 for exclude, 0 for include and -1 for undecided.
|
* Return 1 for exclude, 0 for include and -1 for undecided.
|
||||||
*/
|
*/
|
||||||
@ -519,9 +612,9 @@ int excluded_from_list(const char *pathname,
|
|||||||
|
|
||||||
for (i = el->nr - 1; 0 <= i; i--) {
|
for (i = el->nr - 1; 0 <= i; i--) {
|
||||||
struct exclude *x = el->excludes[i];
|
struct exclude *x = el->excludes[i];
|
||||||
const char *name, *exclude = x->pattern;
|
const char *exclude = x->pattern;
|
||||||
int to_exclude = x->to_exclude;
|
int to_exclude = x->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
|
||||||
int namelen, prefix = x->nowildcardlen;
|
int prefix = x->nowildcardlen;
|
||||||
|
|
||||||
if (x->flags & EXC_FLAG_MUSTBEDIR) {
|
if (x->flags & EXC_FLAG_MUSTBEDIR) {
|
||||||
if (*dtype == DT_UNKNOWN)
|
if (*dtype == DT_UNKNOWN)
|
||||||
@ -531,51 +624,18 @@ int excluded_from_list(const char *pathname,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (x->flags & EXC_FLAG_NODIR) {
|
if (x->flags & EXC_FLAG_NODIR) {
|
||||||
/* match basename */
|
if (match_basename(basename,
|
||||||
if (prefix == x->patternlen) {
|
pathlen - (basename - pathname),
|
||||||
if (!strcmp_icase(exclude, basename))
|
exclude, prefix, x->patternlen,
|
||||||
|
x->flags))
|
||||||
return to_exclude;
|
return to_exclude;
|
||||||
} else if (x->flags & EXC_FLAG_ENDSWITH) {
|
|
||||||
if (x->patternlen - 1 <= pathlen &&
|
|
||||||
!strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1))
|
|
||||||
return to_exclude;
|
|
||||||
} else {
|
|
||||||
if (fnmatch_icase(exclude, basename, 0) == 0)
|
|
||||||
return to_exclude;
|
|
||||||
}
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* match with FNM_PATHNAME:
|
assert(x->baselen == 0 || x->base[x->baselen - 1] == '/');
|
||||||
* exclude has base (baselen long) implicitly in front of it.
|
if (match_pathname(pathname, pathlen,
|
||||||
*/
|
x->base, x->baselen ? x->baselen - 1 : 0,
|
||||||
if (*exclude == '/') {
|
exclude, prefix, x->patternlen, x->flags))
|
||||||
exclude++;
|
|
||||||
prefix--;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pathlen < x->baselen ||
|
|
||||||
(x->baselen && pathname[x->baselen-1] != '/') ||
|
|
||||||
strncmp_icase(pathname, x->base, x->baselen))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
namelen = x->baselen ? pathlen - x->baselen : pathlen;
|
|
||||||
name = pathname + pathlen - namelen;
|
|
||||||
|
|
||||||
/* if the non-wildcard part is longer than the
|
|
||||||
remaining pathname, surely it cannot match */
|
|
||||||
if (prefix > namelen)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (prefix) {
|
|
||||||
if (strncmp_icase(exclude, name, prefix))
|
|
||||||
continue;
|
|
||||||
exclude += prefix;
|
|
||||||
name += prefix;
|
|
||||||
namelen -= prefix;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!namelen || !fnmatch_icase(exclude, name, FNM_PATHNAME))
|
|
||||||
return to_exclude;
|
return to_exclude;
|
||||||
}
|
}
|
||||||
return -1; /* undecided */
|
return -1; /* undecided */
|
||||||
|
13
dir.h
13
dir.h
@ -11,6 +11,7 @@ struct dir_entry {
|
|||||||
#define EXC_FLAG_NODIR 1
|
#define EXC_FLAG_NODIR 1
|
||||||
#define EXC_FLAG_ENDSWITH 4
|
#define EXC_FLAG_ENDSWITH 4
|
||||||
#define EXC_FLAG_MUSTBEDIR 8
|
#define EXC_FLAG_MUSTBEDIR 8
|
||||||
|
#define EXC_FLAG_NEGATIVE 16
|
||||||
|
|
||||||
struct exclude_list {
|
struct exclude_list {
|
||||||
int nr;
|
int nr;
|
||||||
@ -21,7 +22,6 @@ struct exclude_list {
|
|||||||
int nowildcardlen;
|
int nowildcardlen;
|
||||||
const char *base;
|
const char *base;
|
||||||
int baselen;
|
int baselen;
|
||||||
int to_exclude;
|
|
||||||
int flags;
|
int flags;
|
||||||
} **excludes;
|
} **excludes;
|
||||||
};
|
};
|
||||||
@ -80,6 +80,16 @@ extern int excluded_from_list(const char *pathname, int pathlen, const char *bas
|
|||||||
int *dtype, struct exclude_list *el);
|
int *dtype, struct exclude_list *el);
|
||||||
struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len);
|
struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* these implement the matching logic for dir.c:excluded_from_list and
|
||||||
|
* attr.c:path_matches()
|
||||||
|
*/
|
||||||
|
extern int match_basename(const char *, int,
|
||||||
|
const char *, int, int, int);
|
||||||
|
extern int match_pathname(const char *, int,
|
||||||
|
const char *, int,
|
||||||
|
const char *, int, int, int);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The excluded() API is meant for callers that check each level of leading
|
* The excluded() API is meant for callers that check each level of leading
|
||||||
* directory hierarchies with excluded() to avoid recursing into excluded
|
* directory hierarchies with excluded() to avoid recursing into excluded
|
||||||
@ -97,6 +107,7 @@ extern int path_excluded(struct path_exclude_check *, const char *, int namelen,
|
|||||||
extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
|
extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
|
||||||
char **buf_p, struct exclude_list *which, int check_index);
|
char **buf_p, struct exclude_list *which, int check_index);
|
||||||
extern void add_excludes_from_file(struct dir_struct *, const char *fname);
|
extern void add_excludes_from_file(struct dir_struct *, const char *fname);
|
||||||
|
extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen);
|
||||||
extern void add_exclude(const char *string, const char *base,
|
extern void add_exclude(const char *string, const char *base,
|
||||||
int baselen, struct exclude_list *which);
|
int baselen, struct exclude_list *which);
|
||||||
extern void free_excludes(struct exclude_list *el);
|
extern void free_excludes(struct exclude_list *el);
|
||||||
|
@ -196,6 +196,16 @@ test_expect_success 'root subdir attribute test' '
|
|||||||
attr_check subdir/a/i unspecified
|
attr_check subdir/a/i unspecified
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success 'negative patterns' '
|
||||||
|
echo "!f test=bar" >.gitattributes &&
|
||||||
|
test_must_fail git check-attr test -- f
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'patterns starting with exclamation' '
|
||||||
|
echo "\!f test=foo" >.gitattributes &&
|
||||||
|
attr_check "!f" foo
|
||||||
|
'
|
||||||
|
|
||||||
test_expect_success 'setup bare' '
|
test_expect_success 'setup bare' '
|
||||||
git clone --bare . bare.git &&
|
git clone --bare . bare.git &&
|
||||||
cd bare.git
|
cd bare.git
|
||||||
|
@ -214,4 +214,10 @@ test_expect_success 'subdirectory ignore (l1)' '
|
|||||||
test_cmp expect actual
|
test_cmp expect actual
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success 'pattern matches prefix completely' '
|
||||||
|
: >expect &&
|
||||||
|
git ls-files -i -o --exclude "/three/a.3[abc]" >actual &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
test_done
|
test_done
|
||||||
|
Loading…
Reference in New Issue
Block a user