Merge branch 'nd/pathspec-wildcard'
Optimize matching paths with common forms of pathspecs that contain wildcard characters. * nd/pathspec-wildcard: tree_entry_interesting: do basedir compare on wildcard patterns when possible pathspec: apply "*.c" optimization from exclude pathspec: do exact comparison on the leading non-wildcard part pathspec: save the non-wildcard length part
This commit is contained in:
commit
990a4fea96
@ -337,7 +337,7 @@ void overlay_tree_on_cache(const char *tree_name, const char *prefix)
|
||||
matchbuf[0] = prefix;
|
||||
matchbuf[1] = NULL;
|
||||
init_pathspec(&pathspec, matchbuf);
|
||||
pathspec.items[0].use_wildcard = 0;
|
||||
pathspec.items[0].nowildcard_len = pathspec.items[0].len;
|
||||
} else
|
||||
init_pathspec(&pathspec, NULL);
|
||||
if (read_tree(tree, 1, &pathspec))
|
||||
|
@ -168,7 +168,7 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix)
|
||||
|
||||
init_pathspec(&pathspec, get_pathspec(prefix, argv + 1));
|
||||
for (i = 0; i < pathspec.nr; i++)
|
||||
pathspec.items[i].use_wildcard = 0;
|
||||
pathspec.items[i].nowildcard_len = pathspec.items[i].len;
|
||||
pathspec.has_wildcard = 0;
|
||||
tree = parse_tree_indirect(sha1);
|
||||
if (!tree)
|
||||
|
5
cache.h
5
cache.h
@ -473,6 +473,8 @@ extern int index_name_is_other(const struct index_state *, const char *, int);
|
||||
extern int ie_match_stat(const struct index_state *, struct cache_entry *, struct stat *, unsigned int);
|
||||
extern int ie_modified(const struct index_state *, struct cache_entry *, struct stat *, unsigned int);
|
||||
|
||||
#define PATHSPEC_ONESTAR 1 /* the pathspec pattern sastisfies GFNM_ONESTAR */
|
||||
|
||||
struct pathspec {
|
||||
const char **raw; /* get_pathspec() result, not freed by free_pathspec() */
|
||||
int nr;
|
||||
@ -482,7 +484,8 @@ struct pathspec {
|
||||
struct pathspec_item {
|
||||
const char *match;
|
||||
int len;
|
||||
unsigned int use_wildcard:1;
|
||||
int nowildcard_len;
|
||||
int flags;
|
||||
} *items;
|
||||
};
|
||||
|
||||
|
36
dir.c
36
dir.c
@ -34,6 +34,28 @@ int fnmatch_icase(const char *pattern, const char *string, int flags)
|
||||
return fnmatch(pattern, string, flags | (ignore_case ? FNM_CASEFOLD : 0));
|
||||
}
|
||||
|
||||
inline int git_fnmatch(const char *pattern, const char *string,
|
||||
int flags, int prefix)
|
||||
{
|
||||
int fnm_flags = 0;
|
||||
if (flags & GFNM_PATHNAME)
|
||||
fnm_flags |= FNM_PATHNAME;
|
||||
if (prefix > 0) {
|
||||
if (strncmp(pattern, string, prefix))
|
||||
return FNM_NOMATCH;
|
||||
pattern += prefix;
|
||||
string += prefix;
|
||||
}
|
||||
if (flags & GFNM_ONESTAR) {
|
||||
int pattern_len = strlen(++pattern);
|
||||
int string_len = strlen(string);
|
||||
return string_len < pattern_len ||
|
||||
strcmp(pattern,
|
||||
string + string_len - pattern_len);
|
||||
}
|
||||
return fnmatch(pattern, string, fnm_flags);
|
||||
}
|
||||
|
||||
static size_t common_prefix_len(const char **pathspec)
|
||||
{
|
||||
const char *n, *first;
|
||||
@ -230,7 +252,10 @@ static int match_pathspec_item(const struct pathspec_item *item, int prefix,
|
||||
return MATCHED_RECURSIVELY;
|
||||
}
|
||||
|
||||
if (item->use_wildcard && !fnmatch(match, name, 0))
|
||||
if (item->nowildcard_len < item->len &&
|
||||
!git_fnmatch(match, name,
|
||||
item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
|
||||
item->nowildcard_len - prefix))
|
||||
return MATCHED_FNMATCH;
|
||||
|
||||
return 0;
|
||||
@ -1429,9 +1454,14 @@ int init_pathspec(struct pathspec *pathspec, const char **paths)
|
||||
|
||||
item->match = path;
|
||||
item->len = strlen(path);
|
||||
item->use_wildcard = !no_wildcard(path);
|
||||
if (item->use_wildcard)
|
||||
item->nowildcard_len = simple_length(path);
|
||||
item->flags = 0;
|
||||
if (item->nowildcard_len < item->len) {
|
||||
pathspec->has_wildcard = 1;
|
||||
if (path[item->nowildcard_len] == '*' &&
|
||||
no_wildcard(path + item->nowildcard_len + 1))
|
||||
item->flags |= PATHSPEC_ONESTAR;
|
||||
}
|
||||
}
|
||||
|
||||
qsort(pathspec->items, pathspec->nr,
|
||||
|
9
dir.h
9
dir.h
@ -139,4 +139,13 @@ extern int strcmp_icase(const char *a, const char *b);
|
||||
extern int strncmp_icase(const char *a, const char *b, size_t count);
|
||||
extern int fnmatch_icase(const char *pattern, const char *string, int flags);
|
||||
|
||||
/*
|
||||
* The prefix part of pattern must not contains wildcards.
|
||||
*/
|
||||
#define GFNM_PATHNAME 1 /* similar to FNM_PATHNAME */
|
||||
#define GFNM_ONESTAR 2 /* there is only _one_ wildcard, a star */
|
||||
|
||||
extern int git_fnmatch(const char *pattern, const char *string,
|
||||
int flags, int prefix);
|
||||
|
||||
#endif
|
||||
|
77
tree-walk.c
77
tree-walk.c
@ -572,6 +572,54 @@ static int match_dir_prefix(const char *base,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform matching on the leading non-wildcard part of
|
||||
* pathspec. item->nowildcard_len must be greater than zero. Return
|
||||
* non-zero if base is matched.
|
||||
*/
|
||||
static int match_wildcard_base(const struct pathspec_item *item,
|
||||
const char *base, int baselen,
|
||||
int *matched)
|
||||
{
|
||||
const char *match = item->match;
|
||||
/* the wildcard part is not considered in this function */
|
||||
int matchlen = item->nowildcard_len;
|
||||
|
||||
if (baselen) {
|
||||
int dirlen;
|
||||
/*
|
||||
* Return early if base is longer than the
|
||||
* non-wildcard part but it does not match.
|
||||
*/
|
||||
if (baselen >= matchlen) {
|
||||
*matched = matchlen;
|
||||
return !strncmp(base, match, matchlen);
|
||||
}
|
||||
|
||||
dirlen = matchlen;
|
||||
while (dirlen && match[dirlen - 1] != '/')
|
||||
dirlen--;
|
||||
|
||||
/*
|
||||
* Return early if base is shorter than the
|
||||
* non-wildcard part but it does not match. Note that
|
||||
* base ends with '/' so we are sure it really matches
|
||||
* directory
|
||||
*/
|
||||
if (strncmp(base, match, baselen))
|
||||
return 0;
|
||||
*matched = baselen;
|
||||
} else
|
||||
*matched = 0;
|
||||
/*
|
||||
* we could have checked entry against the non-wildcard part
|
||||
* that is not in base and does similar never_interesting
|
||||
* optimization as in match_entry. For now just be happy with
|
||||
* base comparison.
|
||||
*/
|
||||
return entry_interesting;
|
||||
}
|
||||
|
||||
/*
|
||||
* Is a tree entry interesting given the pathspec we have?
|
||||
*
|
||||
@ -602,7 +650,7 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
|
||||
const struct pathspec_item *item = ps->items+i;
|
||||
const char *match = item->match;
|
||||
const char *base_str = base->buf + base_offset;
|
||||
int matchlen = item->len;
|
||||
int matchlen = item->len, matched = 0;
|
||||
|
||||
if (baselen >= matchlen) {
|
||||
/* If it doesn't match, move along... */
|
||||
@ -626,8 +674,10 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
|
||||
&never_interesting))
|
||||
return entry_interesting;
|
||||
|
||||
if (item->use_wildcard) {
|
||||
if (!fnmatch(match + baselen, entry->path, 0))
|
||||
if (item->nowildcard_len < item->len) {
|
||||
if (!git_fnmatch(match + baselen, entry->path,
|
||||
item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
|
||||
item->nowildcard_len - baselen))
|
||||
return entry_interesting;
|
||||
|
||||
/*
|
||||
@ -642,17 +692,34 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
|
||||
}
|
||||
|
||||
match_wildcards:
|
||||
if (!item->use_wildcard)
|
||||
if (item->nowildcard_len == item->len)
|
||||
continue;
|
||||
|
||||
if (item->nowildcard_len &&
|
||||
!match_wildcard_base(item, base_str, baselen, &matched))
|
||||
return entry_not_interesting;
|
||||
|
||||
/*
|
||||
* Concatenate base and entry->path into one and do
|
||||
* fnmatch() on it.
|
||||
*
|
||||
* While we could avoid concatenation in certain cases
|
||||
* [1], which saves a memcpy and potentially a
|
||||
* realloc, it turns out not worth it. Measurement on
|
||||
* linux-2.6 does not show any clear improvements,
|
||||
* partly because of the nowildcard_len optimization
|
||||
* in git_fnmatch(). Avoid micro-optimizations here.
|
||||
*
|
||||
* [1] if match_wildcard_base() says the base
|
||||
* directory is already matched, we only need to match
|
||||
* the rest, which is shorter so _in theory_ faster.
|
||||
*/
|
||||
|
||||
strbuf_add(base, entry->path, pathlen);
|
||||
|
||||
if (!fnmatch(match, base->buf + base_offset, 0)) {
|
||||
if (!git_fnmatch(match, base->buf + base_offset,
|
||||
item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
|
||||
item->nowildcard_len)) {
|
||||
strbuf_setlen(base, base_offset + baselen);
|
||||
return entry_interesting;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user