ref-filter: limit traversal to prefix

When we are matching refnames against a pattern, then we know that the
beginning of any refname that can match the pattern has to match the
part of the pattern up to the first glob character. For example, if
the pattern is `refs/heads/foo*bar`, then it can only match a
reference that has the prefix `refs/heads/foo`.

So pass that prefix to `for_each_fullref_in()`. This lets the ref code
avoid passing us the full set of refs, and in some cases avoid reading
them in the first place.

Note that this applies only when the `match_as_path` flag is set
(i.e., when `for-each-ref` is the caller), as the matching rules for
git-branch and git-tag are subtly different.

This could be generalized to the case of multiple patterns, but (a) it
probably doesn't come up that often, and (b) it is more awkward to
deal with multiple patterns (e.g., the patterns might not be
disjoint). So, since this is just an optimization, punt on the case of
multiple patterns.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Jeff King 2017-05-22 16:17:54 +02:00 committed by Junio C Hamano
parent c1da06c6f1
commit cfe004a5a9

View File

@ -1665,6 +1665,68 @@ static int filter_pattern_match(struct ref_filter *filter, const char *refname)
return match_pattern(filter, refname);
}
/*
* Find the longest prefix of pattern we can pass to
* `for_each_fullref_in()`, namely the part of pattern preceding the
* first glob character. (Note that `for_each_fullref_in()` is
* perfectly happy working with a prefix that doesn't end at a
* pathname component boundary.)
*/
static void find_longest_prefix(struct strbuf *out, const char *pattern)
{
const char *p;
for (p = pattern; *p && !is_glob_special(*p); p++)
;
strbuf_add(out, pattern, p - pattern);
}
/*
* This is the same as for_each_fullref_in(), but it tries to iterate
* only over the patterns we'll care about. Note that it _doesn't_ do a full
* pattern match, so the callback still has to match each ref individually.
*/
static int for_each_fullref_in_pattern(struct ref_filter *filter,
each_ref_fn cb,
void *cb_data,
int broken)
{
struct strbuf prefix = STRBUF_INIT;
int ret;
if (!filter->match_as_path) {
/*
* in this case, the patterns are applied after
* prefixes like "refs/heads/" etc. are stripped off,
* so we have to look at everything:
*/
return for_each_fullref_in("", cb, cb_data, broken);
}
if (!filter->name_patterns[0]) {
/* no patterns; we have to look at everything */
return for_each_fullref_in("", cb, cb_data, broken);
}
if (filter->name_patterns[1]) {
/*
* multiple patterns; in theory this could still work as long
* as the patterns are disjoint. We'd just make multiple calls
* to for_each_ref(). But if they're not disjoint, we'd end up
* reporting the same ref multiple times. So let's punt on that
* for now.
*/
return for_each_fullref_in("", cb, cb_data, broken);
}
find_longest_prefix(&prefix, filter->name_patterns[0]);
ret = for_each_fullref_in(prefix.buf, cb, cb_data, broken);
strbuf_release(&prefix);
return ret;
}
/*
* Given a ref (sha1, refname), check if the ref belongs to the array
* of sha1s. If the given ref is a tag, check if the given tag points
@ -1911,7 +1973,7 @@ int filter_refs(struct ref_array *array, struct ref_filter *filter, unsigned int
else if (filter->kind == FILTER_REFS_TAGS)
ret = for_each_fullref_in("refs/tags/", ref_filter_handler, &ref_cbdata, broken);
else if (filter->kind & FILTER_REFS_ALL)
ret = for_each_fullref_in("", ref_filter_handler, &ref_cbdata, broken);
ret = for_each_fullref_in_pattern(filter, ref_filter_handler, &ref_cbdata, broken);
if (!ret && (filter->kind & FILTER_REFS_DETACHED_HEAD))
head_ref(ref_filter_handler, &ref_cbdata);
}