attr: avoid heavy work when we know the specified attr is not defined

If we have never seen attr 'X' in any .gitattributes file we have
examined so far, we can be sure that 'X' is not defined. So no need to
go over all the attr stack to look for attr 'X'. This is the purpose
behind this new field maybe_real.

This optimization breaks down if macros are involved because we can't
know for sure what macro would expand to 'X' at attr parsing time. But
if we go the pessimistic way and assume all macros are expanded, we hit
the builtin "binary" macro. At least the "diff" attr defined in this
macro will disable this optimization for git-grep. So we wait until
any attr lines _may_ reference to a macro before we turn this off.

In git.git, this reduces the number of fill_one() call for "git grep
abcdefghi" from ~5348 to 2955. The optimization stops when it reads
t/.gitattributes, which uses 'binary' macro. We could probably reduce
it further by limiting the 'binary' reference to t/ and subdirs only
in this case.

"git grep" is actually a good example to justify this patch. The
command checks "diff" attribute on every file. People usually don't
define this attribute. But they pay the attr lookup penalty anyway
without this patch, proportional to the number of attr lines they have
in repo.

Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Nguyễn Thái Ngọc Duy 2014-12-28 06:39:48 +07:00 committed by Junio C Hamano
parent fad32bcd83
commit 06a604e670

33
attr.c
View File

@ -33,9 +33,11 @@ struct git_attr {
unsigned h; unsigned h;
int attr_nr; int attr_nr;
int maybe_macro; int maybe_macro;
int maybe_real;
char name[FLEX_ARRAY]; char name[FLEX_ARRAY];
}; };
static int attr_nr; static int attr_nr;
static int cannot_trust_maybe_real;
static struct git_attr_check *check_all_attr; static struct git_attr_check *check_all_attr;
static struct git_attr *(git_attr_hash[HASHSIZE]); static struct git_attr *(git_attr_hash[HASHSIZE]);
@ -97,6 +99,7 @@ static struct git_attr *git_attr_internal(const char *name, int len)
a->next = git_attr_hash[pos]; a->next = git_attr_hash[pos];
a->attr_nr = attr_nr++; a->attr_nr = attr_nr++;
a->maybe_macro = 0; a->maybe_macro = 0;
a->maybe_real = 0;
git_attr_hash[pos] = a; git_attr_hash[pos] = a;
REALLOC_ARRAY(check_all_attr, attr_nr); REALLOC_ARRAY(check_all_attr, attr_nr);
@ -269,6 +272,10 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
/* Second pass to fill the attr_states */ /* Second pass to fill the attr_states */
for (cp = states, i = 0; *cp; i++) { for (cp = states, i = 0; *cp; i++) {
cp = parse_attr(src, lineno, cp, &(res->state[i])); cp = parse_attr(src, lineno, cp, &(res->state[i]));
if (!is_macro)
res->state[i].attr->maybe_real = 1;
if (res->state[i].attr->maybe_macro)
cannot_trust_maybe_real = 1;
} }
return res; return res;
@ -710,10 +717,13 @@ static int macroexpand_one(int nr, int rem)
} }
/* /*
* Collect all attributes for path into the array pointed to by * Collect attributes for path into the array pointed to by
* check_all_attr. * check_all_attr. If num is non-zero, only attributes in check[] are
* collected. Otherwise all attributes are collected.
*/ */
static void collect_all_attrs(const char *path) static void collect_some_attrs(const char *path, int num,
struct git_attr_check *check)
{ {
struct attr_stack *stk; struct attr_stack *stk;
int i, pathlen, rem, dirlen; int i, pathlen, rem, dirlen;
@ -736,6 +746,19 @@ static void collect_all_attrs(const char *path)
prepare_attr_stack(path, dirlen); prepare_attr_stack(path, dirlen);
for (i = 0; i < attr_nr; i++) for (i = 0; i < attr_nr; i++)
check_all_attr[i].value = ATTR__UNKNOWN; check_all_attr[i].value = ATTR__UNKNOWN;
if (num && !cannot_trust_maybe_real) {
rem = 0;
for (i = 0; i < num; i++) {
if (!check[i].attr->maybe_real) {
struct git_attr_check *c;
c = check_all_attr + check[i].attr->attr_nr;
c->value = ATTR__UNSET;
rem++;
}
}
if (rem == num)
return;
}
rem = attr_nr; rem = attr_nr;
for (stk = attr_stack; 0 < rem && stk; stk = stk->prev) for (stk = attr_stack; 0 < rem && stk; stk = stk->prev)
@ -746,7 +769,7 @@ int git_check_attr(const char *path, int num, struct git_attr_check *check)
{ {
int i; int i;
collect_all_attrs(path); collect_some_attrs(path, num, check);
for (i = 0; i < num; i++) { for (i = 0; i < num; i++) {
const char *value = check_all_attr[check[i].attr->attr_nr].value; const char *value = check_all_attr[check[i].attr->attr_nr].value;
@ -762,7 +785,7 @@ int git_all_attrs(const char *path, int *num, struct git_attr_check **check)
{ {
int i, count, j; int i, count, j;
collect_all_attrs(path); collect_some_attrs(path, 0, NULL);
/* Count the number of attributes that are set. */ /* Count the number of attributes that are set. */
count = 0; count = 0;