From aa7710e064a9ee644e1e86bd6f89193200ac4ccd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 28 Dec 2014 06:39:46 +0700 Subject: [PATCH 1/3] attr.c: rename arg name attr_nr to avoid shadowing the global one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Helped-by: Junio C Hamano Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- attr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/attr.c b/attr.c index cd5469770a..a1758bf537 100644 --- a/attr.c +++ b/attr.c @@ -681,13 +681,13 @@ static int fill(const char *path, int pathlen, int basename_offset, return rem; } -static int macroexpand_one(int attr_nr, int rem) +static int macroexpand_one(int nr, int rem) { struct attr_stack *stk; struct match_attr *a = NULL; int i; - if (check_all_attr[attr_nr].value != ATTR__TRUE) + if (check_all_attr[nr].value != ATTR__TRUE) return rem; for (stk = attr_stack; !a && stk; stk = stk->prev) @@ -695,7 +695,7 @@ static int macroexpand_one(int attr_nr, int rem) struct match_attr *ma = stk->attrs[i]; if (!ma->is_macro) continue; - if (ma->u.attr->attr_nr == attr_nr) + if (ma->u.attr->attr_nr == nr) a = ma; } From fad32bcd83ec4e4f88013e4a0b05f42e32e2c6f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 28 Dec 2014 06:39:47 +0700 Subject: [PATCH 2/3] attr: do not attempt to expand when we know it's not a macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keep track of all recognized macros in the new "maybe_macro" field. If this field is true, it _may_ be a macro (depending on what's in the current attr stack). But if the field is false, it's definitely not a macro, no need to go through the whole attr stack in macroexpand_one() to search for one. Without this, "git grep abcdefghi" on git.git hits the inner loop in macroexpand_one() 2481 times. With this, it's 66 times. Helped-by: Eric Sunshine Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- attr.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/attr.c b/attr.c index a1758bf537..b80e52b157 100644 --- a/attr.c +++ b/attr.c @@ -32,6 +32,7 @@ struct git_attr { struct git_attr *next; unsigned h; int attr_nr; + int maybe_macro; char name[FLEX_ARRAY]; }; static int attr_nr; @@ -95,6 +96,7 @@ static struct git_attr *git_attr_internal(const char *name, int len) a->h = hval; a->next = git_attr_hash[pos]; a->attr_nr = attr_nr++; + a->maybe_macro = 0; git_attr_hash[pos] = a; REALLOC_ARRAY(check_all_attr, attr_nr); @@ -244,9 +246,10 @@ static struct match_attr *parse_attr_line(const char *line, const char *src, sizeof(*res) + sizeof(struct attr_state) * num_attr + (is_macro ? 0 : namelen + 1)); - if (is_macro) + if (is_macro) { res->u.attr = git_attr_internal(name, namelen); - else { + res->u.attr->maybe_macro = 1; + } else { char *p = (char *)&(res->state[num_attr]); memcpy(p, name, namelen); res->u.pat.pattern = p; @@ -687,7 +690,8 @@ static int macroexpand_one(int nr, int rem) struct match_attr *a = NULL; int i; - if (check_all_attr[nr].value != ATTR__TRUE) + if (check_all_attr[nr].value != ATTR__TRUE || + !check_all_attr[nr].attr->maybe_macro) return rem; for (stk = attr_stack; !a && stk; stk = stk->prev) From 06a604e67051b9342c158432a49e42a5440f9280 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 28 Dec 2014 06:39:48 +0700 Subject: [PATCH 3/3] attr: avoid heavy work when we know the specified attr is not defined MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we have never seen attr 'X' in any .gitattributes file we have examined so far, we can be sure that 'X' is not defined. So no need to go over all the attr stack to look for attr 'X'. This is the purpose behind this new field maybe_real. This optimization breaks down if macros are involved because we can't know for sure what macro would expand to 'X' at attr parsing time. But if we go the pessimistic way and assume all macros are expanded, we hit the builtin "binary" macro. At least the "diff" attr defined in this macro will disable this optimization for git-grep. So we wait until any attr lines _may_ reference to a macro before we turn this off. In git.git, this reduces the number of fill_one() call for "git grep abcdefghi" from ~5348 to 2955. The optimization stops when it reads t/.gitattributes, which uses 'binary' macro. We could probably reduce it further by limiting the 'binary' reference to t/ and subdirs only in this case. "git grep" is actually a good example to justify this patch. The command checks "diff" attribute on every file. People usually don't define this attribute. But they pay the attr lookup penalty anyway without this patch, proportional to the number of attr lines they have in repo. Helped-by: Junio C Hamano Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- attr.c | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/attr.c b/attr.c index b80e52b157..1f9eebd2dd 100644 --- a/attr.c +++ b/attr.c @@ -33,9 +33,11 @@ struct git_attr { unsigned h; int attr_nr; int maybe_macro; + int maybe_real; char name[FLEX_ARRAY]; }; static int attr_nr; +static int cannot_trust_maybe_real; static struct git_attr_check *check_all_attr; static struct git_attr *(git_attr_hash[HASHSIZE]); @@ -97,6 +99,7 @@ static struct git_attr *git_attr_internal(const char *name, int len) a->next = git_attr_hash[pos]; a->attr_nr = attr_nr++; a->maybe_macro = 0; + a->maybe_real = 0; git_attr_hash[pos] = a; REALLOC_ARRAY(check_all_attr, attr_nr); @@ -269,6 +272,10 @@ static struct match_attr *parse_attr_line(const char *line, const char *src, /* Second pass to fill the attr_states */ for (cp = states, i = 0; *cp; i++) { cp = parse_attr(src, lineno, cp, &(res->state[i])); + if (!is_macro) + res->state[i].attr->maybe_real = 1; + if (res->state[i].attr->maybe_macro) + cannot_trust_maybe_real = 1; } return res; @@ -710,10 +717,13 @@ static int macroexpand_one(int nr, int rem) } /* - * Collect all attributes for path into the array pointed to by - * check_all_attr. + * Collect attributes for path into the array pointed to by + * check_all_attr. If num is non-zero, only attributes in check[] are + * collected. Otherwise all attributes are collected. */ -static void collect_all_attrs(const char *path) +static void collect_some_attrs(const char *path, int num, + struct git_attr_check *check) + { struct attr_stack *stk; int i, pathlen, rem, dirlen; @@ -736,6 +746,19 @@ static void collect_all_attrs(const char *path) prepare_attr_stack(path, dirlen); for (i = 0; i < attr_nr; i++) check_all_attr[i].value = ATTR__UNKNOWN; + if (num && !cannot_trust_maybe_real) { + rem = 0; + for (i = 0; i < num; i++) { + if (!check[i].attr->maybe_real) { + struct git_attr_check *c; + c = check_all_attr + check[i].attr->attr_nr; + c->value = ATTR__UNSET; + rem++; + } + } + if (rem == num) + return; + } rem = attr_nr; for (stk = attr_stack; 0 < rem && stk; stk = stk->prev) @@ -746,7 +769,7 @@ int git_check_attr(const char *path, int num, struct git_attr_check *check) { int i; - collect_all_attrs(path); + collect_some_attrs(path, num, check); for (i = 0; i < num; i++) { const char *value = check_all_attr[check[i].attr->attr_nr].value; @@ -762,7 +785,7 @@ int git_all_attrs(const char *path, int *num, struct git_attr_check **check) { int i, count, j; - collect_all_attrs(path); + collect_some_attrs(path, 0, NULL); /* Count the number of attributes that are set. */ count = 0;