From 45e7ca0f0e1042c26d56b578165365c3f70c0121 Mon Sep 17 00:00:00 2001 From: Brandon Casey Date: Thu, 18 Sep 2008 17:40:48 -0500 Subject: [PATCH 1/6] diff.c: return pattern entry pointer rather than just the hunk header pattern This is in preparation for associating a flag with each pattern which will control how the pattern is interpreted. For example, as a basic or extended regular expression. Signed-off-by: Brandon Casey Signed-off-by: Junio C Hamano --- diff.c | 55 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/diff.c b/diff.c index 5e01b2bb27..406a76a203 100644 --- a/diff.c +++ b/diff.c @@ -94,32 +94,35 @@ static int parse_lldiff_command(const char *var, const char *ep, const char *val * to define a customized regexp to find the beginning of a function to * be used for hunk header lines of "diff -p" style output. */ -static struct funcname_pattern { +struct funcname_pattern_entry { char *name; char *pattern; - struct funcname_pattern *next; +}; +static struct funcname_pattern_list { + struct funcname_pattern_list *next; + struct funcname_pattern_entry e; } *funcname_pattern_list; static int parse_funcname_pattern(const char *var, const char *ep, const char *value) { const char *name; int namelen; - struct funcname_pattern *pp; + struct funcname_pattern_list *pp; name = var + 5; /* "diff." */ namelen = ep - name; for (pp = funcname_pattern_list; pp; pp = pp->next) - if (!strncmp(pp->name, name, namelen) && !pp->name[namelen]) + if (!strncmp(pp->e.name, name, namelen) && !pp->e.name[namelen]) break; if (!pp) { pp = xcalloc(1, sizeof(*pp)); - pp->name = xmemdupz(name, namelen); + pp->e.name = xmemdupz(name, namelen); pp->next = funcname_pattern_list; funcname_pattern_list = pp; } - free(pp->pattern); - pp->pattern = xstrdup(value); + free(pp->e.pattern); + pp->e.pattern = xstrdup(value); return 0; } @@ -1377,20 +1380,17 @@ int diff_filespec_is_binary(struct diff_filespec *one) return one->is_binary; } -static const char *funcname_pattern(const char *ident) +static const struct funcname_pattern_entry *funcname_pattern(const char *ident) { - struct funcname_pattern *pp; + struct funcname_pattern_list *pp; for (pp = funcname_pattern_list; pp; pp = pp->next) - if (!strcmp(ident, pp->name)) - return pp->pattern; + if (!strcmp(ident, pp->e.name)) + return &pp->e; return NULL; } -static struct builtin_funcname_pattern { - const char *name; - const char *pattern; -} builtin_funcname_pattern[] = { +static const struct funcname_pattern_entry builtin_funcname_pattern[] = { { "java", "!^[ ]*\\(catch\\|do\\|for\\|if\\|instanceof\\|" "new\\|return\\|switch\\|throw\\|while\\)\n" "^[ ]*\\(\\([ ]*" @@ -1407,9 +1407,10 @@ static struct builtin_funcname_pattern { { "ruby", "^\\s*\\(\\(class\\|module\\|def\\)\\s.*\\)$" }, }; -static const char *diff_funcname_pattern(struct diff_filespec *one) +static const struct funcname_pattern_entry *diff_funcname_pattern(struct diff_filespec *one) { - const char *ident, *pattern; + const char *ident; + const struct funcname_pattern_entry *pe; int i; diff_filespec_check_attr(one); @@ -1424,9 +1425,9 @@ static const char *diff_funcname_pattern(struct diff_filespec *one) return funcname_pattern("default"); /* Look up custom "funcname.$ident" regexp from config. */ - pattern = funcname_pattern(ident); - if (pattern) - return pattern; + pe = funcname_pattern(ident); + if (pe) + return pe; /* * And define built-in fallback patterns here. Note that @@ -1434,7 +1435,7 @@ static const char *diff_funcname_pattern(struct diff_filespec *one) */ for (i = 0; i < ARRAY_SIZE(builtin_funcname_pattern); i++) if (!strcmp(ident, builtin_funcname_pattern[i].name)) - return builtin_funcname_pattern[i].pattern; + return &builtin_funcname_pattern[i]; return NULL; } @@ -1512,11 +1513,11 @@ static void builtin_diff(const char *name_a, xdemitconf_t xecfg; xdemitcb_t ecb; struct emit_callback ecbdata; - const char *funcname_pattern; + const struct funcname_pattern_entry *pe; - funcname_pattern = diff_funcname_pattern(one); - if (!funcname_pattern) - funcname_pattern = diff_funcname_pattern(two); + pe = diff_funcname_pattern(one); + if (!pe) + pe = diff_funcname_pattern(two); memset(&xecfg, 0, sizeof(xecfg)); memset(&ecbdata, 0, sizeof(ecbdata)); @@ -1528,8 +1529,8 @@ static void builtin_diff(const char *name_a, xpp.flags = XDF_NEED_MINIMAL | o->xdl_opts; xecfg.ctxlen = o->context; xecfg.flags = XDL_EMIT_FUNCNAMES; - if (funcname_pattern) - xdiff_set_find_func(&xecfg, funcname_pattern); + if (pe) + xdiff_set_find_func(&xecfg, pe->pattern); if (!diffopts) ; else if (!prefixcmp(diffopts, "--unified=")) From a013585b20ac757b0e75a72181ffa44674f35235 Mon Sep 17 00:00:00 2001 From: Brandon Casey Date: Thu, 18 Sep 2008 17:42:48 -0500 Subject: [PATCH 2/6] diff.c: associate a flag with each pattern and use it for compiling regex This is in preparation for allowing extended regular expression patterns. Signed-off-by: Brandon Casey Signed-off-by: Junio C Hamano --- diff.c | 21 ++++++++++++--------- xdiff-interface.c | 4 ++-- xdiff-interface.h | 2 +- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/diff.c b/diff.c index 406a76a203..6881cf4efa 100644 --- a/diff.c +++ b/diff.c @@ -97,13 +97,14 @@ static int parse_lldiff_command(const char *var, const char *ep, const char *val struct funcname_pattern_entry { char *name; char *pattern; + int cflags; }; static struct funcname_pattern_list { struct funcname_pattern_list *next; struct funcname_pattern_entry e; } *funcname_pattern_list; -static int parse_funcname_pattern(const char *var, const char *ep, const char *value) +static int parse_funcname_pattern(const char *var, const char *ep, const char *value, int cflags) { const char *name; int namelen; @@ -123,6 +124,7 @@ static int parse_funcname_pattern(const char *var, const char *ep, const char *v } free(pp->e.pattern); pp->e.pattern = xstrdup(value); + pp->e.cflags = cflags; return 0; } @@ -185,7 +187,8 @@ int git_diff_basic_config(const char *var, const char *value, void *cb) if (!strcmp(ep, ".funcname")) { if (!value) return config_error_nonbool(var); - return parse_funcname_pattern(var, ep, value); + return parse_funcname_pattern(var, ep, value, + 0); } } } @@ -1395,16 +1398,16 @@ static const struct funcname_pattern_entry builtin_funcname_pattern[] = { "new\\|return\\|switch\\|throw\\|while\\)\n" "^[ ]*\\(\\([ ]*" "[A-Za-z_][A-Za-z_0-9]*\\)\\{2,\\}" - "[ ]*([^;]*\\)$" }, + "[ ]*([^;]*\\)$", 0 }, { "pascal", "^\\(\\(procedure\\|function\\|constructor\\|" "destructor\\|interface\\|implementation\\|" "initialization\\|finalization\\)[ \t]*.*\\)$" "\\|" - "^\\(.*=[ \t]*\\(class\\|record\\).*\\)$" - }, - { "bibtex", "\\(@[a-zA-Z]\\{1,\\}[ \t]*{\\{0,1\\}[ \t]*[^ \t\"@',\\#}{~%]*\\).*$" }, - { "tex", "^\\(\\\\\\(\\(sub\\)*section\\|chapter\\|part\\)\\*\\{0,1\\}{.*\\)$" }, - { "ruby", "^\\s*\\(\\(class\\|module\\|def\\)\\s.*\\)$" }, + "^\\(.*=[ \t]*\\(class\\|record\\).*\\)$", + 0 }, + { "bibtex", "\\(@[a-zA-Z]\\{1,\\}[ \t]*{\\{0,1\\}[ \t]*[^ \t\"@',\\#}{~%]*\\).*$", 0 }, + { "tex", "^\\(\\\\\\(\\(sub\\)*section\\|chapter\\|part\\)\\*\\{0,1\\}{.*\\)$", 0 }, + { "ruby", "^\\s*\\(\\(class\\|module\\|def\\)\\s.*\\)$", 0 }, }; static const struct funcname_pattern_entry *diff_funcname_pattern(struct diff_filespec *one) @@ -1530,7 +1533,7 @@ static void builtin_diff(const char *name_a, xecfg.ctxlen = o->context; xecfg.flags = XDL_EMIT_FUNCNAMES; if (pe) - xdiff_set_find_func(&xecfg, pe->pattern); + xdiff_set_find_func(&xecfg, pe->pattern, pe->cflags); if (!diffopts) ; else if (!prefixcmp(diffopts, "--unified=")) diff --git a/xdiff-interface.c b/xdiff-interface.c index 61dc5c5470..2c81f40cb6 100644 --- a/xdiff-interface.c +++ b/xdiff-interface.c @@ -206,7 +206,7 @@ static long ff_regexp(const char *line, long len, return result; } -void xdiff_set_find_func(xdemitconf_t *xecfg, const char *value) +void xdiff_set_find_func(xdemitconf_t *xecfg, const char *value, int cflags) { int i; struct ff_regs *regs; @@ -231,7 +231,7 @@ void xdiff_set_find_func(xdemitconf_t *xecfg, const char *value) expression = buffer = xstrndup(value, ep - value); else expression = value; - if (regcomp(®->re, expression, 0)) + if (regcomp(®->re, expression, cflags)) die("Invalid regexp to look for hunk header: %s", expression); free(buffer); value = ep + 1; diff --git a/xdiff-interface.h b/xdiff-interface.h index f7f791d96b..33cab9dd59 100644 --- a/xdiff-interface.h +++ b/xdiff-interface.h @@ -21,6 +21,6 @@ int parse_hunk_header(char *line, int len, int read_mmfile(mmfile_t *ptr, const char *filename); int buffer_is_binary(const char *ptr, unsigned long size); -extern void xdiff_set_find_func(xdemitconf_t *xecfg, const char *line); +extern void xdiff_set_find_func(xdemitconf_t *xecfg, const char *line, int cflags); #endif From 45d9414fa5599b41578625961b53e18a9b9148c7 Mon Sep 17 00:00:00 2001 From: Brandon Casey Date: Thu, 18 Sep 2008 17:44:33 -0500 Subject: [PATCH 3/6] diff.*.xfuncname which uses "extended" regex's for hunk header selection Currently, the hunk headers produced by 'diff -p' are customizable by setting the diff.*.funcname option in the config file. The 'funcname' option takes a basic regular expression. This functionality was designed using the GNU regex library which, by default, allows using backslashed versions of some extended regular expression operators, even in Basic Regular Expression mode. For example, the following characters, when backslashed, are interpreted according to the extended regular expression rules: ?, +, and |. As such, the builtin funcname patterns were created using some extended regular expression operators. Other platforms which adhere more strictly to the POSIX spec do not interpret the backslashed extended RE operators in Basic Regular Expression mode. This causes the pattern matching for the builtin funcname patterns to fail on those platforms. Introduce a new option 'xfuncname' which uses extended regular expressions, and advertise it _instead_ of funcname. Since most users are on GNU platforms, the majority of funcname patterns are created and tested there. Advertising only xfuncname should help to avoid the creation of non-portable patterns which work with GNU regex but not elsewhere. Additionally, the extended regular expressions may be less ugly and complicated compared to the basic RE since many common special operators do not need to be backslashed. For example, the GNU Basic RE: ^[ ]*\\(\\(public\\|static\\).*\\)$ becomes the following Extended RE: ^[ ]*((public|static).*)$ Signed-off-by: Brandon Casey Signed-off-by: Junio C Hamano --- Documentation/gitattributes.txt | 4 ++-- diff.c | 5 +++++ t/t4018-diff-funcname.sh | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 94e6752aa2..9259637609 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -288,13 +288,13 @@ for paths. *.tex diff=tex ------------------------ -Then, you would define "diff.tex.funcname" configuration to +Then, you would define "diff.tex.xfuncname" configuration to specify a regular expression that matches a line that you would want to appear as the hunk header, like this: ------------------------ [diff "tex"] - funcname = "^\\(\\\\\\(sub\\)*section{.*\\)$" + xfuncname = "^(\\\\(sub)*section\\{.*)$" ------------------------ Note. A single level of backslashes are eaten by the diff --git a/diff.c b/diff.c index 6881cf4efa..dabb4b4a02 100644 --- a/diff.c +++ b/diff.c @@ -189,6 +189,11 @@ int git_diff_basic_config(const char *var, const char *value, void *cb) return config_error_nonbool(var); return parse_funcname_pattern(var, ep, value, 0); + } else if (!strcmp(ep, ".xfuncname")) { + if (!value) + return config_error_nonbool(var); + return parse_funcname_pattern(var, ep, value, + REG_EXTENDED); } } } diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh index 18bcd9713d..602d68f092 100755 --- a/t/t4018-diff-funcname.sh +++ b/t/t4018-diff-funcname.sh @@ -58,7 +58,7 @@ test_expect_success 'last regexp must not be negated' ' ' test_expect_success 'alternation in pattern' ' - git config diff.java.funcname "^[ ]*\\(\\(public\\|static\\).*\\)$" + git config diff.java.xfuncname "^[ ]*((public|static).*)$" && git diff --no-index Beer.java Beer-correct.java | grep "^@@.*@@ public static void main(" ' From 6a6baf9b4e819a0bbfd70627f966cd7144dd8301 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 19 Sep 2008 23:45:04 -0700 Subject: [PATCH 4/6] diff: use extended regexp to find hunk headers Using ERE elements such as "|" (alternation) by backquoting in BRE is a GNU extension and should not be done in portable programs. Signed-off-by: Junio C Hamano --- diff.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/diff.c b/diff.c index dabb4b4a02..175a044a34 100644 --- a/diff.c +++ b/diff.c @@ -1399,20 +1399,23 @@ static const struct funcname_pattern_entry *funcname_pattern(const char *ident) } static const struct funcname_pattern_entry builtin_funcname_pattern[] = { - { "java", "!^[ ]*\\(catch\\|do\\|for\\|if\\|instanceof\\|" - "new\\|return\\|switch\\|throw\\|while\\)\n" - "^[ ]*\\(\\([ ]*" - "[A-Za-z_][A-Za-z_0-9]*\\)\\{2,\\}" - "[ ]*([^;]*\\)$", 0 }, - { "pascal", "^\\(\\(procedure\\|function\\|constructor\\|" - "destructor\\|interface\\|implementation\\|" - "initialization\\|finalization\\)[ \t]*.*\\)$" - "\\|" - "^\\(.*=[ \t]*\\(class\\|record\\).*\\)$", - 0 }, - { "bibtex", "\\(@[a-zA-Z]\\{1,\\}[ \t]*{\\{0,1\\}[ \t]*[^ \t\"@',\\#}{~%]*\\).*$", 0 }, - { "tex", "^\\(\\\\\\(\\(sub\\)*section\\|chapter\\|part\\)\\*\\{0,1\\}{.*\\)$", 0 }, - { "ruby", "^\\s*\\(\\(class\\|module\\|def\\)\\s.*\\)$", 0 }, + { "java", + "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n" + "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$", + REG_EXTENDED }, + { "pascal", + "^((procedure|function|constructor|destructor|interface|" + "implementation|initialization|finalization)[ \t]*.*)$" + "|" + "^(.*=[ \t]*(class|record).*)$", + REG_EXTENDED }, + { "bibtex", "(@[a-zA-Z]{1,}[ \t]*\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", + REG_EXTENDED }, + { "tex", + "^(\\\\((sub)*section|chapter|part)\\*{0,1}\{.*)$", + REG_EXTENDED }, + { "ruby", "^[ \t]*((class|module|def)[ \t].*)$", + REG_EXTENDED }, }; static const struct funcname_pattern_entry *diff_funcname_pattern(struct diff_filespec *one) From 96d1a8e9d44fd635fad8466dbe0aab6d73495c9f Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 20 Sep 2008 15:30:12 -0700 Subject: [PATCH 5/6] diff hunk pattern: fix misconverted "\{" tex macro introducers Pointed out by Brandon Casey. Signed-off-by: Junio C Hamano --- diff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/diff.c b/diff.c index 175a044a34..a283738616 100644 --- a/diff.c +++ b/diff.c @@ -1409,10 +1409,10 @@ static const struct funcname_pattern_entry builtin_funcname_pattern[] = { "|" "^(.*=[ \t]*(class|record).*)$", REG_EXTENDED }, - { "bibtex", "(@[a-zA-Z]{1,}[ \t]*\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", + { "bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", REG_EXTENDED }, { "tex", - "^(\\\\((sub)*section|chapter|part)\\*{0,1}\{.*)$", + "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$", REG_EXTENDED }, { "ruby", "^[ \t]*((class|module|def)[ \t].*)$", REG_EXTENDED }, From e3bf5e43fd9db9391ebc876ef118dbb431853d69 Mon Sep 17 00:00:00 2001 From: Brandon Casey Date: Mon, 22 Sep 2008 18:19:05 -0500 Subject: [PATCH 6/6] t4018-diff-funcname: test syntax of builtin xfuncname patterns Signed-off-by: Brandon Casey Signed-off-by: Junio C Hamano --- t/t4018-diff-funcname.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh index 602d68f092..99fff973eb 100755 --- a/t/t4018-diff-funcname.sh +++ b/t/t4018-diff-funcname.sh @@ -32,7 +32,18 @@ EOF sed 's/beer\\/beer,\\/' < Beer.java > Beer-correct.java +builtin_patterns="bibtex java pascal ruby tex" +for p in $builtin_patterns +do + test_expect_success "builtin $p pattern compiles" ' + echo "*.java diff=$p" > .gitattributes && + ! ( git diff --no-index Beer.java Beer-correct.java 2>&1 | + grep "fatal" > /dev/null ) + ' +done + test_expect_success 'default behaviour' ' + rm -f .gitattributes && git diff --no-index Beer.java Beer-correct.java | grep "^@@.*@@ public class Beer" '