From e7881c3594d22eb711654d2febfd577dc4994f34 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 5 Oct 2008 17:42:35 -0400 Subject: [PATCH 1/4] t4012: use test_cmp instead of cmp This makes erroneous output slightly easier to see. We also flip the argument order to match our usual style. Signed-off-by: Jeff King Signed-off-by: Shawn O. Pearce --- t/t4012-diff-binary.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/t4012-diff-binary.sh b/t/t4012-diff-binary.sh index 421f4bba3f..3cf5b5c4ea 100755 --- a/t/t4012-diff-binary.sh +++ b/t/t4012-diff-binary.sh @@ -25,11 +25,11 @@ cat > expected <<\EOF EOF test_expect_success 'diff without --binary' \ 'git diff | git apply --stat --summary >current && - cmp current expected' + test_cmp expected current' test_expect_success 'diff with --binary' \ 'git diff --binary | git apply --stat --summary >current && - cmp current expected' + test_cmp expected current' # apply needs to be able to skip the binary material correctly # in order to report the line number of a corrupt patch. From be58e70dbadf3cb3f4aa5829d513d886ae8bc460 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 5 Oct 2008 17:43:21 -0400 Subject: [PATCH 2/4] diff: unify external diff and funcname parsing code Both sets of code assume that one specifies a diff profile as a gitattribute via the "diff=foo" attribute. They then pull information about that profile from the config as diff.foo.*. The code for each is currently completely separate from the other, which has several disadvantages: - there is duplication as we maintain code to create and search the separate lists of external drivers and funcname patterns - it is difficult to add new profile options, since it is unclear where they should go - the code is difficult to follow, as we rely on the "check if this file is binary" code to find the funcname pattern as a side effect. This is the first step in refactoring the binary-checking code. This patch factors out these diff profiles into "userdiff" drivers. A file with "diff=foo" uses the "foo" driver, which is specified by a single struct. Note that one major difference between the two pieces of code is that the funcname patterns are always loaded, whereas external drivers are loaded only for the "git diff" porcelain; the new code takes care to retain that situation. Signed-off-by: Jeff King Signed-off-by: Shawn O. Pearce --- Makefile | 2 + diff.c | 251 ++++++----------------------------------------------- userdiff.c | 160 ++++++++++++++++++++++++++++++++++ userdiff.h | 23 +++++ 4 files changed, 212 insertions(+), 224 deletions(-) create mode 100644 userdiff.c create mode 100644 userdiff.h diff --git a/Makefile b/Makefile index 308dc70b5d..d6f3695c97 100644 --- a/Makefile +++ b/Makefile @@ -389,6 +389,7 @@ LIB_H += transport.h LIB_H += tree.h LIB_H += tree-walk.h LIB_H += unpack-trees.h +LIB_H += userdiff.h LIB_H += utf8.h LIB_H += wt-status.h @@ -485,6 +486,7 @@ LIB_OBJS += tree-diff.o LIB_OBJS += tree.o LIB_OBJS += tree-walk.o LIB_OBJS += unpack-trees.o +LIB_OBJS += userdiff.o LIB_OBJS += usage.o LIB_OBJS += utf8.o LIB_OBJS += walker.o diff --git a/diff.c b/diff.c index 1c6be897b2..d50355e842 100644 --- a/diff.c +++ b/diff.c @@ -11,6 +11,7 @@ #include "attr.h" #include "run-command.h" #include "utf8.h" +#include "userdiff.h" #ifdef NO_FAST_WORKING_DIRECTORY #define FAST_WORKING_DIRECTORY 0 @@ -56,80 +57,6 @@ static int parse_diff_color_slot(const char *var, int ofs) die("bad config variable '%s'", var); } -static struct ll_diff_driver { - const char *name; - struct ll_diff_driver *next; - const char *cmd; -} *user_diff, **user_diff_tail; - -/* - * Currently there is only "diff..command" variable; - * because there are "diff.color." variables, we are parsing - * this in a bit convoluted way to allow low level diff driver - * called "color". - */ -static int parse_lldiff_command(const char *var, const char *ep, const char *value) -{ - const char *name; - int namelen; - struct ll_diff_driver *drv; - - name = var + 5; - namelen = ep - name; - for (drv = user_diff; drv; drv = drv->next) - if (!strncmp(drv->name, name, namelen) && !drv->name[namelen]) - break; - if (!drv) { - drv = xcalloc(1, sizeof(struct ll_diff_driver)); - drv->name = xmemdupz(name, namelen); - if (!user_diff_tail) - user_diff_tail = &user_diff; - *user_diff_tail = drv; - user_diff_tail = &(drv->next); - } - - return git_config_string(&(drv->cmd), var, value); -} - -/* - * 'diff..funcname' attribute can be specified in the configuration - * to define a customized regexp to find the beginning of a function to - * be used for hunk header lines of "diff -p" style output. - */ -struct funcname_pattern_entry { - char *name; - char *pattern; - int cflags; -}; -static struct funcname_pattern_list { - struct funcname_pattern_list *next; - struct funcname_pattern_entry e; -} *funcname_pattern_list; - -static int parse_funcname_pattern(const char *var, const char *ep, const char *value, int cflags) -{ - const char *name; - int namelen; - struct funcname_pattern_list *pp; - - name = var + 5; /* "diff." */ - namelen = ep - name; - - for (pp = funcname_pattern_list; pp; pp = pp->next) - if (!strncmp(pp->e.name, name, namelen) && !pp->e.name[namelen]) - break; - if (!pp) { - pp = xcalloc(1, sizeof(*pp)); - pp->e.name = xmemdupz(name, namelen); - pp->next = funcname_pattern_list; - funcname_pattern_list = pp; - } - free(pp->e.pattern); - pp->e.pattern = xstrdup(value); - pp->e.cflags = cflags; - return 0; -} - /* * These are to give UI layer defaults. * The core-level commands such as git-diff-files should @@ -162,11 +89,11 @@ int git_diff_ui_config(const char *var, const char *value, void *cb) } if (!strcmp(var, "diff.external")) return git_config_string(&external_diff_cmd_cfg, var, value); - if (!prefixcmp(var, "diff.")) { - const char *ep = strrchr(var, '.'); - if (ep != var + 4 && !strcmp(ep, ".command")) - return parse_lldiff_command(var, ep, value); + switch (userdiff_config_porcelain(var, value)) { + case 0: break; + case -1: return -1; + default: return 0; } return git_diff_basic_config(var, value, cb); @@ -193,21 +120,10 @@ int git_diff_basic_config(const char *var, const char *value, void *cb) return 0; } - if (!prefixcmp(var, "diff.")) { - const char *ep = strrchr(var, '.'); - if (ep != var + 4) { - if (!strcmp(ep, ".funcname")) { - if (!value) - return config_error_nonbool(var); - return parse_funcname_pattern(var, ep, value, - 0); - } else if (!strcmp(ep, ".xfuncname")) { - if (!value) - return config_error_nonbool(var); - return parse_funcname_pattern(var, ep, value, - REG_EXTENDED); - } - } + switch (userdiff_config_basic(var, value)) { + case 0: break; + case -1: return -1; + default: return 0; } return git_color_default_config(var, value, cb); @@ -1352,46 +1268,24 @@ static void emit_binary_diff(FILE *file, mmfile_t *one, mmfile_t *two) emit_binary_diff_body(file, two, one); } -static void setup_diff_attr_check(struct git_attr_check *check) -{ - static struct git_attr *attr_diff; - - if (!attr_diff) { - attr_diff = git_attr("diff", 4); - } - check[0].attr = attr_diff; -} - static void diff_filespec_check_attr(struct diff_filespec *one) { - struct git_attr_check attr_diff_check; + struct userdiff_driver *drv; int check_from_data = 0; if (one->checked_attr) return; - setup_diff_attr_check(&attr_diff_check); + drv = userdiff_find_by_path(one->path); one->is_binary = 0; - one->funcname_pattern_ident = NULL; - if (!git_checkattr(one->path, 1, &attr_diff_check)) { - const char *value; - - /* binaryness */ - value = attr_diff_check.value; - if (ATTR_TRUE(value)) - ; - else if (ATTR_FALSE(value)) - one->is_binary = 1; - else - check_from_data = 1; - - /* funcname pattern ident */ - if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value)) - ; - else - one->funcname_pattern_ident = value; - } + /* binaryness */ + if (drv == USERDIFF_ATTR_TRUE) + ; + else if (drv == USERDIFF_ATTR_FALSE) + one->is_binary = 1; + else + check_from_data = 1; if (check_from_data) { if (!one->data && DIFF_FILE_VALID(one)) @@ -1408,80 +1302,12 @@ int diff_filespec_is_binary(struct diff_filespec *one) return one->is_binary; } -static const struct funcname_pattern_entry *funcname_pattern(const char *ident) +static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespec *one) { - struct funcname_pattern_list *pp; - - for (pp = funcname_pattern_list; pp; pp = pp->next) - if (!strcmp(ident, pp->e.name)) - return &pp->e; - return NULL; -} - -static const struct funcname_pattern_entry builtin_funcname_pattern[] = { - { "bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", - REG_EXTENDED }, - { "html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$", REG_EXTENDED }, - { "java", - "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n" - "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$", - REG_EXTENDED }, - { "objc", - /* Negate C statements that can look like functions */ - "!^[ \t]*(do|for|if|else|return|switch|while)\n" - /* Objective-C methods */ - "^[ \t]*([-+][ \t]*\\([ \t]*[A-Za-z_][A-Za-z_0-9* \t]*\\)[ \t]*[A-Za-z_].*)$\n" - /* C functions */ - "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$\n" - /* Objective-C class/protocol definitions */ - "^(@(implementation|interface|protocol)[ \t].*)$", - REG_EXTENDED }, - { "pascal", - "^((procedure|function|constructor|destructor|interface|" - "implementation|initialization|finalization)[ \t]*.*)$" - "\n" - "^(.*=[ \t]*(class|record).*)$", - REG_EXTENDED }, - { "php", "^[\t ]*((function|class).*)", REG_EXTENDED }, - { "python", "^[ \t]*((class|def)[ \t].*)$", REG_EXTENDED }, - { "ruby", "^[ \t]*((class|module|def)[ \t].*)$", - REG_EXTENDED }, - { "tex", - "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$", - REG_EXTENDED }, -}; - -static const struct funcname_pattern_entry *diff_funcname_pattern(struct diff_filespec *one) -{ - const char *ident; - const struct funcname_pattern_entry *pe; - int i; - - diff_filespec_check_attr(one); - ident = one->funcname_pattern_ident; - - if (!ident) - /* - * If the config file has "funcname.default" defined, that - * regexp is used; otherwise NULL is returned and xemit uses - * the built-in default. - */ - return funcname_pattern("default"); - - /* Look up custom "funcname.$ident" regexp from config. */ - pe = funcname_pattern(ident); - if (pe) - return pe; - - /* - * And define built-in fallback patterns here. Note that - * these can be overridden by the user's config settings. - */ - for (i = 0; i < ARRAY_SIZE(builtin_funcname_pattern); i++) - if (!strcmp(ident, builtin_funcname_pattern[i].name)) - return &builtin_funcname_pattern[i]; - - return NULL; + struct userdiff_driver *drv = userdiff_find_by_path(one->path); + if (!drv) + drv = userdiff_find_by_name("default"); + return drv && drv->funcname.pattern ? &drv->funcname : NULL; } void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b) @@ -1579,7 +1405,7 @@ static void builtin_diff(const char *name_a, xdemitconf_t xecfg; xdemitcb_t ecb; struct emit_callback ecbdata; - const struct funcname_pattern_entry *pe; + const struct userdiff_funcname *pe; pe = diff_funcname_pattern(one); if (!pe) @@ -2117,29 +1943,6 @@ static void run_external_diff(const char *pgm, } } -static const char *external_diff_attr(const char *name) -{ - struct git_attr_check attr_diff_check; - - if (!name) - return NULL; - - setup_diff_attr_check(&attr_diff_check); - if (!git_checkattr(name, 1, &attr_diff_check)) { - const char *value = attr_diff_check.value; - if (!ATTR_TRUE(value) && - !ATTR_FALSE(value) && - !ATTR_UNSET(value)) { - struct ll_diff_driver *drv; - - for (drv = user_diff; drv; drv = drv->next) - if (!strcmp(drv->name, value)) - return drv->cmd; - } - } - return NULL; -} - static void run_diff_cmd(const char *pgm, const char *name, const char *other, @@ -2153,9 +1956,9 @@ static void run_diff_cmd(const char *pgm, if (!DIFF_OPT_TST(o, ALLOW_EXTERNAL)) pgm = NULL; else { - const char *cmd = external_diff_attr(attr_path); - if (cmd) - pgm = cmd; + struct userdiff_driver *drv = userdiff_find_by_path(attr_path); + if (drv && drv->external) + pgm = drv->external; } if (pgm) { diff --git a/userdiff.c b/userdiff.c new file mode 100644 index 0000000000..80e2857abb --- /dev/null +++ b/userdiff.c @@ -0,0 +1,160 @@ +#include "userdiff.h" +#include "cache.h" +#include "attr.h" + +static struct userdiff_driver *drivers; +static int ndrivers; +static int drivers_alloc; + +#define FUNCNAME(name, pattern) \ + { name, NULL, { pattern, REG_EXTENDED } } +static struct userdiff_driver builtin_drivers[] = { +FUNCNAME("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$"), +FUNCNAME("java", + "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n" + "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$"), +FUNCNAME("objc", + /* Negate C statements that can look like functions */ + "!^[ \t]*(do|for|if|else|return|switch|while)\n" + /* Objective-C methods */ + "^[ \t]*([-+][ \t]*\\([ \t]*[A-Za-z_][A-Za-z_0-9* \t]*\\)[ \t]*[A-Za-z_].*)$\n" + /* C functions */ + "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$\n" + /* Objective-C class/protocol definitions */ + "^(@(implementation|interface|protocol)[ \t].*)$"), +FUNCNAME("pascal", + "^((procedure|function|constructor|destructor|interface|" + "implementation|initialization|finalization)[ \t]*.*)$" + "\n" + "^(.*=[ \t]*(class|record).*)$"), +FUNCNAME("php", "^[\t ]*((function|class).*)"), +FUNCNAME("python", "^[ \t]*((class|def)[ \t].*)$"), +FUNCNAME("ruby", "^[ \t]*((class|module|def)[ \t].*)$"), +FUNCNAME("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$"), +FUNCNAME("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$"), +}; +#undef FUNCNAME + +static struct userdiff_driver driver_true = { + "diff=true", + NULL, + { NULL, 0 } +}; +struct userdiff_driver *USERDIFF_ATTR_TRUE = &driver_true; + +static struct userdiff_driver driver_false = { + "!diff", + NULL, + { NULL, 0 } +}; +struct userdiff_driver *USERDIFF_ATTR_FALSE = &driver_false; + +static struct userdiff_driver *userdiff_find_by_namelen(const char *k, int len) +{ + int i; + for (i = 0; i < ndrivers; i++) { + struct userdiff_driver *drv = drivers + i; + if (!strncmp(drv->name, k, len) && !drv->name[len]) + return drv; + } + for (i = 0; i < ARRAY_SIZE(builtin_drivers); i++) { + struct userdiff_driver *drv = builtin_drivers + i; + if (!strncmp(drv->name, k, len) && !drv->name[len]) + return drv; + } + return NULL; +} + +static struct userdiff_driver *parse_driver(const char *var, + const char *value, const char *type) +{ + struct userdiff_driver *drv; + const char *dot; + const char *name; + int namelen; + + if (prefixcmp(var, "diff.")) + return NULL; + dot = strrchr(var, '.'); + if (dot == var + 4) + return NULL; + if (strcmp(type, dot+1)) + return NULL; + + name = var + 5; + namelen = dot - name; + drv = userdiff_find_by_namelen(name, namelen); + if (!drv) { + ALLOC_GROW(drivers, ndrivers+1, drivers_alloc); + drv = &drivers[ndrivers++]; + memset(drv, 0, sizeof(*drv)); + drv->name = xmemdupz(name, namelen); + } + return drv; +} + +static int parse_funcname(struct userdiff_funcname *f, const char *k, + const char *v, int cflags) +{ + if (git_config_string(&f->pattern, k, v) < 0) + return -1; + f->cflags = cflags; + return 1; +} + +static int parse_string(const char **d, const char *k, const char *v) +{ + if (git_config_string(d, k, v) < 0) + return -1; + return 1; +} + +int userdiff_config_basic(const char *k, const char *v) +{ + struct userdiff_driver *drv; + + if ((drv = parse_driver(k, v, "funcname"))) + return parse_funcname(&drv->funcname, k, v, 0); + if ((drv = parse_driver(k, v, "xfuncname"))) + return parse_funcname(&drv->funcname, k, v, REG_EXTENDED); + + return 0; +} + +int userdiff_config_porcelain(const char *k, const char *v) +{ + struct userdiff_driver *drv; + + if ((drv = parse_driver(k, v, "command"))) + return parse_string(&drv->external, k, v); + + return 0; +} + +struct userdiff_driver *userdiff_find_by_name(const char *name) { + int len = strlen(name); + return userdiff_find_by_namelen(name, len); +} + +struct userdiff_driver *userdiff_find_by_path(const char *path) +{ + static struct git_attr *attr; + struct git_attr_check check; + + if (!attr) + attr = git_attr("diff", 4); + check.attr = attr; + + if (!path) + return NULL; + if (git_checkattr(path, 1, &check)) + return NULL; + + if (ATTR_TRUE(check.value)) + return &driver_true; + if (ATTR_FALSE(check.value)) + return &driver_false; + if (ATTR_UNSET(check.value)) + return NULL; + return userdiff_find_by_name(check.value); +} diff --git a/userdiff.h b/userdiff.h new file mode 100644 index 0000000000..c64c5f5669 --- /dev/null +++ b/userdiff.h @@ -0,0 +1,23 @@ +#ifndef USERDIFF_H +#define USERDIFF_H + +struct userdiff_funcname { + const char *pattern; + int cflags; +}; + +struct userdiff_driver { + const char *name; + const char *external; + struct userdiff_funcname funcname; +}; + +extern struct userdiff_driver *USERDIFF_ATTR_TRUE; +extern struct userdiff_driver *USERDIFF_ATTR_FALSE; + +int userdiff_config_basic(const char *k, const char *v); +int userdiff_config_porcelain(const char *k, const char *v); +struct userdiff_driver *userdiff_find_by_name(const char *name); +struct userdiff_driver *userdiff_find_by_path(const char *path); + +#endif /* USERDIFF */ From 122aa6f9c000d0d286898e2eb7b3504ac6cb9ebd Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 5 Oct 2008 17:43:36 -0400 Subject: [PATCH 3/4] diff: introduce diff..binary The "diff" gitattribute is somewhat overloaded right now. It can say one of three things: 1. this file is definitely binary, or definitely not (i.e., diff or !diff) 2. this file should use an external diff engine (i.e., diff=foo, diff.foo.command = custom-script) 3. this file should use particular funcname patterns (i.e., diff=foo, diff.foo.(x?)funcname = some-regex) Most of the time, there is no conflict between these uses, since using one implies that the other is irrelevant (e.g., an external diff engine will decide for itself whether the file is binary). However, there is at least one conflicting situation: there is no way to say "use the regular rules to determine whether this file is binary, but if we do diff it textually, use this funcname pattern." That is, currently setting diff=foo indicates that the file is definitely text. This patch introduces a "binary" config option for a diff driver, so that one can explicitly set diff.foo.binary. We default this value to "don't know". That is, setting a diff attribute to "foo" and using "diff.foo.funcname" will have no effect on the binaryness of a file. To get the current behavior, one can set diff.foo.binary to true. This patch also has one additional advantage: it cleans up the interface to the userdiff code a bit. Before, calling code had to know more about whether attributes were false, true, or unset to determine binaryness. Now that binaryness is a property of a driver, we can represent these situations just by passing back a driver struct. Signed-off-by: Jeff King Signed-off-by: Shawn O. Pearce --- diff.c | 52 ++++++++++++++++++++++------------------------------ diffcore.h | 8 ++++++-- userdiff.c | 19 ++++++++++++++++--- userdiff.h | 4 +--- 4 files changed, 45 insertions(+), 38 deletions(-) diff --git a/diff.c b/diff.c index d50355e842..dabd7f50ec 100644 --- a/diff.c +++ b/diff.c @@ -1268,46 +1268,37 @@ static void emit_binary_diff(FILE *file, mmfile_t *one, mmfile_t *two) emit_binary_diff_body(file, two, one); } -static void diff_filespec_check_attr(struct diff_filespec *one) +void diff_filespec_load_driver(struct diff_filespec *one) { - struct userdiff_driver *drv; - int check_from_data = 0; - - if (one->checked_attr) - return; - - drv = userdiff_find_by_path(one->path); - one->is_binary = 0; - - /* binaryness */ - if (drv == USERDIFF_ATTR_TRUE) - ; - else if (drv == USERDIFF_ATTR_FALSE) - one->is_binary = 1; - else - check_from_data = 1; - - if (check_from_data) { - if (!one->data && DIFF_FILE_VALID(one)) - diff_populate_filespec(one, 0); - - if (one->data) - one->is_binary = buffer_is_binary(one->data, one->size); - } + if (!one->driver) + one->driver = userdiff_find_by_path(one->path); + if (!one->driver) + one->driver = userdiff_find_by_name("default"); } int diff_filespec_is_binary(struct diff_filespec *one) { - diff_filespec_check_attr(one); + if (one->is_binary == -1) { + diff_filespec_load_driver(one); + if (one->driver->binary != -1) + one->is_binary = one->driver->binary; + else { + if (!one->data && DIFF_FILE_VALID(one)) + diff_populate_filespec(one, 0); + if (one->data) + one->is_binary = buffer_is_binary(one->data, + one->size); + if (one->is_binary == -1) + one->is_binary = 0; + } + } return one->is_binary; } static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespec *one) { - struct userdiff_driver *drv = userdiff_find_by_path(one->path); - if (!drv) - drv = userdiff_find_by_name("default"); - return drv && drv->funcname.pattern ? &drv->funcname : NULL; + diff_filespec_load_driver(one); + return one->driver->funcname.pattern ? &one->driver->funcname : NULL; } void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b) @@ -1559,6 +1550,7 @@ struct diff_filespec *alloc_filespec(const char *path) spec->path = (char *)(spec + 1); memcpy(spec->path, path, namelen+1); spec->count = 1; + spec->is_binary = -1; return spec; } diff --git a/diffcore.h b/diffcore.h index 8ae35785fd..713cca785c 100644 --- a/diffcore.h +++ b/diffcore.h @@ -22,6 +22,8 @@ #define MINIMUM_BREAK_SIZE 400 /* do not break a file smaller than this */ +struct userdiff_driver; + struct diff_filespec { unsigned char sha1[20]; char *path; @@ -40,8 +42,10 @@ struct diff_filespec { #define DIFF_FILE_VALID(spec) (((spec)->mode) != 0) unsigned should_free : 1; /* data should be free()'ed */ unsigned should_munmap : 1; /* data should be munmap()'ed */ - unsigned checked_attr : 1; - unsigned is_binary : 1; /* data should be considered "binary" */ + + struct userdiff_driver *driver; + /* data should be considered "binary"; -1 means "don't know yet" */ + int is_binary; }; extern struct diff_filespec *alloc_filespec(const char *); diff --git a/userdiff.c b/userdiff.c index 80e2857abb..58478a6912 100644 --- a/userdiff.c +++ b/userdiff.c @@ -7,7 +7,7 @@ static int ndrivers; static int drivers_alloc; #define FUNCNAME(name, pattern) \ - { name, NULL, { pattern, REG_EXTENDED } } + { name, NULL, -1, { pattern, REG_EXTENDED } } static struct userdiff_driver builtin_drivers[] = { FUNCNAME("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$"), FUNCNAME("java", @@ -32,22 +32,23 @@ FUNCNAME("python", "^[ \t]*((class|def)[ \t].*)$"), FUNCNAME("ruby", "^[ \t]*((class|module|def)[ \t].*)$"), FUNCNAME("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$"), FUNCNAME("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$"), +{ "default", NULL, -1, { NULL, 0 } }, }; #undef FUNCNAME static struct userdiff_driver driver_true = { "diff=true", NULL, + 0, { NULL, 0 } }; -struct userdiff_driver *USERDIFF_ATTR_TRUE = &driver_true; static struct userdiff_driver driver_false = { "!diff", NULL, + 1, { NULL, 0 } }; -struct userdiff_driver *USERDIFF_ATTR_FALSE = &driver_false; static struct userdiff_driver *userdiff_find_by_namelen(const char *k, int len) { @@ -89,6 +90,7 @@ static struct userdiff_driver *parse_driver(const char *var, drv = &drivers[ndrivers++]; memset(drv, 0, sizeof(*drv)); drv->name = xmemdupz(name, namelen); + drv->binary = -1; } return drv; } @@ -109,6 +111,15 @@ static int parse_string(const char **d, const char *k, const char *v) return 1; } +static int parse_tristate(int *b, const char *k, const char *v) +{ + if (v && !strcasecmp(v, "auto")) + *b = -1; + else + *b = git_config_bool(k, v); + return 1; +} + int userdiff_config_basic(const char *k, const char *v) { struct userdiff_driver *drv; @@ -117,6 +128,8 @@ int userdiff_config_basic(const char *k, const char *v) return parse_funcname(&drv->funcname, k, v, 0); if ((drv = parse_driver(k, v, "xfuncname"))) return parse_funcname(&drv->funcname, k, v, REG_EXTENDED); + if ((drv = parse_driver(k, v, "binary"))) + return parse_tristate(&drv->binary, k, v); return 0; } diff --git a/userdiff.h b/userdiff.h index c64c5f5669..1c1eb042b4 100644 --- a/userdiff.h +++ b/userdiff.h @@ -9,12 +9,10 @@ struct userdiff_funcname { struct userdiff_driver { const char *name; const char *external; + int binary; struct userdiff_funcname funcname; }; -extern struct userdiff_driver *USERDIFF_ATTR_TRUE; -extern struct userdiff_driver *USERDIFF_ATTR_FALSE; - int userdiff_config_basic(const char *k, const char *v); int userdiff_config_porcelain(const char *k, const char *v); struct userdiff_driver *userdiff_find_by_name(const char *name); From 9cb92c390cefd3bf3f71bbda12eb04893c861361 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 5 Oct 2008 17:43:45 -0400 Subject: [PATCH 4/4] diff: add filter for converting binary to text When diffing binary files, it is sometimes nice to see the differences of a canonical text form rather than either a binary patch or simply "binary files differ." Until now, the only option for doing this was to define an external diff command to perform the diff. This was a lot of work, since the external command needed to take care of doing the diff itself (including mode changes), and lost the benefit of git's colorization and other options. This patch adds a text conversion option, which converts a file to its canonical format before performing the diff. This is less flexible than an arbitrary external diff, but is much less work to set up. For example: $ echo '*.jpg diff=exif' >>.gitattributes $ git config diff.exif.textconv exiftool $ git config diff.exif.binary false allows one to see jpg diffs represented by the text output of exiftool. Signed-off-by: Jeff King Signed-off-by: Shawn O. Pearce --- diff.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- userdiff.c | 2 ++ userdiff.h | 1 + 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/diff.c b/diff.c index dabd7f50ec..e368fef14f 100644 --- a/diff.c +++ b/diff.c @@ -38,6 +38,9 @@ static char diff_colors[][COLOR_MAXLEN] = { "\033[41m", /* WHITESPACE (red background) */ }; +static void diff_filespec_load_driver(struct diff_filespec *one); +static char *run_textconv(const char *, struct diff_filespec *, size_t *); + static int parse_diff_color_slot(const char *var, int ofs) { if (!strcasecmp(var+ofs, "plain")) @@ -290,8 +293,19 @@ static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one) } else if (diff_populate_filespec(one, 0)) return -1; - mf->ptr = one->data; - mf->size = one->size; + + diff_filespec_load_driver(one); + if (one->driver->textconv) { + size_t size; + mf->ptr = run_textconv(one->driver->textconv, one, &size); + if (!mf->ptr) + return -1; + mf->size = size; + } + else { + mf->ptr = one->data; + mf->size = one->size; + } return 0; } @@ -3373,3 +3387,34 @@ void diff_unmerge(struct diff_options *options, fill_filespec(one, sha1, mode); diff_queue(&diff_queued_diff, one, two)->is_unmerged = 1; } + +static char *run_textconv(const char *pgm, struct diff_filespec *spec, + size_t *outsize) +{ + struct diff_tempfile temp; + const char *argv[3]; + const char **arg = argv; + struct child_process child; + struct strbuf buf = STRBUF_INIT; + + prepare_temp_file(spec->path, &temp, spec); + *arg++ = pgm; + *arg++ = temp.name; + *arg = NULL; + + memset(&child, 0, sizeof(child)); + child.argv = argv; + child.out = -1; + if (start_command(&child) != 0 || + strbuf_read(&buf, child.out, 0) < 0 || + finish_command(&child) != 0) { + if (temp.name == temp.tmp_path) + unlink(temp.name); + error("error running textconv command '%s'", pgm); + return NULL; + } + if (temp.name == temp.tmp_path) + unlink(temp.name); + + return strbuf_detach(&buf, outsize); +} diff --git a/userdiff.c b/userdiff.c index 58478a6912..d95257ab3b 100644 --- a/userdiff.c +++ b/userdiff.c @@ -140,6 +140,8 @@ int userdiff_config_porcelain(const char *k, const char *v) if ((drv = parse_driver(k, v, "command"))) return parse_string(&drv->external, k, v); + if ((drv = parse_driver(k, v, "textconv"))) + return parse_string(&drv->textconv, k, v); return 0; } diff --git a/userdiff.h b/userdiff.h index 1c1eb042b4..f29c18ffb3 100644 --- a/userdiff.h +++ b/userdiff.h @@ -11,6 +11,7 @@ struct userdiff_driver { const char *external; int binary; struct userdiff_funcname funcname; + const char *textconv; }; int userdiff_config_basic(const char *k, const char *v);