From 11e95e16e8ef70c6c757d50a99f9b310e6c795f4 Mon Sep 17 00:00:00 2001 From: John Cai Date: Mon, 20 Feb 2023 21:04:41 +0000 Subject: [PATCH 1/2] diff: consolidate diff algorithm option parsing A subsequent commit will need the ability to tell if the diff algorithm was set through the command line through setting a new member of diff_options. While this logic can be added to the diff_opt_diff_algorithm() callback, the `--minimal` and `--histogram` options are handled via OPT_BIT without a callback. Remedy this by consolidating the options parsing logic for --minimal and --histogram into one callback. This way we can modify `diff_options` in that function. As an additional refactor, the logic that sets the diff algorithm in diff_opt_diff_algorithm() can be refactored into a helper that will allow multiple callsites to set the diff algorithm. Signed-off-by: John Cai Signed-off-by: Junio C Hamano --- diff.c | 57 +++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 14 deletions(-) diff --git a/diff.c b/diff.c index 1054a4b732..3b97f5eca2 100644 --- a/diff.c +++ b/diff.c @@ -3437,6 +3437,22 @@ static int diff_filepair_is_phoney(struct diff_filespec *one, return !DIFF_FILE_VALID(one) && !DIFF_FILE_VALID(two); } +static int set_diff_algorithm(struct diff_options *opts, + const char *alg) +{ + long value = parse_algorithm_value(alg); + + if (value < 0) + return -1; + + /* clear out previous settings */ + DIFF_XDL_CLR(opts, NEED_MINIMAL); + opts->xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK; + opts->xdl_opts |= value; + + return 0; +} + static void builtin_diff(const char *name_a, const char *name_b, struct diff_filespec *one, @@ -5117,17 +5133,28 @@ static int diff_opt_diff_algorithm(const struct option *opt, const char *arg, int unset) { struct diff_options *options = opt->value; - long value = parse_algorithm_value(arg); BUG_ON_OPT_NEG(unset); - if (value < 0) + + if (set_diff_algorithm(options, arg)) return error(_("option diff-algorithm accepts \"myers\", " "\"minimal\", \"patience\" and \"histogram\"")); - /* clear out previous settings */ - DIFF_XDL_CLR(options, NEED_MINIMAL); - options->xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK; - options->xdl_opts |= value; + return 0; +} + +static int diff_opt_diff_algorithm_no_arg(const struct option *opt, + const char *arg, int unset) +{ + struct diff_options *options = opt->value; + + BUG_ON_OPT_NEG(unset); + BUG_ON_OPT_ARG(arg); + + if (set_diff_algorithm(options, opt->long_name)) + BUG("available diff algorithms include \"myers\", " + "\"minimal\", \"patience\" and \"histogram\""); + return 0; } @@ -5260,7 +5287,6 @@ static int diff_opt_patience(const struct option *opt, BUG_ON_OPT_NEG(unset); BUG_ON_OPT_ARG(arg); - options->xdl_opts = DIFF_WITH_ALG(options, PATIENCE_DIFF); /* * Both --patience and --anchored use PATIENCE_DIFF * internally, so remove any anchors previously @@ -5269,7 +5295,8 @@ static int diff_opt_patience(const struct option *opt, for (i = 0; i < options->anchors_nr; i++) free(options->anchors[i]); options->anchors_nr = 0; - return 0; + + return set_diff_algorithm(options, "patience"); } static int diff_opt_ignore_regex(const struct option *opt, @@ -5571,9 +5598,10 @@ static void prep_parse_options(struct diff_options *options) N_("prevent rename/copy detection if the number of rename/copy targets exceeds given limit")), OPT_GROUP(N_("Diff algorithm options")), - OPT_BIT(0, "minimal", &options->xdl_opts, - N_("produce the smallest possible diff"), - XDF_NEED_MINIMAL), + OPT_CALLBACK_F(0, "minimal", options, NULL, + N_("produce the smallest possible diff"), + PARSE_OPT_NONEG | PARSE_OPT_NOARG, + diff_opt_diff_algorithm_no_arg), OPT_BIT_F('w', "ignore-all-space", &options->xdl_opts, N_("ignore whitespace when comparing lines"), XDF_IGNORE_WHITESPACE, PARSE_OPT_NONEG), @@ -5599,9 +5627,10 @@ static void prep_parse_options(struct diff_options *options) N_("generate diff using the \"patience diff\" algorithm"), PARSE_OPT_NONEG | PARSE_OPT_NOARG, diff_opt_patience), - OPT_BITOP(0, "histogram", &options->xdl_opts, - N_("generate diff using the \"histogram diff\" algorithm"), - XDF_HISTOGRAM_DIFF, XDF_DIFF_ALGORITHM_MASK), + OPT_CALLBACK_F(0, "histogram", options, NULL, + N_("generate diff using the \"histogram diff\" algorithm"), + PARSE_OPT_NONEG | PARSE_OPT_NOARG, + diff_opt_diff_algorithm_no_arg), OPT_CALLBACK_F(0, "diff-algorithm", options, N_(""), N_("choose a diff algorithm"), PARSE_OPT_NONEG, diff_opt_diff_algorithm), From a4cf900ee734ce9bb73d57c5dfbb1da4a5a88bd3 Mon Sep 17 00:00:00 2001 From: John Cai Date: Mon, 20 Feb 2023 21:04:42 +0000 Subject: [PATCH 2/2] diff: teach diff to read algorithm from diff driver It can be useful to specify diff algorithms per file type. For example, one may want to use the minimal diff algorithm for .json files, another for .c files, etc. The diff machinery already checks attributes for a diff driver. Teach the diff driver parser a new type "algorithm" to look for in the config, which will be used if a driver has been specified through the attributes. Enforce precedence of the diff algorithm by favoring the command line option, then looking at the driver attributes & config combination, then finally the diff.algorithm config. To enforce precedence order, use a new `ignore_driver_algorithm` member during options parsing to indicate the diff algorithm was set via command line args. Signed-off-by: John Cai Signed-off-by: Junio C Hamano --- Documentation/gitattributes.txt | 31 +++++++++++++++++++++++++++ diff.c | 33 ++++++++++++++++++++-------- diff.h | 1 + t/lib-diff-alternative.sh | 38 ++++++++++++++++++++++++++++++++- userdiff.c | 4 +++- userdiff.h | 1 + 6 files changed, 97 insertions(+), 11 deletions(-) diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 4b36d51beb..966e969631 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -758,6 +758,37 @@ with the above configuration, i.e. `j-c-diff`, with 7 parameters, just like `GIT_EXTERNAL_DIFF` program is called. See linkgit:git[1] for details. +Setting the internal diff algorithm +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The diff algorithm can be set through the `diff.algorithm` config key, but +sometimes it may be helpful to set the diff algorithm per path. For example, +one may want to use the `minimal` diff algorithm for .json files, and the +`histogram` for .c files, and so on without having to pass in the algorithm +through the command line each time. + +First, in `.gitattributes`, assign the `diff` attribute for paths. + +------------------------ +*.json diff= +------------------------ + +Then, define a "diff..algorithm" configuration to specify the diff +algorithm, choosing from `myers`, `patience`, `minimal`, or `histogram`. + +---------------------------------------------------------------- +[diff ""] + algorithm = histogram +---------------------------------------------------------------- + +This diff algorithm applies to user facing diff output like git-diff(1), +git-show(1) and is used for the `--stat` output as well. The merge machinery +will not use the diff algorithm set through this method. + +NOTE: If `diff..command` is defined for path with the +`diff=` attribute, it is executed as an external diff driver +(see above), and adding `diff..algorithm` has no effect, as the +algorithm is not passed to the external diff driver. Defining a custom hunk-header ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/diff.c b/diff.c index 3b97f5eca2..b006467dc2 100644 --- a/diff.c +++ b/diff.c @@ -4460,15 +4460,13 @@ static void run_diff_cmd(const char *pgm, const char *xfrm_msg = NULL; int complete_rewrite = (p->status == DIFF_STATUS_MODIFIED) && p->score; int must_show_header = 0; + struct userdiff_driver *drv = NULL; - - if (o->flags.allow_external) { - struct userdiff_driver *drv; - + if (o->flags.allow_external || !o->ignore_driver_algorithm) drv = userdiff_find_by_path(o->repo->index, attr_path); - if (drv && drv->external) - pgm = drv->external; - } + + if (o->flags.allow_external && drv && drv->external) + pgm = drv->external; if (msg) { /* @@ -4485,12 +4483,16 @@ static void run_diff_cmd(const char *pgm, run_external_diff(pgm, name, other, one, two, xfrm_msg, o); return; } - if (one && two) + if (one && two) { + if (!o->ignore_driver_algorithm && drv && drv->algorithm) + set_diff_algorithm(o, drv->algorithm); + builtin_diff(name, other ? other : name, one, two, xfrm_msg, must_show_header, o, complete_rewrite); - else + } else { fprintf(o->file, "* Unmerged path %s\n", name); + } } static void diff_fill_oid_info(struct diff_filespec *one, struct index_state *istate) @@ -4587,6 +4589,14 @@ static void run_diffstat(struct diff_filepair *p, struct diff_options *o, const char *name; const char *other; + if (!o->ignore_driver_algorithm) { + struct userdiff_driver *drv = userdiff_find_by_path(o->repo->index, + p->one->path); + + if (drv && drv->algorithm) + set_diff_algorithm(o, drv->algorithm); + } + if (DIFF_PAIR_UNMERGED(p)) { /* unmerged */ builtin_diffstat(p->one->path, NULL, NULL, NULL, @@ -5140,6 +5150,8 @@ static int diff_opt_diff_algorithm(const struct option *opt, return error(_("option diff-algorithm accepts \"myers\", " "\"minimal\", \"patience\" and \"histogram\"")); + options->ignore_driver_algorithm = 1; + return 0; } @@ -5155,6 +5167,8 @@ static int diff_opt_diff_algorithm_no_arg(const struct option *opt, BUG("available diff algorithms include \"myers\", " "\"minimal\", \"patience\" and \"histogram\""); + options->ignore_driver_algorithm = 1; + return 0; } @@ -5295,6 +5309,7 @@ static int diff_opt_patience(const struct option *opt, for (i = 0; i < options->anchors_nr; i++) free(options->anchors[i]); options->anchors_nr = 0; + options->ignore_driver_algorithm = 1; return set_diff_algorithm(options, "patience"); } diff --git a/diff.h b/diff.h index fd33caeb25..7b1fe25a53 100644 --- a/diff.h +++ b/diff.h @@ -333,6 +333,7 @@ struct diff_options { int prefix_length; const char *stat_sep; int xdl_opts; + int ignore_driver_algorithm; /* see Documentation/diff-options.txt */ char **anchors; diff --git a/t/lib-diff-alternative.sh b/t/lib-diff-alternative.sh index 8d1e408bb5..a8f5d3274a 100644 --- a/t/lib-diff-alternative.sh +++ b/t/lib-diff-alternative.sh @@ -105,10 +105,46 @@ index $file1..$file2 100644 } EOF + cat >expect_diffstat < file2 | 21 ++++++++++----------- + 1 file changed, 10 insertions(+), 11 deletions(-) +EOF + STRATEGY=$1 + test_expect_success "$STRATEGY diff from attributes" ' + echo "file* diff=driver" >.gitattributes && + git config diff.driver.algorithm "$STRATEGY" && + test_must_fail git diff --no-index file1 file2 > output && + cat expect && + cat output && + test_cmp expect output + ' + + test_expect_success "$STRATEGY diff from attributes has valid diffstat" ' + echo "file* diff=driver" >.gitattributes && + git config diff.driver.algorithm "$STRATEGY" && + test_must_fail git diff --stat --no-index file1 file2 > output && + test_cmp expect_diffstat output + ' + test_expect_success "$STRATEGY diff" ' - test_must_fail git diff --no-index "--$STRATEGY" file1 file2 > output && + test_must_fail git diff --no-index "--diff-algorithm=$STRATEGY" file1 file2 > output && + test_cmp expect output + ' + + test_expect_success "$STRATEGY diff command line precedence before attributes" ' + echo "file* diff=driver" >.gitattributes && + git config diff.driver.algorithm myers && + test_must_fail git diff --no-index "--diff-algorithm=$STRATEGY" file1 file2 > output && + test_cmp expect output + ' + + test_expect_success "$STRATEGY diff attributes precedence before config" ' + git config diff.algorithm default && + echo "file* diff=driver" >.gitattributes && + git config diff.driver.algorithm "$STRATEGY" && + test_must_fail git diff --no-index file1 file2 > output && test_cmp expect output ' diff --git a/userdiff.c b/userdiff.c index 151d9a5278..8719a5111c 100644 --- a/userdiff.c +++ b/userdiff.c @@ -293,7 +293,7 @@ PATTERNS("scheme", "|([^][)(}{[ \t])+"), PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$", "\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff]+"), -{ "default", NULL, -1, { NULL, 0 } }, +{ "default", NULL, NULL, -1, { NULL, 0 } }, }; #undef PATTERNS #undef IPATTERN @@ -393,6 +393,8 @@ int userdiff_config(const char *k, const char *v) return parse_bool(&drv->textconv_want_cache, k, v); if (!strcmp(type, "wordregex")) return git_config_string(&drv->word_regex, k, v); + if (!strcmp(type, "algorithm")) + return git_config_string(&drv->algorithm, k, v); return 0; } diff --git a/userdiff.h b/userdiff.h index aee91bc77e..24419db697 100644 --- a/userdiff.h +++ b/userdiff.h @@ -14,6 +14,7 @@ struct userdiff_funcname { struct userdiff_driver { const char *name; const char *external; + const char *algorithm; int binary; struct userdiff_funcname funcname; const char *word_regex;