diff: teach diff to read algorithm from diff driver

It can be useful to specify diff algorithms per file type. For example,
one may want to use the minimal diff algorithm for .json files, another
for .c files, etc.

The diff machinery already checks attributes for a diff driver. Teach
the diff driver parser a new type "algorithm" to look for in the
config, which will be used if a driver has been specified through the
attributes.

Enforce precedence of the diff algorithm by favoring the command line
option, then looking at the driver attributes & config combination, then
finally the diff.algorithm config.

To enforce precedence order, use a new `ignore_driver_algorithm` member
during options parsing to indicate the diff algorithm was set via command
line args.

Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
John Cai 2023-02-20 21:04:42 +00:00 committed by Junio C Hamano
parent 11e95e16e8
commit a4cf900ee7
6 changed files with 97 additions and 11 deletions

View File

@ -758,6 +758,37 @@ with the above configuration, i.e. `j-c-diff`, with 7
parameters, just like `GIT_EXTERNAL_DIFF` program is called.
See linkgit:git[1] for details.
Setting the internal diff algorithm
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The diff algorithm can be set through the `diff.algorithm` config key, but
sometimes it may be helpful to set the diff algorithm per path. For example,
one may want to use the `minimal` diff algorithm for .json files, and the
`histogram` for .c files, and so on without having to pass in the algorithm
through the command line each time.
First, in `.gitattributes`, assign the `diff` attribute for paths.
------------------------
*.json diff=<name>
------------------------
Then, define a "diff.<name>.algorithm" configuration to specify the diff
algorithm, choosing from `myers`, `patience`, `minimal`, or `histogram`.
----------------------------------------------------------------
[diff "<name>"]
algorithm = histogram
----------------------------------------------------------------
This diff algorithm applies to user facing diff output like git-diff(1),
git-show(1) and is used for the `--stat` output as well. The merge machinery
will not use the diff algorithm set through this method.
NOTE: If `diff.<name>.command` is defined for path with the
`diff=<name>` attribute, it is executed as an external diff driver
(see above), and adding `diff.<name>.algorithm` has no effect, as the
algorithm is not passed to the external diff driver.
Defining a custom hunk-header
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

33
diff.c
View File

@ -4460,15 +4460,13 @@ static void run_diff_cmd(const char *pgm,
const char *xfrm_msg = NULL;
int complete_rewrite = (p->status == DIFF_STATUS_MODIFIED) && p->score;
int must_show_header = 0;
struct userdiff_driver *drv = NULL;
if (o->flags.allow_external) {
struct userdiff_driver *drv;
if (o->flags.allow_external || !o->ignore_driver_algorithm)
drv = userdiff_find_by_path(o->repo->index, attr_path);
if (drv && drv->external)
pgm = drv->external;
}
if (o->flags.allow_external && drv && drv->external)
pgm = drv->external;
if (msg) {
/*
@ -4485,12 +4483,16 @@ static void run_diff_cmd(const char *pgm,
run_external_diff(pgm, name, other, one, two, xfrm_msg, o);
return;
}
if (one && two)
if (one && two) {
if (!o->ignore_driver_algorithm && drv && drv->algorithm)
set_diff_algorithm(o, drv->algorithm);
builtin_diff(name, other ? other : name,
one, two, xfrm_msg, must_show_header,
o, complete_rewrite);
else
} else {
fprintf(o->file, "* Unmerged path %s\n", name);
}
}
static void diff_fill_oid_info(struct diff_filespec *one, struct index_state *istate)
@ -4587,6 +4589,14 @@ static void run_diffstat(struct diff_filepair *p, struct diff_options *o,
const char *name;
const char *other;
if (!o->ignore_driver_algorithm) {
struct userdiff_driver *drv = userdiff_find_by_path(o->repo->index,
p->one->path);
if (drv && drv->algorithm)
set_diff_algorithm(o, drv->algorithm);
}
if (DIFF_PAIR_UNMERGED(p)) {
/* unmerged */
builtin_diffstat(p->one->path, NULL, NULL, NULL,
@ -5140,6 +5150,8 @@ static int diff_opt_diff_algorithm(const struct option *opt,
return error(_("option diff-algorithm accepts \"myers\", "
"\"minimal\", \"patience\" and \"histogram\""));
options->ignore_driver_algorithm = 1;
return 0;
}
@ -5155,6 +5167,8 @@ static int diff_opt_diff_algorithm_no_arg(const struct option *opt,
BUG("available diff algorithms include \"myers\", "
"\"minimal\", \"patience\" and \"histogram\"");
options->ignore_driver_algorithm = 1;
return 0;
}
@ -5295,6 +5309,7 @@ static int diff_opt_patience(const struct option *opt,
for (i = 0; i < options->anchors_nr; i++)
free(options->anchors[i]);
options->anchors_nr = 0;
options->ignore_driver_algorithm = 1;
return set_diff_algorithm(options, "patience");
}

1
diff.h
View File

@ -333,6 +333,7 @@ struct diff_options {
int prefix_length;
const char *stat_sep;
int xdl_opts;
int ignore_driver_algorithm;
/* see Documentation/diff-options.txt */
char **anchors;

View File

@ -105,10 +105,46 @@ index $file1..$file2 100644
}
EOF
cat >expect_diffstat <<EOF
file1 => file2 | 21 ++++++++++-----------
1 file changed, 10 insertions(+), 11 deletions(-)
EOF
STRATEGY=$1
test_expect_success "$STRATEGY diff from attributes" '
echo "file* diff=driver" >.gitattributes &&
git config diff.driver.algorithm "$STRATEGY" &&
test_must_fail git diff --no-index file1 file2 > output &&
cat expect &&
cat output &&
test_cmp expect output
'
test_expect_success "$STRATEGY diff from attributes has valid diffstat" '
echo "file* diff=driver" >.gitattributes &&
git config diff.driver.algorithm "$STRATEGY" &&
test_must_fail git diff --stat --no-index file1 file2 > output &&
test_cmp expect_diffstat output
'
test_expect_success "$STRATEGY diff" '
test_must_fail git diff --no-index "--$STRATEGY" file1 file2 > output &&
test_must_fail git diff --no-index "--diff-algorithm=$STRATEGY" file1 file2 > output &&
test_cmp expect output
'
test_expect_success "$STRATEGY diff command line precedence before attributes" '
echo "file* diff=driver" >.gitattributes &&
git config diff.driver.algorithm myers &&
test_must_fail git diff --no-index "--diff-algorithm=$STRATEGY" file1 file2 > output &&
test_cmp expect output
'
test_expect_success "$STRATEGY diff attributes precedence before config" '
git config diff.algorithm default &&
echo "file* diff=driver" >.gitattributes &&
git config diff.driver.algorithm "$STRATEGY" &&
test_must_fail git diff --no-index file1 file2 > output &&
test_cmp expect output
'

View File

@ -293,7 +293,7 @@ PATTERNS("scheme",
"|([^][)(}{[ \t])+"),
PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",
"\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff]+"),
{ "default", NULL, -1, { NULL, 0 } },
{ "default", NULL, NULL, -1, { NULL, 0 } },
};
#undef PATTERNS
#undef IPATTERN
@ -393,6 +393,8 @@ int userdiff_config(const char *k, const char *v)
return parse_bool(&drv->textconv_want_cache, k, v);
if (!strcmp(type, "wordregex"))
return git_config_string(&drv->word_regex, k, v);
if (!strcmp(type, "algorithm"))
return git_config_string(&drv->algorithm, k, v);
return 0;
}

View File

@ -14,6 +14,7 @@ struct userdiff_funcname {
struct userdiff_driver {
const char *name;
const char *external;
const char *algorithm;
int binary;
struct userdiff_funcname funcname;
const char *word_regex;