Merge branch 'ab/pickaxe-pcre2'

Rewrite the backend for "diff -G/-S" to use pcre2 engine when
available.

* ab/pickaxe-pcre2: (22 commits)
  xdiff-interface: replace discard_hunk_line() with a flag
  xdiff users: use designated initializers for out_line
  pickaxe -G: don't special-case create/delete
  pickaxe -G: terminate early on matching lines
  xdiff-interface: allow early return from xdiff_emit_line_fn
  xdiff-interface: prepare for allowing early return
  pickaxe -S: slightly optimize contains()
  pickaxe: rename variables in has_changes() for brevity
  pickaxe -S: support content with NULs under --pickaxe-regex
  pickaxe: assert that we must have a needle under -G or -S
  pickaxe: refactor function selection in diffcore-pickaxe()
  perf: add performance test for pickaxe
  pickaxe/style: consolidate declarations and assignments
  diff.h: move pickaxe fields together again
  pickaxe: die when --find-object and --pickaxe-all are combined
  pickaxe: die when -G and --pickaxe-regex are combined
  pickaxe tests: add missing test for --no-pickaxe-regex being an error
  pickaxe tests: test for -G, -S and --find-object incompatibility
  pickaxe tests: add test for "log -S" not being a regex
  pickaxe tests: add test for diffgrep_consume() internals
  ...
This commit is contained in:
Junio C Hamano 2021-07-13 16:52:50 -07:00
commit 4da281e84d
14 changed files with 312 additions and 107 deletions

View File

@ -107,15 +107,12 @@ static void show_diff(struct merge_list *entry)
mmfile_t src, dst;
xpparam_t xpp;
xdemitconf_t xecfg;
xdemitcb_t ecb;
xdemitcb_t ecb = { .out_line = show_outf };
memset(&xpp, 0, sizeof(xpp));
xpp.flags = 0;
memset(&xecfg, 0, sizeof(xecfg));
xecfg.ctxlen = 3;
ecb.out_hunk = NULL;
ecb.out_line = show_outf;
ecb.priv = NULL;
src.ptr = origin(entry, &size);
if (!src.ptr)

View File

@ -28,7 +28,7 @@ static int diff_two(const char *file1, const char *label1,
{
xpparam_t xpp;
xdemitconf_t xecfg;
xdemitcb_t ecb;
xdemitcb_t ecb = { .out_line = outf };
mmfile_t minus, plus;
int ret;
@ -41,8 +41,6 @@ static int diff_two(const char *file1, const char *label1,
xpp.flags = 0;
memset(&xecfg, 0, sizeof(xecfg));
xecfg.ctxlen = 3;
ecb.out_hunk = NULL;
ecb.out_line = outf;
ret = xdi_diff(&minus, &plus, &xpp, &xecfg, &ecb);
free(minus.ptr);

View File

@ -403,11 +403,11 @@ static void consume_hunk(void *state_,
state->sline[state->nb-1].p_lno[state->n] = state->ob;
}
static void consume_line(void *state_, char *line, unsigned long len)
static int consume_line(void *state_, char *line, unsigned long len)
{
struct combine_diff_state *state = state_;
if (!state->lost_bucket)
return; /* not in any hunk yet */
return 0; /* not in any hunk yet */
switch (line[0]) {
case '-':
append_lost(state->lost_bucket, state->n, line+1, len-1);
@ -417,6 +417,7 @@ static void consume_line(void *state_, char *line, unsigned long len)
state->lno++;
break;
}
return 0;
}
static void combine_diff(struct repository *r,

39
diff.c
View File

@ -2340,7 +2340,7 @@ static void find_lno(const char *line, struct emit_callback *ecbdata)
ecbdata->lno_in_postimage = strtol(p + 1, NULL, 10);
}
static void fn_out_consume(void *priv, char *line, unsigned long len)
static int fn_out_consume(void *priv, char *line, unsigned long len)
{
struct emit_callback *ecbdata = priv;
struct diff_options *o = ecbdata->opt;
@ -2376,7 +2376,7 @@ static void fn_out_consume(void *priv, char *line, unsigned long len)
len = sane_truncate_line(line, len);
find_lno(line, ecbdata);
emit_hunk_header(ecbdata, line, len);
return;
return 0;
}
if (ecbdata->diff_words) {
@ -2386,11 +2386,11 @@ static void fn_out_consume(void *priv, char *line, unsigned long len)
if (line[0] == '-') {
diff_words_append(line, len,
&ecbdata->diff_words->minus);
return;
return 0;
} else if (line[0] == '+') {
diff_words_append(line, len,
&ecbdata->diff_words->plus);
return;
return 0;
} else if (starts_with(line, "\\ ")) {
/*
* Eat the "no newline at eof" marker as if we
@ -2399,11 +2399,11 @@ static void fn_out_consume(void *priv, char *line, unsigned long len)
* defer processing. If this is the end of
* preimage, more "+" lines may come after it.
*/
return;
return 0;
}
diff_words_flush(ecbdata);
emit_diff_symbol(o, s, line, len, 0);
return;
return 0;
}
switch (line[0]) {
@ -2427,6 +2427,7 @@ static void fn_out_consume(void *priv, char *line, unsigned long len)
line, len, 0);
break;
}
return 0;
}
static void pprint_rename(struct strbuf *name, const char *a, const char *b)
@ -2526,7 +2527,7 @@ static struct diffstat_file *diffstat_add(struct diffstat_t *diffstat,
return x;
}
static void diffstat_consume(void *priv, char *line, unsigned long len)
static int diffstat_consume(void *priv, char *line, unsigned long len)
{
struct diffstat_t *diffstat = priv;
struct diffstat_file *x = diffstat->files[diffstat->nr - 1];
@ -2535,6 +2536,7 @@ static void diffstat_consume(void *priv, char *line, unsigned long len)
x->added++;
else if (line[0] == '-')
x->deleted++;
return 0;
}
const char mime_boundary_leader[] = "------------";
@ -3212,7 +3214,7 @@ static void checkdiff_consume_hunk(void *priv,
data->lineno = nb - 1;
}
static void checkdiff_consume(void *priv, char *line, unsigned long len)
static int checkdiff_consume(void *priv, char *line, unsigned long len)
{
struct checkdiff_t *data = priv;
int marker_size = data->conflict_marker_size;
@ -3236,7 +3238,7 @@ static void checkdiff_consume(void *priv, char *line, unsigned long len)
}
bad = ws_check(line + 1, len - 1, data->ws_rule);
if (!bad)
return;
return 0;
data->status |= bad;
err = whitespace_error_string(bad);
fprintf(data->o->file, "%s%s:%d: %s.\n",
@ -3248,6 +3250,7 @@ static void checkdiff_consume(void *priv, char *line, unsigned long len)
} else if (line[0] == ' ') {
data->lineno++;
}
return 0;
}
static unsigned char *deflate_it(char *data,
@ -3726,7 +3729,8 @@ static void builtin_diffstat(const char *name_a, const char *name_b,
xpp.anchors_nr = o->anchors_nr;
xecfg.ctxlen = o->context;
xecfg.interhunkctxlen = o->interhunkcontext;
if (xdi_diff_outf(&mf1, &mf2, discard_hunk_line,
xecfg.flags = XDL_EMIT_NO_HUNK_HDR;
if (xdi_diff_outf(&mf1, &mf2, NULL,
diffstat_consume, diffstat, &xpp, &xecfg))
die("unable to generate diffstat for %s", one->path);
@ -4632,6 +4636,12 @@ void diff_setup_done(struct diff_options *options)
if (HAS_MULTI_BITS(options->pickaxe_opts & DIFF_PICKAXE_KINDS_MASK))
die(_("-G, -S and --find-object are mutually exclusive"));
if (HAS_MULTI_BITS(options->pickaxe_opts & DIFF_PICKAXE_KINDS_G_REGEX_MASK))
die(_("-G and --pickaxe-regex are mutually exclusive, use --pickaxe-regex with -S"));
if (HAS_MULTI_BITS(options->pickaxe_opts & DIFF_PICKAXE_KINDS_ALL_OBJFIND_MASK))
die(_("---pickaxe-all and --find-object are mutually exclusive, use --pickaxe-all with -G and -S"));
/*
* Most of the time we can say "there are changes"
* only by checking if there are changed paths, but
@ -6119,17 +6129,18 @@ void flush_one_hunk(struct object_id *result, git_hash_ctx *ctx)
}
}
static void patch_id_consume(void *priv, char *line, unsigned long len)
static int patch_id_consume(void *priv, char *line, unsigned long len)
{
struct patch_id_t *data = priv;
int new_len;
if (len > 12 && starts_with(line, "\\ "))
return;
return 0;
new_len = remove_space(line, len);
the_hash_algo->update_fn(data->ctx, line, new_len);
data->patchlen += new_len;
return 0;
}
static void patch_id_add_string(git_hash_ctx *ctx, const char *str)
@ -6227,8 +6238,8 @@ static int diff_get_patch_id(struct diff_options *options, struct object_id *oid
xpp.flags = 0;
xecfg.ctxlen = 3;
xecfg.flags = 0;
if (xdi_diff_outf(&mf1, &mf2, discard_hunk_line,
xecfg.flags = XDL_EMIT_NO_HUNK_HDR;
if (xdi_diff_outf(&mf1, &mf2, NULL,
patch_id_consume, &data, &xpp, &xecfg))
return error("unable to generate patch-id diff for %s",
p->one->path);

7
diff.h
View File

@ -265,6 +265,7 @@ struct diff_options {
* postimage of the diff_queue.
*/
const char *pickaxe;
unsigned pickaxe_opts;
/* -I<regex> */
regex_t **ignore_regex;
@ -304,8 +305,6 @@ struct diff_options {
/* The output format used when `diff_flush()` is run. */
int output_format;
unsigned pickaxe_opts;
/* Affects the way detection logic for complete rewrites, renames and
* copies.
*/
@ -556,6 +555,10 @@ int git_config_rename(const char *var, const char *value);
#define DIFF_PICKAXE_KINDS_MASK (DIFF_PICKAXE_KIND_S | \
DIFF_PICKAXE_KIND_G | \
DIFF_PICKAXE_KIND_OBJFIND)
#define DIFF_PICKAXE_KINDS_G_REGEX_MASK (DIFF_PICKAXE_KIND_G | \
DIFF_PICKAXE_REGEX)
#define DIFF_PICKAXE_KINDS_ALL_OBJFIND_MASK (DIFF_PICKAXE_ALL | \
DIFF_PICKAXE_KIND_OBJFIND)
#define DIFF_PICKAXE_IGNORE_CASE 32

View File

@ -19,38 +19,31 @@ struct diffgrep_cb {
int hit;
};
static void diffgrep_consume(void *priv, char *line, unsigned long len)
static int diffgrep_consume(void *priv, char *line, unsigned long len)
{
struct diffgrep_cb *data = priv;
regmatch_t regmatch;
if (line[0] != '+' && line[0] != '-')
return;
return 0;
if (data->hit)
/*
* NEEDSWORK: we should have a way to terminate the
* caller early.
*/
return;
data->hit = !regexec_buf(data->regexp, line + 1, len - 1, 1,
&regmatch, 0);
BUG("Already matched in diffgrep_consume! Broken xdiff_emit_line_fn?");
if (!regexec_buf(data->regexp, line + 1, len - 1, 1,
&regmatch, 0)) {
data->hit = 1;
return 1;
}
return 0;
}
static int diff_grep(mmfile_t *one, mmfile_t *two,
struct diff_options *o,
regex_t *regexp, kwset_t kws)
{
regmatch_t regmatch;
struct diffgrep_cb ecbdata;
xpparam_t xpp;
xdemitconf_t xecfg;
if (!one)
return !regexec_buf(regexp, two->ptr, two->size,
1, &regmatch, 0);
if (!two)
return !regexec_buf(regexp, one->ptr, one->size,
1, &regmatch, 0);
int ret;
/*
* We have both sides; need to run textual diff and see if
@ -60,38 +53,47 @@ static int diff_grep(mmfile_t *one, mmfile_t *two,
memset(&xecfg, 0, sizeof(xecfg));
ecbdata.regexp = regexp;
ecbdata.hit = 0;
xecfg.flags = XDL_EMIT_NO_HUNK_HDR;
xecfg.ctxlen = o->context;
xecfg.interhunkctxlen = o->interhunkcontext;
if (xdi_diff_outf(one, two, discard_hunk_line, diffgrep_consume,
&ecbdata, &xpp, &xecfg))
return 0;
return ecbdata.hit;
/*
* An xdiff error might be our "data->hit" from above. See the
* comment for xdiff_emit_line_fn in xdiff-interface.h
*/
ret = xdi_diff_outf(one, two, NULL, diffgrep_consume,
&ecbdata, &xpp, &xecfg);
if (ecbdata.hit)
return 1;
if (ret)
return ret;
return 0;
}
static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws)
static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws,
unsigned int limit)
{
unsigned int cnt;
unsigned long sz;
const char *data;
sz = mf->size;
data = mf->ptr;
cnt = 0;
unsigned int cnt = 0;
unsigned long sz = mf->size;
const char *data = mf->ptr;
if (regexp) {
regmatch_t regmatch;
int flags = 0;
while (sz && *data &&
while (sz &&
!regexec_buf(regexp, data, sz, 1, &regmatch, flags)) {
flags |= REG_NOTBOL;
data += regmatch.rm_eo;
sz -= regmatch.rm_eo;
if (sz && *data && regmatch.rm_so == regmatch.rm_eo) {
if (sz && regmatch.rm_so == regmatch.rm_eo) {
data++;
sz--;
}
cnt++;
if (limit && cnt == limit)
return cnt;
}
} else { /* Classic exact string match */
@ -103,6 +105,9 @@ static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws)
sz -= offset + kwsm.size[0];
data += offset + kwsm.size[0];
cnt++;
if (limit && cnt == limit)
return cnt;
}
}
return cnt;
@ -112,9 +117,9 @@ static int has_changes(mmfile_t *one, mmfile_t *two,
struct diff_options *o,
regex_t *regexp, kwset_t kws)
{
unsigned int one_contains = one ? contains(one, regexp, kws) : 0;
unsigned int two_contains = two ? contains(two, regexp, kws) : 0;
return one_contains != two_contains;
unsigned int c1 = one ? contains(one, regexp, kws, 0) : 0;
unsigned int c2 = two ? contains(two, regexp, kws, c1 + 1) : 0;
return c1 != c2;
}
static int pickaxe_match(struct diff_filepair *p, struct diff_options *o,
@ -136,9 +141,6 @@ static int pickaxe_match(struct diff_filepair *p, struct diff_options *o,
oidset_contains(o->objfind, &p->two->oid));
}
if (!o->pickaxe[0])
return 0;
if (o->flags.allow_textconv) {
textconv_one = get_textconv(o->repo, p->one);
textconv_two = get_textconv(o->repo, p->two);
@ -163,9 +165,7 @@ static int pickaxe_match(struct diff_filepair *p, struct diff_options *o,
mf1.size = fill_textconv(o->repo, textconv_one, p->one, &mf1.ptr);
mf2.size = fill_textconv(o->repo, textconv_two, p->two, &mf2.ptr);
ret = fn(DIFF_FILE_VALID(p->one) ? &mf1 : NULL,
DIFF_FILE_VALID(p->two) ? &mf2 : NULL,
o, regexp, kws);
ret = fn(&mf1, &mf2, o, regexp, kws);
if (textconv_one)
free(mf1.ptr);
@ -232,13 +232,31 @@ void diffcore_pickaxe(struct diff_options *o)
int opts = o->pickaxe_opts;
regex_t regex, *regexp = NULL;
kwset_t kws = NULL;
pickaxe_fn fn;
if (opts & ~DIFF_PICKAXE_KIND_OBJFIND &&
(!needle || !*needle))
BUG("should have needle under -G or -S");
if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) {
int cflags = REG_EXTENDED | REG_NEWLINE;
if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE)
cflags |= REG_ICASE;
regcomp_or_die(&regex, needle, cflags);
regexp = &regex;
if (opts & DIFF_PICKAXE_KIND_G)
fn = diff_grep;
else if (opts & DIFF_PICKAXE_REGEX)
fn = has_changes;
else
/*
* We don't need to check the combination of
* -G and --pickaxe-regex, by the time we get
* here diff.c has already died if they're
* combined. See the usage tests in
* t4209-log-pickaxe.sh.
*/
BUG("unreachable");
} else if (opts & DIFF_PICKAXE_KIND_S) {
if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE &&
has_non_ascii(needle)) {
@ -255,10 +273,14 @@ void diffcore_pickaxe(struct diff_options *o)
kwsincr(kws, needle, strlen(needle));
kwsprep(kws);
}
fn = has_changes;
} else if (opts & DIFF_PICKAXE_KIND_OBJFIND) {
fn = NULL;
} else {
BUG("unknown pickaxe_opts flag");
}
pickaxe(&diff_queued_diff, o, regexp, kws,
(opts & DIFF_PICKAXE_KIND_G) ? diff_grep : has_changes);
pickaxe(&diff_queued_diff, o, regexp, kws, fn);
if (regexp)
regfree(regexp);

View File

@ -274,9 +274,10 @@ static void find_exact_matches(struct string_list *a, struct string_list *b)
hashmap_clear(&map);
}
static void diffsize_consume(void *data, char *line, unsigned long len)
static int diffsize_consume(void *data, char *line, unsigned long len)
{
(*(int *)data)++;
return 0;
}
static void diffsize_hunk(void *data, long ob, long on, long nb, long nn,

70
t/perf/p4209-pickaxe.sh Executable file
View File

@ -0,0 +1,70 @@
#!/bin/sh
test_description="Test pickaxe performance"
. ./perf-lib.sh
test_perf_default_repo
# Not --max-count, as that's the number of matching commit, so it's
# unbounded. We want to limit our revision walk here.
from_rev_desc=
from_rev=
max_count=1000
if test_have_prereq EXPENSIVE
then
max_count=10000
fi
from_rev=" $(git rev-list HEAD | head -n $max_count | tail -n 1).."
from_rev_desc=" <limit-rev>.."
for icase in \
'' \
'-i '
do
# -S (no regex)
for pattern in \
'int main' \
'æ'
do
for opts in \
'-S'
do
test_perf "git log $icase$opts'$pattern'$from_rev_desc" "
git log --pretty=format:%H $icase$opts'$pattern'$from_rev
"
done
done
# -S (regex)
for pattern in \
'(int|void|null)' \
'if *\([^ ]+ & ' \
'[àáâãäåæñøùúûüýþ]'
do
for opts in \
'--pickaxe-regex -S'
do
test_perf "git log $icase$opts'$pattern'$from_rev_desc" "
git log --pretty=format:%H $icase$opts'$pattern'$from_rev
"
done
done
# -G
for pattern in \
'(int|void|null)' \
'if *\([^ ]+ & ' \
'[àáâãäåæñøùúûüýþ]'
do
for opts in \
'-G'
do
test_perf "git log $icase$opts'$pattern'$from_rev_desc" "
git log --pretty=format:%H $icase$opts'$pattern'$from_rev
"
done
done
done
test_done

View File

@ -55,6 +55,43 @@ test_expect_success setup '
git rev-parse --verify HEAD >expect_second
'
test_expect_success 'usage' '
test_expect_code 129 git log -S 2>err &&
test_i18ngrep "switch.*requires a value" err &&
test_expect_code 129 git log -G 2>err &&
test_i18ngrep "switch.*requires a value" err &&
test_expect_code 128 git log -Gregex -Sstring 2>err &&
grep "mutually exclusive" err &&
test_expect_code 128 git log -Gregex --find-object=HEAD 2>err &&
grep "mutually exclusive" err &&
test_expect_code 128 git log -Sstring --find-object=HEAD 2>err &&
grep "mutually exclusive" err &&
test_expect_code 128 git log --pickaxe-all --find-object=HEAD 2>err &&
grep "mutually exclusive" err
'
test_expect_success 'usage: --pickaxe-regex' '
test_expect_code 128 git log -Gregex --pickaxe-regex 2>err &&
grep "mutually exclusive" err
'
test_expect_success 'usage: --no-pickaxe-regex' '
cat >expect <<-\EOF &&
fatal: unrecognized argument: --no-pickaxe-regex
EOF
test_expect_code 128 git log -Sstring --no-pickaxe-regex 2>actual &&
test_cmp expect actual &&
test_expect_code 128 git log -Gstring --no-pickaxe-regex 2>err &&
test_cmp expect actual
'
test_log expect_initial --grep initial
test_log expect_nomatch --grep InItial
test_log_icase expect_initial --grep InItial
@ -106,38 +143,83 @@ test_expect_success 'log -S --no-textconv (missing textconv tool)' '
rm .gitattributes
'
test_expect_success 'setup log -[GS] plain & regex' '
test_create_repo GS-plain &&
test_commit -C GS-plain --append A data.txt "a" &&
test_commit -C GS-plain --append B data.txt "a a" &&
test_commit -C GS-plain --append C data.txt "b" &&
test_commit -C GS-plain --append D data.txt "[b]" &&
test_commit -C GS-plain E data.txt "" &&
# We also include E, the deletion commit
git -C GS-plain log --grep="[ABE]" >A-to-B-then-E-log &&
git -C GS-plain log --grep="[CDE]" >C-to-D-then-E-log &&
git -C GS-plain log --grep="[DE]" >D-then-E-log &&
git -C GS-plain log >full-log
'
test_expect_success 'log -G trims diff new/old [-+]' '
git -C GS-plain log -G"[+-]a" >log &&
test_must_be_empty log &&
git -C GS-plain log -G"^a" >log &&
test_cmp log A-to-B-then-E-log
'
test_expect_success 'log -S<pat> is not a regex, but -S<pat> --pickaxe-regex is' '
git -C GS-plain log -S"a" >log &&
test_cmp log A-to-B-then-E-log &&
git -C GS-plain log -S"[a]" >log &&
test_must_be_empty log &&
git -C GS-plain log -S"[a]" --pickaxe-regex >log &&
test_cmp log A-to-B-then-E-log &&
git -C GS-plain log -S"[b]" >log &&
test_cmp log D-then-E-log &&
git -C GS-plain log -S"[b]" --pickaxe-regex >log &&
test_cmp log C-to-D-then-E-log
'
test_expect_success 'setup log -[GS] binary & --text' '
git checkout --orphan GS-binary-and-text &&
git read-tree --empty &&
printf "a\na\0a\n" >data.bin &&
git add data.bin &&
git commit -m "create binary file" data.bin &&
printf "a\na\0a\n" >>data.bin &&
git commit -m "modify binary file" data.bin &&
git rm data.bin &&
git commit -m "delete binary file" data.bin &&
git log >full-log
test_create_repo GS-bin-txt &&
test_commit -C GS-bin-txt --printf A data.bin "a\na\0a\n" &&
test_commit -C GS-bin-txt --append --printf B data.bin "a\na\0a\n" &&
test_commit -C GS-bin-txt C data.bin "" &&
git -C GS-bin-txt log >full-log
'
test_expect_success 'log -G ignores binary files' '
git log -Ga >log &&
git -C GS-bin-txt log -Ga >log &&
test_must_be_empty log
'
test_expect_success 'log -G looks into binary files with -a' '
git log -a -Ga >log &&
git -C GS-bin-txt log -a -Ga >log &&
test_cmp log full-log
'
test_expect_success 'log -G looks into binary files with textconv filter' '
test_when_finished "rm .gitattributes" &&
echo "* diff=bin" >.gitattributes &&
git -c diff.bin.textconv=cat log -Ga >log &&
test_when_finished "rm GS-bin-txt/.gitattributes" &&
(
cd GS-bin-txt &&
echo "* diff=bin" >.gitattributes &&
git -c diff.bin.textconv=cat log -Ga >../log
) &&
test_cmp log full-log
'
test_expect_success 'log -S looks into binary files' '
git log -Sa >log &&
git -C GS-bin-txt log -Sa >log &&
test_cmp log full-log
'
test_expect_success 'log -S --pickaxe-regex looks into binary files' '
git -C GS-bin-txt log --pickaxe-regex -Sa >log &&
test_cmp log full-log &&
git -C GS-bin-txt log --pickaxe-regex -S"[a]" >log &&
test_cmp log full-log
'

View File

@ -59,7 +59,7 @@ test_expect_success 'setup' "
git commit -m.
"
# Simple fixed-string matching that can use kwset (no -i && non-ASCII)
# Simple fixed-string matching
nul_match P P P '-F' 'yQf'
nul_match P P P '-F' 'yQx'
nul_match P P P '-Fi' 'YQf'
@ -78,7 +78,7 @@ nul_match P P P '-Fi' '[Y]QF'
nul_match P P P '-F' 'æQ[ð]'
nul_match P P P '-F' '[æ]Qð'
# The -F kwset codepath can't handle -i && non-ASCII...
# Matching pattern and subject case with -i
nul_match P 1 1 '-i' '[æ]Qð'
# ...PCRE v2 only matches non-ASCII with -i casefolding under UTF-8

View File

@ -31,29 +31,36 @@ static int xdiff_out_hunk(void *priv_,
return 0;
}
static void consume_one(void *priv_, char *s, unsigned long size)
static int consume_one(void *priv_, char *s, unsigned long size)
{
struct xdiff_emit_state *priv = priv_;
char *ep;
while (size) {
unsigned long this_size;
int ret;
ep = memchr(s, '\n', size);
this_size = (ep == NULL) ? size : (ep - s + 1);
priv->line_fn(priv->consume_callback_data, s, this_size);
ret = priv->line_fn(priv->consume_callback_data, s, this_size);
if (ret)
return ret;
size -= this_size;
s += this_size;
}
return 0;
}
static int xdiff_outf(void *priv_, mmbuffer_t *mb, int nbuf)
{
struct xdiff_emit_state *priv = priv_;
int i;
int stop = 0;
if (!priv->line_fn)
return 0;
for (i = 0; i < nbuf; i++) {
if (stop)
return 1;
if (mb[i].ptr[mb[i].size-1] != '\n') {
/* Incomplete line */
strbuf_add(&priv->remainder, mb[i].ptr, mb[i].size);
@ -62,17 +69,21 @@ static int xdiff_outf(void *priv_, mmbuffer_t *mb, int nbuf)
/* we have a complete line */
if (!priv->remainder.len) {
consume_one(priv, mb[i].ptr, mb[i].size);
stop = consume_one(priv, mb[i].ptr, mb[i].size);
continue;
}
strbuf_add(&priv->remainder, mb[i].ptr, mb[i].size);
consume_one(priv, priv->remainder.buf, priv->remainder.len);
stop = consume_one(priv, priv->remainder.buf, priv->remainder.len);
strbuf_reset(&priv->remainder);
}
if (stop)
return -1;
if (priv->remainder.len) {
consume_one(priv, priv->remainder.buf, priv->remainder.len);
stop = consume_one(priv, priv->remainder.buf, priv->remainder.len);
strbuf_reset(&priv->remainder);
}
if (stop)
return -1;
return 0;
}
@ -115,12 +126,6 @@ int xdi_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdemitconf_t co
return xdl_diff(&a, &b, xpp, xecfg, xecb);
}
void discard_hunk_line(void *priv,
long ob, long on, long nb, long nn,
const char *func, long funclen)
{
}
int xdi_diff_outf(mmfile_t *mf1, mmfile_t *mf2,
xdiff_emit_hunk_fn hunk_fn,
xdiff_emit_line_fn line_fn,

View File

@ -11,7 +11,28 @@
*/
#define MAX_XDIFF_SIZE (1024UL * 1024 * 1023)
typedef void (*xdiff_emit_line_fn)(void *, char *, unsigned long);
/**
* The `xdiff_emit_line_fn` function can return 1 to abort early, or 0
* to continue processing. Note that doing so is an all-or-nothing
* affair, as returning 1 will return all the way to the top-level,
* e.g. the xdi_diff_outf() call to generate the diff.
*
* Thus returning 1 means you won't be getting any more diff lines. If
* you need something in-between those two options you'll to use
* `xdl_emit_hunk_consume_func_t` and implement your own version of
* xdl_emit_diff().
*
* We may extend the interface in the future to understand other more
* granular return values. While you should return 1 to exit early,
* doing so will currently make your early return indistinguishable
* from an error internal to xdiff, xdiff itself will see that
* non-zero return and translate it to -1.
*
* See "diff_grep" in diffcore-pickaxe.c for a trick to work around
* this, i.e. using the "consume_callback_data" to note the desired
* early return.
*/
typedef int (*xdiff_emit_line_fn)(void *, char *, unsigned long);
typedef void (*xdiff_emit_hunk_fn)(void *data,
long old_begin, long old_nr,
long new_begin, long new_nr,
@ -32,14 +53,6 @@ void xdiff_clear_find_func(xdemitconf_t *xecfg);
int git_xmerge_config(const char *var, const char *value, void *cb);
extern int git_xmerge_style;
/*
* Can be used as a no-op hunk_fn for xdi_diff_outf(), since a NULL
* one just sends the hunk line to the line_fn callback).
*/
void discard_hunk_line(void *priv,
long ob, long on, long nb, long nn,
const char *func, long funclen);
/*
* Compare the strings l1 with l2 which are of size s1 and s2 respectively.
* Returns 1 if the strings are deemed equal, 0 otherwise.

View File

@ -50,6 +50,7 @@ extern "C" {
/* xdemitconf_t.flags */
#define XDL_EMIT_FUNCNAMES (1 << 0)
#define XDL_EMIT_NO_HUNK_HDR (1 << 1)
#define XDL_EMIT_FUNCCONTEXT (1 << 2)
/* merge simplification levels */

View File

@ -278,7 +278,8 @@ pre_context_calculation:
s1 - 1, funclineprev);
funclineprev = s1 - 1;
}
if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2,
if (!(xecfg->flags & XDL_EMIT_NO_HUNK_HDR) &&
xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2,
func_line.buf, func_line.len, ecb) < 0)
return -1;