diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 641362d056..b95ade2c1b 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -45,6 +45,8 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1, long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl, xdalgoenv_t *xenv); static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2); +static int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo); + @@ -395,6 +397,110 @@ static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, } +static int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo) { + long ix, ixo, ixs, ixref, grpsiz, nrec = xdf->nrec; + char *rchg = xdf->rchg, *rchgo = xdfo->rchg; + xrecord_t **recs = xdf->recs; + + /* + * This is the same of what GNU diff does. Move back and forward + * change groups for a consistent and pretty diff output. This also + * helps in finding joineable change groups and reduce the diff size. + */ + for (ix = ixo = 0;;) { + /* + * Find the first changed line in the to-be-compacted file. + * We need to keep track of both indexes, so if we find a + * changed lines group on the other file, while scanning the + * to-be-compacted file, we need to skip it properly. Note + * that loops that are testing for changed lines on rchg* do + * not need index bounding since the array is prepared with + * a zero at position -1 and N. + */ + for (; ix < nrec && !rchg[ix]; ix++) + while (rchgo[ixo++]); + if (ix == nrec) + break; + + /* + * Record the start of a changed-group in the to-be-compacted file + * and find the end of it, on both to-be-compacted and other file + * indexes (ix and ixo). + */ + ixs = ix; + for (ix++; rchg[ix]; ix++); + for (; rchgo[ixo]; ixo++); + + do { + grpsiz = ix - ixs; + + /* + * If the line before the current change group, is equal to + * the last line of the current change group, shift backward + * the group. + */ + while (ixs > 0 && recs[ixs - 1]->ha == recs[ix - 1]->ha && + XDL_RECMATCH(recs[ixs - 1], recs[ix - 1])) { + rchg[--ixs] = 1; + rchg[--ix] = 0; + + /* + * This change might have joined two change groups, + * so we try to take this scenario in account by moving + * the start index accordingly (and so the other-file + * end-of-group index). + */ + for (; rchg[ixs - 1]; ixs--); + while (rchgo[--ixo]); + } + + /* + * Record the end-of-group position in case we are matched + * with a group of changes in the other file (that is, the + * change record before the enf-of-group index in the other + * file is set). + */ + ixref = rchgo[ixo - 1] ? ix: nrec; + + /* + * If the first line of the current change group, is equal to + * the line next of the current change group, shift forward + * the group. + */ + while (ix < nrec && recs[ixs]->ha == recs[ix]->ha && + XDL_RECMATCH(recs[ixs], recs[ix])) { + rchg[ixs++] = 0; + rchg[ix++] = 1; + + /* + * This change might have joined two change groups, + * so we try to take this scenario in account by moving + * the start index accordingly (and so the other-file + * end-of-group index). Keep tracking the reference + * index in case we are shifting together with a + * corresponding group of changes in the other file. + */ + for (; rchg[ix]; ix++); + while (rchgo[++ixo]) + ixref = ix; + } + } while (grpsiz != ix - ixs); + + /* + * Try to move back the possibly merged group of changes, to match + * the recorded postion in the other file. + */ + while (ixref < ix) { + rchg[--ixs] = 1; + rchg[--ix] = 0; + while (rchgo[--ixo]); + } + } + + return 0; +} + + int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) { xdchange_t *cscr = NULL, *xch; char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg; @@ -440,13 +546,13 @@ int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, return -1; } - - if (xdl_build_script(&xe, &xscr) < 0) { + if (xdl_change_compact(&xe.xdf1, &xe.xdf2) < 0 || + xdl_change_compact(&xe.xdf2, &xe.xdf1) < 0 || + xdl_build_script(&xe, &xscr) < 0) { xdl_free_env(&xe); return -1; } - if (xscr) { if (xdl_emit_diff(&xe, xscr, ecb, xecfg) < 0) { @@ -454,10 +560,8 @@ int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdl_free_env(&xe); return -1; } - xdl_free_script(xscr); } - xdl_free_env(&xe); return 0; diff --git a/xdiff/xmacros.h b/xdiff/xmacros.h index 4c2fde80c1..78f02603b8 100644 --- a/xdiff/xmacros.h +++ b/xdiff/xmacros.h @@ -33,6 +33,7 @@ #define XDL_ISDIGIT(c) ((c) >= '0' && (c) <= '9') #define XDL_HASHLONG(v, b) (((unsigned long)(v) * GR_PRIME) >> ((CHAR_BIT * sizeof(unsigned long)) - (b))) #define XDL_PTRFREE(p) do { if (p) { xdl_free(p); (p) = NULL; } } while (0) +#define XDL_RECMATCH(r1, r2) ((r1)->size == (r2)->size && memcmp((r1)->ptr, (r2)->ptr, (r1)->size) == 0) #define XDL_LE32_PUT(p, v) \ do { \ unsigned char *__p = (unsigned char *) (p); \