xdiffi: fix typos and touch up comments
Inspired by the thoroughly stale https://github.com/git/git/pull/159, this patch fixes a couple of typos, rewraps and clarifies some comments. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
5fa0f5238b
commit
03d3b1297c
@ -38,9 +38,9 @@ typedef struct s_xdpsplit {
|
|||||||
* Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
|
* Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
|
||||||
* the forward diagonal starting from (off1, off2) and the backward diagonal
|
* the forward diagonal starting from (off1, off2) and the backward diagonal
|
||||||
* starting from (lim1, lim2). If the K values on the same diagonal crosses
|
* starting from (lim1, lim2). If the K values on the same diagonal crosses
|
||||||
* returns the furthest point of reach. We might end up having to expensive
|
* returns the furthest point of reach. We might encounter expensive edge cases
|
||||||
* cases using this algorithm is full, so a little bit of heuristic is needed
|
* using this algorithm, so a little bit of heuristic is needed to cut the
|
||||||
* to cut the search and to return a suboptimal point.
|
* search and to return a suboptimal point.
|
||||||
*/
|
*/
|
||||||
static long xdl_split(unsigned long const *ha1, long off1, long lim1,
|
static long xdl_split(unsigned long const *ha1, long off1, long lim1,
|
||||||
unsigned long const *ha2, long off2, long lim2,
|
unsigned long const *ha2, long off2, long lim2,
|
||||||
@ -63,11 +63,13 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
|
|||||||
int got_snake = 0;
|
int got_snake = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need to extent the diagonal "domain" by one. If the next
|
* We need to extend the diagonal "domain" by one. If the next
|
||||||
* values exits the box boundaries we need to change it in the
|
* values exits the box boundaries we need to change it in the
|
||||||
* opposite direction because (max - min) must be a power of two.
|
* opposite direction because (max - min) must be a power of
|
||||||
|
* two.
|
||||||
|
*
|
||||||
* Also we initialize the external K value to -1 so that we can
|
* Also we initialize the external K value to -1 so that we can
|
||||||
* avoid extra conditions check inside the core loop.
|
* avoid extra conditions in the check inside the core loop.
|
||||||
*/
|
*/
|
||||||
if (fmin > dmin)
|
if (fmin > dmin)
|
||||||
kvdf[--fmin - 1] = -1;
|
kvdf[--fmin - 1] = -1;
|
||||||
@ -98,11 +100,13 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need to extent the diagonal "domain" by one. If the next
|
* We need to extend the diagonal "domain" by one. If the next
|
||||||
* values exits the box boundaries we need to change it in the
|
* values exits the box boundaries we need to change it in the
|
||||||
* opposite direction because (max - min) must be a power of two.
|
* opposite direction because (max - min) must be a power of
|
||||||
|
* two.
|
||||||
|
*
|
||||||
* Also we initialize the external K value to -1 so that we can
|
* Also we initialize the external K value to -1 so that we can
|
||||||
* avoid extra conditions check inside the core loop.
|
* avoid extra conditions in the check inside the core loop.
|
||||||
*/
|
*/
|
||||||
if (bmin > dmin)
|
if (bmin > dmin)
|
||||||
kvdb[--bmin - 1] = XDL_LINE_MAX;
|
kvdb[--bmin - 1] = XDL_LINE_MAX;
|
||||||
@ -138,7 +142,7 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
|
|||||||
/*
|
/*
|
||||||
* If the edit cost is above the heuristic trigger and if
|
* If the edit cost is above the heuristic trigger and if
|
||||||
* we got a good snake, we sample current diagonals to see
|
* we got a good snake, we sample current diagonals to see
|
||||||
* if some of the, have reached an "interesting" path. Our
|
* if some of them have reached an "interesting" path. Our
|
||||||
* measure is a function of the distance from the diagonal
|
* measure is a function of the distance from the diagonal
|
||||||
* corner (i1 + i2) penalized with the distance from the
|
* corner (i1 + i2) penalized with the distance from the
|
||||||
* mid diagonal itself. If this value is above the current
|
* mid diagonal itself. If this value is above the current
|
||||||
@ -196,8 +200,9 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Enough is enough. We spent too much time here and now we collect
|
* Enough is enough. We spent too much time here and now we
|
||||||
* the furthest reaching path using the (i1 + i2) measure.
|
* collect the furthest reaching path using the (i1 + i2)
|
||||||
|
* measure.
|
||||||
*/
|
*/
|
||||||
if (ec >= xenv->mxcost) {
|
if (ec >= xenv->mxcost) {
|
||||||
long fbest, fbest1, bbest, bbest1;
|
long fbest, fbest1, bbest, bbest1;
|
||||||
@ -244,9 +249,9 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling
|
* Rule: "Divide et Impera" (divide & conquer). Recursively split the box in
|
||||||
* the box splitting function. Note that the real job (marking changed lines)
|
* sub-boxes by calling the box splitting function. Note that the real job
|
||||||
* is done in the two boundary reaching checks.
|
* (marking changed lines) is done in the two boundary reaching checks.
|
||||||
*/
|
*/
|
||||||
int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
|
int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
|
||||||
diffdata_t *dd2, long off2, long lim2,
|
diffdata_t *dd2, long off2, long lim2,
|
||||||
@ -323,7 +328,9 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate and setup K vectors to be used by the differential algorithm.
|
* Allocate and setup K vectors to be used by the differential
|
||||||
|
* algorithm.
|
||||||
|
*
|
||||||
* One is to store the forward path and one to store the backward path.
|
* One is to store the forward path and one to store the backward path.
|
||||||
*/
|
*/
|
||||||
ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3;
|
ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3;
|
||||||
@ -394,8 +401,8 @@ static int recs_match(xrecord_t *rec1, xrecord_t *rec2, long flags)
|
|||||||
/*
|
/*
|
||||||
* If a line is indented more than this, get_indent() just returns this value.
|
* If a line is indented more than this, get_indent() just returns this value.
|
||||||
* This avoids having to do absurd amounts of work for data that are not
|
* This avoids having to do absurd amounts of work for data that are not
|
||||||
* human-readable text, and also ensures that the output of get_indent fits within
|
* human-readable text, and also ensures that the output of get_indent fits
|
||||||
* an int.
|
* within an int.
|
||||||
*/
|
*/
|
||||||
#define MAX_INDENT 200
|
#define MAX_INDENT 200
|
||||||
|
|
||||||
@ -429,9 +436,9 @@ static int get_indent(xrecord_t *rec)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If more than this number of consecutive blank rows are found, just return this
|
* If more than this number of consecutive blank rows are found, just return
|
||||||
* value. This avoids requiring O(N^2) work for pathological cases, and also
|
* this value. This avoids requiring O(N^2) work for pathological cases, and
|
||||||
* ensures that the output of score_split fits in an int.
|
* also ensures that the output of score_split fits in an int.
|
||||||
*/
|
*/
|
||||||
#define MAX_BLANKS 20
|
#define MAX_BLANKS 20
|
||||||
|
|
||||||
@ -443,8 +450,8 @@ struct split_measurement {
|
|||||||
int end_of_file;
|
int end_of_file;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* How much is the line immediately following the split indented (or -1 if
|
* How much is the line immediately following the split indented (or -1
|
||||||
* the line is blank):
|
* if the line is blank):
|
||||||
*/
|
*/
|
||||||
int indent;
|
int indent;
|
||||||
|
|
||||||
@ -454,8 +461,8 @@ struct split_measurement {
|
|||||||
int pre_blank;
|
int pre_blank;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* How much is the nearest non-blank line above the split indented (or -1
|
* How much is the nearest non-blank line above the split indented (or
|
||||||
* if there is no such line)?
|
* -1 if there is no such line)?
|
||||||
*/
|
*/
|
||||||
int pre_indent;
|
int pre_indent;
|
||||||
|
|
||||||
@ -581,13 +588,13 @@ static void measure_split(const xdfile_t *xdf, long split,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Compute a badness score for the hypothetical split whose measurements are
|
* Compute a badness score for the hypothetical split whose measurements are
|
||||||
* stored in m. The weight factors were determined empirically using the tools and
|
* stored in m. The weight factors were determined empirically using the tools
|
||||||
* corpus described in
|
* and corpus described in
|
||||||
*
|
*
|
||||||
* https://github.com/mhagger/diff-slider-tools
|
* https://github.com/mhagger/diff-slider-tools
|
||||||
*
|
*
|
||||||
* Also see that project if you want to improve the weights based on, for example,
|
* Also see that project if you want to improve the weights based on, for
|
||||||
* a larger or more diverse corpus.
|
* example, a larger or more diverse corpus.
|
||||||
*/
|
*/
|
||||||
static void score_add_split(const struct split_measurement *m, struct split_score *s)
|
static void score_add_split(const struct split_measurement *m, struct split_score *s)
|
||||||
{
|
{
|
||||||
@ -809,13 +816,16 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
|
|||||||
group_init(xdfo, &go);
|
group_init(xdfo, &go);
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
/* If the group is empty in the to-be-compacted file, skip it: */
|
/*
|
||||||
|
* If the group is empty in the to-be-compacted file, skip it:
|
||||||
|
*/
|
||||||
if (g.end == g.start)
|
if (g.end == g.start)
|
||||||
goto next;
|
goto next;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now shift the change up and then down as far as possible in
|
* Now shift the change up and then down as far as possible in
|
||||||
* each direction. If it bumps into any other changes, merge them.
|
* each direction. If it bumps into any other changes, merge
|
||||||
|
* them.
|
||||||
*/
|
*/
|
||||||
do {
|
do {
|
||||||
groupsize = g.end - g.start;
|
groupsize = g.end - g.start;
|
||||||
@ -858,17 +868,17 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
|
|||||||
* If the group can be shifted, then we can possibly use this
|
* If the group can be shifted, then we can possibly use this
|
||||||
* freedom to produce a more intuitive diff.
|
* freedom to produce a more intuitive diff.
|
||||||
*
|
*
|
||||||
* The group is currently shifted as far down as possible, so the
|
* The group is currently shifted as far down as possible, so
|
||||||
* heuristics below only have to handle upwards shifts.
|
* the heuristics below only have to handle upwards shifts.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (g.end == earliest_end) {
|
if (g.end == earliest_end) {
|
||||||
/* no shifting was possible */
|
/* no shifting was possible */
|
||||||
} else if (end_matching_other != -1) {
|
} else if (end_matching_other != -1) {
|
||||||
/*
|
/*
|
||||||
* Move the possibly merged group of changes back to line
|
* Move the possibly merged group of changes back to
|
||||||
* up with the last group of changes from the other file
|
* line up with the last group of changes from the
|
||||||
* that it can align with.
|
* other file that it can align with.
|
||||||
*/
|
*/
|
||||||
while (go.end == go.start) {
|
while (go.end == go.start) {
|
||||||
if (group_slide_up(xdf, &g, flags))
|
if (group_slide_up(xdf, &g, flags))
|
||||||
@ -879,14 +889,15 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
|
|||||||
} else if (flags & XDF_INDENT_HEURISTIC) {
|
} else if (flags & XDF_INDENT_HEURISTIC) {
|
||||||
/*
|
/*
|
||||||
* Indent heuristic: a group of pure add/delete lines
|
* Indent heuristic: a group of pure add/delete lines
|
||||||
* implies two splits, one between the end of the "before"
|
* implies two splits, one between the end of the
|
||||||
* context and the start of the group, and another between
|
* "before" context and the start of the group, and
|
||||||
* the end of the group and the beginning of the "after"
|
* another between the end of the group and the
|
||||||
* context. Some splits are aesthetically better and some
|
* beginning of the "after" context. Some splits are
|
||||||
* are worse. We compute a badness "score" for each split,
|
* aesthetically better and some are worse. We compute
|
||||||
* and add the scores for the two splits to define a
|
* a badness "score" for each split, and add the scores
|
||||||
* "score" for each position that the group can be shifted
|
* for the two splits to define a "score" for each
|
||||||
* to. Then we pick the shift with the lowest score.
|
* position that the group can be shifted to. Then we
|
||||||
|
* pick the shift with the lowest score.
|
||||||
*/
|
*/
|
||||||
long shift, best_shift = -1;
|
long shift, best_shift = -1;
|
||||||
struct split_score best_score;
|
struct split_score best_score;
|
||||||
|
Loading…
Reference in New Issue
Block a user