combine-diff: coalesce lost lines optimally
This replaces the greedy implementation to coalesce lost lines by using dynamic programming to find the Longest Common Subsequence. The O(n²) time complexity is obviously bigger than previous implementation but it can produce shorter diff results (and most likely easier to read). List of lost lines is now doubly-linked because we reverse-read it when reading the direction matrix. Signed-off-by: Antoine Pelisse <apelisse@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
fa04ae0be8
commit
99d3206010
255
combine-diff.c
255
combine-diff.c
@ -74,16 +74,24 @@ static struct combine_diff_path *intersect_paths(struct combine_diff_path *curr,
|
||||
|
||||
/* Lines lost from parent */
|
||||
struct lline {
|
||||
struct lline *next;
|
||||
struct lline *next, *prev;
|
||||
int len;
|
||||
unsigned long parent_map;
|
||||
char line[FLEX_ARRAY];
|
||||
};
|
||||
|
||||
/* Lines lost from current parent (before coalescing) */
|
||||
struct plost {
|
||||
struct lline *lost_head, *lost_tail;
|
||||
int len;
|
||||
};
|
||||
|
||||
/* Lines surviving in the merge result */
|
||||
struct sline {
|
||||
struct lline *lost_head, **lost_tail;
|
||||
struct lline *next_lost;
|
||||
/* Accumulated and coalesced lost lines */
|
||||
struct lline *lost;
|
||||
int lenlost;
|
||||
struct plost plost;
|
||||
char *bol;
|
||||
int len;
|
||||
/* bit 0 up to (N-1) are on if the parent has this line (i.e.
|
||||
@ -95,34 +103,6 @@ struct sline {
|
||||
unsigned long *p_lno;
|
||||
};
|
||||
|
||||
static char *grab_blob(const unsigned char *sha1, unsigned int mode,
|
||||
unsigned long *size, struct userdiff_driver *textconv,
|
||||
const char *path)
|
||||
{
|
||||
char *blob;
|
||||
enum object_type type;
|
||||
|
||||
if (S_ISGITLINK(mode)) {
|
||||
blob = xmalloc(100);
|
||||
*size = snprintf(blob, 100,
|
||||
"Subproject commit %s\n", sha1_to_hex(sha1));
|
||||
} else if (is_null_sha1(sha1)) {
|
||||
/* deleted blob */
|
||||
*size = 0;
|
||||
return xcalloc(1, 1);
|
||||
} else if (textconv) {
|
||||
struct diff_filespec *df = alloc_filespec(path);
|
||||
fill_filespec(df, sha1, 1, mode);
|
||||
*size = fill_textconv(textconv, df, &blob);
|
||||
free_filespec(df);
|
||||
} else {
|
||||
blob = read_sha1_file(sha1, &type, size);
|
||||
if (type != OBJ_BLOB)
|
||||
die("object '%s' is not a blob!", sha1_to_hex(sha1));
|
||||
}
|
||||
return blob;
|
||||
}
|
||||
|
||||
static int match_string_spaces(const char *line1, int len1,
|
||||
const char *line2, int len2,
|
||||
long flags)
|
||||
@ -163,36 +143,180 @@ static int match_string_spaces(const char *line1, int len1,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void append_lost(struct sline *sline, int n, const char *line, int len, long flags)
|
||||
enum coalesce_direction { MATCH, BASE, NEW };
|
||||
|
||||
/* Coalesce new lines into base by finding LCS */
|
||||
static struct lline *coalesce_lines(struct lline *base, int *lenbase,
|
||||
struct lline *new, int lennew,
|
||||
unsigned long parent, long flags)
|
||||
{
|
||||
int **lcs;
|
||||
enum coalesce_direction **directions;
|
||||
struct lline *baseend, *newend = NULL;
|
||||
int i, j, origbaselen = *lenbase;
|
||||
|
||||
if (new == NULL)
|
||||
return base;
|
||||
|
||||
if (base == NULL) {
|
||||
*lenbase = lennew;
|
||||
return new;
|
||||
}
|
||||
|
||||
/*
|
||||
* Coalesce new lines into base by finding the LCS
|
||||
* - Create the table to run dynamic programing
|
||||
* - Compute the LCS
|
||||
* - Then reverse read the direction structure:
|
||||
* - If we have MATCH, assign parent to base flag, and consume
|
||||
* both baseend and newend
|
||||
* - Else if we have BASE, consume baseend
|
||||
* - Else if we have NEW, insert newend lline into base and
|
||||
* consume newend
|
||||
*/
|
||||
lcs = xcalloc(origbaselen + 1, sizeof(int*));
|
||||
directions = xcalloc(origbaselen + 1, sizeof(enum coalesce_direction*));
|
||||
for (i = 0; i < origbaselen + 1; i++) {
|
||||
lcs[i] = xcalloc(lennew + 1, sizeof(int));
|
||||
directions[i] = xcalloc(lennew + 1, sizeof(enum coalesce_direction));
|
||||
directions[i][0] = BASE;
|
||||
}
|
||||
for (j = 1; j < lennew + 1; j++)
|
||||
directions[0][j] = NEW;
|
||||
|
||||
for (i = 1, baseend = base; i < origbaselen + 1; i++) {
|
||||
for (j = 1, newend = new; j < lennew + 1; j++) {
|
||||
if (match_string_spaces(baseend->line, baseend->len,
|
||||
newend->line, newend->len, flags)) {
|
||||
lcs[i][j] = lcs[i - 1][j - 1] + 1;
|
||||
directions[i][j] = MATCH;
|
||||
} else if (lcs[i][j - 1] >= lcs[i - 1][j]) {
|
||||
lcs[i][j] = lcs[i][j - 1];
|
||||
directions[i][j] = NEW;
|
||||
} else {
|
||||
lcs[i][j] = lcs[i - 1][j];
|
||||
directions[i][j] = BASE;
|
||||
}
|
||||
if (newend->next)
|
||||
newend = newend->next;
|
||||
}
|
||||
if (baseend->next)
|
||||
baseend = baseend->next;
|
||||
}
|
||||
|
||||
for (i = 0; i < origbaselen + 1; i++)
|
||||
free(lcs[i]);
|
||||
free(lcs);
|
||||
|
||||
/* At this point, baseend and newend point to the end of each lists */
|
||||
i--;
|
||||
j--;
|
||||
while (i != 0 || j != 0) {
|
||||
if (directions[i][j] == MATCH) {
|
||||
baseend->parent_map |= 1<<parent;
|
||||
baseend = baseend->prev;
|
||||
newend = newend->prev;
|
||||
i--;
|
||||
j--;
|
||||
} else if (directions[i][j] == NEW) {
|
||||
struct lline *lline;
|
||||
|
||||
lline = newend;
|
||||
/* Remove lline from new list and update newend */
|
||||
if (lline->prev)
|
||||
lline->prev->next = lline->next;
|
||||
else
|
||||
new = lline->next;
|
||||
if (lline->next)
|
||||
lline->next->prev = lline->prev;
|
||||
|
||||
newend = lline->prev;
|
||||
j--;
|
||||
|
||||
/* Add lline to base list */
|
||||
if (baseend) {
|
||||
lline->next = baseend->next;
|
||||
lline->prev = baseend;
|
||||
if (lline->prev)
|
||||
lline->prev->next = lline;
|
||||
}
|
||||
else {
|
||||
lline->next = base;
|
||||
base = lline;
|
||||
}
|
||||
(*lenbase)++;
|
||||
|
||||
if (lline->next)
|
||||
lline->next->prev = lline;
|
||||
|
||||
} else {
|
||||
baseend = baseend->prev;
|
||||
i--;
|
||||
}
|
||||
}
|
||||
|
||||
newend = new;
|
||||
while (newend) {
|
||||
struct lline *lline = newend;
|
||||
newend = newend->next;
|
||||
free(lline);
|
||||
}
|
||||
|
||||
for (i = 0; i < origbaselen + 1; i++)
|
||||
free(directions[i]);
|
||||
free(directions);
|
||||
|
||||
return base;
|
||||
}
|
||||
|
||||
static char *grab_blob(const unsigned char *sha1, unsigned int mode,
|
||||
unsigned long *size, struct userdiff_driver *textconv,
|
||||
const char *path)
|
||||
{
|
||||
char *blob;
|
||||
enum object_type type;
|
||||
|
||||
if (S_ISGITLINK(mode)) {
|
||||
blob = xmalloc(100);
|
||||
*size = snprintf(blob, 100,
|
||||
"Subproject commit %s\n", sha1_to_hex(sha1));
|
||||
} else if (is_null_sha1(sha1)) {
|
||||
/* deleted blob */
|
||||
*size = 0;
|
||||
return xcalloc(1, 1);
|
||||
} else if (textconv) {
|
||||
struct diff_filespec *df = alloc_filespec(path);
|
||||
fill_filespec(df, sha1, 1, mode);
|
||||
*size = fill_textconv(textconv, df, &blob);
|
||||
free_filespec(df);
|
||||
} else {
|
||||
blob = read_sha1_file(sha1, &type, size);
|
||||
if (type != OBJ_BLOB)
|
||||
die("object '%s' is not a blob!", sha1_to_hex(sha1));
|
||||
}
|
||||
return blob;
|
||||
}
|
||||
|
||||
static void append_lost(struct sline *sline, int n, const char *line, int len)
|
||||
{
|
||||
struct lline *lline;
|
||||
unsigned long this_mask = (1UL<<n);
|
||||
if (line[len-1] == '\n')
|
||||
len--;
|
||||
|
||||
/* Check to see if we can squash things */
|
||||
if (sline->lost_head) {
|
||||
lline = sline->next_lost;
|
||||
while (lline) {
|
||||
if (match_string_spaces(lline->line, lline->len,
|
||||
line, len, flags)) {
|
||||
lline->parent_map |= this_mask;
|
||||
sline->next_lost = lline->next;
|
||||
return;
|
||||
}
|
||||
lline = lline->next;
|
||||
}
|
||||
}
|
||||
|
||||
lline = xmalloc(sizeof(*lline) + len + 1);
|
||||
lline->len = len;
|
||||
lline->next = NULL;
|
||||
lline->prev = sline->plost.lost_tail;
|
||||
if (lline->prev)
|
||||
lline->prev->next = lline;
|
||||
else
|
||||
sline->plost.lost_head = lline;
|
||||
sline->plost.lost_tail = lline;
|
||||
sline->plost.len++;
|
||||
lline->parent_map = this_mask;
|
||||
memcpy(lline->line, line, len);
|
||||
lline->line[len] = 0;
|
||||
*sline->lost_tail = lline;
|
||||
sline->lost_tail = &lline->next;
|
||||
sline->next_lost = NULL;
|
||||
}
|
||||
|
||||
struct combine_diff_state {
|
||||
@ -203,7 +327,6 @@ struct combine_diff_state {
|
||||
int n;
|
||||
struct sline *sline;
|
||||
struct sline *lost_bucket;
|
||||
long flags;
|
||||
};
|
||||
|
||||
static void consume_line(void *state_, char *line, unsigned long len)
|
||||
@ -236,14 +359,13 @@ static void consume_line(void *state_, char *line, unsigned long len)
|
||||
xcalloc(state->num_parent,
|
||||
sizeof(unsigned long));
|
||||
state->sline[state->nb-1].p_lno[state->n] = state->ob;
|
||||
state->lost_bucket->next_lost = state->lost_bucket->lost_head;
|
||||
return;
|
||||
}
|
||||
if (!state->lost_bucket)
|
||||
return; /* not in any hunk yet */
|
||||
switch (line[0]) {
|
||||
case '-':
|
||||
append_lost(state->lost_bucket, state->n, line+1, len-1, state->flags);
|
||||
append_lost(state->lost_bucket, state->n, line+1, len-1);
|
||||
break;
|
||||
case '+':
|
||||
state->sline[state->lno-1].flag |= state->nmask;
|
||||
@ -276,7 +398,6 @@ static void combine_diff(const unsigned char *parent, unsigned int mode,
|
||||
xpp.flags = flags;
|
||||
memset(&xecfg, 0, sizeof(xecfg));
|
||||
memset(&state, 0, sizeof(state));
|
||||
state.flags = flags;
|
||||
state.nmask = nmask;
|
||||
state.sline = sline;
|
||||
state.lno = 1;
|
||||
@ -298,8 +419,18 @@ static void combine_diff(const unsigned char *parent, unsigned int mode,
|
||||
struct lline *ll;
|
||||
sline[lno].p_lno[n] = p_lno;
|
||||
|
||||
/* Coalesce new lines */
|
||||
if (sline[lno].plost.lost_head) {
|
||||
struct sline *sl = &sline[lno];
|
||||
sl->lost = coalesce_lines(sl->lost, &sl->lenlost,
|
||||
sl->plost.lost_head,
|
||||
sl->plost.len, n, flags);
|
||||
sl->plost.lost_head = sl->plost.lost_tail = NULL;
|
||||
sl->plost.len = 0;
|
||||
}
|
||||
|
||||
/* How many lines would this sline advance the p_lno? */
|
||||
ll = sline[lno].lost_head;
|
||||
ll = sline[lno].lost;
|
||||
while (ll) {
|
||||
if (ll->parent_map & nmask)
|
||||
p_lno++; /* '-' means parent had it */
|
||||
@ -319,7 +450,7 @@ static int interesting(struct sline *sline, unsigned long all_mask)
|
||||
/* If some parents lost lines here, or if we have added to
|
||||
* some parent, it is interesting.
|
||||
*/
|
||||
return ((sline->flag & all_mask) || sline->lost_head);
|
||||
return ((sline->flag & all_mask) || sline->lost);
|
||||
}
|
||||
|
||||
static unsigned long adjust_hunk_tail(struct sline *sline,
|
||||
@ -502,7 +633,7 @@ static int make_hunks(struct sline *sline, unsigned long cnt,
|
||||
has_interesting = 0;
|
||||
for (j = i; j < hunk_end && !has_interesting; j++) {
|
||||
unsigned long this_diff = sline[j].flag & all_mask;
|
||||
struct lline *ll = sline[j].lost_head;
|
||||
struct lline *ll = sline[j].lost;
|
||||
if (this_diff) {
|
||||
/* This has some changes. Is it the
|
||||
* same as others?
|
||||
@ -656,7 +787,7 @@ static void dump_sline(struct sline *sline, const char *line_prefix,
|
||||
int j;
|
||||
unsigned long p_mask;
|
||||
struct sline *sl = &sline[lno++];
|
||||
ll = (sl->flag & no_pre_delete) ? NULL : sl->lost_head;
|
||||
ll = (sl->flag & no_pre_delete) ? NULL : sl->lost;
|
||||
while (ll) {
|
||||
printf("%s%s", line_prefix, c_old);
|
||||
for (j = 0; j < num_parent; j++) {
|
||||
@ -707,7 +838,7 @@ static void reuse_combine_diff(struct sline *sline, unsigned long cnt,
|
||||
jmask = (1UL<<j);
|
||||
|
||||
for (lno = 0; lno <= cnt; lno++) {
|
||||
struct lline *ll = sline->lost_head;
|
||||
struct lline *ll = sline->lost;
|
||||
sline->p_lno[i] = sline->p_lno[j];
|
||||
while (ll) {
|
||||
if (ll->parent_map & jmask)
|
||||
@ -966,10 +1097,6 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent,
|
||||
|
||||
sline = xcalloc(cnt+2, sizeof(*sline));
|
||||
sline[0].bol = result;
|
||||
for (lno = 0; lno <= cnt + 1; lno++) {
|
||||
sline[lno].lost_tail = &sline[lno].lost_head;
|
||||
sline[lno].flag = 0;
|
||||
}
|
||||
for (lno = 0, cp = result; cp < result + result_size; cp++) {
|
||||
if (*cp == '\n') {
|
||||
sline[lno].len = cp - sline[lno].bol;
|
||||
@ -1019,8 +1146,8 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent,
|
||||
free(result);
|
||||
|
||||
for (lno = 0; lno < cnt; lno++) {
|
||||
if (sline[lno].lost_head) {
|
||||
struct lline *ll = sline[lno].lost_head;
|
||||
if (sline[lno].lost) {
|
||||
struct lline *ll = sline[lno].lost;
|
||||
while (ll) {
|
||||
struct lline *tmp = ll;
|
||||
ll = ll->next;
|
||||
|
@ -224,4 +224,133 @@ test_expect_success 'check combined output (ignore all spaces)' '
|
||||
compare_diff_patch expected actual
|
||||
'
|
||||
|
||||
test_expect_success 'combine diff coalesce simple' '
|
||||
>test &&
|
||||
git add test &&
|
||||
git commit -m initial &&
|
||||
test_seq 4 >test &&
|
||||
git commit -a -m empty1 &&
|
||||
git branch side1 &&
|
||||
git checkout HEAD^ &&
|
||||
test_seq 5 >test &&
|
||||
git commit -a -m empty2 &&
|
||||
test_must_fail git merge side1 &&
|
||||
>test &&
|
||||
git commit -a -m merge &&
|
||||
git show >actual.tmp &&
|
||||
sed -e "1,/^@@@/d" < actual.tmp >actual &&
|
||||
tr -d Q <<-\EOF >expected &&
|
||||
--1
|
||||
--2
|
||||
--3
|
||||
--4
|
||||
- 5
|
||||
EOF
|
||||
compare_diff_patch expected actual
|
||||
'
|
||||
|
||||
test_expect_success 'combine diff coalesce tricky' '
|
||||
>test &&
|
||||
git add test &&
|
||||
git commit -m initial --allow-empty &&
|
||||
cat <<-\EOF >test &&
|
||||
3
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
EOF
|
||||
git commit -a -m empty1 &&
|
||||
git branch -f side1 &&
|
||||
git checkout HEAD^ &&
|
||||
cat <<-\EOF >test &&
|
||||
1
|
||||
3
|
||||
5
|
||||
4
|
||||
EOF
|
||||
git commit -a -m empty2 &&
|
||||
git branch -f side2 &&
|
||||
test_must_fail git merge side1 &&
|
||||
>test &&
|
||||
git commit -a -m merge &&
|
||||
git show >actual.tmp &&
|
||||
sed -e "1,/^@@@/d" < actual.tmp >actual &&
|
||||
tr -d Q <<-\EOF >expected &&
|
||||
-3
|
||||
--1
|
||||
-2
|
||||
--3
|
||||
- 5
|
||||
--4
|
||||
EOF
|
||||
compare_diff_patch expected actual &&
|
||||
git checkout -f side1 &&
|
||||
test_must_fail git merge side2 &&
|
||||
>test &&
|
||||
git commit -a -m merge &&
|
||||
git show >actual.tmp &&
|
||||
sed -e "1,/^@@@/d" < actual.tmp >actual &&
|
||||
tr -d Q <<-\EOF >expected &&
|
||||
- 3
|
||||
--1
|
||||
- 2
|
||||
--3
|
||||
-5
|
||||
--4
|
||||
EOF
|
||||
compare_diff_patch expected actual
|
||||
'
|
||||
|
||||
test_expect_failure 'combine diff coalesce three parents' '
|
||||
>test &&
|
||||
git add test &&
|
||||
git commit -m initial --allow-empty &&
|
||||
cat <<-\EOF >test &&
|
||||
3
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
EOF
|
||||
git commit -a -m empty1 &&
|
||||
git checkout -B side1 &&
|
||||
git checkout HEAD^ &&
|
||||
cat <<-\EOF >test &&
|
||||
1
|
||||
3
|
||||
7
|
||||
5
|
||||
4
|
||||
EOF
|
||||
git commit -a -m empty2 &&
|
||||
git branch -f side2 &&
|
||||
git checkout HEAD^ &&
|
||||
cat <<-\EOF >test &&
|
||||
3
|
||||
1
|
||||
6
|
||||
5
|
||||
4
|
||||
EOF
|
||||
git commit -a -m empty3 &&
|
||||
>test &&
|
||||
git add test &&
|
||||
TREE=$(git write-tree) &&
|
||||
COMMIT=$(git commit-tree -p HEAD -p side1 -p side2 -m merge $TREE) &&
|
||||
git show $COMMIT >actual.tmp &&
|
||||
sed -e "1,/^@@@/d" < actual.tmp >actual &&
|
||||
tr -d Q <<-\EOF >expected &&
|
||||
-- 3
|
||||
---1
|
||||
- 6
|
||||
- 2
|
||||
--3
|
||||
-7
|
||||
- -5
|
||||
---4
|
||||
EOF
|
||||
compare_diff_patch expected actual
|
||||
'
|
||||
|
||||
test_done
|
||||
|
Loading…
Reference in New Issue
Block a user