[PATCH] Diff-helper update
This patch adds a framework and a stub implementation of rename detection to diff-helper program. The current stub code is just enough to detect pure renames in diff-tree output and not fancier. The plan is perhaps to use the same delta code when Nico's delta storage patch is merged for similarity evaluation purposes. Signed-off-by: Junio C Hamano <junkio@cox.net> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
e7bd907db6
commit
915838c3cb
@ -9,7 +9,7 @@ git-diff-helper - Generates patch format output for git-diff-*
|
|||||||
|
|
||||||
SYNOPSIS
|
SYNOPSIS
|
||||||
--------
|
--------
|
||||||
'git-diff-helper' [-z] [-R]
|
'git-diff-helper' [-z] [-R] [-r]
|
||||||
|
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
-----------
|
-----------
|
||||||
@ -28,7 +28,12 @@ OPTIONS
|
|||||||
|
|
||||||
git-diff-cache <tree> | git-diff-helper -R file.c
|
git-diff-cache <tree> | git-diff-helper -R file.c
|
||||||
|
|
||||||
would show a diff to bring the working file back to what is in the <tree>.
|
would show a diff to bring the working file back to what
|
||||||
|
is in the <tree>.
|
||||||
|
|
||||||
|
-r::
|
||||||
|
Detect renames.
|
||||||
|
|
||||||
|
|
||||||
See Also
|
See Also
|
||||||
--------
|
--------
|
||||||
@ -37,7 +42,8 @@ The section on generating patches in link:git-diff-cache.html[git-diff-cache]
|
|||||||
|
|
||||||
Author
|
Author
|
||||||
------
|
------
|
||||||
Written by Linus Torvalds <torvalds@osdl.org>
|
Written by Junio C Hamano <junkio@cox.net>
|
||||||
|
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
--------------
|
--------------
|
||||||
|
141
diff-helper.c
141
diff-helper.c
@ -21,6 +21,129 @@ static int matches_pathspec(const char *name, const char **spec, int cnt)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int detect_rename = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We do not detect circular renames. Just hold created and deleted
|
||||||
|
* entries and later attempt to match them up. If they do not match,
|
||||||
|
* then spit them out as deletes or creates as original.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static struct diff_spec_hold {
|
||||||
|
struct diff_spec_hold *next;
|
||||||
|
struct diff_spec_hold *matched;
|
||||||
|
struct diff_spec old, new;
|
||||||
|
char path[1];
|
||||||
|
} *createdfile, *deletedfile;
|
||||||
|
|
||||||
|
static void hold_spec(const char *path,
|
||||||
|
struct diff_spec *old, struct diff_spec *new)
|
||||||
|
{
|
||||||
|
struct diff_spec_hold **list, *elem;
|
||||||
|
list = (! old->file_valid) ? &createdfile : &deletedfile;
|
||||||
|
elem = xmalloc(sizeof(*elem) + strlen(path));
|
||||||
|
strcpy(elem->path, path);
|
||||||
|
elem->next = *list;
|
||||||
|
*list = elem;
|
||||||
|
elem->old = *old;
|
||||||
|
elem->new = *new;
|
||||||
|
elem->matched = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MINIMUM_SCORE 7000
|
||||||
|
int estimate_similarity(struct diff_spec *one, struct diff_spec *two)
|
||||||
|
{
|
||||||
|
/* Return how similar they are, representing the score as an
|
||||||
|
* integer between 0 and 10000.
|
||||||
|
*
|
||||||
|
* This version is very dumb and detects exact matches only.
|
||||||
|
* Wnen Nico's delta stuff gets in, I'll use the delta
|
||||||
|
* algorithm to estimate the similarity score in core.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (one->sha1_valid && two->sha1_valid &&
|
||||||
|
!memcmp(one->blob_sha1, two->blob_sha1, 20))
|
||||||
|
return 10000;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void flush_renames(const char **spec, int cnt, int reverse)
|
||||||
|
{
|
||||||
|
struct diff_spec_hold *rename_src, *rename_dst, *elem;
|
||||||
|
struct diff_spec_hold *leftover = NULL;
|
||||||
|
int score, best_score;
|
||||||
|
|
||||||
|
while (createdfile) {
|
||||||
|
rename_dst = createdfile;
|
||||||
|
createdfile = rename_dst->next;
|
||||||
|
best_score = MINIMUM_SCORE;
|
||||||
|
rename_src = NULL;
|
||||||
|
for (elem = deletedfile;
|
||||||
|
elem;
|
||||||
|
elem = elem->next) {
|
||||||
|
if (elem->matched)
|
||||||
|
continue;
|
||||||
|
score = estimate_similarity(&elem->old,
|
||||||
|
&rename_dst->new);
|
||||||
|
if (best_score < score) {
|
||||||
|
rename_src = elem;
|
||||||
|
best_score = score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (rename_src) {
|
||||||
|
rename_src->matched = rename_dst;
|
||||||
|
rename_dst->matched = rename_src;
|
||||||
|
|
||||||
|
if (!cnt ||
|
||||||
|
matches_pathspec(rename_src->path, spec, cnt) ||
|
||||||
|
matches_pathspec(rename_dst->path, spec, cnt)) {
|
||||||
|
if (reverse)
|
||||||
|
run_external_diff(rename_dst->path,
|
||||||
|
rename_src->path,
|
||||||
|
&rename_dst->new,
|
||||||
|
&rename_src->old);
|
||||||
|
else
|
||||||
|
run_external_diff(rename_src->path,
|
||||||
|
rename_dst->path,
|
||||||
|
&rename_src->old,
|
||||||
|
&rename_dst->new);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
rename_dst->next = leftover;
|
||||||
|
leftover = rename_dst;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* unmatched deletes */
|
||||||
|
for (elem = deletedfile; elem; elem = elem->next) {
|
||||||
|
if (elem->matched)
|
||||||
|
continue;
|
||||||
|
if (!cnt ||
|
||||||
|
matches_pathspec(elem->path, spec, cnt)) {
|
||||||
|
if (reverse)
|
||||||
|
run_external_diff(elem->path, NULL,
|
||||||
|
&elem->new, &elem->old);
|
||||||
|
else
|
||||||
|
run_external_diff(elem->path, NULL,
|
||||||
|
&elem->old, &elem->new);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* unmatched creates */
|
||||||
|
for (elem = leftover; elem; elem = elem->next) {
|
||||||
|
if (!cnt ||
|
||||||
|
matches_pathspec(elem->path, spec, cnt)) {
|
||||||
|
if (reverse)
|
||||||
|
run_external_diff(elem->path, NULL,
|
||||||
|
&elem->new, &elem->old);
|
||||||
|
else
|
||||||
|
run_external_diff(elem->path, NULL,
|
||||||
|
&elem->old, &elem->new);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int parse_oneside_change(const char *cp, struct diff_spec *one,
|
static int parse_oneside_change(const char *cp, struct diff_spec *one,
|
||||||
char *path)
|
char *path)
|
||||||
{
|
{
|
||||||
@ -100,17 +223,24 @@ static int parse_diff_raw_output(const char *buf,
|
|||||||
default:
|
default:
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (detect_rename && old.file_valid != new.file_valid) {
|
||||||
|
/* hold these */
|
||||||
|
hold_spec(path, &old, &new);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (!cnt || matches_pathspec(path, spec, cnt)) {
|
if (!cnt || matches_pathspec(path, spec, cnt)) {
|
||||||
if (reverse)
|
if (reverse)
|
||||||
run_external_diff(path, &new, &old);
|
run_external_diff(path, NULL, &new, &old);
|
||||||
else
|
else
|
||||||
run_external_diff(path, &old, &new);
|
run_external_diff(path, NULL, &old, &new);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *diff_helper_usage =
|
static const char *diff_helper_usage =
|
||||||
"git-diff-helper [-R] [-z] paths...";
|
"git-diff-helper [-r] [-R] [-z] paths...";
|
||||||
|
|
||||||
int main(int ac, const char **av) {
|
int main(int ac, const char **av) {
|
||||||
struct strbuf sb;
|
struct strbuf sb;
|
||||||
@ -124,6 +254,8 @@ int main(int ac, const char **av) {
|
|||||||
reverse = 1;
|
reverse = 1;
|
||||||
else if (av[1][1] == 'z')
|
else if (av[1][1] == 'z')
|
||||||
line_termination = 0;
|
line_termination = 0;
|
||||||
|
else if (av[1][1] == 'r')
|
||||||
|
detect_rename = 1;
|
||||||
else
|
else
|
||||||
usage(diff_helper_usage);
|
usage(diff_helper_usage);
|
||||||
ac--; av++;
|
ac--; av++;
|
||||||
@ -139,5 +271,8 @@ int main(int ac, const char **av) {
|
|||||||
if (status)
|
if (status)
|
||||||
fprintf(stderr, "cannot parse %s\n", sb.buf);
|
fprintf(stderr, "cannot parse %s\n", sb.buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (detect_rename)
|
||||||
|
flush_renames(av+1, ac-1, reverse);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
34
diff.c
34
diff.c
@ -79,7 +79,8 @@ static struct diff_tempfile {
|
|||||||
char tmp_path[50];
|
char tmp_path[50];
|
||||||
} diff_temp[2];
|
} diff_temp[2];
|
||||||
|
|
||||||
static void builtin_diff(const char *name,
|
static void builtin_diff(const char *name_a,
|
||||||
|
const char *name_b,
|
||||||
struct diff_tempfile *temp)
|
struct diff_tempfile *temp)
|
||||||
{
|
{
|
||||||
int i, next_at;
|
int i, next_at;
|
||||||
@ -88,9 +89,12 @@ static void builtin_diff(const char *name,
|
|||||||
const char *input_name_sq[2];
|
const char *input_name_sq[2];
|
||||||
const char *path0[2];
|
const char *path0[2];
|
||||||
const char *path1[2];
|
const char *path1[2];
|
||||||
const char *name_sq = sq_expand(name);
|
const char *name_sq[2];
|
||||||
char *cmd;
|
char *cmd;
|
||||||
|
|
||||||
|
name_sq[0] = sq_expand(name_a);
|
||||||
|
name_sq[1] = sq_expand(name_b);
|
||||||
|
|
||||||
/* diff_cmd and diff_arg have 6 %s in total which makes
|
/* diff_cmd and diff_arg have 6 %s in total which makes
|
||||||
* the sum of these strings 12 bytes larger than required.
|
* the sum of these strings 12 bytes larger than required.
|
||||||
* we use 2 spaces around diff-opts, and we need to count
|
* we use 2 spaces around diff-opts, and we need to count
|
||||||
@ -105,7 +109,7 @@ static void builtin_diff(const char *name,
|
|||||||
path1[i] = "";
|
path1[i] = "";
|
||||||
} else {
|
} else {
|
||||||
path0[i] = i ? "b/" : "a/";
|
path0[i] = i ? "b/" : "a/";
|
||||||
path1[i] = name_sq;
|
path1[i] = name_sq[i];
|
||||||
}
|
}
|
||||||
cmd_size += (strlen(path0[i]) + strlen(path1[i]) +
|
cmd_size += (strlen(path0[i]) + strlen(path1[i]) +
|
||||||
strlen(input_name_sq[i]));
|
strlen(input_name_sq[i]));
|
||||||
@ -122,7 +126,7 @@ static void builtin_diff(const char *name,
|
|||||||
next_at += snprintf(cmd+next_at, cmd_size-next_at,
|
next_at += snprintf(cmd+next_at, cmd_size-next_at,
|
||||||
diff_arg, input_name_sq[0], input_name_sq[1]);
|
diff_arg, input_name_sq[0], input_name_sq[1]);
|
||||||
|
|
||||||
printf("diff --git a/%s b/%s\n", name, name);
|
printf("diff --git a/%s b/%s\n", name_a, name_b);
|
||||||
if (!path1[0][0])
|
if (!path1[0][0])
|
||||||
printf("new file mode %s\n", temp[1].mode);
|
printf("new file mode %s\n", temp[1].mode);
|
||||||
else if (!path1[1][0])
|
else if (!path1[1][0])
|
||||||
@ -132,6 +136,10 @@ static void builtin_diff(const char *name,
|
|||||||
printf("old mode %s\n", temp[0].mode);
|
printf("old mode %s\n", temp[0].mode);
|
||||||
printf("new mode %s\n", temp[1].mode);
|
printf("new mode %s\n", temp[1].mode);
|
||||||
}
|
}
|
||||||
|
if (strcmp(name_a, name_b)) {
|
||||||
|
printf("rename old %s\n", name_a);
|
||||||
|
printf("rename new %s\n", name_b);
|
||||||
|
}
|
||||||
if (strncmp(temp[0].mode, temp[1].mode, 3))
|
if (strncmp(temp[0].mode, temp[1].mode, 3))
|
||||||
/* we do not run diff between different kind
|
/* we do not run diff between different kind
|
||||||
* of objects.
|
* of objects.
|
||||||
@ -157,7 +165,7 @@ static int work_tree_matches(const char *name, const unsigned char *sha1)
|
|||||||
* benchmark with my previous version that always reads cache
|
* benchmark with my previous version that always reads cache
|
||||||
* shows that it makes things worse for diff-tree comparing
|
* shows that it makes things worse for diff-tree comparing
|
||||||
* two linux-2.6 kernel trees in an already checked out work
|
* two linux-2.6 kernel trees in an already checked out work
|
||||||
* tree. This is because most diff-tree comparison deals with
|
* tree. This is because most diff-tree comparisons deal with
|
||||||
* only a small number of files, while reading the cache is
|
* only a small number of files, while reading the cache is
|
||||||
* expensive for a large project, and its cost outweighs the
|
* expensive for a large project, and its cost outweighs the
|
||||||
* savings we get by not inflating the object to a temporary
|
* savings we get by not inflating the object to a temporary
|
||||||
@ -294,6 +302,7 @@ static void remove_tempfile_on_signal(int signo)
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
void run_external_diff(const char *name,
|
void run_external_diff(const char *name,
|
||||||
|
const char *other,
|
||||||
struct diff_spec *one,
|
struct diff_spec *one,
|
||||||
struct diff_spec *two)
|
struct diff_spec *two)
|
||||||
{
|
{
|
||||||
@ -304,7 +313,7 @@ void run_external_diff(const char *name,
|
|||||||
|
|
||||||
if (one && two) {
|
if (one && two) {
|
||||||
prepare_temp_file(name, &temp[0], one);
|
prepare_temp_file(name, &temp[0], one);
|
||||||
prepare_temp_file(name, &temp[1], two);
|
prepare_temp_file(other ? : name, &temp[1], two);
|
||||||
if (! atexit_asked &&
|
if (! atexit_asked &&
|
||||||
(temp[0].name == temp[0].tmp_path ||
|
(temp[0].name == temp[0].tmp_path ||
|
||||||
temp[1].name == temp[1].tmp_path)) {
|
temp[1].name == temp[1].tmp_path)) {
|
||||||
@ -320,7 +329,8 @@ void run_external_diff(const char *name,
|
|||||||
die("unable to fork");
|
die("unable to fork");
|
||||||
if (!pid) {
|
if (!pid) {
|
||||||
const char *pgm = external_diff();
|
const char *pgm = external_diff();
|
||||||
if (pgm) {
|
/* not passing rename patch to external ones */
|
||||||
|
if (!other && pgm) {
|
||||||
if (one && two)
|
if (one && two)
|
||||||
execlp(pgm, pgm,
|
execlp(pgm, pgm,
|
||||||
name,
|
name,
|
||||||
@ -334,7 +344,7 @@ void run_external_diff(const char *name,
|
|||||||
* otherwise we use the built-in one.
|
* otherwise we use the built-in one.
|
||||||
*/
|
*/
|
||||||
if (one && two)
|
if (one && two)
|
||||||
builtin_diff(name, temp);
|
builtin_diff(name, other ? : name, temp);
|
||||||
else
|
else
|
||||||
printf("* Unmerged path %s\n", name);
|
printf("* Unmerged path %s\n", name);
|
||||||
exit(0);
|
exit(0);
|
||||||
@ -379,7 +389,7 @@ void diff_addremove(int addremove, unsigned mode,
|
|||||||
strcpy(concatpath, base);
|
strcpy(concatpath, base);
|
||||||
strcat(concatpath, path);
|
strcat(concatpath, path);
|
||||||
}
|
}
|
||||||
run_external_diff(path ? concatpath : base, one, two);
|
run_external_diff(path ? concatpath : base, NULL, one, two);
|
||||||
}
|
}
|
||||||
|
|
||||||
void diff_change(unsigned old_mode, unsigned new_mode,
|
void diff_change(unsigned old_mode, unsigned new_mode,
|
||||||
@ -400,10 +410,10 @@ void diff_change(unsigned old_mode, unsigned new_mode,
|
|||||||
strcpy(concatpath, base);
|
strcpy(concatpath, base);
|
||||||
strcat(concatpath, path);
|
strcat(concatpath, path);
|
||||||
}
|
}
|
||||||
run_external_diff(path ? concatpath : base, &spec[0], &spec[1]);
|
run_external_diff(path ? concatpath : base, NULL, &spec[0], &spec[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void diff_unmerge(const char *path)
|
void diff_unmerge(const char *path)
|
||||||
{
|
{
|
||||||
run_external_diff(path, NULL, NULL);
|
run_external_diff(path, NULL, NULL, NULL);
|
||||||
}
|
}
|
||||||
|
2
diff.h
2
diff.h
@ -31,7 +31,7 @@ struct diff_spec {
|
|||||||
unsigned file_valid : 1; /* if false the file does not even exist */
|
unsigned file_valid : 1; /* if false the file does not even exist */
|
||||||
};
|
};
|
||||||
|
|
||||||
extern void run_external_diff(const char *name,
|
extern void run_external_diff(const char *name, const char *other,
|
||||||
struct diff_spec *, struct diff_spec *);
|
struct diff_spec *, struct diff_spec *);
|
||||||
|
|
||||||
#endif /* DIFF_H */
|
#endif /* DIFF_H */
|
||||||
|
Loading…
Reference in New Issue
Block a user