From 52e9578985fb636ec1d3f6cf794fdadd5ec896fc Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 21 May 2005 02:40:01 -0700 Subject: [PATCH] [PATCH] Introducing software archaeologist's tool "pickaxe". This steals the "pickaxe" feature from JIT and make it available to the bare Plumbing layer. From the command line, the user gives a string he is intersted in. Using the diff-core infrastructure previously introduced, it filters the differences to limit the output only to the diffs between and where the string appears only in one but not in the other. For example: $ ./git-rev-list HEAD | ./git-diff-tree -Sdiff-tree-helper --stdin -M would show the diffs that touch the string "diff-tree-helper". In real software-archaeologist application, you would typically look for a few to several lines of code and see where that code came from. The "pickaxe" module runs after "rename/copy detection" module, so it even crosses the file rename boundary, as the above example demonstrates. Signed-off-by: Junio C Hamano Signed-off-by: Linus Torvalds --- Documentation/git-diff-cache.txt | 6 +++- Documentation/git-diff-files.txt | 6 +++- Documentation/git-diff-helper.txt | 6 +++- Documentation/git-diff-tree.txt | 5 ++- Makefile | 3 +- diff-cache.c | 11 ++++-- diff-files.c | 9 +++-- diff-helper.c | 10 ++++-- diff-tree.c | 15 ++++++--- diff.c | 23 ++++++++----- diff.h | 1 + diffcore-pickaxe.c | 56 +++++++++++++++++++++++++++++++ diffcore-rename.c | 29 ++++++---------- diffcore.h | 11 +++--- 14 files changed, 140 insertions(+), 51 deletions(-) create mode 100644 diffcore-pickaxe.c diff --git a/Documentation/git-diff-cache.txt b/Documentation/git-diff-cache.txt index e3ed7131a4..2cfdffd87c 100644 --- a/Documentation/git-diff-cache.txt +++ b/Documentation/git-diff-cache.txt @@ -9,7 +9,7 @@ git-diff-cache - Compares content and mode of blobs between the cache and reposi SYNOPSIS -------- -'git-diff-cache' [-p] [-r] [-z] [-m] [-M] [-R] [-C] [--cached] +'git-diff-cache' [-p] [-r] [-z] [-m] [-M] [-R] [-C] [-S] [--cached] DESCRIPTION ----------- @@ -39,6 +39,10 @@ OPTIONS -C:: Detect copies as well as renames; implies -p. +-S:: + Look for differences that contains the change in . + + -R:: Output diff in reverse. diff --git a/Documentation/git-diff-files.txt b/Documentation/git-diff-files.txt index 1eae3d0333..51a3d0fcd6 100644 --- a/Documentation/git-diff-files.txt +++ b/Documentation/git-diff-files.txt @@ -9,7 +9,7 @@ git-diff-files - Compares files in the working tree and the cache SYNOPSIS -------- -'git-diff-files' [-p] [-q] [-r] [-z] [-M] [-C] [-R] [...] +'git-diff-files' [-p] [-q] [-r] [-z] [-M] [-C] [-R] [-S] [...] DESCRIPTION ----------- @@ -35,6 +35,10 @@ OPTIONS -C:: Detect copies as well as renames; implies -p. +-S:: + Look for differences that contains the change in . + + -r:: This flag does not mean anything. It is there only to match git-diff-tree. Unlike git-diff-tree, git-diff-files always looks diff --git a/Documentation/git-diff-helper.txt b/Documentation/git-diff-helper.txt index 302789e9d1..2036c6cc1b 100644 --- a/Documentation/git-diff-helper.txt +++ b/Documentation/git-diff-helper.txt @@ -9,7 +9,7 @@ git-diff-helper - Generates patch format output for git-diff-* SYNOPSIS -------- -'git-diff-helper' [-z] [-R] [-M] [-C] +'git-diff-helper' [-z] [-R] [-M] [-C] [-S] DESCRIPTION ----------- @@ -37,6 +37,10 @@ OPTIONS -C:: Detect copies as well as renames. +-S:: + Look for differences that contains the change in . + + See Also -------- The section on generating patches in link:git-diff-cache.html[git-diff-cache] diff --git a/Documentation/git-diff-tree.txt b/Documentation/git-diff-tree.txt index f4e95a97c3..bdc8d5a53b 100644 --- a/Documentation/git-diff-tree.txt +++ b/Documentation/git-diff-tree.txt @@ -9,7 +9,7 @@ git-diff-tree - Compares the content and mode of blobs found via two tree object SYNOPSIS -------- -'git-diff-tree' [-p] [-r] [-z] [--stdin] [-M] [-R] [-C] [-m] [-s] [-v] []\* +'git-diff-tree' [-p] [-r] [-z] [--stdin] [-M] [-R] [-C] [-S] [-m] [-s] [-v] []\* DESCRIPTION ----------- @@ -43,6 +43,9 @@ OPTIONS -R:: Output diff in reverse. +-S:: + Look for differences that contains the change in . + -r:: recurse diff --git a/Makefile b/Makefile index f61bfe00d2..b13edabc20 100644 --- a/Makefile +++ b/Makefile @@ -45,7 +45,7 @@ LIB_H += strbuf.h LIB_OBJS += strbuf.o LIB_H += diff.h -LIB_OBJS += diff.o diffcore-rename.o +LIB_OBJS += diff.o diffcore-rename.o diffcore-pickaxe.o LIB_OBJS += gitenv.o @@ -125,6 +125,7 @@ strbuf.o: $(LIB_H) gitenv.o: $(LIB_H) diff.o: $(LIB_H) diffcore-rename.o : $(LIB_H) +diffcore-pickaxe.o : $(LIB_H) test: all make -C t/ all diff --git a/diff-cache.c b/diff-cache.c index 383302abbc..a5deb8c7a4 100644 --- a/diff-cache.c +++ b/diff-cache.c @@ -8,6 +8,7 @@ static int line_termination = '\n'; static int detect_rename = 0; static int reverse_diff = 0; static int diff_score_opt = 0; +static char *pickaxe = 0; /* A file entry went away or appeared */ static void show_file(const char *prefix, struct cache_entry *ce, unsigned char *sha1, unsigned int mode) @@ -153,7 +154,7 @@ static void mark_merge_entries(void) } static char *diff_cache_usage = -"git-diff-cache [-p] [-r] [-z] [-m] [-M] [-C] [-R] [--cached] "; +"git-diff-cache [-p] [-r] [-z] [-m] [-M] [-C] [-R] [-S] [--cached] "; int main(int argc, char **argv) { @@ -194,6 +195,10 @@ int main(int argc, char **argv) reverse_diff = 1; continue; } + if (!strcmp(arg, "-S")) { + pickaxe = arg + 2; + continue; + } if (!strcmp(arg, "-m")) { match_nonexisting = 1; continue; @@ -208,8 +213,8 @@ int main(int argc, char **argv) if (argc != 2 || get_sha1(argv[1], tree_sha1)) usage(diff_cache_usage); - diff_setup(detect_rename, diff_score_opt, reverse_diff, - (generate_patch ? -1 : line_termination), + diff_setup(detect_rename, diff_score_opt, pickaxe, + reverse_diff, (generate_patch ? -1 : line_termination), NULL, 0); mark_merge_entries(); diff --git a/diff-files.c b/diff-files.c index d020254922..d3b80a0725 100644 --- a/diff-files.c +++ b/diff-files.c @@ -7,13 +7,14 @@ #include "diff.h" static const char *diff_files_usage = -"git-diff-files [-p] [-q] [-r] [-z] [-M] [-C] [-R] [paths...]"; +"git-diff-files [-p] [-q] [-r] [-z] [-M] [-C] [-R] [-S] [paths...]"; static int generate_patch = 0; static int line_termination = '\n'; static int detect_rename = 0; static int reverse_diff = 0; static int diff_score_opt = 0; +static char *pickaxe = 0; static int silent = 0; static int matches_pathspec(struct cache_entry *ce, char **spec, int cnt) @@ -67,6 +68,8 @@ int main(int argc, char **argv) line_termination = 0; else if (!strcmp(argv[1], "-R")) reverse_diff = 1; + else if (!strcmp(argv[1], "-S")) + pickaxe = argv[1] + 2; else if (!strncmp(argv[1], "-M", 2)) { diff_score_opt = diff_scoreopt_parse(argv[1]); detect_rename = generate_patch = 1; @@ -89,8 +92,8 @@ int main(int argc, char **argv) exit(1); } - diff_setup(detect_rename, diff_score_opt, reverse_diff, - (generate_patch ? -1 : line_termination), + diff_setup(detect_rename, diff_score_opt, pickaxe, + reverse_diff, (generate_patch ? -1 : line_termination), NULL, 0); for (i = 0; i < entries; i++) { diff --git a/diff-helper.c b/diff-helper.c index 4e966db769..568d5ae356 100644 --- a/diff-helper.c +++ b/diff-helper.c @@ -9,6 +9,7 @@ static int detect_rename = 0; static int diff_score_opt = 0; static int generate_patch = 1; +static char *pickaxe = 0; static int parse_oneside_change(const char *cp, int *mode, unsigned char *sha1, char *path) @@ -93,7 +94,7 @@ static int parse_diff_raw_output(const char *buf) } static const char *diff_helper_usage = - "git-diff-helper [-z] [-R] [-M] [-C] paths..."; + "git-diff-helper [-z] [-R] [-M] [-C] [-S] paths..."; int main(int ac, const char **av) { struct strbuf sb; @@ -117,14 +118,17 @@ int main(int ac, const char **av) { detect_rename = 2; diff_score_opt = diff_scoreopt_parse(av[1]); } + else if (av[1][1] == 'S') { + pickaxe = av[1] + 2; + } else usage(diff_helper_usage); ac--; av++; } /* the remaining parameters are paths patterns */ - diff_setup(detect_rename, diff_score_opt, reverse, - (generate_patch ? -1 : line_termination), + diff_setup(detect_rename, diff_score_opt, pickaxe, + reverse, (generate_patch ? -1 : line_termination), av+1, ac-1); while (1) { diff --git a/diff-tree.c b/diff-tree.c index aa4944686b..233a250668 100644 --- a/diff-tree.c +++ b/diff-tree.c @@ -13,6 +13,7 @@ static int generate_patch = 0; static int detect_rename = 0; static int reverse_diff = 0; static int diff_score_opt = 0; +static char *pickaxe = 0; static const char *header = NULL; static const char *header_prefix = ""; @@ -271,8 +272,8 @@ static int diff_tree_sha1_top(const unsigned char *old, { int ret; - diff_setup(detect_rename, diff_score_opt, reverse_diff, - (generate_patch ? -1 : line_termination), + diff_setup(detect_rename, diff_score_opt, pickaxe, + reverse_diff, (generate_patch ? -1 : line_termination), NULL, 0); ret = diff_tree_sha1(old, new, base); diff_flush(); @@ -285,8 +286,8 @@ static int diff_root_tree(const unsigned char *new, const char *base) void *tree; unsigned long size; - diff_setup(detect_rename, diff_score_opt, reverse_diff, - (generate_patch ? -1 : line_termination), + diff_setup(detect_rename, diff_score_opt, pickaxe, + reverse_diff, (generate_patch ? -1 : line_termination), NULL, 0); tree = read_object_with_reference(new, "tree", &size, NULL); if (!tree) @@ -430,7 +431,7 @@ static int diff_tree_stdin(char *line) } static char *diff_tree_usage = -"git-diff-tree [-p] [-r] [-z] [--stdin] [-M] [-C] [-R] [-m] [-s] [-v] "; +"git-diff-tree [-p] [-r] [-z] [--stdin] [-M] [-C] [-R] [-S] [-m] [-s] [-v] "; int main(int argc, char **argv) { @@ -473,6 +474,10 @@ int main(int argc, char **argv) recursive = generate_patch = 1; continue; } + if (!strncmp(arg, "-S", 2)) { + pickaxe = arg + 2; + continue; + } if (!strncmp(arg, "-M", 2)) { detect_rename = recursive = generate_patch = 1; diff_score_opt = diff_scoreopt_parse(arg); diff --git a/diff.c b/diff.c index d908ef3fde..13d5e3edf4 100644 --- a/diff.c +++ b/diff.c @@ -17,6 +17,7 @@ static int reverse_diff; static int diff_raw_output = -1; static const char **pathspec; static int speccnt; +static const char *pickaxe; static int minimum_score; static const char *external_diff(void) @@ -511,8 +512,9 @@ int diff_scoreopt_parse(const char *opt) return MAX_SCORE * num / scale; } -void diff_setup(int detect_rename_, int minimum_score_, int reverse_diff_, - int diff_raw_output_, +void diff_setup(int detect_rename_, int minimum_score_, + char *pickaxe_, + int reverse_diff_, int diff_raw_output_, const char **pathspec_, int speccnt_) { detect_rename = detect_rename_; @@ -521,15 +523,16 @@ void diff_setup(int detect_rename_, int minimum_score_, int reverse_diff_, diff_raw_output = diff_raw_output_; speccnt = speccnt_; minimum_score = minimum_score_ ? : DEFAULT_MINIMUM_SCORE; + pickaxe = pickaxe_; } static struct diff_queue_struct queued_diff; -struct diff_file_pair *diff_queue(struct diff_queue_struct *queue, +struct diff_filepair *diff_queue(struct diff_queue_struct *queue, struct diff_filespec *one, struct diff_filespec *two) { - struct diff_file_pair *dp = xmalloc(sizeof(*dp)); + struct diff_filepair *dp = xmalloc(sizeof(*dp)); dp->one = one; dp->two = two; dp->xfrm_msg = 0; @@ -549,7 +552,7 @@ static const char *git_object_type(unsigned mode) return S_ISDIR(mode) ? "tree" : "blob"; } -static void diff_flush_raw(struct diff_file_pair *p) +static void diff_flush_raw(struct diff_filepair *p) { struct diff_filespec *it; int addremove; @@ -583,7 +586,7 @@ static void diff_flush_raw(struct diff_file_pair *p) sha1_to_hex(it->sha1), it->path, diff_raw_output); } -static void diff_flush_patch(struct diff_file_pair *p) +static void diff_flush_patch(struct diff_filepair *p) { const char *name, *other; @@ -600,7 +603,7 @@ static int identical(struct diff_filespec *one, struct diff_filespec *two) { /* This function is written stricter than necessary to support * the currently implemented transformers, but the idea is to - * let transformers to produce diff_file_pairs any way they want, + * let transformers to produce diff_filepairs any way they want, * and filter and clean them up here before producing the output. */ @@ -623,7 +626,7 @@ static int identical(struct diff_filespec *one, struct diff_filespec *two) return 0; } -static void diff_flush_one(struct diff_file_pair *p) +static void diff_flush_one(struct diff_filepair *p) { if (identical(p->one, p->two)) return; @@ -640,11 +643,13 @@ void diff_flush(void) if (detect_rename) diff_detect_rename(q, detect_rename, minimum_score); + if (pickaxe) + diff_pickaxe(q, pickaxe); for (i = 0; i < q->nr; i++) diff_flush_one(q->queue[i]); for (i = 0; i < q->nr; i++) { - struct diff_file_pair *p = q->queue[i]; + struct diff_filepair *p = q->queue[i]; diff_free_filespec_data(p->one); diff_free_filespec_data(p->two); free(p->xfrm_msg); diff --git a/diff.h b/diff.h index 86a645afee..97d39ac961 100644 --- a/diff.h +++ b/diff.h @@ -20,6 +20,7 @@ extern void diff_unmerge(const char *path); extern int diff_scoreopt_parse(const char *opt); extern void diff_setup(int detect_rename, int minimum_score, + char *pickaxe, int reverse, int raw_output, const char **spec, int cnt); diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c new file mode 100644 index 0000000000..ee22e36abc --- /dev/null +++ b/diffcore-pickaxe.c @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2005 Junio C Hamano + */ +#include "cache.h" +#include "diff.h" +#include "diffcore.h" +#include "delta.h" + +static int contains(struct diff_filespec *one, + const char *needle, unsigned long len) +{ + unsigned long offset, sz; + const char *data; + if (diff_populate_filespec(one)) + return 0; + sz = one->size; + data = one->data; + for (offset = 0; offset + len <= sz; offset++) + if (!strncmp(needle, data + offset, len)) + return 1; + return 0; +} + +void diff_pickaxe(struct diff_queue_struct *q, const char *needle) +{ + unsigned long len = strlen(needle); + int i; + struct diff_queue_struct outq; + outq.queue = NULL; + outq.nr = outq.alloc = 0; + + for (i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + if (!p->one->file_valid) { + if (!p->two->file_valid) + continue; /* ignore nonsense */ + /* created */ + if (contains(p->two, needle, len)) + diff_queue(&outq, p->one, p->two); + } + else if (!p->two->file_valid) { + if (contains(p->one, needle, len)) + diff_queue(&outq, p->one, p->two); + } + else if (contains(p->one, needle, len) != + contains(p->two, needle, len)) + diff_queue(&outq, p->one, p->two); + } + for (i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + free(p); + } + free(q->queue); + *q = outq; + return; +} diff --git a/diffcore-rename.c b/diffcore-rename.c index 8aa8f841c4..6dd753bc63 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -129,7 +129,7 @@ static void record_rename_pair(struct diff_queue_struct *outq, * To achieve this sort order, we give xform_work the number * above. */ - struct diff_file_pair *dp = diff_queue(outq, src, dst); + struct diff_filepair *dp = diff_queue(outq, src, dst); dp->xfrm_work = (rank * 2 + 1) | (score<xfrm_flags |= RENAME_DST_MATCHED; } @@ -148,7 +148,7 @@ static void debug_filespec(struct diff_filespec *s, int x, const char *one) s->size, s->xfrm_flags); } -static void debug_filepair(const struct diff_file_pair *p, int i) +static void debug_filepair(const struct diff_filepair *p, int i) { debug_filespec(p->one, i, "one"); debug_filespec(p->two, i, "two"); @@ -165,7 +165,7 @@ static void debug_queue(const char *msg, struct diff_queue_struct *q) fprintf(stderr, "%s\n", msg); fprintf(stderr, "q->nr = %d\n", q->nr); for (i = 0; i < q->nr; i++) { - struct diff_file_pair *p = q->queue[i]; + struct diff_filepair *p = q->queue[i]; debug_filepair(p, i); } } @@ -180,8 +180,8 @@ static void debug_queue(const char *msg, struct diff_queue_struct *q) */ static int rank_compare(const void *a_, const void *b_) { - const struct diff_file_pair *a = *(const struct diff_file_pair **)a_; - const struct diff_file_pair *b = *(const struct diff_file_pair **)b_; + const struct diff_filepair *a = *(const struct diff_filepair **)a_; + const struct diff_filepair *b = *(const struct diff_filepair **)b_; int a_rank = a->xfrm_work & ((1<xfrm_work & ((1<nr) { - struct diff_file_pair *p = q->queue[i++]; + struct diff_filepair *p = q->queue[i++]; if (!p->two->file_valid) continue; /* removed is fine */ if (strcmp(p->one->path, it->path)) @@ -243,15 +243,8 @@ void diff_detect_rename(struct diff_queue_struct *q, srcs[0] = &deleted; srcs[1] = &stay; - /* NEEDSWORK: - * (1) make sure we properly ignore but pass trees. - * - * (2) make sure we do right thing on the same path deleted - * and created in the same patch. - */ - for (i = 0; i < q->nr; i++) { - struct diff_file_pair *p = q->queue[i]; + struct diff_filepair *p = q->queue[i]; if (!p->one->file_valid) if (!p->two->file_valid) continue; /* ignore nonsense */ @@ -340,11 +333,11 @@ void diff_detect_rename(struct diff_queue_struct *q, * See comments at the top of record_rename_pair for numbers used * to assign xfrm_work. * - * Note that we have not annotated the diff_file_pair with any comment + * Note that we have not annotated the diff_filepair with any comment * so there is nothing other than p to free. */ for (i = 0; i < q->nr; i++) { - struct diff_file_pair *dp, *p = q->queue[i]; + struct diff_filepair *dp, *p = q->queue[i]; if (!p->one->file_valid) { if (p->two->file_valid) { /* creation */ @@ -378,7 +371,7 @@ void diff_detect_rename(struct diff_queue_struct *q, /* Copy it out to q, removing duplicates. */ for (i = 0; i < outq.nr; i++) { - struct diff_file_pair *p = outq.queue[i]; + struct diff_filepair *p = outq.queue[i]; if (!p->one->file_valid) { /* created */ if (p->two->xfrm_flags & RENAME_DST_MATCHED) @@ -395,7 +388,7 @@ void diff_detect_rename(struct diff_queue_struct *q, } else if (strcmp(p->one->path, p->two->path)) { /* rename or copy */ - struct diff_file_pair *dp = + struct diff_filepair *dp = diff_queue(q, p->one, p->two); int msglen = (strlen(p->one->path) + strlen(p->two->path) + 100); diff --git a/diffcore.h b/diffcore.h index 5fa7067603..c3809ef858 100644 --- a/diffcore.h +++ b/diffcore.h @@ -38,7 +38,7 @@ extern void fill_filespec(struct diff_filespec *, const unsigned char *, extern int diff_populate_filespec(struct diff_filespec *); extern void diff_free_filespec_data(struct diff_filespec *); -struct diff_file_pair { +struct diff_filepair { struct diff_filespec *one; struct diff_filespec *two; char *xfrm_msg; @@ -47,14 +47,15 @@ struct diff_file_pair { }; struct diff_queue_struct { - struct diff_file_pair **queue; + struct diff_filepair **queue; int alloc; int nr; }; -extern struct diff_file_pair *diff_queue(struct diff_queue_struct *, - struct diff_filespec *, - struct diff_filespec *); +extern struct diff_filepair *diff_queue(struct diff_queue_struct *, + struct diff_filespec *, + struct diff_filespec *); extern void diff_detect_rename(struct diff_queue_struct *, int, int); +extern void diff_pickaxe(struct diff_queue_struct *, const char *); #endif