From 1732a1fd942f00b9a77a47acc09df0cd62c770bd Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 29 Jan 2007 17:36:22 -0800 Subject: [PATCH] git-blame: somewhat better commenting. Signed-off-by: Junio C Hamano --- builtin-blame.c | 293 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 255 insertions(+), 38 deletions(-) diff --git a/builtin-blame.c b/builtin-blame.c index 02bda5e196..3033e9bdad 100644 --- a/builtin-blame.c +++ b/builtin-blame.c @@ -77,6 +77,10 @@ struct origin { char path[FLEX_ARRAY]; }; +/* + * Given an origin, prepare mmfile_t structure to be used by the + * diff machinery + */ static char *fill_origin_blob(struct origin *o, mmfile_t *file) { if (!o->file.ptr) { @@ -91,6 +95,10 @@ static char *fill_origin_blob(struct origin *o, mmfile_t *file) return file->ptr; } +/* + * Origin is refcounted and usually we keep the blob contents to be + * reused. + */ static inline struct origin *origin_incref(struct origin *o) { if (o) @@ -108,6 +116,11 @@ static void origin_decref(struct origin *o) } } +/* + * Each group of lines is described by a blame_entry; it can be split + * as we pass blame to the parents. They form a linked list in the + * scoreboard structure, sorted by the target line number. + */ struct blame_entry { struct blame_entry *prev; struct blame_entry *next; @@ -134,19 +147,24 @@ struct blame_entry { int s_lno; /* how significant this entry is -- cached to avoid - * scanning the lines over and over + * scanning the lines over and over. */ unsigned score; }; +/* + * The current state of the blame assignment. + */ struct scoreboard { /* the final commit (i.e. where we started digging from) */ struct commit *final; const char *path; - /* the contents in the final; pointed into by buf pointers of - * blame_entries + /* + * The contents in the final image. + * Used by many functions to obtain contents of the nth line, + * indexed with scoreboard.lineno[blame_entry.lno]. */ const char *final_buf; unsigned long final_buf_size; @@ -171,6 +189,11 @@ static int cmp_suspect(struct origin *a, struct origin *b) static void sanity_check_refcnt(struct scoreboard *); +/* + * If two blame entries that are next to each other came from + * contiguous lines in the same origin (i.e. pair), + * merge them together. + */ static void coalesce(struct scoreboard *sb) { struct blame_entry *ent, *next; @@ -194,6 +217,12 @@ static void coalesce(struct scoreboard *sb) sanity_check_refcnt(sb); } +/* + * Given a commit and a path in it, create a new origin structure. + * The callers that add blame to the scoreboard should use + * get_origin() to obtain shared, refcounted copy instead of calling + * this function directly. + */ static struct origin *make_origin(struct commit *commit, const char *path) { struct origin *o; @@ -204,6 +233,9 @@ static struct origin *make_origin(struct commit *commit, const char *path) return o; } +/* + * Locate an existing origin or create a new one. + */ static struct origin *get_origin(struct scoreboard *sb, struct commit *commit, const char *path) @@ -218,6 +250,13 @@ static struct origin *get_origin(struct scoreboard *sb, return make_origin(commit, path); } +/* + * Fill the blob_sha1 field of an origin if it hasn't, so that later + * call to fill_origin_blob() can use it to locate the data. blob_sha1 + * for an origin is also used to pass the blame for the entire file to + * the parent to detect the case where a child's blob is identical to + * that of its parent's. + */ static int fill_blob_sha1(struct origin *origin) { unsigned mode; @@ -238,6 +277,10 @@ static int fill_blob_sha1(struct origin *origin) return -1; } +/* + * We have an origin -- check if the same path exists in the + * parent and return an origin structure to represent it. + */ static struct origin *find_origin(struct scoreboard *sb, struct commit *parent, struct origin *origin) @@ -247,12 +290,26 @@ static struct origin *find_origin(struct scoreboard *sb, const char *paths[2]; if (parent->util) { - /* This is a freestanding copy of origin and not - * refcounted. + /* + * Each commit object can cache one origin in that + * commit. This is a freestanding copy of origin and + * not refcounted. */ struct origin *cached = parent->util; if (!strcmp(cached->path, origin->path)) { + /* + * The same path between origin and its parent + * without renaming -- the most common case. + */ porigin = get_origin(sb, parent, cached->path); + + /* + * If the origin was newly created (i.e. get_origin + * would call make_origin if none is found in the + * scoreboard), it does not know the blob_sha1, + * so copy it. Otherwise porigin was in the + * scoreboard and already knows blob_sha1. + */ if (porigin->refcnt == 1) hashcpy(porigin->blob_sha1, cached->blob_sha1); return porigin; @@ -309,7 +366,13 @@ static struct origin *find_origin(struct scoreboard *sb, } diff_flush(&diff_opts); if (porigin) { + /* + * Create a freestanding copy that is not part of + * the refcounted origin found in the scoreboard, and + * cache it in the commit. + */ struct origin *cached; + cached = make_origin(porigin->commit, porigin->path); hashcpy(cached->blob_sha1, porigin->blob_sha1); parent->util = cached; @@ -317,6 +380,10 @@ static struct origin *find_origin(struct scoreboard *sb, return porigin; } +/* + * We have an origin -- find the path that corresponds to it in its + * parent and return an origin structure to represent it. + */ static struct origin *find_rename(struct scoreboard *sb, struct commit *parent, struct origin *origin) @@ -353,6 +420,9 @@ static struct origin *find_rename(struct scoreboard *sb, return porigin; } +/* + * Parsing of patch chunks... + */ struct chunk { /* line number in postimage; up to but not including this * line is the same as preimage @@ -454,6 +524,11 @@ static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o, return state.ret; } +/* + * Run diff between two origins and grab the patch output, so that + * we can pass blame for lines origin is currently suspected for + * to its parent. + */ static struct patch *get_patch(struct origin *parent, struct origin *origin) { mmfile_t file_p, file_o; @@ -474,6 +549,10 @@ static void free_patch(struct patch *p) free(p); } +/* + * Link in a new blame entry to the scorebord. Entries that cover the + * same line range have been removed from the scoreboard previously. + */ static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e) { struct blame_entry *ent, *prev = NULL; @@ -497,6 +576,12 @@ static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e) e->next->prev = e; } +/* + * src typically is on-stack; we want to copy the information in it to + * an malloced blame_entry that is already on the linked list of the + * scoreboard. The origin of dst loses a refcnt while the origin of src + * gains one. + */ static void dup_entry(struct blame_entry *dst, struct blame_entry *src) { struct blame_entry *p, *n; @@ -516,25 +601,25 @@ static const char *nth_line(struct scoreboard *sb, int lno) return sb->final_buf + sb->lineno[lno]; } +/* + * It is known that lines between tlno to same came from parent, and e + * has an overlap with that range. it also is known that parent's + * line plno corresponds to e's line tlno. + * + * <---- e -----> + * <------> + * <------------> + * <------------> + * <------------------> + * + * Split e into potentially three parts; before this chunk, the chunk + * to be blamed for the parent, and after that portion. + */ static void split_overlap(struct blame_entry *split, struct blame_entry *e, int tlno, int plno, int same, struct origin *parent) { - /* it is known that lines between tlno to same came from - * parent, and e has an overlap with that range. it also is - * known that parent's line plno corresponds to e's line tlno. - * - * <---- e -----> - * <------> - * <------------> - * <------------> - * <------------------> - * - * Potentially we need to split e into three parts; before - * this chunk, the chunk to be blamed for parent, and after - * that portion. - */ int chunk_end_lno; memset(split, 0, sizeof(struct blame_entry [3])); @@ -564,11 +649,20 @@ static void split_overlap(struct blame_entry *split, chunk_end_lno = e->lno + e->num_lines; split[1].num_lines = chunk_end_lno - split[1].lno; + /* + * if it turns out there is nothing to blame the parent for, + * forget about the splitting. !split[1].suspect signals this. + */ if (split[1].num_lines < 1) return; split[1].suspect = origin_incref(parent); } +/* + * split_overlap() divided an existing blame e into up to three parts + * in split. Adjust the linked list of blames in the scoreboard to + * reflect the split. + */ static void split_blame(struct scoreboard *sb, struct blame_entry *split, struct blame_entry *e) @@ -576,21 +670,27 @@ static void split_blame(struct scoreboard *sb, struct blame_entry *new_entry; if (split[0].suspect && split[2].suspect) { - /* we need to split e into two and add another for parent */ + /* The first part (reuse storage for the existing entry e) */ dup_entry(e, &split[0]); + /* The last part -- me */ new_entry = xmalloc(sizeof(*new_entry)); memcpy(new_entry, &(split[2]), sizeof(struct blame_entry)); add_blame_entry(sb, new_entry); + /* ... and the middle part -- parent */ new_entry = xmalloc(sizeof(*new_entry)); memcpy(new_entry, &(split[1]), sizeof(struct blame_entry)); add_blame_entry(sb, new_entry); } else if (!split[0].suspect && !split[2].suspect) - /* parent covers the entire area */ + /* + * The parent covers the entire area; reuse storage for + * e and replace it with the parent. + */ dup_entry(e, &split[1]); else if (split[0].suspect) { + /* me and then parent */ dup_entry(e, &split[0]); new_entry = xmalloc(sizeof(*new_entry)); @@ -598,6 +698,7 @@ static void split_blame(struct scoreboard *sb, add_blame_entry(sb, new_entry); } else { + /* parent and then me */ dup_entry(e, &split[1]); new_entry = xmalloc(sizeof(*new_entry)); @@ -628,6 +729,10 @@ static void split_blame(struct scoreboard *sb, } } +/* + * After splitting the blame, the origins used by the + * on-stack blame_entry should lose one refcnt each. + */ static void decref_split(struct blame_entry *split) { int i; @@ -636,6 +741,10 @@ static void decref_split(struct blame_entry *split) origin_decref(split[i].suspect); } +/* + * Helper for blame_chunk(). blame_entry e is known to overlap with + * the patch hunk; split it and pass blame to the parent. + */ static void blame_overlap(struct scoreboard *sb, struct blame_entry *e, int tlno, int plno, int same, struct origin *parent) @@ -648,6 +757,9 @@ static void blame_overlap(struct scoreboard *sb, struct blame_entry *e, decref_split(split); } +/* + * Find the line number of the last line the target is suspected for. + */ static int find_last_in_target(struct scoreboard *sb, struct origin *target) { struct blame_entry *e; @@ -662,6 +774,11 @@ static int find_last_in_target(struct scoreboard *sb, struct origin *target) return last_in_target; } +/* + * Process one hunk from the patch between the current suspect for + * blame_entry e and its parent. Find and split the overlap, and + * pass blame to the overlapping part to the parent. + */ static void blame_chunk(struct scoreboard *sb, int tlno, int plno, int same, struct origin *target, struct origin *parent) @@ -678,6 +795,11 @@ static void blame_chunk(struct scoreboard *sb, } } +/* + * We are looking at the origin 'target' and aiming to pass blame + * for the lines it is suspected to its parent. Run diff to find + * which lines came from parent and pass blame for them. + */ static int pass_blame_to_parent(struct scoreboard *sb, struct origin *target, struct origin *parent) @@ -698,13 +820,22 @@ static int pass_blame_to_parent(struct scoreboard *sb, plno = chunk->p_next; tlno = chunk->t_next; } - /* rest (i.e. anything above tlno) are the same as parent */ + /* The rest (i.e. anything after tlno) are the same as the parent */ blame_chunk(sb, tlno, plno, last_in_target, target, parent); free_patch(patch); return 0; } +/* + * The lines in blame_entry after splitting blames many times can become + * very small and trivial, and at some point it becomes pointless to + * blame the parents. E.g. "\t\t}\n\t}\n\n" appears everywhere in any + * ordinary C program, and it is not worth to say it was copied from + * totally unrelated file in the parent. + * + * Compute how trivial the lines in the blame_entry are. + */ static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e) { unsigned score; @@ -726,6 +857,12 @@ static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e) return score; } +/* + * best_so_far[] and this[] are both a split of an existing blame_entry + * that passes blame to the parent. Maintain best_so_far the best split + * so far, by comparing this and best_so_far and copying this into + * bst_so_far as needed. + */ static void copy_split_if_better(struct scoreboard *sb, struct blame_entry *best_so_far, struct blame_entry *this) @@ -745,6 +882,11 @@ static void copy_split_if_better(struct scoreboard *sb, memcpy(best_so_far, this, sizeof(struct blame_entry [3])); } +/* + * Find the lines from parent that are the same as ent so that + * we can pass blames to it. file_p has the blob contents for + * the parent. + */ static void find_copy_in_blob(struct scoreboard *sb, struct blame_entry *ent, struct origin *parent, @@ -757,6 +899,9 @@ static void find_copy_in_blob(struct scoreboard *sb, struct patch *patch; int i, plno, tlno; + /* + * Prepare mmfile that contains only the lines in ent. + */ cp = nth_line(sb, ent->lno); file_o.ptr = (char*) cp; cnt = ent->num_lines; @@ -792,6 +937,10 @@ static void find_copy_in_blob(struct scoreboard *sb, free_patch(patch); } +/* + * See if lines currently target is suspected for can be attributed to + * parent. + */ static int find_move_in_parent(struct scoreboard *sb, struct origin *target, struct origin *parent) @@ -826,12 +975,15 @@ static int find_move_in_parent(struct scoreboard *sb, return 0; } - struct blame_list { struct blame_entry *ent; struct blame_entry split[3]; }; +/* + * Count the number of entries the target is suspected for, + * and prepare a list of entry and the best split. + */ static struct blame_list *setup_blame_list(struct scoreboard *sb, struct origin *target, int *num_ents_p) @@ -840,9 +992,6 @@ static struct blame_list *setup_blame_list(struct scoreboard *sb, int num_ents, i; struct blame_list *blame_list = NULL; - /* Count the number of entries the target is suspected for, - * and prepare a list of entry and the best split. - */ for (e = sb->ent, num_ents = 0; e; e = e->next) if (!e->guilty && !cmp_suspect(e->suspect, target)) num_ents++; @@ -856,6 +1005,11 @@ static struct blame_list *setup_blame_list(struct scoreboard *sb, return blame_list; } +/* + * For lines target is suspected for, see if we can find code movement + * across file boundary from the parent commit. porigin is the path + * in the parent we already tried. + */ static int find_copy_in_parent(struct scoreboard *sb, struct origin *target, struct commit *parent, @@ -956,7 +1110,8 @@ static int find_copy_in_parent(struct scoreboard *sb, return retval; } -/* The blobs of origin and porigin exactly match, so everything +/* + * The blobs of origin and porigin exactly match, so everything * origin is suspected for can be blamed on the parent. */ static void pass_whole_blame(struct scoreboard *sb, @@ -1041,7 +1196,7 @@ static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt) } /* - * Optionally run "miff" to find moves in parents' files here. + * Optionally find moves in parents' files. */ if (opt & PICKAXE_BLAME_MOVE) for (i = 0, parent = commit->parents; @@ -1055,7 +1210,7 @@ static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt) } /* - * Optionally run "ciff" to find copies from parents' files here. + * Optionally find copies from parents' files. */ if (opt & PICKAXE_BLAME_COPY) for (i = 0, parent = commit->parents; @@ -1072,6 +1227,9 @@ static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt) origin_decref(parent_origin[i]); } +/* + * Information on commits, used for output. + */ struct commit_info { char *author; @@ -1088,6 +1246,9 @@ struct commit_info char *summary; }; +/* + * Parse author/committer line in the commit object buffer + */ static void get_ac_line(const char *inbuf, const char *what, int bufsz, char *person, char **mail, unsigned long *time, char **tz) @@ -1142,7 +1303,8 @@ static void get_commit_info(struct commit *commit, static char committer_buf[1024]; static char summary_buf[1024]; - /* We've operated without save_commit_buffer, so + /* + * We've operated without save_commit_buffer, so * we now need to populate them for output. */ if (!commit->buffer) { @@ -1182,6 +1344,10 @@ static void get_commit_info(struct commit *commit, summary_buf[len] = 0; } +/* + * To allow LF and other nonportable characters in pathnames, + * they are c-style quoted as needed. + */ static void write_filename_info(const char *path) { printf("filename "); @@ -1189,6 +1355,10 @@ static void write_filename_info(const char *path) putchar('\n'); } +/* + * The blame_entry is found to be guilty for the range. Mark it + * as such, and show it in incremental output. + */ static void found_guilty_entry(struct blame_entry *ent) { if (ent->guilty) @@ -1220,6 +1390,11 @@ static void found_guilty_entry(struct blame_entry *ent) } } +/* + * The main loop -- while the scoreboard has lines whose true origin + * is still unknown, pick one brame_entry, and allow its current + * suspect to pass blames to its parents. + */ static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt) { while (1) { @@ -1234,12 +1409,16 @@ static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt) if (!suspect) return; /* all done */ + /* + * We will use this suspect later in the loop, + * so hold onto it in the meantime. + */ origin_incref(suspect); commit = suspect->commit; if (!commit->object.parsed) parse_commit(commit); if (!(commit->object.flags & UNINTERESTING) && - !(revs->max_age != -1 && commit->date < revs->max_age)) + !(revs->max_age != -1 && commit->date < revs->max_age)) pass_blame(sb, suspect, opt); else { commit->object.flags |= UNINTERESTING; @@ -1431,6 +1610,10 @@ static void output(struct scoreboard *sb, int option) } } +/* + * To allow quick access to the contents of nth line in the + * final image, prepare an index in the scoreboard. + */ static int prepare_lines(struct scoreboard *sb) { const char *buf = sb->final_buf; @@ -1458,6 +1641,11 @@ static int prepare_lines(struct scoreboard *sb) return sb->num_lines; } +/* + * Add phony grafts for use with -S; this is primarily to + * support git-cvsserver that wants to give a linear history + * to its clients. + */ static int read_ancestry(const char *graft_file) { FILE *fp = fopen(graft_file, "r"); @@ -1475,6 +1663,9 @@ static int read_ancestry(const char *graft_file) return 0; } +/* + * How many columns do we need to show line numbers in decimal? + */ static int lineno_width(int lines) { int i, width; @@ -1484,6 +1675,10 @@ static int lineno_width(int lines) return width; } +/* + * How many columns do we need to show line numbers, authors, + * and filenames? + */ static void find_alignment(struct scoreboard *sb, int *option) { int longest_src_lines = 0; @@ -1522,6 +1717,10 @@ static void find_alignment(struct scoreboard *sb, int *option) max_score_digits = lineno_width(largest_score); } +/* + * For debugging -- origin is refcounted, and this asserts that + * we do not underflow. + */ static void sanity_check_refcnt(struct scoreboard *sb) { int baa = 0; @@ -1543,8 +1742,9 @@ static void sanity_check_refcnt(struct scoreboard *sb) ent->suspect->refcnt = -ent->suspect->refcnt; } for (ent = sb->ent; ent; ent = ent->next) { - /* then pick each and see if they have the the correct - * refcnt. + /* + * ... then pick each and see if they have the the + * correct refcnt. */ int found; struct blame_entry *e; @@ -1574,6 +1774,10 @@ static void sanity_check_refcnt(struct scoreboard *sb) } } +/* + * Used for the command line parsing; check if the path exists + * in the working tree. + */ static int has_path_in_work_tree(const char *path) { struct stat st; @@ -1596,6 +1800,9 @@ static const char *add_prefix(const char *prefix, const char *path) return prefix_path(prefix, strlen(prefix), path); } +/* + * Parsing of (comma separated) one item in the -L option + */ static const char *parse_loc(const char *spec, struct scoreboard *sb, long lno, long begin, long *ret) @@ -1670,6 +1877,9 @@ static const char *parse_loc(const char *spec, } } +/* + * Parsing of -L option + */ static void prepare_blame_range(struct scoreboard *sb, const char *bottomtop, long lno, @@ -1788,7 +1998,8 @@ int cmd_blame(int argc, const char **argv, const char *prefix) if (!blame_copy_score) blame_copy_score = BLAME_DEFAULT_COPY_SCORE; - /* We have collected options unknown to us in argv[1..unk] + /* + * We have collected options unknown to us in argv[1..unk] * which are to be passed to revision machinery if we are * going to do the "bottom" procesing. * @@ -1868,7 +2079,8 @@ int cmd_blame(int argc, const char **argv, const char *prefix) if (final_commit_name) argv[unk++] = final_commit_name; - /* Now we got rev and path. We do not want the path pruning + /* + * Now we got rev and path. We do not want the path pruning * but we may want "bottom" processing. */ argv[unk++] = "--"; /* terminate the rev name */ @@ -1878,7 +2090,8 @@ int cmd_blame(int argc, const char **argv, const char *prefix) setup_revisions(unk, argv, &revs, "HEAD"); memset(&sb, 0, sizeof(sb)); - /* There must be one and only one positive commit in the + /* + * There must be one and only one positive commit in the * revs->pending array. */ for (i = 0; i < revs.pending.nr; i++) { @@ -1899,7 +2112,10 @@ int cmd_blame(int argc, const char **argv, const char *prefix) } if (!sb.final) { - /* "--not A B -- path" without anything positive */ + /* + * "--not A B -- path" without anything positive; + * default to HEAD. + */ unsigned char head_sha1[20]; final_commit_name = "HEAD"; @@ -1909,7 +2125,8 @@ int cmd_blame(int argc, const char **argv, const char *prefix) add_pending_object(&revs, &(sb.final->object), "HEAD"); } - /* If we have bottom, this will mark the ancestors of the + /* + * If we have bottom, this will mark the ancestors of the * bottom commits we would reach while traversing as * uninteresting. */