git-commit-vandalism/blame.c

900 lines
20 KiB
C
Raw Normal View History

/*
* Copyright (C) 2006, Fredrik Kuivinen <freku045@student.liu.se>
*/
#include <assert.h>
#include <time.h>
#include <sys/time.h>
#include <math.h>
#include "cache.h"
#include "refs.h"
#include "tag.h"
#include "commit.h"
#include "tree.h"
#include "blob.h"
#include "diff.h"
#include "diffcore.h"
#include "revision.h"
#include "xdiff-interface.h"
#define DEBUG 0
static const char blame_usage[] = "[-c] [-l] [--] file [commit]\n"
" -c, --compability Use the same output mode as git-annotate (Default: off)\n"
" -l, --long Show long commit SHA1 (Default: off)\n"
" -h, --help This message";
static struct commit **blame_lines;
static int num_blame_lines;
static char* blame_contents;
static int blame_len;
struct util_info {
int *line_map;
unsigned char sha1[20]; /* blob sha, not commit! */
char *buf;
unsigned long size;
int num_lines;
const char* pathname;
void* topo_data;
};
struct chunk {
int off1, len1; // ---
int off2, len2; // +++
};
struct patch {
struct chunk *chunks;
int num;
};
static void get_blob(struct commit *commit);
/* Only used for statistics */
static int num_get_patch = 0;
static int num_commits = 0;
static int patch_time = 0;
struct blame_diff_state {
struct xdiff_emit_state xm;
struct patch *ret;
};
static void process_u0_diff(void *state_, char *line, unsigned long len)
{
struct blame_diff_state *state = state_;
struct chunk *chunk;
if (len < 4 || line[0] != '@' || line[1] != '@')
return;
if (DEBUG)
printf("chunk line: %.*s", (int)len, line);
state->ret->num++;
state->ret->chunks = xrealloc(state->ret->chunks,
sizeof(struct chunk) * state->ret->num);
chunk = &state->ret->chunks[state->ret->num - 1];
assert(!strncmp(line, "@@ -", 4));
if (parse_hunk_header(line, len,
&chunk->off1, &chunk->len1,
&chunk->off2, &chunk->len2)) {
state->ret->num--;
return;
}
if (chunk->len1 == 0)
chunk->off1++;
if (chunk->len2 == 0)
chunk->off2++;
if (chunk->off1 > 0)
chunk->off1--;
if (chunk->off2 > 0)
chunk->off2--;
assert(chunk->off1 >= 0);
assert(chunk->off2 >= 0);
}
static struct patch *get_patch(struct commit *commit, struct commit *other)
{
struct blame_diff_state state;
xpparam_t xpp;
xdemitconf_t xecfg;
mmfile_t file_c, file_o;
xdemitcb_t ecb;
struct util_info *info_c = (struct util_info *)commit->object.util;
struct util_info *info_o = (struct util_info *)other->object.util;
struct timeval tv_start, tv_end;
get_blob(commit);
file_c.ptr = info_c->buf;
file_c.size = info_c->size;
get_blob(other);
file_o.ptr = info_o->buf;
file_o.size = info_o->size;
gettimeofday(&tv_start, NULL);
xpp.flags = XDF_NEED_MINIMAL;
xecfg.ctxlen = 0;
xecfg.flags = 0;
ecb.outf = xdiff_outf;
ecb.priv = &state;
memset(&state, 0, sizeof(state));
state.xm.consume = process_u0_diff;
state.ret = xmalloc(sizeof(struct patch));
state.ret->chunks = NULL;
state.ret->num = 0;
xdl_diff(&file_c, &file_o, &xpp, &xecfg, &ecb);
gettimeofday(&tv_end, NULL);
patch_time += 1000000 * (tv_end.tv_sec - tv_start.tv_sec) +
tv_end.tv_usec - tv_start.tv_usec;
num_get_patch++;
return state.ret;
}
static void free_patch(struct patch *p)
{
free(p->chunks);
free(p);
}
static int get_blob_sha1_internal(unsigned char *sha1, const char *base,
int baselen, const char *pathname,
unsigned mode, int stage);
static unsigned char blob_sha1[20];
static const char* blame_file;
static int get_blob_sha1(struct tree *t, const char *pathname,
unsigned char *sha1)
{
int i;
const char *pathspec[2];
blame_file = pathname;
pathspec[0] = pathname;
pathspec[1] = NULL;
memset(blob_sha1, 0, sizeof(blob_sha1));
read_tree_recursive(t, "", 0, 0, pathspec, get_blob_sha1_internal);
for (i = 0; i < 20; i++) {
if (blob_sha1[i] != 0)
break;
}
if (i == 20)
return -1;
memcpy(sha1, blob_sha1, 20);
return 0;
}
static int get_blob_sha1_internal(unsigned char *sha1, const char *base,
int baselen, const char *pathname,
unsigned mode, int stage)
{
if (S_ISDIR(mode))
return READ_TREE_RECURSIVE;
if (strncmp(blame_file, base, baselen) ||
strcmp(blame_file + baselen, pathname))
return -1;
memcpy(blob_sha1, sha1, 20);
return -1;
}
static void get_blob(struct commit *commit)
{
struct util_info *info = commit->object.util;
char type[20];
if (info->buf)
return;
info->buf = read_sha1_file(info->sha1, type, &info->size);
assert(!strcmp(type, blob_type));
}
/* For debugging only */
static void print_patch(struct patch *p)
{
int i;
printf("Num chunks: %d\n", p->num);
for (i = 0; i < p->num; i++) {
printf("%d,%d %d,%d\n", p->chunks[i].off1, p->chunks[i].len1,
p->chunks[i].off2, p->chunks[i].len2);
}
}
#if DEBUG
/* For debugging only */
static void print_map(struct commit *cmit, struct commit *other)
{
struct util_info *util = cmit->object.util;
struct util_info *util2 = other->object.util;
int i;
int max =
util->num_lines >
util2->num_lines ? util->num_lines : util2->num_lines;
int num;
for (i = 0; i < max; i++) {
printf("i: %d ", i);
num = -1;
if (i < util->num_lines) {
num = util->line_map[i];
printf("%d\t", num);
} else
printf("\t");
if (i < util2->num_lines) {
int num2 = util2->line_map[i];
printf("%d\t", num2);
if (num != -1 && num2 != num)
printf("---");
} else
printf("\t");
printf("\n");
}
}
#endif
// p is a patch from commit to other.
static void fill_line_map(struct commit *commit, struct commit *other,
struct patch *p)
{
struct util_info *util = commit->object.util;
struct util_info *util2 = other->object.util;
int *map = util->line_map;
int *map2 = util2->line_map;
int cur_chunk = 0;
int i1, i2;
if (p->num && DEBUG)
print_patch(p);
if (DEBUG)
printf("num lines 1: %d num lines 2: %d\n", util->num_lines,
util2->num_lines);
for (i1 = 0, i2 = 0; i1 < util->num_lines; i1++, i2++) {
struct chunk *chunk = NULL;
if (cur_chunk < p->num)
chunk = &p->chunks[cur_chunk];
if (chunk && chunk->off1 == i1) {
if (DEBUG && i2 != chunk->off2)
printf("i2: %d off2: %d\n", i2, chunk->off2);
assert(i2 == chunk->off2);
i1--;
i2--;
if (chunk->len1 > 0)
i1 += chunk->len1;
if (chunk->len2 > 0)
i2 += chunk->len2;
cur_chunk++;
} else {
if (i2 >= util2->num_lines)
break;
if (map[i1] != map2[i2] && map[i1] != -1) {
if (DEBUG)
printf("map: i1: %d %d %p i2: %d %d %p\n",
i1, map[i1],
i1 != -1 ? blame_lines[map[i1]] : NULL,
i2, map2[i2],
i2 != -1 ? blame_lines[map2[i2]] : NULL);
if (map2[i2] != -1 &&
blame_lines[map[i1]] &&
!blame_lines[map2[i2]])
map[i1] = map2[i2];
}
if (map[i1] == -1 && map2[i2] != -1)
map[i1] = map2[i2];
}
if (DEBUG > 1)
printf("l1: %d l2: %d i1: %d i2: %d\n",
map[i1], map2[i2], i1, i2);
}
}
static int map_line(struct commit *commit, int line)
{
struct util_info *info = commit->object.util;
assert(line >= 0 && line < info->num_lines);
return info->line_map[line];
}
static struct util_info* get_util(struct commit *commit)
{
struct util_info *util = commit->object.util;
if (util)
return util;
util = xmalloc(sizeof(struct util_info));
util->buf = NULL;
util->size = 0;
util->line_map = NULL;
util->num_lines = -1;
util->pathname = NULL;
commit->object.util = util;
return util;
}
static int fill_util_info(struct commit *commit)
{
struct util_info *util = commit->object.util;
assert(util);
assert(util->pathname);
if (get_blob_sha1(commit->tree, util->pathname, util->sha1))
return 1;
else
return 0;
}
static void alloc_line_map(struct commit *commit)
{
struct util_info *util = commit->object.util;
int i;
if (util->line_map)
return;
get_blob(commit);
util->num_lines = 0;
for (i = 0; i < util->size; i++) {
if (util->buf[i] == '\n')
util->num_lines++;
}
if(util->buf[util->size - 1] != '\n')
util->num_lines++;
util->line_map = xmalloc(sizeof(int) * util->num_lines);
for (i = 0; i < util->num_lines; i++)
util->line_map[i] = -1;
}
static void init_first_commit(struct commit* commit, const char* filename)
{
struct util_info* util = commit->object.util;
int i;
util->pathname = filename;
if (fill_util_info(commit))
die("fill_util_info failed");
alloc_line_map(commit);
util = commit->object.util;
for (i = 0; i < util->num_lines; i++)
util->line_map[i] = i;
}
static void process_commits(struct rev_info *rev, const char *path,
struct commit** initial)
{
int i;
struct util_info* util;
int lines_left;
int *blame_p;
int *new_lines;
int new_lines_len;
struct commit* commit = get_revision(rev);
assert(commit);
init_first_commit(commit, path);
util = commit->object.util;
num_blame_lines = util->num_lines;
blame_lines = xmalloc(sizeof(struct commit *) * num_blame_lines);
blame_contents = util->buf;
blame_len = util->size;
for (i = 0; i < num_blame_lines; i++)
blame_lines[i] = NULL;
lines_left = num_blame_lines;
blame_p = xmalloc(sizeof(int) * num_blame_lines);
new_lines = xmalloc(sizeof(int) * num_blame_lines);
do {
struct commit_list *parents;
int num_parents;
struct util_info *util;
if (DEBUG)
printf("\nProcessing commit: %d %s\n", num_commits,
sha1_to_hex(commit->object.sha1));
if (lines_left == 0)
return;
num_commits++;
memset(blame_p, 0, sizeof(int) * num_blame_lines);
new_lines_len = 0;
num_parents = 0;
for (parents = commit->parents;
parents != NULL; parents = parents->next)
num_parents++;
if(num_parents == 0)
*initial = commit;
if (fill_util_info(commit))
continue;
alloc_line_map(commit);
util = commit->object.util;
for (parents = commit->parents;
parents != NULL; parents = parents->next) {
struct commit *parent = parents->item;
struct patch *patch;
if (parse_commit(parent) < 0)
die("parse_commit error");
if (DEBUG)
printf("parent: %s\n",
sha1_to_hex(parent->object.sha1));
if (fill_util_info(parent)) {
num_parents--;
continue;
}
patch = get_patch(parent, commit);
alloc_line_map(parent);
fill_line_map(parent, commit, patch);
for (i = 0; i < patch->num; i++) {
int l;
for (l = 0; l < patch->chunks[i].len2; l++) {
int mapped_line =
map_line(commit, patch->chunks[i].off2 + l);
if (mapped_line != -1) {
blame_p[mapped_line]++;
if (blame_p[mapped_line] == num_parents)
new_lines[new_lines_len++] = mapped_line;
}
}
}
free_patch(patch);
}
if (DEBUG)
printf("parents: %d\n", num_parents);
for (i = 0; i < new_lines_len; i++) {
int mapped_line = new_lines[i];
if (blame_lines[mapped_line] == NULL) {
blame_lines[mapped_line] = commit;
lines_left--;
if (DEBUG)
printf("blame: mapped: %d i: %d\n",
mapped_line, i);
}
}
} while ((commit = get_revision(rev)) != NULL);
}
static int compare_tree_path(struct rev_info* revs,
struct commit* c1, struct commit* c2)
{
const char* paths[2];
struct util_info* util = c2->object.util;
paths[0] = util->pathname;
paths[1] = NULL;
diff_tree_setup_paths(get_pathspec(revs->prefix, paths));
return rev_compare_tree(c1->tree, c2->tree);
}
static int same_tree_as_empty_path(struct rev_info *revs, struct tree* t1,
const char* path)
{
const char* paths[2];
paths[0] = path;
paths[1] = NULL;
diff_tree_setup_paths(get_pathspec(revs->prefix, paths));
return rev_same_tree_as_empty(t1);
}
static const char* find_rename(struct commit* commit, struct commit* parent)
{
struct util_info* cutil = commit->object.util;
struct diff_options diff_opts;
const char *paths[1];
int i;
if (DEBUG) {
printf("find_rename commit: %s ",
sha1_to_hex(commit->object.sha1));
puts(sha1_to_hex(parent->object.sha1));
}
diff_setup(&diff_opts);
diff_opts.recursive = 1;
diff_opts.detect_rename = DIFF_DETECT_RENAME;
paths[0] = NULL;
diff_tree_setup_paths(paths);
if (diff_setup_done(&diff_opts) < 0)
die("diff_setup_done failed");
diff_tree_sha1(commit->tree->object.sha1, parent->tree->object.sha1,
"", &diff_opts);
diffcore_std(&diff_opts);
for (i = 0; i < diff_queued_diff.nr; i++) {
struct diff_filepair *p = diff_queued_diff.queue[i];
if (p->status == 'R' && !strcmp(p->one->path, cutil->pathname)) {
if (DEBUG)
printf("rename %s -> %s\n", p->one->path, p->two->path);
return p->two->path;
}
}
return 0;
}
static void simplify_commit(struct rev_info *revs, struct commit *commit)
{
struct commit_list **pp, *parent;
if (!commit->tree)
return;
if (!commit->parents) {
struct util_info* util = commit->object.util;
if (!same_tree_as_empty_path(revs, commit->tree,
util->pathname))
commit->object.flags |= TREECHANGE;
return;
}
pp = &commit->parents;
while ((parent = *pp) != NULL) {
struct commit *p = parent->item;
if (p->object.flags & UNINTERESTING) {
pp = &parent->next;
continue;
}
parse_commit(p);
switch (compare_tree_path(revs, p, commit)) {
case REV_TREE_SAME:
parent->next = NULL;
commit->parents = parent;
get_util(p)->pathname = get_util(commit)->pathname;
return;
case REV_TREE_NEW:
{
struct util_info* util = commit->object.util;
if (revs->remove_empty_trees &&
same_tree_as_empty_path(revs, p->tree,
util->pathname)) {
const char* new_name = find_rename(commit, p);
if (new_name) {
struct util_info* putil = get_util(p);
if (!putil->pathname)
putil->pathname = strdup(new_name);
} else {
*pp = parent->next;
continue;
}
}
}
/* fallthrough */
case REV_TREE_DIFFERENT:
pp = &parent->next;
if (!get_util(p)->pathname)
get_util(p)->pathname =
get_util(commit)->pathname;
continue;
}
die("bad tree compare for commit %s",
sha1_to_hex(commit->object.sha1));
}
commit->object.flags |= TREECHANGE;
}
struct commit_info
{
char* author;
char* author_mail;
unsigned long author_time;
char* author_tz;
};
static void get_commit_info(struct commit* commit, struct commit_info* ret)
{
int len;
char* tmp;
static char author_buf[1024];
tmp = strstr(commit->buffer, "\nauthor ") + 8;
len = strchr(tmp, '\n') - tmp;
ret->author = author_buf;
memcpy(ret->author, tmp, len);
tmp = ret->author;
tmp += len;
*tmp = 0;
while(*tmp != ' ')
tmp--;
ret->author_tz = tmp+1;
*tmp = 0;
while(*tmp != ' ')
tmp--;
ret->author_time = strtoul(tmp, NULL, 10);
*tmp = 0;
while(*tmp != ' ')
tmp--;
ret->author_mail = tmp + 1;
*tmp = 0;
}
static const char* format_time(unsigned long time, const char* tz_str)
{
static char time_buf[128];
time_t t = time;
int minutes, tz;
struct tm *tm;
tz = atoi(tz_str);
minutes = tz < 0 ? -tz : tz;
minutes = (minutes / 100)*60 + (minutes % 100);
minutes = tz < 0 ? -minutes : minutes;
t = time + minutes * 60;
tm = gmtime(&t);
strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm);
strcat(time_buf, tz_str);
return time_buf;
}
static void topo_setter(struct commit* c, void* data)
{
struct util_info* util = c->object.util;
util->topo_data = data;
}
static void* topo_getter(struct commit* c)
{
struct util_info* util = c->object.util;
return util->topo_data;
}
static int read_ancestry(const char *graft_file,
unsigned char **start_sha1)
{
FILE *fp = fopen(graft_file, "r");
char buf[1024];
if (!fp)
return -1;
while (fgets(buf, sizeof(buf), fp)) {
/* The format is just "Commit Parent1 Parent2 ...\n" */
int len = strlen(buf);
struct commit_graft *graft = read_graft_line(buf, len);
register_commit_graft(graft, 0);
if (!*start_sha1)
*start_sha1 = graft->sha1;
}
fclose(fp);
return 0;
}
int main(int argc, const char **argv)
{
int i;
struct commit *initial = NULL;
unsigned char sha1[20], *sha1_p = NULL;
const char *filename = NULL, *commit = NULL;
char filename_buf[256];
int sha1_len = 8;
int compability = 0;
int options = 1;
struct commit* start_commit;
const char* args[10];
struct rev_info rev;
struct commit_info ci;
const char *buf;
int max_digits;
int longest_file, longest_author;
int found_rename;
const char* prefix = setup_git_directory();
git_config(git_default_config);
for(i = 1; i < argc; i++) {
if(options) {
if(!strcmp(argv[i], "-h") ||
!strcmp(argv[i], "--help"))
usage(blame_usage);
else if(!strcmp(argv[i], "-l") ||
!strcmp(argv[i], "--long")) {
sha1_len = 40;
continue;
} else if(!strcmp(argv[i], "-c") ||
!strcmp(argv[i], "--compability")) {
compability = 1;
continue;
} else if(!strcmp(argv[i], "-S")) {
if (i + 1 < argc &&
!read_ancestry(argv[i + 1], &sha1_p)) {
compability = 1;
i++;
continue;
}
usage(blame_usage);
} else if(!strcmp(argv[i], "--")) {
options = 0;
continue;
} else if(argv[i][0] == '-')
usage(blame_usage);
else
options = 0;
}
if(!options) {
if(!filename)
filename = argv[i];
else if(!commit)
commit = argv[i];
else
usage(blame_usage);
}
}
if(!filename)
usage(blame_usage);
if (commit && sha1_p)
usage(blame_usage);
else if(!commit)
commit = "HEAD";
if(prefix)
sprintf(filename_buf, "%s%s", prefix, filename);
else
strcpy(filename_buf, filename);
filename = filename_buf;
if (!sha1_p) {
if (get_sha1(commit, sha1))
die("get_sha1 failed, commit '%s' not found", commit);
sha1_p = sha1;
}
start_commit = lookup_commit_reference(sha1_p);
get_util(start_commit)->pathname = filename;
if (fill_util_info(start_commit)) {
printf("%s not found in %s\n", filename, commit);
return 1;
}
init_revisions(&rev);
rev.remove_empty_trees = 1;
rev.topo_order = 1;
rev.prune_fn = simplify_commit;
rev.topo_setter = topo_setter;
rev.topo_getter = topo_getter;
rev.parents = 1;
rev.limited = 1;
commit_list_insert(start_commit, &rev.commits);
args[0] = filename;
args[1] = NULL;
diff_tree_setup_paths(args);
prepare_revision_walk(&rev);
process_commits(&rev, filename, &initial);
buf = blame_contents;
for (max_digits = 1, i = 10; i <= num_blame_lines + 1; max_digits++)
i *= 10;
longest_file = 0;
longest_author = 0;
found_rename = 0;
for (i = 0; i < num_blame_lines; i++) {
struct commit *c = blame_lines[i];
struct util_info* u;
if (!c)
c = initial;
u = c->object.util;
if (!found_rename && strcmp(filename, u->pathname))
found_rename = 1;
if (longest_file < strlen(u->pathname))
longest_file = strlen(u->pathname);
get_commit_info(c, &ci);
if (longest_author < strlen(ci.author))
longest_author = strlen(ci.author);
}
for (i = 0; i < num_blame_lines; i++) {
struct commit *c = blame_lines[i];
struct util_info* u;
if (!c)
c = initial;
u = c->object.util;
get_commit_info(c, &ci);
fwrite(sha1_to_hex(c->object.sha1), sha1_len, 1, stdout);
if(compability) {
printf("\t(%10s\t%10s\t%d)", ci.author,
format_time(ci.author_time, ci.author_tz), i+1);
} else {
if (found_rename)
printf(" %-*.*s", longest_file, longest_file,
u->pathname);
printf(" (%-*.*s %10s %*d) ",
longest_author, longest_author, ci.author,
format_time(ci.author_time, ci.author_tz),
max_digits, i+1);
}
if(i == num_blame_lines - 1) {
fwrite(buf, blame_len - (buf - blame_contents),
1, stdout);
if(blame_contents[blame_len-1] != '\n')
putc('\n', stdout);
} else {
char* next_buf = strchr(buf, '\n') + 1;
fwrite(buf, next_buf - buf, 1, stdout);
buf = next_buf;
}
}
if (DEBUG) {
printf("num get patch: %d\n", num_get_patch);
printf("num commits: %d\n", num_commits);
printf("patch time: %f\n", patch_time / 1000000.0);
printf("initial: %s\n", sha1_to_hex(initial->object.sha1));
}
return 0;
}