git-commit-vandalism/reflog-walk.c

370 lines
8.5 KiB
C
Raw Normal View History

#include "cache.h"
#include "commit.h"
#include "refs.h"
#include "diff.h"
#include "revision.h"
#include "string-list.h"
#include "reflog-walk.h"
struct complete_reflogs {
char *ref;
char *short_ref;
struct reflog_info {
struct object_id ooid, noid;
char *email;
timestamp_t timestamp;
int tz;
char *message;
} *items;
int nr, alloc;
};
static int read_one_reflog(struct object_id *ooid, struct object_id *noid,
const char *email, timestamp_t timestamp, int tz,
const char *message, void *cb_data)
{
struct complete_reflogs *array = cb_data;
struct reflog_info *item;
ALLOC_GROW(array->items, array->nr + 1, array->alloc);
item = array->items + array->nr;
oidcpy(&item->ooid, ooid);
oidcpy(&item->noid, noid);
item->email = xstrdup(email);
item->timestamp = timestamp;
item->tz = tz;
item->message = xstrdup(message);
array->nr++;
return 0;
}
static void free_complete_reflog(struct complete_reflogs *array)
{
int i;
if (!array)
return;
for (i = 0; i < array->nr; i++) {
free(array->items[i].email);
free(array->items[i].message);
}
free(array->items);
free(array->ref);
free(array->short_ref);
free(array);
}
static void complete_reflogs_clear(void *util, const char *str)
{
struct complete_reflogs *array = util;
free_complete_reflog(array);
}
static struct complete_reflogs *read_complete_reflog(const char *ref)
{
struct complete_reflogs *reflogs =
xcalloc(1, sizeof(struct complete_reflogs));
reflogs->ref = xstrdup(ref);
for_each_reflog_ent(ref, read_one_reflog, reflogs);
if (reflogs->nr == 0) {
const char *name;
void *name_to_free;
name = name_to_free = resolve_refdup(ref, RESOLVE_REF_READING,
NULL, NULL);
if (name) {
for_each_reflog_ent(name, read_one_reflog, reflogs);
free(name_to_free);
}
}
if (reflogs->nr == 0) {
char *refname = xstrfmt("refs/%s", ref);
for_each_reflog_ent(refname, read_one_reflog, reflogs);
if (reflogs->nr == 0) {
free(refname);
refname = xstrfmt("refs/heads/%s", ref);
for_each_reflog_ent(refname, read_one_reflog, reflogs);
}
free(refname);
}
return reflogs;
}
static int get_reflog_recno_by_time(struct complete_reflogs *array,
timestamp_t timestamp)
{
int i;
for (i = array->nr - 1; i >= 0; i--)
if (timestamp >= array->items[i].timestamp)
return i;
return -1;
}
struct commit_reflog {
int recno;
enum selector_type {
SELECTOR_NONE,
SELECTOR_INDEX,
SELECTOR_DATE
} selector;
struct complete_reflogs *reflogs;
};
struct reflog_walk_info {
reflog-walk: stop using fake parents The reflog-walk system works by putting a ref's tip into the pending queue, and then "traversing" the reflog by pretending that the parent of each commit is the previous reflog entry. This causes a number of user-visible oddities, as documented in t1414 (and the commit message which introduced it). We can fix all of them in one go by replacing the fake-reflog system with a much simpler one: just keeping a list of reflogs to show, and walking through them entry by entry. The implementation is fairly straight-forward, but there are a few items to note: 1. We obviously must skip calling add_parents_to_list() when we are traversing reflogs, since we do not want to walk the original parents at all. As a result, we must call try_to_simplify_commit() ourselves. There are other parts of add_parents_to_list() we skip, as well, but none of them should matter for a reflog traversal: - We do not allow UNINTERESTING commits, nor symmetric ranges (and we bail when these are used with "-g"). - Using --source makes no sense, since we aren't traversing. The reflog selector shows the same information with more detail. - Using --first-parent is still sensible, since you may want to see the first-parent diff for each entry. But since we're not traversing, we don't need to cull the parent list here. 2. Since we now just walk the reflog entries themselves, rather than starting with the ref tip, we now look at the "new" field of each entry rather than the "old" (i.e., we are showing entries, not faking parents). This removes all of the tricky logic around skipping past root commits. But note that we have no way to show an entry with the null sha1 in its "new" field (because such a commit obviously does not exist). Normally this would not happen, since we delete reflogs along with refs, but there is one special case. When we rename the currently checked out branch, we write two reflog entries into the HEAD log: one where the commit goes away, and another where it comes back. Prior to this commit, we show both entries with identical reflog messages. After this commit, we show only the "comes back" entry. See the update in t3200 which demonstrates this. Arguably either is fine, as the whole double-entry thing is a bit hacky in the first place. And until a recent fix, we truncated the traversal in such a case anyway, which was _definitely_ wrong. 3. We show individual reflogs in order, but choose which reflog to show at each stage based on which has the most recent timestamp. This interleaves the output from multiple reflogs based on date order, which is probably what you'd want with limiting like "-n 30". Note that the implementation aims for simplicity. It does a linear walk over the reflog queue for each commit it pulls, which may perform badly if you interleave an enormous number of reflogs. That seems like an unlikely use case; if we did want to handle it, we could probably keep a priority queue of reflogs, ordered by the timestamp of their current tip entry. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-07 11:14:07 +02:00
struct commit_reflog **logs;
size_t nr, alloc;
struct string_list complete_reflogs;
struct commit_reflog *last_commit_reflog;
};
void init_reflog_walk(struct reflog_walk_info **info)
{
CALLOC_ARRAY(*info, 1);
2017-07-07 10:39:50 +02:00
(*info)->complete_reflogs.strdup_strings = 1;
}
void reflog_walk_info_release(struct reflog_walk_info *info)
{
size_t i;
if (!info)
return;
for (i = 0; i < info->nr; i++)
free(info->logs[i]);
string_list_clear_func(&info->complete_reflogs,
complete_reflogs_clear);
free(info->logs);
free(info);
}
int add_reflog_for_walk(struct reflog_walk_info *info,
struct commit *commit, const char *name)
{
timestamp_t timestamp = 0;
int recno = -1;
struct string_list_item *item;
struct complete_reflogs *reflogs;
char *branch, *at = strchr(name, '@');
struct commit_reflog *commit_reflog;
enum selector_type selector = SELECTOR_NONE;
if (commit->object.flags & UNINTERESTING)
die("cannot walk reflogs for %s", name);
branch = xstrdup(name);
if (at && at[1] == '{') {
char *ep;
branch[at - name] = '\0';
recno = strtoul(at + 2, &ep, 10);
if (*ep != '}') {
recno = -1;
timestamp = approxidate(at + 2);
selector = SELECTOR_DATE;
}
else
selector = SELECTOR_INDEX;
} else
recno = 0;
item = string_list_lookup(&info->complete_reflogs, branch);
if (item)
reflogs = item->util;
else {
if (*branch == '\0') {
free(branch);
branch = resolve_refdup("HEAD", 0, NULL, NULL);
if (!branch)
die("no current branch");
}
reflogs = read_complete_reflog(branch);
if (!reflogs || reflogs->nr == 0) {
char *b;
int ret = dwim_log(branch, strlen(branch),
NULL, &b);
if (ret > 1)
free(b);
else if (ret == 1) {
free_complete_reflog(reflogs);
free(branch);
branch = b;
reflogs = read_complete_reflog(branch);
}
}
if (!reflogs || reflogs->nr == 0) {
free_complete_reflog(reflogs);
free(branch);
return -1;
}
string_list_insert(&info->complete_reflogs, branch)->util
= reflogs;
}
free(branch);
CALLOC_ARRAY(commit_reflog, 1);
if (recno < 0) {
commit_reflog->recno = get_reflog_recno_by_time(reflogs, timestamp);
if (commit_reflog->recno < 0) {
free(commit_reflog);
return -1;
}
} else
commit_reflog->recno = reflogs->nr - recno - 1;
commit_reflog->selector = selector;
commit_reflog->reflogs = reflogs;
reflog-walk: stop using fake parents The reflog-walk system works by putting a ref's tip into the pending queue, and then "traversing" the reflog by pretending that the parent of each commit is the previous reflog entry. This causes a number of user-visible oddities, as documented in t1414 (and the commit message which introduced it). We can fix all of them in one go by replacing the fake-reflog system with a much simpler one: just keeping a list of reflogs to show, and walking through them entry by entry. The implementation is fairly straight-forward, but there are a few items to note: 1. We obviously must skip calling add_parents_to_list() when we are traversing reflogs, since we do not want to walk the original parents at all. As a result, we must call try_to_simplify_commit() ourselves. There are other parts of add_parents_to_list() we skip, as well, but none of them should matter for a reflog traversal: - We do not allow UNINTERESTING commits, nor symmetric ranges (and we bail when these are used with "-g"). - Using --source makes no sense, since we aren't traversing. The reflog selector shows the same information with more detail. - Using --first-parent is still sensible, since you may want to see the first-parent diff for each entry. But since we're not traversing, we don't need to cull the parent list here. 2. Since we now just walk the reflog entries themselves, rather than starting with the ref tip, we now look at the "new" field of each entry rather than the "old" (i.e., we are showing entries, not faking parents). This removes all of the tricky logic around skipping past root commits. But note that we have no way to show an entry with the null sha1 in its "new" field (because such a commit obviously does not exist). Normally this would not happen, since we delete reflogs along with refs, but there is one special case. When we rename the currently checked out branch, we write two reflog entries into the HEAD log: one where the commit goes away, and another where it comes back. Prior to this commit, we show both entries with identical reflog messages. After this commit, we show only the "comes back" entry. See the update in t3200 which demonstrates this. Arguably either is fine, as the whole double-entry thing is a bit hacky in the first place. And until a recent fix, we truncated the traversal in such a case anyway, which was _definitely_ wrong. 3. We show individual reflogs in order, but choose which reflog to show at each stage based on which has the most recent timestamp. This interleaves the output from multiple reflogs based on date order, which is probably what you'd want with limiting like "-n 30". Note that the implementation aims for simplicity. It does a linear walk over the reflog queue for each commit it pulls, which may perform badly if you interleave an enormous number of reflogs. That seems like an unlikely use case; if we did want to handle it, we could probably keep a priority queue of reflogs, ordered by the timestamp of their current tip entry. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-07 11:14:07 +02:00
ALLOC_GROW(info->logs, info->nr + 1, info->alloc);
info->logs[info->nr++] = commit_reflog;
reflog-walk: stop using fake parents The reflog-walk system works by putting a ref's tip into the pending queue, and then "traversing" the reflog by pretending that the parent of each commit is the previous reflog entry. This causes a number of user-visible oddities, as documented in t1414 (and the commit message which introduced it). We can fix all of them in one go by replacing the fake-reflog system with a much simpler one: just keeping a list of reflogs to show, and walking through them entry by entry. The implementation is fairly straight-forward, but there are a few items to note: 1. We obviously must skip calling add_parents_to_list() when we are traversing reflogs, since we do not want to walk the original parents at all. As a result, we must call try_to_simplify_commit() ourselves. There are other parts of add_parents_to_list() we skip, as well, but none of them should matter for a reflog traversal: - We do not allow UNINTERESTING commits, nor symmetric ranges (and we bail when these are used with "-g"). - Using --source makes no sense, since we aren't traversing. The reflog selector shows the same information with more detail. - Using --first-parent is still sensible, since you may want to see the first-parent diff for each entry. But since we're not traversing, we don't need to cull the parent list here. 2. Since we now just walk the reflog entries themselves, rather than starting with the ref tip, we now look at the "new" field of each entry rather than the "old" (i.e., we are showing entries, not faking parents). This removes all of the tricky logic around skipping past root commits. But note that we have no way to show an entry with the null sha1 in its "new" field (because such a commit obviously does not exist). Normally this would not happen, since we delete reflogs along with refs, but there is one special case. When we rename the currently checked out branch, we write two reflog entries into the HEAD log: one where the commit goes away, and another where it comes back. Prior to this commit, we show both entries with identical reflog messages. After this commit, we show only the "comes back" entry. See the update in t3200 which demonstrates this. Arguably either is fine, as the whole double-entry thing is a bit hacky in the first place. And until a recent fix, we truncated the traversal in such a case anyway, which was _definitely_ wrong. 3. We show individual reflogs in order, but choose which reflog to show at each stage based on which has the most recent timestamp. This interleaves the output from multiple reflogs based on date order, which is probably what you'd want with limiting like "-n 30". Note that the implementation aims for simplicity. It does a linear walk over the reflog queue for each commit it pulls, which may perform badly if you interleave an enormous number of reflogs. That seems like an unlikely use case; if we did want to handle it, we could probably keep a priority queue of reflogs, ordered by the timestamp of their current tip entry. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-07 11:14:07 +02:00
return 0;
}
void get_reflog_selector(struct strbuf *sb,
struct reflog_walk_info *reflog_info,
convert "enum date_mode" into a struct In preparation for adding date modes that may carry extra information beyond the mode itself, this patch converts the date_mode enum into a struct. Most of the conversion is fairly straightforward; we pass the struct as a pointer and dereference the type field where necessary. Locations that declare a date_mode can use a "{}" constructor. However, the tricky case is where we use the enum labels as constants, like: show_date(t, tz, DATE_NORMAL); Ideally we could say: show_date(t, tz, &{ DATE_NORMAL }); but of course C does not allow that. Likewise, we cannot cast the constant to a struct, because we need to pass an actual address. Our options are basically: 1. Manually add a "struct date_mode d = { DATE_NORMAL }" definition to each caller, and pass "&d". This makes the callers uglier, because they sometimes do not even have their own scope (e.g., they are inside a switch statement). 2. Provide a pre-made global "date_normal" struct that can be passed by address. We'd also need "date_rfc2822", "date_iso8601", and so forth. But at least the ugliness is defined in one place. 3. Provide a wrapper that generates the correct struct on the fly. The big downside is that we end up pointing to a single global, which makes our wrapper non-reentrant. But show_date is already not reentrant, so it does not matter. This patch implements 3, along with a minor macro to keep the size of the callers sane. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-06-25 18:55:02 +02:00
const struct date_mode *dmode, int force_date,
int shorten)
{
struct commit_reflog *commit_reflog = reflog_info->last_commit_reflog;
struct reflog_info *info;
const char *printed_ref;
if (!commit_reflog)
return;
if (shorten) {
if (!commit_reflog->reflogs->short_ref)
commit_reflog->reflogs->short_ref
= shorten_unambiguous_ref(commit_reflog->reflogs->ref, 0);
printed_ref = commit_reflog->reflogs->short_ref;
} else {
printed_ref = commit_reflog->reflogs->ref;
}
strbuf_addf(sb, "%s@{", printed_ref);
if (commit_reflog->selector == SELECTOR_DATE ||
(commit_reflog->selector == SELECTOR_NONE && force_date)) {
info = &commit_reflog->reflogs->items[commit_reflog->recno+1];
strbuf_addstr(sb, show_date(info->timestamp, info->tz, dmode));
} else {
strbuf_addf(sb, "%d", commit_reflog->reflogs->nr
- 2 - commit_reflog->recno);
}
strbuf_addch(sb, '}');
}
void get_reflog_message(struct strbuf *sb,
struct reflog_walk_info *reflog_info)
{
struct commit_reflog *commit_reflog = reflog_info->last_commit_reflog;
struct reflog_info *info;
size_t len;
if (!commit_reflog)
return;
info = &commit_reflog->reflogs->items[commit_reflog->recno+1];
len = strlen(info->message);
if (len > 0)
len--; /* strip away trailing newline */
strbuf_add(sb, info->message, len);
}
const char *get_reflog_ident(struct reflog_walk_info *reflog_info)
{
struct commit_reflog *commit_reflog = reflog_info->last_commit_reflog;
struct reflog_info *info;
if (!commit_reflog)
return NULL;
info = &commit_reflog->reflogs->items[commit_reflog->recno+1];
return info->email;
}
timestamp_t get_reflog_timestamp(struct reflog_walk_info *reflog_info)
{
struct commit_reflog *commit_reflog = reflog_info->last_commit_reflog;
struct reflog_info *info;
if (!commit_reflog)
return 0;
info = &commit_reflog->reflogs->items[commit_reflog->recno+1];
return info->timestamp;
}
void show_reflog_message(struct reflog_walk_info *reflog_info, int oneline,
convert "enum date_mode" into a struct In preparation for adding date modes that may carry extra information beyond the mode itself, this patch converts the date_mode enum into a struct. Most of the conversion is fairly straightforward; we pass the struct as a pointer and dereference the type field where necessary. Locations that declare a date_mode can use a "{}" constructor. However, the tricky case is where we use the enum labels as constants, like: show_date(t, tz, DATE_NORMAL); Ideally we could say: show_date(t, tz, &{ DATE_NORMAL }); but of course C does not allow that. Likewise, we cannot cast the constant to a struct, because we need to pass an actual address. Our options are basically: 1. Manually add a "struct date_mode d = { DATE_NORMAL }" definition to each caller, and pass "&d". This makes the callers uglier, because they sometimes do not even have their own scope (e.g., they are inside a switch statement). 2. Provide a pre-made global "date_normal" struct that can be passed by address. We'd also need "date_rfc2822", "date_iso8601", and so forth. But at least the ugliness is defined in one place. 3. Provide a wrapper that generates the correct struct on the fly. The big downside is that we end up pointing to a single global, which makes our wrapper non-reentrant. But show_date is already not reentrant, so it does not matter. This patch implements 3, along with a minor macro to keep the size of the callers sane. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-06-25 18:55:02 +02:00
const struct date_mode *dmode, int force_date)
{
if (reflog_info && reflog_info->last_commit_reflog) {
struct commit_reflog *commit_reflog = reflog_info->last_commit_reflog;
struct reflog_info *info;
struct strbuf selector = STRBUF_INIT;
info = &commit_reflog->reflogs->items[commit_reflog->recno+1];
get_reflog_selector(&selector, reflog_info, dmode, force_date, 0);
if (oneline) {
printf("%s: %s", selector.buf, info->message);
}
else {
printf("Reflog: %s (%s)\nReflog message: %s",
selector.buf, info->email, info->message);
}
strbuf_release(&selector);
}
}
int reflog_walk_empty(struct reflog_walk_info *info)
{
reflog-walk: stop using fake parents The reflog-walk system works by putting a ref's tip into the pending queue, and then "traversing" the reflog by pretending that the parent of each commit is the previous reflog entry. This causes a number of user-visible oddities, as documented in t1414 (and the commit message which introduced it). We can fix all of them in one go by replacing the fake-reflog system with a much simpler one: just keeping a list of reflogs to show, and walking through them entry by entry. The implementation is fairly straight-forward, but there are a few items to note: 1. We obviously must skip calling add_parents_to_list() when we are traversing reflogs, since we do not want to walk the original parents at all. As a result, we must call try_to_simplify_commit() ourselves. There are other parts of add_parents_to_list() we skip, as well, but none of them should matter for a reflog traversal: - We do not allow UNINTERESTING commits, nor symmetric ranges (and we bail when these are used with "-g"). - Using --source makes no sense, since we aren't traversing. The reflog selector shows the same information with more detail. - Using --first-parent is still sensible, since you may want to see the first-parent diff for each entry. But since we're not traversing, we don't need to cull the parent list here. 2. Since we now just walk the reflog entries themselves, rather than starting with the ref tip, we now look at the "new" field of each entry rather than the "old" (i.e., we are showing entries, not faking parents). This removes all of the tricky logic around skipping past root commits. But note that we have no way to show an entry with the null sha1 in its "new" field (because such a commit obviously does not exist). Normally this would not happen, since we delete reflogs along with refs, but there is one special case. When we rename the currently checked out branch, we write two reflog entries into the HEAD log: one where the commit goes away, and another where it comes back. Prior to this commit, we show both entries with identical reflog messages. After this commit, we show only the "comes back" entry. See the update in t3200 which demonstrates this. Arguably either is fine, as the whole double-entry thing is a bit hacky in the first place. And until a recent fix, we truncated the traversal in such a case anyway, which was _definitely_ wrong. 3. We show individual reflogs in order, but choose which reflog to show at each stage based on which has the most recent timestamp. This interleaves the output from multiple reflogs based on date order, which is probably what you'd want with limiting like "-n 30". Note that the implementation aims for simplicity. It does a linear walk over the reflog queue for each commit it pulls, which may perform badly if you interleave an enormous number of reflogs. That seems like an unlikely use case; if we did want to handle it, we could probably keep a priority queue of reflogs, ordered by the timestamp of their current tip entry. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-07 11:14:07 +02:00
return !info || !info->nr;
}
static struct commit *next_reflog_commit(struct commit_reflog *log)
{
for (; log->recno >= 0; log->recno--) {
struct reflog_info *entry = &log->reflogs->items[log->recno];
struct object *obj = parse_object(the_repository,
&entry->noid);
reflog-walk: stop using fake parents The reflog-walk system works by putting a ref's tip into the pending queue, and then "traversing" the reflog by pretending that the parent of each commit is the previous reflog entry. This causes a number of user-visible oddities, as documented in t1414 (and the commit message which introduced it). We can fix all of them in one go by replacing the fake-reflog system with a much simpler one: just keeping a list of reflogs to show, and walking through them entry by entry. The implementation is fairly straight-forward, but there are a few items to note: 1. We obviously must skip calling add_parents_to_list() when we are traversing reflogs, since we do not want to walk the original parents at all. As a result, we must call try_to_simplify_commit() ourselves. There are other parts of add_parents_to_list() we skip, as well, but none of them should matter for a reflog traversal: - We do not allow UNINTERESTING commits, nor symmetric ranges (and we bail when these are used with "-g"). - Using --source makes no sense, since we aren't traversing. The reflog selector shows the same information with more detail. - Using --first-parent is still sensible, since you may want to see the first-parent diff for each entry. But since we're not traversing, we don't need to cull the parent list here. 2. Since we now just walk the reflog entries themselves, rather than starting with the ref tip, we now look at the "new" field of each entry rather than the "old" (i.e., we are showing entries, not faking parents). This removes all of the tricky logic around skipping past root commits. But note that we have no way to show an entry with the null sha1 in its "new" field (because such a commit obviously does not exist). Normally this would not happen, since we delete reflogs along with refs, but there is one special case. When we rename the currently checked out branch, we write two reflog entries into the HEAD log: one where the commit goes away, and another where it comes back. Prior to this commit, we show both entries with identical reflog messages. After this commit, we show only the "comes back" entry. See the update in t3200 which demonstrates this. Arguably either is fine, as the whole double-entry thing is a bit hacky in the first place. And until a recent fix, we truncated the traversal in such a case anyway, which was _definitely_ wrong. 3. We show individual reflogs in order, but choose which reflog to show at each stage based on which has the most recent timestamp. This interleaves the output from multiple reflogs based on date order, which is probably what you'd want with limiting like "-n 30". Note that the implementation aims for simplicity. It does a linear walk over the reflog queue for each commit it pulls, which may perform badly if you interleave an enormous number of reflogs. That seems like an unlikely use case; if we did want to handle it, we could probably keep a priority queue of reflogs, ordered by the timestamp of their current tip entry. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-07 11:14:07 +02:00
if (obj && obj->type == OBJ_COMMIT)
return (struct commit *)obj;
}
return NULL;
}
static timestamp_t log_timestamp(struct commit_reflog *log)
{
return log->reflogs->items[log->recno].timestamp;
}
struct commit *next_reflog_entry(struct reflog_walk_info *walk)
{
struct commit_reflog *best = NULL;
struct commit *best_commit = NULL;
size_t i;
for (i = 0; i < walk->nr; i++) {
struct commit_reflog *log = walk->logs[i];
struct commit *commit = next_reflog_commit(log);
if (!commit)
continue;
if (!best || log_timestamp(log) > log_timestamp(best)) {
best = log;
best_commit = commit;
}
}
if (best) {
best->recno--;
walk->last_commit_reflog = best;
return best_commit;
}
return NULL;
}