git-commit-vandalism/builtin/reflog.c

400 lines
11 KiB
C
Raw Normal View History

#include "builtin.h"
#include "config.h"
#include "revision.h"
#include "reachable.h"
#include "worktree.h"
#include "reflog.h"
static const char reflog_exists_usage[] =
N_("git reflog exists <ref>");
static timestamp_t default_reflog_expire;
static timestamp_t default_reflog_expire_unreachable;
struct worktree_reflogs {
struct worktree *worktree;
struct string_list reflogs;
};
static int collect_reflog(const char *ref, const struct object_id *oid, int unused, void *cb_data)
{
struct worktree_reflogs *cb = cb_data;
struct worktree *worktree = cb->worktree;
struct strbuf newref = STRBUF_INIT;
/*
* Avoid collecting the same shared ref multiple times because
* they are available via all worktrees.
*/
if (!worktree->is_current && ref_type(ref) == REF_TYPE_NORMAL)
return 0;
strbuf_worktree_ref(worktree, &newref, ref);
string_list_append_nodup(&cb->reflogs, strbuf_detach(&newref, NULL));
return 0;
}
static struct reflog_expire_cfg {
struct reflog_expire_cfg *next;
timestamp_t expire_total;
timestamp_t expire_unreachable;
char pattern[FLEX_ARRAY];
} *reflog_expire_cfg, **reflog_expire_cfg_tail;
static struct reflog_expire_cfg *find_cfg_ent(const char *pattern, size_t len)
{
struct reflog_expire_cfg *ent;
if (!reflog_expire_cfg_tail)
reflog_expire_cfg_tail = &reflog_expire_cfg;
for (ent = reflog_expire_cfg; ent; ent = ent->next)
if (!strncmp(ent->pattern, pattern, len) &&
ent->pattern[len] == '\0')
return ent;
FLEX_ALLOC_MEM(ent, pattern, pattern, len);
*reflog_expire_cfg_tail = ent;
reflog_expire_cfg_tail = &(ent->next);
return ent;
}
/* expiry timer slot */
#define EXPIRE_TOTAL 01
#define EXPIRE_UNREACH 02
static int reflog_expire_config(const char *var, const char *value, void *cb)
{
const char *pattern, *key;
size_t pattern_len;
timestamp_t expire;
int slot;
struct reflog_expire_cfg *ent;
if (parse_config_key(var, "gc", &pattern, &pattern_len, &key) < 0)
return git_default_config(var, value, cb);
if (!strcmp(key, "reflogexpire")) {
slot = EXPIRE_TOTAL;
if (git_config_expiry_date(&expire, var, value))
return -1;
} else if (!strcmp(key, "reflogexpireunreachable")) {
slot = EXPIRE_UNREACH;
if (git_config_expiry_date(&expire, var, value))
return -1;
} else
return git_default_config(var, value, cb);
if (!pattern) {
switch (slot) {
case EXPIRE_TOTAL:
default_reflog_expire = expire;
break;
case EXPIRE_UNREACH:
default_reflog_expire_unreachable = expire;
break;
}
return 0;
}
ent = find_cfg_ent(pattern, pattern_len);
if (!ent)
return -1;
switch (slot) {
case EXPIRE_TOTAL:
ent->expire_total = expire;
break;
case EXPIRE_UNREACH:
ent->expire_unreachable = expire;
break;
}
return 0;
}
static void set_reflog_expiry_param(struct cmd_reflog_expire_cb *cb, const char *ref)
{
struct reflog_expire_cfg *ent;
if (cb->explicit_expiry == (EXPIRE_TOTAL|EXPIRE_UNREACH))
return; /* both given explicitly -- nothing to tweak */
for (ent = reflog_expire_cfg; ent; ent = ent->next) {
if (!wildmatch(ent->pattern, ref, 0)) {
if (!(cb->explicit_expiry & EXPIRE_TOTAL))
cb->expire_total = ent->expire_total;
if (!(cb->explicit_expiry & EXPIRE_UNREACH))
cb->expire_unreachable = ent->expire_unreachable;
return;
}
}
/*
* If unconfigured, make stash never expire
*/
if (!strcmp(ref, "refs/stash")) {
if (!(cb->explicit_expiry & EXPIRE_TOTAL))
cb->expire_total = 0;
if (!(cb->explicit_expiry & EXPIRE_UNREACH))
cb->expire_unreachable = 0;
return;
}
/* Nothing matched -- use the default value */
if (!(cb->explicit_expiry & EXPIRE_TOTAL))
cb->expire_total = default_reflog_expire;
if (!(cb->explicit_expiry & EXPIRE_UNREACH))
cb->expire_unreachable = default_reflog_expire_unreachable;
}
static const char * reflog_expire_usage[] = {
N_("git reflog expire [--expire=<time>] "
"[--expire-unreachable=<time>] "
"[--rewrite] [--updateref] [--stale-fix] [--dry-run | -n] "
"[--verbose] [--all] <refs>..."),
NULL
};
static int expire_unreachable_callback(const struct option *opt,
const char *arg,
int unset)
{
struct cmd_reflog_expire_cb *cmd = opt->value;
if (parse_expiry_date(arg, &cmd->expire_unreachable))
die(_("invalid timestamp '%s' given to '--%s'"),
arg, opt->long_name);
cmd->explicit_expiry |= EXPIRE_UNREACH;
return 0;
}
static int expire_total_callback(const struct option *opt,
const char *arg,
int unset)
{
struct cmd_reflog_expire_cb *cmd = opt->value;
if (parse_expiry_date(arg, &cmd->expire_total))
die(_("invalid timestamp '%s' given to '--%s'"),
arg, opt->long_name);
cmd->explicit_expiry |= EXPIRE_TOTAL;
return 0;
}
static int cmd_reflog_expire(int argc, const char **argv, const char *prefix)
{
struct cmd_reflog_expire_cb cmd = { 0 };
timestamp_t now = time(NULL);
int i, status, do_all, all_worktrees = 1;
unsigned int flags = 0;
int verbose = 0;
reflog_expiry_should_prune_fn *should_prune_fn = should_expire_reflog_ent;
const struct option options[] = {
OPT_BIT(0, "dry-run", &flags, N_("do not actually prune any entries"),
EXPIRE_REFLOGS_DRY_RUN),
OPT_BIT(0, "rewrite", &flags,
N_("rewrite the old SHA1 with the new SHA1 of the entry that now precedes it"),
EXPIRE_REFLOGS_REWRITE),
OPT_BIT(0, "updateref", &flags,
N_("update the reference to the value of the top reflog entry"),
EXPIRE_REFLOGS_UPDATE_REF),
OPT_BOOL(0, "verbose", &verbose, N_("print extra information on screen")),
OPT_CALLBACK_F(0, "expire", &cmd, N_("timestamp"),
N_("prune entries older than the specified time"),
PARSE_OPT_NONEG,
expire_total_callback),
OPT_CALLBACK_F(0, "expire-unreachable", &cmd, N_("timestamp"),
N_("prune entries older than <time> that are not reachable from the current tip of the branch"),
PARSE_OPT_NONEG,
expire_unreachable_callback),
OPT_BOOL(0, "stale-fix", &cmd.stalefix,
N_("prune any reflog entries that point to broken commits")),
OPT_BOOL(0, "all", &do_all, N_("process the reflogs of all references")),
OPT_BOOL(1, "single-worktree", &all_worktrees,
N_("limits processing to reflogs from the current worktree only")),
OPT_END()
};
default_reflog_expire_unreachable = now - 30 * 24 * 3600;
default_reflog_expire = now - 90 * 24 * 3600;
git_config(reflog_expire_config, NULL);
save_commit_buffer = 0;
do_all = status = 0;
cmd.explicit_expiry = 0;
cmd.expire_total = default_reflog_expire;
cmd.expire_unreachable = default_reflog_expire_unreachable;
argc = parse_options(argc, argv, prefix, options, reflog_expire_usage, 0);
if (verbose)
should_prune_fn = should_expire_reflog_ent_verbose;
/*
* We can trust the commits and objects reachable from refs
* even in older repository. We cannot trust what's reachable
* from reflog if the repository was pruned with older git.
*/
if (cmd.stalefix) {
struct rev_info revs;
repo_init_revisions(the_repository, &revs, prefix);
revs.do_not_die_on_missing_tree = 1;
revs.ignore_missing = 1;
revs.ignore_missing_links = 1;
if (verbose)
printf(_("Marking reachable objects..."));
mark_reachable_objects(&revs, 0, 0, NULL);
release_revisions(&revs);
if (verbose)
putchar('\n');
}
if (do_all) {
struct worktree_reflogs collected = {
.reflogs = STRING_LIST_INIT_DUP,
};
struct string_list_item *item;
struct worktree **worktrees, **p;
worktrees = get_worktrees();
for (p = worktrees; *p; p++) {
if (!all_worktrees && !(*p)->is_current)
continue;
collected.worktree = *p;
refs_for_each_reflog(get_worktree_ref_store(*p),
collect_reflog, &collected);
}
free_worktrees(worktrees);
for_each_string_list_item(item, &collected.reflogs) {
struct expire_reflog_policy_cb cb = {
.cmd = cmd,
.dry_run = !!(flags & EXPIRE_REFLOGS_DRY_RUN),
};
reflog expire: don't lock reflogs using previously seen OID During reflog expiry, the cmd_reflog_expire() function first iterates over all reflogs in logs/*, and then one-by-one acquires the lock for each one and expires it. This behavior has been with us since this command was implemented in 4264dc15e1 ("git reflog expire", 2006-12-19). Change this to stop calling lock_ref_oid_basic() with the OID we saw when we looped over the logs, instead have it pass the OID it managed to lock. This mostly mitigates a race condition where e.g. "git gc" will fail in a concurrently updated repository because the branch moved since "git reflog expire --all" was started. I.e. with: error: cannot lock ref '<refname>': ref '<refname>' is at <OID-A> but expected <OID-B> This behavior of passing in an "oid" was needed for an edge-case that I've untangled in this and preceding commits though, namely that we needed this OID because we'd: 1. Lookup the reflog name/OID via dwim_log() 2. With that OID, lock the reflog 3. Later in builtin/reflog.c we use the OID we looked as input to lookup_commit_reference_gently(), assured that it's equal to the OID we got from dwim_log(). We can be sure that this change is safe to make because between dwim_log (step #1) and lock_ref_oid_basic (step #2) there was no other logic relevant to the OID or expiry run in the cmd_reflog_expire() caller. We can thus treat that code as a black box, before and after this change it would get an OID that's been locked, the only difference is that now we mostly won't be failing to get the lock due to the TOCTOU race[0]. That failure was purely an implementation detail in how the "current OID" was looked up, it was divorced from the locking mechanism. What do we mean with "mostly"? It mostly mitigates it because we'll still run into cases where the ref is locked and being updated as we want to expire it, and other git processes wanting to update the refs will in turn race with us as we expire the reflog. That remaining race can in turn be mitigated with the core.filesRefLockTimeout setting, see 4ff0f01cb7 ("refs: retry acquiring reference locks for 100ms", 2017-08-21). In practice if that value is high enough we'll probably never have ref updates or reflog expiry failing, since the clients involved will retry for far longer than the time any of those operations could take. See [1] for an initial report of how this impacted "git gc" and a large discussion about this change in early 2019. In particular patch looked good to Michael Haggerty, see his[2]. That message seems to not have made it to the ML archive, its content is quoted in full in my [3]. I'm leaving behind now-unused code the refs API etc. that takes the now-NULL "unused_oid" argument, and other code that can be simplified now that we never have on OID in that context, that'll be cleaned up in subsequent commits, but for now let's narrowly focus on fixing the "git gc" issue. As the modified assert() shows we always pass a NULL oid to reflog_expire() now. Unfortunately this sort of probabilistic contention is hard to turn into a test. I've tested this by running the following three subshells in concurrent terminals: ( rm -rf /tmp/git && git init /tmp/git && while true do head -c 10 /dev/urandom | hexdump >/tmp/git/out && git -C /tmp/git add out && git -C /tmp/git commit -m"out" done ) ( rm -rf /tmp/git-clone && git clone file:///tmp/git /tmp/git-clone && while git -C /tmp/git-clone pull do date done ) ( while git -C /tmp/git-clone reflog expire --all do date done ) Before this change the "reflog expire" would fail really quickly with the "but expected" error noted above. After this change both the "pull" and "reflog expire" will run for a while, but eventually fail because I get unlucky with core.filesRefLockTimeout (the "reflog expire" is in a really tight loop). As noted above that can in turn be mitigated with higher values of core.filesRefLockTimeout than the 100ms default. As noted in the commentary added in the preceding commit there's also the case of branches being racily deleted, that can be tested by adding this to the above: ( while git -C /tmp/git-clone branch topic master && git -C /tmp/git-clone branch -D topic do date done ) With core.filesRefLockTimeout set to 10 seconds (it can probably be a lot lower) I managed to run all four of these concurrently for about an hour, and accumulated ~125k commits, auto-gc's and all, and didn't have a single failure. The loops visibly stall while waiting for the lock, but that's expected and desired behavior. 0. https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use 1. https://lore.kernel.org/git/87tvg7brlm.fsf@evledraar.gmail.com/ 2. http://lore.kernel.org/git/b870a17d-2103-41b8-3cbc-7389d5fff33a@alum.mit.edu 3. https://lore.kernel.org/git/87pnqkco8v.fsf@evledraar.gmail.com/ Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-08-23 13:36:10 +02:00
set_reflog_expiry_param(&cb.cmd, item->string);
status |= reflog_expire(item->string, flags,
reflog_expiry_prepare,
should_prune_fn,
reflog_expiry_cleanup,
&cb);
}
string_list_clear(&collected.reflogs, 0);
}
for (i = 0; i < argc; i++) {
char *ref;
struct expire_reflog_policy_cb cb = { .cmd = cmd };
reflog expire: don't lock reflogs using previously seen OID During reflog expiry, the cmd_reflog_expire() function first iterates over all reflogs in logs/*, and then one-by-one acquires the lock for each one and expires it. This behavior has been with us since this command was implemented in 4264dc15e1 ("git reflog expire", 2006-12-19). Change this to stop calling lock_ref_oid_basic() with the OID we saw when we looped over the logs, instead have it pass the OID it managed to lock. This mostly mitigates a race condition where e.g. "git gc" will fail in a concurrently updated repository because the branch moved since "git reflog expire --all" was started. I.e. with: error: cannot lock ref '<refname>': ref '<refname>' is at <OID-A> but expected <OID-B> This behavior of passing in an "oid" was needed for an edge-case that I've untangled in this and preceding commits though, namely that we needed this OID because we'd: 1. Lookup the reflog name/OID via dwim_log() 2. With that OID, lock the reflog 3. Later in builtin/reflog.c we use the OID we looked as input to lookup_commit_reference_gently(), assured that it's equal to the OID we got from dwim_log(). We can be sure that this change is safe to make because between dwim_log (step #1) and lock_ref_oid_basic (step #2) there was no other logic relevant to the OID or expiry run in the cmd_reflog_expire() caller. We can thus treat that code as a black box, before and after this change it would get an OID that's been locked, the only difference is that now we mostly won't be failing to get the lock due to the TOCTOU race[0]. That failure was purely an implementation detail in how the "current OID" was looked up, it was divorced from the locking mechanism. What do we mean with "mostly"? It mostly mitigates it because we'll still run into cases where the ref is locked and being updated as we want to expire it, and other git processes wanting to update the refs will in turn race with us as we expire the reflog. That remaining race can in turn be mitigated with the core.filesRefLockTimeout setting, see 4ff0f01cb7 ("refs: retry acquiring reference locks for 100ms", 2017-08-21). In practice if that value is high enough we'll probably never have ref updates or reflog expiry failing, since the clients involved will retry for far longer than the time any of those operations could take. See [1] for an initial report of how this impacted "git gc" and a large discussion about this change in early 2019. In particular patch looked good to Michael Haggerty, see his[2]. That message seems to not have made it to the ML archive, its content is quoted in full in my [3]. I'm leaving behind now-unused code the refs API etc. that takes the now-NULL "unused_oid" argument, and other code that can be simplified now that we never have on OID in that context, that'll be cleaned up in subsequent commits, but for now let's narrowly focus on fixing the "git gc" issue. As the modified assert() shows we always pass a NULL oid to reflog_expire() now. Unfortunately this sort of probabilistic contention is hard to turn into a test. I've tested this by running the following three subshells in concurrent terminals: ( rm -rf /tmp/git && git init /tmp/git && while true do head -c 10 /dev/urandom | hexdump >/tmp/git/out && git -C /tmp/git add out && git -C /tmp/git commit -m"out" done ) ( rm -rf /tmp/git-clone && git clone file:///tmp/git /tmp/git-clone && while git -C /tmp/git-clone pull do date done ) ( while git -C /tmp/git-clone reflog expire --all do date done ) Before this change the "reflog expire" would fail really quickly with the "but expected" error noted above. After this change both the "pull" and "reflog expire" will run for a while, but eventually fail because I get unlucky with core.filesRefLockTimeout (the "reflog expire" is in a really tight loop). As noted above that can in turn be mitigated with higher values of core.filesRefLockTimeout than the 100ms default. As noted in the commentary added in the preceding commit there's also the case of branches being racily deleted, that can be tested by adding this to the above: ( while git -C /tmp/git-clone branch topic master && git -C /tmp/git-clone branch -D topic do date done ) With core.filesRefLockTimeout set to 10 seconds (it can probably be a lot lower) I managed to run all four of these concurrently for about an hour, and accumulated ~125k commits, auto-gc's and all, and didn't have a single failure. The loops visibly stall while waiting for the lock, but that's expected and desired behavior. 0. https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use 1. https://lore.kernel.org/git/87tvg7brlm.fsf@evledraar.gmail.com/ 2. http://lore.kernel.org/git/b870a17d-2103-41b8-3cbc-7389d5fff33a@alum.mit.edu 3. https://lore.kernel.org/git/87pnqkco8v.fsf@evledraar.gmail.com/ Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-08-23 13:36:10 +02:00
if (!dwim_log(argv[i], strlen(argv[i]), NULL, &ref)) {
status |= error(_("%s points nowhere!"), argv[i]);
continue;
}
set_reflog_expiry_param(&cb.cmd, ref);
status |= reflog_expire(ref, flags,
reflog_expiry_prepare,
should_prune_fn,
reflog_expiry_cleanup,
&cb);
free(ref);
}
return status;
}
static const char * reflog_delete_usage[] = {
N_("git reflog delete [--rewrite] [--updateref] "
"[--dry-run | -n] [--verbose] <refs>..."),
NULL
};
static int cmd_reflog_delete(int argc, const char **argv, const char *prefix)
{
int i, status = 0;
unsigned int flags = 0;
int verbose = 0;
const struct option options[] = {
OPT_BIT(0, "dry-run", &flags, N_("do not actually prune any entries"),
EXPIRE_REFLOGS_DRY_RUN),
OPT_BIT(0, "rewrite", &flags,
N_("rewrite the old SHA1 with the new SHA1 of the entry that now precedes it"),
EXPIRE_REFLOGS_REWRITE),
OPT_BIT(0, "updateref", &flags,
N_("update the reference to the value of the top reflog entry"),
EXPIRE_REFLOGS_UPDATE_REF),
OPT_BOOL(0, "verbose", &verbose, N_("print extra information on screen")),
OPT_END()
};
argc = parse_options(argc, argv, prefix, options, reflog_delete_usage, 0);
if (argc < 1)
return error(_("no reflog specified to delete"));
for (i = 0; i < argc; i++)
status |= reflog_delete(argv[i], flags, verbose);
return status;
}
static int cmd_reflog_exists(int argc, const char **argv, const char *prefix)
{
int i, start = 0;
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
if (!strcmp(arg, "--")) {
i++;
break;
}
else if (arg[0] == '-')
usage(_(reflog_exists_usage));
else
break;
}
start = i;
if (argc - start != 1)
usage(_(reflog_exists_usage));
if (check_refname_format(argv[start], REFNAME_ALLOW_ONELEVEL))
die(_("invalid ref format: %s"), argv[start]);
return !reflog_exists(argv[start]);
}
/*
* main "reflog"
*/
static const char reflog_usage[] =
"git reflog [ show | expire | delete | exists ]";
int cmd_reflog(int argc, const char **argv, const char *prefix)
{
if (argc > 1 && !strcmp(argv[1], "-h"))
usage(_(reflog_usage));
/* With no command, we default to showing it. */
if (argc < 2 || *argv[1] == '-')
return cmd_log_reflog(argc, argv, prefix);
if (!strcmp(argv[1], "show"))
return cmd_log_reflog(argc - 1, argv + 1, prefix);
if (!strcmp(argv[1], "expire"))
return cmd_reflog_expire(argc - 1, argv + 1, prefix);
if (!strcmp(argv[1], "delete"))
return cmd_reflog_delete(argc - 1, argv + 1, prefix);
if (!strcmp(argv[1], "exists"))
return cmd_reflog_exists(argc - 1, argv + 1, prefix);
return cmd_log_reflog(argc, argv, prefix);
}