git-commit-vandalism/lockfile.c

215 lines
5.6 KiB
C
Raw Normal View History

/*
* Copyright (c) 2005, Junio C Hamano
*/
#include "cache.h"
#include "lockfile.h"
/*
* path = absolute or relative path name
*
* Remove the last path name element from path (leaving the preceding
* "/", if any). If path is empty or the root directory ("/"), set
* path to the empty string.
*/
static void trim_last_path_component(struct strbuf *path)
{
int i = path->len;
/* back up past trailing slashes, if any */
while (i && path->buf[i - 1] == '/')
i--;
/*
* then go backwards until a slash, or the beginning of the
* string
*/
while (i && path->buf[i - 1] != '/')
i--;
strbuf_setlen(path, i);
}
/* We allow "recursive" symbolic links. Only within reason, though */
#define MAXDEPTH 5
/*
* path contains a path that might be a symlink.
*
* If path is a symlink, attempt to overwrite it with a path to the
* real file or directory (which may or may not exist), following a
* chain of symlinks if necessary. Otherwise, leave path unmodified.
*
* This is a best-effort routine. If an error occurs, path will
* either be left unmodified or will name a different symlink in a
* symlink chain that started with the original path.
*/
static void resolve_symlink(struct strbuf *path)
{
int depth = MAXDEPTH;
static struct strbuf link = STRBUF_INIT;
while (depth--) {
if (strbuf_readlink(&link, path->buf, path->len) < 0)
break;
if (is_absolute_path(link.buf))
/* absolute path simply replaces p */
strbuf_reset(path);
else
/*
* link is a relative path, so replace the
* last element of p with it.
*/
trim_last_path_component(path);
strbuf_addbuf(path, &link);
}
strbuf_reset(&link);
}
/* Make sure errno contains a meaningful value on error */
static int lock_file(struct lock_file *lk, const char *path, int flags)
{
struct strbuf filename = STRBUF_INIT;
strbuf_addstr(&filename, path);
if (!(flags & LOCK_NO_DEREF))
resolve_symlink(&filename);
strbuf_addstr(&filename, LOCK_SUFFIX);
tempfile: auto-allocate tempfiles on heap The previous commit taught the tempfile code to give up ownership over tempfiles that have been renamed or deleted. That makes it possible to use a stack variable like this: struct tempfile t; create_tempfile(&t, ...); ... if (!err) rename_tempfile(&t, ...); else delete_tempfile(&t); But doing it this way has a high potential for creating memory errors. The tempfile we pass to create_tempfile() ends up on a global linked list, and it's not safe for it to go out of scope until we've called one of those two deactivation functions. Imagine that we add an early return from the function that forgets to call delete_tempfile(). With a static or heap tempfile variable, the worst case is that the tempfile hangs around until the program exits (and some functions like setup_shallow_temporary rely on this intentionally, creating a tempfile and then leaving it for later cleanup). But with a stack variable as above, this is a serious memory error: the variable goes out of scope and may be filled with garbage by the time the tempfile code looks at it. Let's see if we can make it harder to get this wrong. Since many callers need to allocate arbitrary numbers of tempfiles, we can't rely on static storage as a general solution. So we need to turn to the heap. We could just ask all callers to pass us a heap variable, but that puts the burden on them to call free() at the right time. Instead, let's have the tempfile code handle the heap allocation _and_ the deallocation (when the tempfile is deactivated and removed from the list). This changes the return value of all of the creation functions. For the cleanup functions (delete and rename), we'll add one extra bit of safety: instead of taking a tempfile pointer, we'll take a pointer-to-pointer and set it to NULL after freeing the object. This makes it safe to double-call functions like delete_tempfile(), as the second call treats the NULL input as a noop. Several callsites follow this pattern. The resulting patch does have a fair bit of noise, as each caller needs to be converted to handle: 1. Storing a pointer instead of the struct itself. 2. Passing the pointer instead of taking the struct address. 3. Handling a "struct tempfile *" return instead of a file descriptor. We could play games to make this less noisy. For example, by defining the tempfile like this: struct tempfile { struct heap_allocated_part_of_tempfile { int fd; ...etc } *actual_data; } Callers would continue to have a "struct tempfile", and it would be "active" only when the inner pointer was non-NULL. But that just makes things more awkward in the long run. There aren't that many callers, so we can simply bite the bullet and adjust all of them. And the compiler makes it easy for us to find them all. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-05 14:15:08 +02:00
lk->tempfile = create_tempfile(filename.buf);
strbuf_release(&filename);
tempfile: auto-allocate tempfiles on heap The previous commit taught the tempfile code to give up ownership over tempfiles that have been renamed or deleted. That makes it possible to use a stack variable like this: struct tempfile t; create_tempfile(&t, ...); ... if (!err) rename_tempfile(&t, ...); else delete_tempfile(&t); But doing it this way has a high potential for creating memory errors. The tempfile we pass to create_tempfile() ends up on a global linked list, and it's not safe for it to go out of scope until we've called one of those two deactivation functions. Imagine that we add an early return from the function that forgets to call delete_tempfile(). With a static or heap tempfile variable, the worst case is that the tempfile hangs around until the program exits (and some functions like setup_shallow_temporary rely on this intentionally, creating a tempfile and then leaving it for later cleanup). But with a stack variable as above, this is a serious memory error: the variable goes out of scope and may be filled with garbage by the time the tempfile code looks at it. Let's see if we can make it harder to get this wrong. Since many callers need to allocate arbitrary numbers of tempfiles, we can't rely on static storage as a general solution. So we need to turn to the heap. We could just ask all callers to pass us a heap variable, but that puts the burden on them to call free() at the right time. Instead, let's have the tempfile code handle the heap allocation _and_ the deallocation (when the tempfile is deactivated and removed from the list). This changes the return value of all of the creation functions. For the cleanup functions (delete and rename), we'll add one extra bit of safety: instead of taking a tempfile pointer, we'll take a pointer-to-pointer and set it to NULL after freeing the object. This makes it safe to double-call functions like delete_tempfile(), as the second call treats the NULL input as a noop. Several callsites follow this pattern. The resulting patch does have a fair bit of noise, as each caller needs to be converted to handle: 1. Storing a pointer instead of the struct itself. 2. Passing the pointer instead of taking the struct address. 3. Handling a "struct tempfile *" return instead of a file descriptor. We could play games to make this less noisy. For example, by defining the tempfile like this: struct tempfile { struct heap_allocated_part_of_tempfile { int fd; ...etc } *actual_data; } Callers would continue to have a "struct tempfile", and it would be "active" only when the inner pointer was non-NULL. But that just makes things more awkward in the long run. There aren't that many callers, so we can simply bite the bullet and adjust all of them. And the compiler makes it easy for us to find them all. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-05 14:15:08 +02:00
return lk->tempfile ? lk->tempfile->fd : -1;
}
/*
* Constants defining the gaps between attempts to lock a file. The
* first backoff period is approximately INITIAL_BACKOFF_MS
* milliseconds. The longest backoff period is approximately
* (BACKOFF_MAX_MULTIPLIER * INITIAL_BACKOFF_MS) milliseconds.
*/
#define INITIAL_BACKOFF_MS 1L
#define BACKOFF_MAX_MULTIPLIER 1000
/*
* Try locking path, retrying with quadratic backoff for at least
* timeout_ms milliseconds. If timeout_ms is 0, try locking the file
* exactly once. If timeout_ms is -1, try indefinitely.
*/
static int lock_file_timeout(struct lock_file *lk, const char *path,
int flags, long timeout_ms)
{
int n = 1;
int multiplier = 1;
long remaining_ms = 0;
static int random_initialized = 0;
if (timeout_ms == 0)
return lock_file(lk, path, flags);
if (!random_initialized) {
srand((unsigned int)getpid());
random_initialized = 1;
}
if (timeout_ms > 0)
remaining_ms = timeout_ms;
while (1) {
long backoff_ms, wait_ms;
int fd;
fd = lock_file(lk, path, flags);
if (fd >= 0)
return fd; /* success */
else if (errno != EEXIST)
return -1; /* failure other than lock held */
else if (timeout_ms > 0 && remaining_ms <= 0)
return -1; /* failure due to timeout */
backoff_ms = multiplier * INITIAL_BACKOFF_MS;
/* back off for between 0.75*backoff_ms and 1.25*backoff_ms */
wait_ms = (750 + rand() % 500) * backoff_ms / 1000;
sleep_millisec(wait_ms);
remaining_ms -= wait_ms;
/* Recursion: (n+1)^2 = n^2 + 2n + 1 */
multiplier += 2*n + 1;
if (multiplier > BACKOFF_MAX_MULTIPLIER)
multiplier = BACKOFF_MAX_MULTIPLIER;
else
n++;
}
}
void unable_to_lock_message(const char *path, int err, struct strbuf *buf)
{
if (err == EEXIST) {
strbuf_addf(buf, _("Unable to create '%s.lock': %s.\n\n"
"Another git process seems to be running in this repository, e.g.\n"
"an editor opened by 'git commit'. Please make sure all processes\n"
"are terminated then try again. If it still fails, a git process\n"
"may have crashed in this repository earlier:\n"
"remove the file manually to continue."),
absolute_path(path), strerror(err));
} else
strbuf_addf(buf, _("Unable to create '%s.lock': %s"),
absolute_path(path), strerror(err));
}
NORETURN void unable_to_lock_die(const char *path, int err)
{
struct strbuf buf = STRBUF_INIT;
unable_to_lock_message(path, err, &buf);
die("%s", buf.buf);
}
/* This should return a meaningful errno on failure */
int hold_lock_file_for_update_timeout(struct lock_file *lk, const char *path,
int flags, long timeout_ms)
{
int fd = lock_file_timeout(lk, path, flags, timeout_ms);
lockfile: LOCK_REPORT_ON_ERROR The "libify sequencer" topic stopped passing the die_on_error option to hold_locked_index(), and this lost an error message from "git merge --ff-only $commit" when there are competing updates in progress. The command still exits with a non-zero status, but that is not of much help for an interactive user. The last thing the command says is "Updating $from..$to". We used to follow it with a big error message that makes it clear that "merge --ff-only" did not succeed. What is sad is that we should have noticed this regression while reviewing the change. It was clear that the update to the checkout_fast_forward() function made a failing hold_locked_index() silent, but the only caller of the checkout_fast_forward() function had this comment: if (checkout_fast_forward(from, to, 1)) - exit(128); /* the callee should have complained already */ + return -1; /* the callee should have complained already */ which clearly contradicted the assumption X-<. Add a new option LOCK_REPORT_ON_ERROR that can be passed instead of LOCK_DIE_ON_ERROR to the hold_lock*() family of functions and teach checkout_fast_forward() to use it to fix this regression. After going thourgh all calls to hold_lock*() family of functions that used to pass LOCK_DIE_ON_ERROR but were modified to pass 0 in the "libify sequencer" topic "git show --first-parent 2a4062a4a8", it appears that this is the only one that has become silent. Many others used to give detailed report that talked about "there may be competing Git process running" but with the series merged they now only give a single liner "Unable to lock ...", some of which may have to be tweaked further, but at least they say something, unlike the one this patch fixes. Reported-by: Robbie Iannucci <iannucci@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-12-07 19:56:26 +01:00
if (fd < 0) {
if (flags & LOCK_DIE_ON_ERROR)
unable_to_lock_die(path, errno);
if (flags & LOCK_REPORT_ON_ERROR) {
struct strbuf buf = STRBUF_INIT;
unable_to_lock_message(path, errno, &buf);
error("%s", buf.buf);
strbuf_release(&buf);
}
}
return fd;
}
char *get_locked_file_path(struct lock_file *lk)
{
struct strbuf ret = STRBUF_INIT;
tempfile: auto-allocate tempfiles on heap The previous commit taught the tempfile code to give up ownership over tempfiles that have been renamed or deleted. That makes it possible to use a stack variable like this: struct tempfile t; create_tempfile(&t, ...); ... if (!err) rename_tempfile(&t, ...); else delete_tempfile(&t); But doing it this way has a high potential for creating memory errors. The tempfile we pass to create_tempfile() ends up on a global linked list, and it's not safe for it to go out of scope until we've called one of those two deactivation functions. Imagine that we add an early return from the function that forgets to call delete_tempfile(). With a static or heap tempfile variable, the worst case is that the tempfile hangs around until the program exits (and some functions like setup_shallow_temporary rely on this intentionally, creating a tempfile and then leaving it for later cleanup). But with a stack variable as above, this is a serious memory error: the variable goes out of scope and may be filled with garbage by the time the tempfile code looks at it. Let's see if we can make it harder to get this wrong. Since many callers need to allocate arbitrary numbers of tempfiles, we can't rely on static storage as a general solution. So we need to turn to the heap. We could just ask all callers to pass us a heap variable, but that puts the burden on them to call free() at the right time. Instead, let's have the tempfile code handle the heap allocation _and_ the deallocation (when the tempfile is deactivated and removed from the list). This changes the return value of all of the creation functions. For the cleanup functions (delete and rename), we'll add one extra bit of safety: instead of taking a tempfile pointer, we'll take a pointer-to-pointer and set it to NULL after freeing the object. This makes it safe to double-call functions like delete_tempfile(), as the second call treats the NULL input as a noop. Several callsites follow this pattern. The resulting patch does have a fair bit of noise, as each caller needs to be converted to handle: 1. Storing a pointer instead of the struct itself. 2. Passing the pointer instead of taking the struct address. 3. Handling a "struct tempfile *" return instead of a file descriptor. We could play games to make this less noisy. For example, by defining the tempfile like this: struct tempfile { struct heap_allocated_part_of_tempfile { int fd; ...etc } *actual_data; } Callers would continue to have a "struct tempfile", and it would be "active" only when the inner pointer was non-NULL. But that just makes things more awkward in the long run. There aren't that many callers, so we can simply bite the bullet and adjust all of them. And the compiler makes it easy for us to find them all. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-05 14:15:08 +02:00
strbuf_addstr(&ret, get_tempfile_path(lk->tempfile));
if (ret.len <= LOCK_SUFFIX_LEN ||
strcmp(ret.buf + ret.len - LOCK_SUFFIX_LEN, LOCK_SUFFIX))
BUG("get_locked_file_path() called for malformed lock object");
/* remove ".lock": */
strbuf_setlen(&ret, ret.len - LOCK_SUFFIX_LEN);
return strbuf_detach(&ret, NULL);
}
int commit_lock_file(struct lock_file *lk)
{
char *result_path = get_locked_file_path(lk);
if (commit_lock_file_to(lk, result_path)) {
int save_errno = errno;
free(result_path);
errno = save_errno;
return -1;
}
free(result_path);
return 0;
}