2005-07-31 21:17:43 +02:00
|
|
|
#include "cache.h"
|
|
|
|
#include "run-command.h"
|
2006-01-11 03:12:17 +01:00
|
|
|
#include "exec_cmd.h"
|
run-command: optionally kill children on exit
When we spawn a helper process, it should generally be done
and finish_command called before we exit. However, if we
exit abnormally due to an early return or a signal, the
helper may continue to run in our absence.
In the best case, this may simply be wasted CPU cycles or a
few stray messages on a terminal. But it could also mean a
process that the user thought was aborted continues to run
to completion (e.g., a push's pack-objects helper will
complete the push, even though you killed the push process).
This patch provides infrastructure for run-command to keep
track of PIDs to be killed, and clean them on signal
reception or input, just as we do with tempfiles. PIDs can
be added in two ways:
1. If NO_PTHREADS is defined, async helper processes are
automatically marked. By definition this code must be
ready to die when the parent dies, since it may be
implemented as a thread of the parent process.
2. If the run-command caller specifies the "clean_on_exit"
option. This is not the default, as there are cases
where it is OK for the child to outlive us (e.g., when
spawning a pager).
PIDs are cleared from the kill-list automatically during
wait_or_whine, which is called from finish_command and
finish_async.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Clemens Buchacher <drizzd@aon.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-07 12:42:43 +01:00
|
|
|
#include "sigchain.h"
|
2011-09-13 23:58:25 +02:00
|
|
|
#include "argv-array.h"
|
run-command: add an asynchronous parallel child processor
This allows to run external commands in parallel with ordered output
on stderr.
If we run external commands in parallel we cannot pipe the output directly
to the our stdout/err as it would mix up. So each process's output will
flow through a pipe, which we buffer. One subprocess can be directly
piped to out stdout/err for a low latency feedback to the user.
Example:
Let's assume we have 5 submodules A,B,C,D,E and each fetch takes a
different amount of time as the different submodules vary in size, then
the output of fetches in sequential order might look like this:
time -->
output: |---A---| |-B-| |-------C-------| |-D-| |-E-|
When we schedule these submodules into maximal two parallel processes,
a schedule and sample output over time may look like this:
process 1: |---A---| |-D-| |-E-|
process 2: |-B-| |-------C-------|
output: |---A---|B|---C-------|DE
So A will be perceived as it would run normally in the single child
version. As B has finished by the time A is done, we can dump its whole
progress buffer on stderr, such that it looks like it finished in no
time. Once that is done, C is determined to be the visible child and
its progress will be reported in real time.
So this way of output is really good for human consumption, as it only
changes the timing, not the actual output.
For machine consumption the output needs to be prepared in the tasks,
by either having a prefix per line or per block to indicate whose tasks
output is displayed, because the output order may not follow the
original sequential ordering:
|----A----| |--B--| |-C-|
will be scheduled to be all parallel:
process 1: |----A----|
process 2: |--B--|
process 3: |-C-|
output: |----A----|CB
This happens because C finished before B did, so it will be queued for
output before B.
To detect when a child has finished executing, we check interleaved
with other actions (such as checking the liveliness of children or
starting new processes) whether the stderr pipe still exists. Once a
child closed its stderr stream, we assume it is terminating very soon,
and use `finish_command()` from the single external process execution
interface to collect the exit status.
By maintaining the strong assumption of stderr being open until the
very end of a child process, we can avoid other hassle such as an
implementation using `waitpid(-1)`, which is not implemented in Windows.
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-16 01:04:10 +01:00
|
|
|
#include "thread-utils.h"
|
|
|
|
#include "strbuf.h"
|
2005-07-31 21:17:43 +02:00
|
|
|
|
2014-08-19 21:10:48 +02:00
|
|
|
void child_process_init(struct child_process *child)
|
|
|
|
{
|
|
|
|
memset(child, 0, sizeof(*child));
|
|
|
|
argv_array_init(&child->args);
|
2014-10-19 13:13:55 +02:00
|
|
|
argv_array_init(&child->env_array);
|
2014-08-19 21:10:48 +02:00
|
|
|
}
|
|
|
|
|
2015-10-24 14:11:27 +02:00
|
|
|
void child_process_clear(struct child_process *child)
|
|
|
|
{
|
|
|
|
argv_array_clear(&child->args);
|
|
|
|
argv_array_clear(&child->env_array);
|
|
|
|
}
|
|
|
|
|
run-command: optionally kill children on exit
When we spawn a helper process, it should generally be done
and finish_command called before we exit. However, if we
exit abnormally due to an early return or a signal, the
helper may continue to run in our absence.
In the best case, this may simply be wasted CPU cycles or a
few stray messages on a terminal. But it could also mean a
process that the user thought was aborted continues to run
to completion (e.g., a push's pack-objects helper will
complete the push, even though you killed the push process).
This patch provides infrastructure for run-command to keep
track of PIDs to be killed, and clean them on signal
reception or input, just as we do with tempfiles. PIDs can
be added in two ways:
1. If NO_PTHREADS is defined, async helper processes are
automatically marked. By definition this code must be
ready to die when the parent dies, since it may be
implemented as a thread of the parent process.
2. If the run-command caller specifies the "clean_on_exit"
option. This is not the default, as there are cases
where it is OK for the child to outlive us (e.g., when
spawning a pager).
PIDs are cleared from the kill-list automatically during
wait_or_whine, which is called from finish_command and
finish_async.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Clemens Buchacher <drizzd@aon.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-07 12:42:43 +01:00
|
|
|
struct child_to_clean {
|
|
|
|
pid_t pid;
|
|
|
|
struct child_to_clean *next;
|
|
|
|
};
|
|
|
|
static struct child_to_clean *children_to_clean;
|
|
|
|
static int installed_child_cleanup_handler;
|
|
|
|
|
pager: don't use unsafe functions in signal handlers
Since the commit a3da8821208d (pager: do wait_for_pager on signal
death), we call wait_for_pager() in the pager's signal handler. The
recent bug report revealed that this causes a deadlock in glibc at
aborting "git log" [*1*]. When this happens, git process is left
unterminated, and it can't be killed by SIGTERM but only by SIGKILL.
The problem is that wait_for_pager() function does more than waiting
for pager process's termination, but it does cleanups and printing
errors. Unfortunately, the functions that may be used in a signal
handler are very limited [*2*]. Particularly, malloc(), free() and the
variants can't be used in a signal handler because they take a mutex
internally in glibc. This was the cause of the deadlock above. Other
than the direct calls of malloc/free, many functions calling
malloc/free can't be used. strerror() is such one, either.
Also the usage of fflush() and printf() in a signal handler is bad,
although it seems working so far. In a safer side, we should avoid
them, too.
This patch tries to reduce the calls of such functions in signal
handlers. wait_for_signal() takes a flag and avoids the unsafe
calls. Also, finish_command_in_signal() is introduced for the
same reason. There the free() calls are removed, and only waits for
the children without whining at errors.
[*1*] https://bugzilla.opensuse.org/show_bug.cgi?id=942297
[*2*] http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-04 11:35:57 +02:00
|
|
|
static void cleanup_children(int sig, int in_signal)
|
run-command: optionally kill children on exit
When we spawn a helper process, it should generally be done
and finish_command called before we exit. However, if we
exit abnormally due to an early return or a signal, the
helper may continue to run in our absence.
In the best case, this may simply be wasted CPU cycles or a
few stray messages on a terminal. But it could also mean a
process that the user thought was aborted continues to run
to completion (e.g., a push's pack-objects helper will
complete the push, even though you killed the push process).
This patch provides infrastructure for run-command to keep
track of PIDs to be killed, and clean them on signal
reception or input, just as we do with tempfiles. PIDs can
be added in two ways:
1. If NO_PTHREADS is defined, async helper processes are
automatically marked. By definition this code must be
ready to die when the parent dies, since it may be
implemented as a thread of the parent process.
2. If the run-command caller specifies the "clean_on_exit"
option. This is not the default, as there are cases
where it is OK for the child to outlive us (e.g., when
spawning a pager).
PIDs are cleared from the kill-list automatically during
wait_or_whine, which is called from finish_command and
finish_async.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Clemens Buchacher <drizzd@aon.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-07 12:42:43 +01:00
|
|
|
{
|
|
|
|
while (children_to_clean) {
|
|
|
|
struct child_to_clean *p = children_to_clean;
|
|
|
|
children_to_clean = p->next;
|
|
|
|
kill(p->pid, sig);
|
pager: don't use unsafe functions in signal handlers
Since the commit a3da8821208d (pager: do wait_for_pager on signal
death), we call wait_for_pager() in the pager's signal handler. The
recent bug report revealed that this causes a deadlock in glibc at
aborting "git log" [*1*]. When this happens, git process is left
unterminated, and it can't be killed by SIGTERM but only by SIGKILL.
The problem is that wait_for_pager() function does more than waiting
for pager process's termination, but it does cleanups and printing
errors. Unfortunately, the functions that may be used in a signal
handler are very limited [*2*]. Particularly, malloc(), free() and the
variants can't be used in a signal handler because they take a mutex
internally in glibc. This was the cause of the deadlock above. Other
than the direct calls of malloc/free, many functions calling
malloc/free can't be used. strerror() is such one, either.
Also the usage of fflush() and printf() in a signal handler is bad,
although it seems working so far. In a safer side, we should avoid
them, too.
This patch tries to reduce the calls of such functions in signal
handlers. wait_for_signal() takes a flag and avoids the unsafe
calls. Also, finish_command_in_signal() is introduced for the
same reason. There the free() calls are removed, and only waits for
the children without whining at errors.
[*1*] https://bugzilla.opensuse.org/show_bug.cgi?id=942297
[*2*] http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-04 11:35:57 +02:00
|
|
|
if (!in_signal)
|
|
|
|
free(p);
|
run-command: optionally kill children on exit
When we spawn a helper process, it should generally be done
and finish_command called before we exit. However, if we
exit abnormally due to an early return or a signal, the
helper may continue to run in our absence.
In the best case, this may simply be wasted CPU cycles or a
few stray messages on a terminal. But it could also mean a
process that the user thought was aborted continues to run
to completion (e.g., a push's pack-objects helper will
complete the push, even though you killed the push process).
This patch provides infrastructure for run-command to keep
track of PIDs to be killed, and clean them on signal
reception or input, just as we do with tempfiles. PIDs can
be added in two ways:
1. If NO_PTHREADS is defined, async helper processes are
automatically marked. By definition this code must be
ready to die when the parent dies, since it may be
implemented as a thread of the parent process.
2. If the run-command caller specifies the "clean_on_exit"
option. This is not the default, as there are cases
where it is OK for the child to outlive us (e.g., when
spawning a pager).
PIDs are cleared from the kill-list automatically during
wait_or_whine, which is called from finish_command and
finish_async.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Clemens Buchacher <drizzd@aon.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-07 12:42:43 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void cleanup_children_on_signal(int sig)
|
|
|
|
{
|
pager: don't use unsafe functions in signal handlers
Since the commit a3da8821208d (pager: do wait_for_pager on signal
death), we call wait_for_pager() in the pager's signal handler. The
recent bug report revealed that this causes a deadlock in glibc at
aborting "git log" [*1*]. When this happens, git process is left
unterminated, and it can't be killed by SIGTERM but only by SIGKILL.
The problem is that wait_for_pager() function does more than waiting
for pager process's termination, but it does cleanups and printing
errors. Unfortunately, the functions that may be used in a signal
handler are very limited [*2*]. Particularly, malloc(), free() and the
variants can't be used in a signal handler because they take a mutex
internally in glibc. This was the cause of the deadlock above. Other
than the direct calls of malloc/free, many functions calling
malloc/free can't be used. strerror() is such one, either.
Also the usage of fflush() and printf() in a signal handler is bad,
although it seems working so far. In a safer side, we should avoid
them, too.
This patch tries to reduce the calls of such functions in signal
handlers. wait_for_signal() takes a flag and avoids the unsafe
calls. Also, finish_command_in_signal() is introduced for the
same reason. There the free() calls are removed, and only waits for
the children without whining at errors.
[*1*] https://bugzilla.opensuse.org/show_bug.cgi?id=942297
[*2*] http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-04 11:35:57 +02:00
|
|
|
cleanup_children(sig, 1);
|
run-command: optionally kill children on exit
When we spawn a helper process, it should generally be done
and finish_command called before we exit. However, if we
exit abnormally due to an early return or a signal, the
helper may continue to run in our absence.
In the best case, this may simply be wasted CPU cycles or a
few stray messages on a terminal. But it could also mean a
process that the user thought was aborted continues to run
to completion (e.g., a push's pack-objects helper will
complete the push, even though you killed the push process).
This patch provides infrastructure for run-command to keep
track of PIDs to be killed, and clean them on signal
reception or input, just as we do with tempfiles. PIDs can
be added in two ways:
1. If NO_PTHREADS is defined, async helper processes are
automatically marked. By definition this code must be
ready to die when the parent dies, since it may be
implemented as a thread of the parent process.
2. If the run-command caller specifies the "clean_on_exit"
option. This is not the default, as there are cases
where it is OK for the child to outlive us (e.g., when
spawning a pager).
PIDs are cleared from the kill-list automatically during
wait_or_whine, which is called from finish_command and
finish_async.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Clemens Buchacher <drizzd@aon.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-07 12:42:43 +01:00
|
|
|
sigchain_pop(sig);
|
|
|
|
raise(sig);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void cleanup_children_on_exit(void)
|
|
|
|
{
|
pager: don't use unsafe functions in signal handlers
Since the commit a3da8821208d (pager: do wait_for_pager on signal
death), we call wait_for_pager() in the pager's signal handler. The
recent bug report revealed that this causes a deadlock in glibc at
aborting "git log" [*1*]. When this happens, git process is left
unterminated, and it can't be killed by SIGTERM but only by SIGKILL.
The problem is that wait_for_pager() function does more than waiting
for pager process's termination, but it does cleanups and printing
errors. Unfortunately, the functions that may be used in a signal
handler are very limited [*2*]. Particularly, malloc(), free() and the
variants can't be used in a signal handler because they take a mutex
internally in glibc. This was the cause of the deadlock above. Other
than the direct calls of malloc/free, many functions calling
malloc/free can't be used. strerror() is such one, either.
Also the usage of fflush() and printf() in a signal handler is bad,
although it seems working so far. In a safer side, we should avoid
them, too.
This patch tries to reduce the calls of such functions in signal
handlers. wait_for_signal() takes a flag and avoids the unsafe
calls. Also, finish_command_in_signal() is introduced for the
same reason. There the free() calls are removed, and only waits for
the children without whining at errors.
[*1*] https://bugzilla.opensuse.org/show_bug.cgi?id=942297
[*2*] http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-04 11:35:57 +02:00
|
|
|
cleanup_children(SIGTERM, 0);
|
run-command: optionally kill children on exit
When we spawn a helper process, it should generally be done
and finish_command called before we exit. However, if we
exit abnormally due to an early return or a signal, the
helper may continue to run in our absence.
In the best case, this may simply be wasted CPU cycles or a
few stray messages on a terminal. But it could also mean a
process that the user thought was aborted continues to run
to completion (e.g., a push's pack-objects helper will
complete the push, even though you killed the push process).
This patch provides infrastructure for run-command to keep
track of PIDs to be killed, and clean them on signal
reception or input, just as we do with tempfiles. PIDs can
be added in two ways:
1. If NO_PTHREADS is defined, async helper processes are
automatically marked. By definition this code must be
ready to die when the parent dies, since it may be
implemented as a thread of the parent process.
2. If the run-command caller specifies the "clean_on_exit"
option. This is not the default, as there are cases
where it is OK for the child to outlive us (e.g., when
spawning a pager).
PIDs are cleared from the kill-list automatically during
wait_or_whine, which is called from finish_command and
finish_async.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Clemens Buchacher <drizzd@aon.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-07 12:42:43 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void mark_child_for_cleanup(pid_t pid)
|
|
|
|
{
|
|
|
|
struct child_to_clean *p = xmalloc(sizeof(*p));
|
|
|
|
p->pid = pid;
|
|
|
|
p->next = children_to_clean;
|
|
|
|
children_to_clean = p;
|
|
|
|
|
|
|
|
if (!installed_child_cleanup_handler) {
|
|
|
|
atexit(cleanup_children_on_exit);
|
|
|
|
sigchain_push_common(cleanup_children_on_signal);
|
|
|
|
installed_child_cleanup_handler = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void clear_child_for_cleanup(pid_t pid)
|
|
|
|
{
|
2012-09-11 16:32:47 +02:00
|
|
|
struct child_to_clean **pp;
|
run-command: optionally kill children on exit
When we spawn a helper process, it should generally be done
and finish_command called before we exit. However, if we
exit abnormally due to an early return or a signal, the
helper may continue to run in our absence.
In the best case, this may simply be wasted CPU cycles or a
few stray messages on a terminal. But it could also mean a
process that the user thought was aborted continues to run
to completion (e.g., a push's pack-objects helper will
complete the push, even though you killed the push process).
This patch provides infrastructure for run-command to keep
track of PIDs to be killed, and clean them on signal
reception or input, just as we do with tempfiles. PIDs can
be added in two ways:
1. If NO_PTHREADS is defined, async helper processes are
automatically marked. By definition this code must be
ready to die when the parent dies, since it may be
implemented as a thread of the parent process.
2. If the run-command caller specifies the "clean_on_exit"
option. This is not the default, as there are cases
where it is OK for the child to outlive us (e.g., when
spawning a pager).
PIDs are cleared from the kill-list automatically during
wait_or_whine, which is called from finish_command and
finish_async.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Clemens Buchacher <drizzd@aon.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-07 12:42:43 +01:00
|
|
|
|
2012-09-11 16:32:47 +02:00
|
|
|
for (pp = &children_to_clean; *pp; pp = &(*pp)->next) {
|
|
|
|
struct child_to_clean *clean_me = *pp;
|
|
|
|
|
|
|
|
if (clean_me->pid == pid) {
|
|
|
|
*pp = clean_me->next;
|
|
|
|
free(clean_me);
|
run-command: optionally kill children on exit
When we spawn a helper process, it should generally be done
and finish_command called before we exit. However, if we
exit abnormally due to an early return or a signal, the
helper may continue to run in our absence.
In the best case, this may simply be wasted CPU cycles or a
few stray messages on a terminal. But it could also mean a
process that the user thought was aborted continues to run
to completion (e.g., a push's pack-objects helper will
complete the push, even though you killed the push process).
This patch provides infrastructure for run-command to keep
track of PIDs to be killed, and clean them on signal
reception or input, just as we do with tempfiles. PIDs can
be added in two ways:
1. If NO_PTHREADS is defined, async helper processes are
automatically marked. By definition this code must be
ready to die when the parent dies, since it may be
implemented as a thread of the parent process.
2. If the run-command caller specifies the "clean_on_exit"
option. This is not the default, as there are cases
where it is OK for the child to outlive us (e.g., when
spawning a pager).
PIDs are cleared from the kill-list automatically during
wait_or_whine, which is called from finish_command and
finish_async.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Clemens Buchacher <drizzd@aon.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-07 12:42:43 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-03-12 19:37:28 +01:00
|
|
|
static inline void close_pair(int fd[2])
|
|
|
|
{
|
|
|
|
close(fd[0]);
|
|
|
|
close(fd[1]);
|
|
|
|
}
|
|
|
|
|
2013-05-02 21:26:08 +02:00
|
|
|
#ifndef GIT_WINDOWS_NATIVE
|
2007-03-12 19:37:55 +01:00
|
|
|
static inline void dup_devnull(int to)
|
|
|
|
{
|
|
|
|
int fd = open("/dev/null", O_RDWR);
|
2013-07-12 10:58:36 +02:00
|
|
|
if (fd < 0)
|
|
|
|
die_errno(_("open /dev/null failed"));
|
|
|
|
if (dup2(fd, to) < 0)
|
|
|
|
die_errno(_("dup2(%d,%d) failed"), fd, to);
|
2007-03-12 19:37:55 +01:00
|
|
|
close(fd);
|
|
|
|
}
|
Windows: avoid the "dup dance" when spawning a child process
When stdin, stdout, or stderr must be redirected for a child process that
on Windows is spawned using one of the spawn() functions of Microsoft's
C runtime, then there is no choice other than to
1. make a backup copy of fd 0,1,2 with dup
2. dup2 the redirection source fd into 0,1,2
3. spawn
4. dup2 the backup back into 0,1,2
5. close the backup copy and the redirection source
We used this idiom as well -- but we are not using the spawn() functions
anymore!
Instead, we have our own implementation. We had hardcoded that stdin,
stdout, and stderr of the child process were inherited from the parent's
fds 0, 1, and 2. But we can actually specify any fd.
With this patch, the fds to inherit are passed from start_command()'s
WIN32 section to our spawn implementation. This way, we can avoid the
backup copies of the fds.
The backup copies were a bug waiting to surface: The OS handles underlying
the dup()ed fds were inherited by the child process (but were not
associated with a file descriptor in the child). Consequently, the file or
pipe represented by the OS handle remained open even after the backup copy
was closed in the parent process until the child exited.
Since our implementation of pipe() creates non-inheritable OS handles, we
still dup() file descriptors in start_command() because dup() happens to
create inheritable duplicates. (A nice side effect is that the fd cleanup
in start_command is the same for Windows and Unix and remains unchanged.)
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-15 21:12:18 +01:00
|
|
|
#endif
|
2007-03-12 19:37:55 +01:00
|
|
|
|
2012-03-30 09:52:18 +02:00
|
|
|
static char *locate_in_PATH(const char *file)
|
|
|
|
{
|
|
|
|
const char *p = getenv("PATH");
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
|
|
|
|
if (!p || !*p)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
const char *end = strchrnul(p, ':');
|
|
|
|
|
|
|
|
strbuf_reset(&buf);
|
|
|
|
|
|
|
|
/* POSIX specifies an empty entry as the current directory. */
|
|
|
|
if (end != p) {
|
|
|
|
strbuf_add(&buf, p, end - p);
|
|
|
|
strbuf_addch(&buf, '/');
|
|
|
|
}
|
|
|
|
strbuf_addstr(&buf, file);
|
|
|
|
|
|
|
|
if (!access(buf.buf, F_OK))
|
|
|
|
return strbuf_detach(&buf, NULL);
|
|
|
|
|
|
|
|
if (!*end)
|
|
|
|
break;
|
|
|
|
p = end + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_release(&buf);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int exists_in_PATH(const char *file)
|
|
|
|
{
|
|
|
|
char *r = locate_in_PATH(file);
|
|
|
|
free(r);
|
|
|
|
return r != NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
int sane_execvp(const char *file, char * const argv[])
|
|
|
|
{
|
|
|
|
if (!execvp(file, argv))
|
|
|
|
return 0; /* cannot happen ;-) */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When a command can't be found because one of the directories
|
|
|
|
* listed in $PATH is unsearchable, execvp reports EACCES, but
|
|
|
|
* careful usability testing (read: analysis of occasional bug
|
|
|
|
* reports) reveals that "No such file or directory" is more
|
|
|
|
* intuitive.
|
|
|
|
*
|
|
|
|
* We avoid commands with "/", because execvp will not do $PATH
|
|
|
|
* lookups in that case.
|
|
|
|
*
|
|
|
|
* The reassignment of EACCES to errno looks like a no-op below,
|
|
|
|
* but we need to protect against exists_in_PATH overwriting errno.
|
|
|
|
*/
|
|
|
|
if (errno == EACCES && !strchr(file, '/'))
|
|
|
|
errno = exists_in_PATH(file) ? EACCES : ENOENT;
|
2012-07-31 21:51:30 +02:00
|
|
|
else if (errno == ENOTDIR && !strchr(file, '/'))
|
|
|
|
errno = ENOENT;
|
2012-03-30 09:52:18 +02:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2009-12-30 11:53:16 +01:00
|
|
|
static const char **prepare_shell_cmd(const char **argv)
|
|
|
|
{
|
|
|
|
int argc, nargc = 0;
|
|
|
|
const char **nargv;
|
|
|
|
|
|
|
|
for (argc = 0; argv[argc]; argc++)
|
|
|
|
; /* just counting */
|
|
|
|
/* +1 for NULL, +3 for "sh -c" plus extra $0 */
|
|
|
|
nargv = xmalloc(sizeof(*nargv) * (argc + 1 + 3));
|
|
|
|
|
|
|
|
if (argc < 1)
|
|
|
|
die("BUG: shell command is empty");
|
|
|
|
|
2009-12-30 11:55:36 +01:00
|
|
|
if (strcspn(argv[0], "|&;<>()$`\\\"' \t\n*?[#~=%") != strlen(argv[0])) {
|
2013-05-02 21:26:08 +02:00
|
|
|
#ifndef GIT_WINDOWS_NATIVE
|
2012-03-31 03:33:21 +02:00
|
|
|
nargv[nargc++] = SHELL_PATH;
|
Do not use SHELL_PATH from build system in prepare_shell_cmd on Windows
The recent change to use SHELL_PATH instead of "sh" to spawn shell commands
is not suited for Windows:
- The default setting, "/bin/sh", does not work when git has to run the
shell because it is a POSIX style path, but not a proper Windows style
path.
- If it worked, it would hard-code a position in the files system where
the shell is expected, making git (more precisely, the POSIX toolset that
is needed alongside git) non-relocatable. But we cannot sacrifice
relocatability on Windows.
- Apart from that, even though the Makefile leaves SHELL_PATH set to
"/bin/sh" for the Windows builds, the build system passes a mangled path
to the compiler, and something like "D:/Src/msysgit/bin/sh" is used,
which is doubly bad because it points to where /bin/sh resolves to on
the system where git was built.
- Finally, the system's CreateProcess() function that is used under
mingw.c's hood does not work with forward slashes and cannot find the
shell.
Undo the earlier change on Windows.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-04-17 09:03:21 +02:00
|
|
|
#else
|
|
|
|
nargv[nargc++] = "sh";
|
|
|
|
#endif
|
2009-12-30 11:55:36 +01:00
|
|
|
nargv[nargc++] = "-c";
|
2009-12-30 11:53:16 +01:00
|
|
|
|
2009-12-30 11:55:36 +01:00
|
|
|
if (argc < 2)
|
|
|
|
nargv[nargc++] = argv[0];
|
|
|
|
else {
|
|
|
|
struct strbuf arg0 = STRBUF_INIT;
|
|
|
|
strbuf_addf(&arg0, "%s \"$@\"", argv[0]);
|
|
|
|
nargv[nargc++] = strbuf_detach(&arg0, NULL);
|
|
|
|
}
|
2009-12-30 11:53:16 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
for (argc = 0; argv[argc]; argc++)
|
|
|
|
nargv[nargc++] = argv[argc];
|
|
|
|
nargv[nargc] = NULL;
|
|
|
|
|
|
|
|
return nargv;
|
|
|
|
}
|
|
|
|
|
2013-05-02 21:26:08 +02:00
|
|
|
#ifndef GIT_WINDOWS_NATIVE
|
2009-12-30 11:53:16 +01:00
|
|
|
static int execv_shell_cmd(const char **argv)
|
|
|
|
{
|
|
|
|
const char **nargv = prepare_shell_cmd(argv);
|
|
|
|
trace_argv_printf(nargv, "trace: exec:");
|
2012-03-30 09:52:18 +02:00
|
|
|
sane_execvp(nargv[0], (char **)nargv);
|
2009-12-30 11:53:16 +01:00
|
|
|
free(nargv);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2013-05-02 21:26:08 +02:00
|
|
|
#ifndef GIT_WINDOWS_NATIVE
|
2010-01-10 14:11:22 +01:00
|
|
|
static int child_notifier = -1;
|
|
|
|
|
|
|
|
static void notify_parent(void)
|
|
|
|
{
|
2011-04-20 12:40:05 +02:00
|
|
|
/*
|
|
|
|
* execvp failed. If possible, we'd like to let start_command
|
|
|
|
* know, so failures like ENOENT can be handled right away; but
|
|
|
|
* otherwise, finish_command will still report the error.
|
|
|
|
*/
|
|
|
|
xwrite(child_notifier, "", 1);
|
2010-01-10 14:11:22 +01:00
|
|
|
}
|
2010-03-06 16:40:42 +01:00
|
|
|
#endif
|
2010-01-10 14:07:52 +01:00
|
|
|
|
|
|
|
static inline void set_cloexec(int fd)
|
|
|
|
{
|
|
|
|
int flags = fcntl(fd, F_GETFD);
|
|
|
|
if (flags >= 0)
|
|
|
|
fcntl(fd, F_SETFD, flags | FD_CLOEXEC);
|
|
|
|
}
|
|
|
|
|
pager: don't use unsafe functions in signal handlers
Since the commit a3da8821208d (pager: do wait_for_pager on signal
death), we call wait_for_pager() in the pager's signal handler. The
recent bug report revealed that this causes a deadlock in glibc at
aborting "git log" [*1*]. When this happens, git process is left
unterminated, and it can't be killed by SIGTERM but only by SIGKILL.
The problem is that wait_for_pager() function does more than waiting
for pager process's termination, but it does cleanups and printing
errors. Unfortunately, the functions that may be used in a signal
handler are very limited [*2*]. Particularly, malloc(), free() and the
variants can't be used in a signal handler because they take a mutex
internally in glibc. This was the cause of the deadlock above. Other
than the direct calls of malloc/free, many functions calling
malloc/free can't be used. strerror() is such one, either.
Also the usage of fflush() and printf() in a signal handler is bad,
although it seems working so far. In a safer side, we should avoid
them, too.
This patch tries to reduce the calls of such functions in signal
handlers. wait_for_signal() takes a flag and avoids the unsafe
calls. Also, finish_command_in_signal() is introduced for the
same reason. There the free() calls are removed, and only waits for
the children without whining at errors.
[*1*] https://bugzilla.opensuse.org/show_bug.cgi?id=942297
[*2*] http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-04 11:35:57 +02:00
|
|
|
static int wait_or_whine(pid_t pid, const char *argv0, int in_signal)
|
2010-01-10 14:08:45 +01:00
|
|
|
{
|
|
|
|
int status, code = -1;
|
|
|
|
pid_t waiting;
|
|
|
|
int failed_errno = 0;
|
|
|
|
|
|
|
|
while ((waiting = waitpid(pid, &status, 0)) < 0 && errno == EINTR)
|
|
|
|
; /* nothing */
|
pager: don't use unsafe functions in signal handlers
Since the commit a3da8821208d (pager: do wait_for_pager on signal
death), we call wait_for_pager() in the pager's signal handler. The
recent bug report revealed that this causes a deadlock in glibc at
aborting "git log" [*1*]. When this happens, git process is left
unterminated, and it can't be killed by SIGTERM but only by SIGKILL.
The problem is that wait_for_pager() function does more than waiting
for pager process's termination, but it does cleanups and printing
errors. Unfortunately, the functions that may be used in a signal
handler are very limited [*2*]. Particularly, malloc(), free() and the
variants can't be used in a signal handler because they take a mutex
internally in glibc. This was the cause of the deadlock above. Other
than the direct calls of malloc/free, many functions calling
malloc/free can't be used. strerror() is such one, either.
Also the usage of fflush() and printf() in a signal handler is bad,
although it seems working so far. In a safer side, we should avoid
them, too.
This patch tries to reduce the calls of such functions in signal
handlers. wait_for_signal() takes a flag and avoids the unsafe
calls. Also, finish_command_in_signal() is introduced for the
same reason. There the free() calls are removed, and only waits for
the children without whining at errors.
[*1*] https://bugzilla.opensuse.org/show_bug.cgi?id=942297
[*2*] http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-04 11:35:57 +02:00
|
|
|
if (in_signal)
|
|
|
|
return 0;
|
2010-01-10 14:08:45 +01:00
|
|
|
|
|
|
|
if (waiting < 0) {
|
|
|
|
failed_errno = errno;
|
|
|
|
error("waitpid for %s failed: %s", argv0, strerror(errno));
|
|
|
|
} else if (waiting != pid) {
|
|
|
|
error("waitpid is confused (%s)", argv0);
|
|
|
|
} else if (WIFSIGNALED(status)) {
|
|
|
|
code = WTERMSIG(status);
|
run-command: don't warn on SIGPIPE deaths
When git executes a sub-command, we print a warning if the
command dies due to a signal, but make an exception for
"uninteresting" cases like SIGINT and SIGQUIT (since the
user presumably just hit ^C).
We should make a similar exception for SIGPIPE, because it's
an expected and uninteresting return in most cases; it
generally means the user quit the pager before git had
finished generating all output. This used to be very hard
to trigger in practice, because:
1. We only complain if we see a real SIGPIPE death, not
the shell-induced 141 exit code. This means that
anything we run via the shell does not trigger the
warning, which includes most non-trivial aliases.
2. The common case for SIGPIPE is the user quitting the
pager before git has finished generating all output.
But if the user triggers a pager with "-p", we redirect
the git wrapper's stderr to that pager, too. Since the
pager is dead, it means that the message goes nowhere.
3. You can see it if you run your own pager, like
"git foo | head". But that only happens if "foo" is a
non-builtin (so it doesn't work with "log", for
example).
However, it may become more common after 86d26f2, which
teaches alias to re-exec builtins rather than running them
in the same process. This case doesn't trigger (1), as we
don't need a shell to run a git command. It doesn't trigger
(2), because the pager is not started by the original git,
but by the inner re-exec of git. And it doesn't trigger (3),
because builtins are treated more like non-builtins in this
case.
Given how flaky this message already is (e.g., you cannot
even know whether you will see it, as git optimizes out some
shell invocations behind the scenes based on the contents of
the command!), and that it is unlikely to ever provide
useful information, let's suppress it for all cases of
SIGPIPE.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-29 09:12:22 +01:00
|
|
|
if (code != SIGINT && code != SIGQUIT && code != SIGPIPE)
|
2012-11-30 23:41:38 +01:00
|
|
|
error("%s died of signal %d", argv0, code);
|
2010-01-10 14:08:45 +01:00
|
|
|
/*
|
|
|
|
* This return value is chosen so that code & 0xff
|
|
|
|
* mimics the exit code that a POSIX shell would report for
|
|
|
|
* a program that died from this signal.
|
|
|
|
*/
|
run-command: encode signal death as a positive integer
When a sub-command dies due to a signal, we encode the
signal number into the numeric exit status as "signal -
128". This is easy to identify (versus a regular positive
error code), and when cast to an unsigned integer (e.g., by
feeding it to exit), matches what a POSIX shell would return
when reporting a signal death in $? or through its own exit
code.
So we have a negative value inside the code, but once it
passes across an exit() barrier, it looks positive (and any
code we receive from a sub-shell will have the positive
form). E.g., death by SIGPIPE (signal 13) will look like
-115 to us in inside git, but will end up as 141 when we
call exit() with it. And a program killed by SIGPIPE but run
via the shell will come to us with an exit code of 141.
Unfortunately, this means that when the "use_shell" option
is set, we need to be on the lookout for _both_ forms. We
might or might not have actually invoked the shell (because
we optimize out some useless shell calls). If we didn't invoke
the shell, we will will see the sub-process's signal death
directly, and run-command converts it into a negative value.
But if we did invoke the shell, we will see the shell's
128+signal exit status. To be thorough, we would need to
check both, or cast the value to an unsigned char (after
checking that it is not -1, which is a magic error value).
Fortunately, most callsites do not care at all whether the
exit was from a code or from a signal; they merely check for
a non-zero status, and sometimes propagate the error via
exit(). But for the callers that do care, we can make life
slightly easier by just using the consistent positive form.
This actually fixes two minor bugs:
1. In launch_editor, we check whether the editor died from
SIGINT or SIGQUIT. But we checked only the negative
form, meaning that we would fail to notice a signal
death exit code which was propagated through the shell.
2. In handle_alias, we assume that a negative return value
from run_command means that errno tells us something
interesting (like a fork failure, or ENOENT).
Otherwise, we simply propagate the exit code. Negative
signal death codes confuse us, and we print a useless
"unable to run alias 'foo': Success" message. By
encoding signal deaths using the positive form, the
existing code just propagates it as it would a normal
non-zero exit code.
The downside is that callers of run_command can no longer
differentiate between a signal received directly by the
sub-process, and one propagated. However, no caller
currently cares, and since we already optimize out some
calls to the shell under the hood, that distinction is not
something that should be relied upon by callers.
Fix the same logic in t/test-terminal.perl for consistency [jc:
raised by Jonathan in the discussion].
Signed-off-by: Jeff King <peff@peff.net>
Acked-by: Johannes Sixt <j6t@kdbg.org>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-05 15:49:49 +01:00
|
|
|
code += 128;
|
2010-01-10 14:08:45 +01:00
|
|
|
} else if (WIFEXITED(status)) {
|
|
|
|
code = WEXITSTATUS(status);
|
|
|
|
/*
|
|
|
|
* Convert special exit code when execvp failed.
|
|
|
|
*/
|
|
|
|
if (code == 127) {
|
|
|
|
code = -1;
|
|
|
|
failed_errno = ENOENT;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
error("waitpid is confused (%s)", argv0);
|
|
|
|
}
|
run-command: optionally kill children on exit
When we spawn a helper process, it should generally be done
and finish_command called before we exit. However, if we
exit abnormally due to an early return or a signal, the
helper may continue to run in our absence.
In the best case, this may simply be wasted CPU cycles or a
few stray messages on a terminal. But it could also mean a
process that the user thought was aborted continues to run
to completion (e.g., a push's pack-objects helper will
complete the push, even though you killed the push process).
This patch provides infrastructure for run-command to keep
track of PIDs to be killed, and clean them on signal
reception or input, just as we do with tempfiles. PIDs can
be added in two ways:
1. If NO_PTHREADS is defined, async helper processes are
automatically marked. By definition this code must be
ready to die when the parent dies, since it may be
implemented as a thread of the parent process.
2. If the run-command caller specifies the "clean_on_exit"
option. This is not the default, as there are cases
where it is OK for the child to outlive us (e.g., when
spawning a pager).
PIDs are cleared from the kill-list automatically during
wait_or_whine, which is called from finish_command and
finish_async.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Clemens Buchacher <drizzd@aon.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-07 12:42:43 +01:00
|
|
|
|
|
|
|
clear_child_for_cleanup(pid);
|
|
|
|
|
2010-01-10 14:08:45 +01:00
|
|
|
errno = failed_errno;
|
|
|
|
return code;
|
|
|
|
}
|
|
|
|
|
2007-03-10 09:28:05 +01:00
|
|
|
int start_command(struct child_process *cmd)
|
2005-07-31 21:17:43 +02:00
|
|
|
{
|
2007-10-19 21:47:58 +02:00
|
|
|
int need_in, need_out, need_err;
|
|
|
|
int fdin[2], fdout[2], fderr[2];
|
2013-03-21 16:45:00 +01:00
|
|
|
int failed_errno;
|
2013-01-31 03:01:05 +01:00
|
|
|
char *str;
|
2007-03-10 09:28:08 +01:00
|
|
|
|
2014-05-15 10:33:26 +02:00
|
|
|
if (!cmd->argv)
|
|
|
|
cmd->argv = cmd->args.argv;
|
2014-10-19 13:13:55 +02:00
|
|
|
if (!cmd->env)
|
|
|
|
cmd->env = cmd->env_array.argv;
|
2014-05-15 10:33:26 +02:00
|
|
|
|
2008-02-21 23:42:56 +01:00
|
|
|
/*
|
|
|
|
* In case of errors we must keep the promise to close FDs
|
|
|
|
* that have been passed in via ->in and ->out.
|
|
|
|
*/
|
|
|
|
|
2007-03-12 19:37:55 +01:00
|
|
|
need_in = !cmd->no_stdin && cmd->in < 0;
|
2007-03-10 09:28:08 +01:00
|
|
|
if (need_in) {
|
2008-02-21 23:42:56 +01:00
|
|
|
if (pipe(fdin) < 0) {
|
run_command: report system call errors instead of returning error codes
The motivation for this change is that system call failures are serious
errors that should be reported to the user, but only few callers took the
burden to decode the error codes that the functions returned into error
messages.
If at all, then only an unspecific error message was given. A prominent
example is this:
$ git upload-pack . | :
fatal: unable to run 'git-upload-pack'
In this example, git-upload-pack, the external command invoked through the
git wrapper, dies due to SIGPIPE, but the git wrapper does not bother to
report the real cause. In fact, this very error message is copied to the
syslog if git-daemon's client aborts the connection early.
With this change, system call failures are reported immediately after the
failure and only a generic failure code is returned to the caller. In the
above example the error is now to the point:
$ git upload-pack . | :
error: git-upload-pack died of signal
Note that there is no error report if the invoked program terminated with
a non-zero exit code, because it is reasonable to expect that the invoked
program has already reported an error. (But many run_command call sites
nevertheless write a generic error message.)
There was one special return code that was used to identify the case where
run_command failed because the requested program could not be exec'd. This
special case is now treated like a system call failure with errno set to
ENOENT. No error is reported in this case, because the call site in git.c
expects this as a normal result. Therefore, the callers that carefully
decoded the return value still check for this condition.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-04 21:26:40 +02:00
|
|
|
failed_errno = errno;
|
2008-02-21 23:42:56 +01:00
|
|
|
if (cmd->out > 0)
|
|
|
|
close(cmd->out);
|
2013-01-31 03:01:05 +01:00
|
|
|
str = "standard input";
|
run_command: report system call errors instead of returning error codes
The motivation for this change is that system call failures are serious
errors that should be reported to the user, but only few callers took the
burden to decode the error codes that the functions returned into error
messages.
If at all, then only an unspecific error message was given. A prominent
example is this:
$ git upload-pack . | :
fatal: unable to run 'git-upload-pack'
In this example, git-upload-pack, the external command invoked through the
git wrapper, dies due to SIGPIPE, but the git wrapper does not bother to
report the real cause. In fact, this very error message is copied to the
syslog if git-daemon's client aborts the connection early.
With this change, system call failures are reported immediately after the
failure and only a generic failure code is returned to the caller. In the
above example the error is now to the point:
$ git upload-pack . | :
error: git-upload-pack died of signal
Note that there is no error report if the invoked program terminated with
a non-zero exit code, because it is reasonable to expect that the invoked
program has already reported an error. (But many run_command call sites
nevertheless write a generic error message.)
There was one special return code that was used to identify the case where
run_command failed because the requested program could not be exec'd. This
special case is now treated like a system call failure with errno set to
ENOENT. No error is reported in this case, because the call site in git.c
expects this as a normal result. Therefore, the callers that carefully
decoded the return value still check for this condition.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-04 21:26:40 +02:00
|
|
|
goto fail_pipe;
|
2008-02-21 23:42:56 +01:00
|
|
|
}
|
2007-03-10 09:28:08 +01:00
|
|
|
cmd->in = fdin[1];
|
|
|
|
}
|
|
|
|
|
2007-03-12 19:37:55 +01:00
|
|
|
need_out = !cmd->no_stdout
|
|
|
|
&& !cmd->stdout_to_stderr
|
|
|
|
&& cmd->out < 0;
|
2007-03-12 19:37:45 +01:00
|
|
|
if (need_out) {
|
|
|
|
if (pipe(fdout) < 0) {
|
run_command: report system call errors instead of returning error codes
The motivation for this change is that system call failures are serious
errors that should be reported to the user, but only few callers took the
burden to decode the error codes that the functions returned into error
messages.
If at all, then only an unspecific error message was given. A prominent
example is this:
$ git upload-pack . | :
fatal: unable to run 'git-upload-pack'
In this example, git-upload-pack, the external command invoked through the
git wrapper, dies due to SIGPIPE, but the git wrapper does not bother to
report the real cause. In fact, this very error message is copied to the
syslog if git-daemon's client aborts the connection early.
With this change, system call failures are reported immediately after the
failure and only a generic failure code is returned to the caller. In the
above example the error is now to the point:
$ git upload-pack . | :
error: git-upload-pack died of signal
Note that there is no error report if the invoked program terminated with
a non-zero exit code, because it is reasonable to expect that the invoked
program has already reported an error. (But many run_command call sites
nevertheless write a generic error message.)
There was one special return code that was used to identify the case where
run_command failed because the requested program could not be exec'd. This
special case is now treated like a system call failure with errno set to
ENOENT. No error is reported in this case, because the call site in git.c
expects this as a normal result. Therefore, the callers that carefully
decoded the return value still check for this condition.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-04 21:26:40 +02:00
|
|
|
failed_errno = errno;
|
2007-03-12 19:37:45 +01:00
|
|
|
if (need_in)
|
|
|
|
close_pair(fdin);
|
2008-02-21 23:42:56 +01:00
|
|
|
else if (cmd->in)
|
|
|
|
close(cmd->in);
|
2013-01-31 03:01:05 +01:00
|
|
|
str = "standard output";
|
run_command: report system call errors instead of returning error codes
The motivation for this change is that system call failures are serious
errors that should be reported to the user, but only few callers took the
burden to decode the error codes that the functions returned into error
messages.
If at all, then only an unspecific error message was given. A prominent
example is this:
$ git upload-pack . | :
fatal: unable to run 'git-upload-pack'
In this example, git-upload-pack, the external command invoked through the
git wrapper, dies due to SIGPIPE, but the git wrapper does not bother to
report the real cause. In fact, this very error message is copied to the
syslog if git-daemon's client aborts the connection early.
With this change, system call failures are reported immediately after the
failure and only a generic failure code is returned to the caller. In the
above example the error is now to the point:
$ git upload-pack . | :
error: git-upload-pack died of signal
Note that there is no error report if the invoked program terminated with
a non-zero exit code, because it is reasonable to expect that the invoked
program has already reported an error. (But many run_command call sites
nevertheless write a generic error message.)
There was one special return code that was used to identify the case where
run_command failed because the requested program could not be exec'd. This
special case is now treated like a system call failure with errno set to
ENOENT. No error is reported in this case, because the call site in git.c
expects this as a normal result. Therefore, the callers that carefully
decoded the return value still check for this condition.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-04 21:26:40 +02:00
|
|
|
goto fail_pipe;
|
2007-03-12 19:37:45 +01:00
|
|
|
}
|
|
|
|
cmd->out = fdout[0];
|
|
|
|
}
|
|
|
|
|
2007-11-11 08:29:37 +01:00
|
|
|
need_err = !cmd->no_stderr && cmd->err < 0;
|
2007-10-19 21:47:58 +02:00
|
|
|
if (need_err) {
|
|
|
|
if (pipe(fderr) < 0) {
|
run_command: report system call errors instead of returning error codes
The motivation for this change is that system call failures are serious
errors that should be reported to the user, but only few callers took the
burden to decode the error codes that the functions returned into error
messages.
If at all, then only an unspecific error message was given. A prominent
example is this:
$ git upload-pack . | :
fatal: unable to run 'git-upload-pack'
In this example, git-upload-pack, the external command invoked through the
git wrapper, dies due to SIGPIPE, but the git wrapper does not bother to
report the real cause. In fact, this very error message is copied to the
syslog if git-daemon's client aborts the connection early.
With this change, system call failures are reported immediately after the
failure and only a generic failure code is returned to the caller. In the
above example the error is now to the point:
$ git upload-pack . | :
error: git-upload-pack died of signal
Note that there is no error report if the invoked program terminated with
a non-zero exit code, because it is reasonable to expect that the invoked
program has already reported an error. (But many run_command call sites
nevertheless write a generic error message.)
There was one special return code that was used to identify the case where
run_command failed because the requested program could not be exec'd. This
special case is now treated like a system call failure with errno set to
ENOENT. No error is reported in this case, because the call site in git.c
expects this as a normal result. Therefore, the callers that carefully
decoded the return value still check for this condition.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-04 21:26:40 +02:00
|
|
|
failed_errno = errno;
|
2007-10-19 21:47:58 +02:00
|
|
|
if (need_in)
|
|
|
|
close_pair(fdin);
|
2008-02-21 23:42:56 +01:00
|
|
|
else if (cmd->in)
|
|
|
|
close(cmd->in);
|
2007-10-19 21:47:58 +02:00
|
|
|
if (need_out)
|
|
|
|
close_pair(fdout);
|
2008-02-21 23:42:56 +01:00
|
|
|
else if (cmd->out)
|
|
|
|
close(cmd->out);
|
2013-01-31 03:01:05 +01:00
|
|
|
str = "standard error";
|
run_command: report system call errors instead of returning error codes
The motivation for this change is that system call failures are serious
errors that should be reported to the user, but only few callers took the
burden to decode the error codes that the functions returned into error
messages.
If at all, then only an unspecific error message was given. A prominent
example is this:
$ git upload-pack . | :
fatal: unable to run 'git-upload-pack'
In this example, git-upload-pack, the external command invoked through the
git wrapper, dies due to SIGPIPE, but the git wrapper does not bother to
report the real cause. In fact, this very error message is copied to the
syslog if git-daemon's client aborts the connection early.
With this change, system call failures are reported immediately after the
failure and only a generic failure code is returned to the caller. In the
above example the error is now to the point:
$ git upload-pack . | :
error: git-upload-pack died of signal
Note that there is no error report if the invoked program terminated with
a non-zero exit code, because it is reasonable to expect that the invoked
program has already reported an error. (But many run_command call sites
nevertheless write a generic error message.)
There was one special return code that was used to identify the case where
run_command failed because the requested program could not be exec'd. This
special case is now treated like a system call failure with errno set to
ENOENT. No error is reported in this case, because the call site in git.c
expects this as a normal result. Therefore, the callers that carefully
decoded the return value still check for this condition.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-04 21:26:40 +02:00
|
|
|
fail_pipe:
|
2013-01-31 03:01:05 +01:00
|
|
|
error("cannot create %s pipe for %s: %s",
|
|
|
|
str, cmd->argv[0], strerror(failed_errno));
|
2015-10-24 14:11:27 +02:00
|
|
|
child_process_clear(cmd);
|
run_command: report system call errors instead of returning error codes
The motivation for this change is that system call failures are serious
errors that should be reported to the user, but only few callers took the
burden to decode the error codes that the functions returned into error
messages.
If at all, then only an unspecific error message was given. A prominent
example is this:
$ git upload-pack . | :
fatal: unable to run 'git-upload-pack'
In this example, git-upload-pack, the external command invoked through the
git wrapper, dies due to SIGPIPE, but the git wrapper does not bother to
report the real cause. In fact, this very error message is copied to the
syslog if git-daemon's client aborts the connection early.
With this change, system call failures are reported immediately after the
failure and only a generic failure code is returned to the caller. In the
above example the error is now to the point:
$ git upload-pack . | :
error: git-upload-pack died of signal
Note that there is no error report if the invoked program terminated with
a non-zero exit code, because it is reasonable to expect that the invoked
program has already reported an error. (But many run_command call sites
nevertheless write a generic error message.)
There was one special return code that was used to identify the case where
run_command failed because the requested program could not be exec'd. This
special case is now treated like a system call failure with errno set to
ENOENT. No error is reported in this case, because the call site in git.c
expects this as a normal result. Therefore, the callers that carefully
decoded the return value still check for this condition.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-04 21:26:40 +02:00
|
|
|
errno = failed_errno;
|
|
|
|
return -1;
|
2007-10-19 21:47:58 +02:00
|
|
|
}
|
|
|
|
cmd->err = fderr[0];
|
|
|
|
}
|
|
|
|
|
2008-07-07 15:41:34 +02:00
|
|
|
trace_argv_printf(cmd->argv, "trace: run_command:");
|
2011-02-04 09:41:58 +01:00
|
|
|
fflush(NULL);
|
2008-07-07 15:41:34 +02:00
|
|
|
|
2013-05-02 21:26:08 +02:00
|
|
|
#ifndef GIT_WINDOWS_NATIVE
|
2010-01-10 14:11:22 +01:00
|
|
|
{
|
|
|
|
int notify_pipe[2];
|
|
|
|
if (pipe(notify_pipe))
|
|
|
|
notify_pipe[0] = notify_pipe[1] = -1;
|
|
|
|
|
2007-03-10 09:28:05 +01:00
|
|
|
cmd->pid = fork();
|
2013-03-21 16:45:00 +01:00
|
|
|
failed_errno = errno;
|
2007-03-10 09:28:05 +01:00
|
|
|
if (!cmd->pid) {
|
2010-01-10 14:07:52 +01:00
|
|
|
/*
|
|
|
|
* Redirect the channel to write syscall error messages to
|
|
|
|
* before redirecting the process's stderr so that all die()
|
|
|
|
* in subsequent call paths use the parent's stderr.
|
|
|
|
*/
|
|
|
|
if (cmd->no_stderr || need_err) {
|
vreportf: report to arbitrary filehandles
The vreportf function always goes to stderr, but run-command
wants child errors to go to the parent's original stderr. To
solve this, commit a5487dd duplicates the stderr fd and
installs die and error handlers to direct the output
appropriately (which later turned into the vwritef
function). This has two downsides, though:
- we make multiple calls to write(), which contradicts the
"write at once" logic from d048a96 (print
warning/error/fatal messages in one shot, 2007-11-09).
- the custom handlers basically duplicate the normal
handlers. They're only a few lines of code, but we
should not have to repeat the magic "exit(128)", for
example.
We can solve the first by using fdopen() on the duplicated
descriptor. We can't pass this to vreportf, but we could
introduce a new vreportf_to to handle it.
However, to fix the second problem, we instead introduce a
new "set_error_handle" function, which lets the normal
vreportf calls output to a handle besides stderr. Thus we
can get rid of our custom handlers entirely, and just ask
the regular handlers to output to our new descriptor.
And as vwritef has no more callers, it can just go away.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-08-11 20:06:15 +02:00
|
|
|
int child_err = dup(2);
|
2010-01-10 14:07:52 +01:00
|
|
|
set_cloexec(child_err);
|
vreportf: report to arbitrary filehandles
The vreportf function always goes to stderr, but run-command
wants child errors to go to the parent's original stderr. To
solve this, commit a5487dd duplicates the stderr fd and
installs die and error handlers to direct the output
appropriately (which later turned into the vwritef
function). This has two downsides, though:
- we make multiple calls to write(), which contradicts the
"write at once" logic from d048a96 (print
warning/error/fatal messages in one shot, 2007-11-09).
- the custom handlers basically duplicate the normal
handlers. They're only a few lines of code, but we
should not have to repeat the magic "exit(128)", for
example.
We can solve the first by using fdopen() on the duplicated
descriptor. We can't pass this to vreportf, but we could
introduce a new vreportf_to to handle it.
However, to fix the second problem, we instead introduce a
new "set_error_handle" function, which lets the normal
vreportf calls output to a handle besides stderr. Thus we
can get rid of our custom handlers entirely, and just ask
the regular handlers to output to our new descriptor.
And as vwritef has no more callers, it can just go away.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-08-11 20:06:15 +02:00
|
|
|
set_error_handle(fdopen(child_err, "w"));
|
2010-01-10 14:07:52 +01:00
|
|
|
}
|
|
|
|
|
2010-01-10 14:11:22 +01:00
|
|
|
close(notify_pipe[0]);
|
|
|
|
set_cloexec(notify_pipe[1]);
|
|
|
|
child_notifier = notify_pipe[1];
|
|
|
|
atexit(notify_parent);
|
|
|
|
|
2007-03-12 19:37:55 +01:00
|
|
|
if (cmd->no_stdin)
|
|
|
|
dup_devnull(0);
|
|
|
|
else if (need_in) {
|
2007-03-10 09:28:08 +01:00
|
|
|
dup2(fdin[0], 0);
|
2007-03-12 19:37:28 +01:00
|
|
|
close_pair(fdin);
|
2007-03-10 09:28:08 +01:00
|
|
|
} else if (cmd->in) {
|
|
|
|
dup2(cmd->in, 0);
|
|
|
|
close(cmd->in);
|
2006-12-31 03:55:22 +01:00
|
|
|
}
|
2007-03-10 09:28:08 +01:00
|
|
|
|
2008-03-05 08:35:16 +01:00
|
|
|
if (cmd->no_stderr)
|
|
|
|
dup_devnull(2);
|
|
|
|
else if (need_err) {
|
|
|
|
dup2(fderr[1], 2);
|
|
|
|
close_pair(fderr);
|
2010-02-05 21:57:37 +01:00
|
|
|
} else if (cmd->err > 1) {
|
|
|
|
dup2(cmd->err, 2);
|
|
|
|
close(cmd->err);
|
2008-03-05 08:35:16 +01:00
|
|
|
}
|
|
|
|
|
2007-03-12 19:37:55 +01:00
|
|
|
if (cmd->no_stdout)
|
|
|
|
dup_devnull(1);
|
|
|
|
else if (cmd->stdout_to_stderr)
|
2006-12-31 03:55:19 +01:00
|
|
|
dup2(2, 1);
|
2007-03-12 19:37:45 +01:00
|
|
|
else if (need_out) {
|
|
|
|
dup2(fdout[1], 1);
|
|
|
|
close_pair(fdout);
|
|
|
|
} else if (cmd->out > 1) {
|
|
|
|
dup2(cmd->out, 1);
|
|
|
|
close(cmd->out);
|
|
|
|
}
|
|
|
|
|
2007-05-22 23:48:23 +02:00
|
|
|
if (cmd->dir && chdir(cmd->dir))
|
2009-06-27 17:58:46 +02:00
|
|
|
die_errno("exec '%s': cd to '%s' failed", cmd->argv[0],
|
|
|
|
cmd->dir);
|
2007-05-22 23:48:47 +02:00
|
|
|
if (cmd->env) {
|
2007-05-23 22:21:39 +02:00
|
|
|
for (; *cmd->env; cmd->env++) {
|
|
|
|
if (strchr(*cmd->env, '='))
|
2009-05-01 11:06:36 +02:00
|
|
|
putenv((char *)*cmd->env);
|
2007-05-23 22:21:39 +02:00
|
|
|
else
|
|
|
|
unsetenv(*cmd->env);
|
|
|
|
}
|
2007-05-22 23:48:47 +02:00
|
|
|
}
|
2013-10-31 10:25:45 +01:00
|
|
|
if (cmd->git_cmd)
|
2007-03-10 09:28:00 +01:00
|
|
|
execv_git_cmd(cmd->argv);
|
2013-10-31 10:25:45 +01:00
|
|
|
else if (cmd->use_shell)
|
2009-12-30 11:53:16 +01:00
|
|
|
execv_shell_cmd(cmd->argv);
|
2013-10-31 10:25:45 +01:00
|
|
|
else
|
2012-03-30 09:52:18 +02:00
|
|
|
sane_execvp(cmd->argv[0], (char *const*) cmd->argv);
|
notice error exit from pager
If the pager fails to run, git produces no output, e.g.:
$ GIT_PAGER=not-a-command git log
The error reporting fails for two reasons:
(1) start_command: There is a mechanism that detects errors during
execvp introduced in 2b541bf8 (start_command: detect execvp
failures early). The child writes one byte to a pipe only if
execvp fails. The parent waits for either EOF, when the
successful execvp automatically closes the pipe (see
FD_CLOEXEC in fcntl(1)), or it reads a single byte, in which
case it knows that the execvp failed. This mechanism is
incompatible with the workaround introduced in 35ce8622
(pager: Work around window resizing bug in 'less'), which
waits for input from the parent before the exec. Since both
the parent and the child are waiting for input from each
other, that would result in a deadlock. In order to avoid
that, the mechanism is disabled by closing the child_notifier
file descriptor.
(2) finish_command: The parent correctly detects the 127 exit
status from the child, but the error output goes nowhere,
since by that time it is already being redirected to the
child.
No simple solution for (1) comes to mind.
Number (2) can be solved by not sending error output to the pager.
Not redirecting error output to the pager can result in the pager
overwriting error output with standard output, however.
Since there is no reliable way to handle error reporting in the
parent, produce the output in the child instead.
Signed-off-by: Clemens Buchacher <drizzd@aon.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-08-01 19:59:21 +02:00
|
|
|
if (errno == ENOENT) {
|
|
|
|
if (!cmd->silent_exec_failure)
|
|
|
|
error("cannot run %s: %s", cmd->argv[0],
|
|
|
|
strerror(ENOENT));
|
2010-01-10 14:07:52 +01:00
|
|
|
exit(127);
|
notice error exit from pager
If the pager fails to run, git produces no output, e.g.:
$ GIT_PAGER=not-a-command git log
The error reporting fails for two reasons:
(1) start_command: There is a mechanism that detects errors during
execvp introduced in 2b541bf8 (start_command: detect execvp
failures early). The child writes one byte to a pipe only if
execvp fails. The parent waits for either EOF, when the
successful execvp automatically closes the pipe (see
FD_CLOEXEC in fcntl(1)), or it reads a single byte, in which
case it knows that the execvp failed. This mechanism is
incompatible with the workaround introduced in 35ce8622
(pager: Work around window resizing bug in 'less'), which
waits for input from the parent before the exec. Since both
the parent and the child are waiting for input from each
other, that would result in a deadlock. In order to avoid
that, the mechanism is disabled by closing the child_notifier
file descriptor.
(2) finish_command: The parent correctly detects the 127 exit
status from the child, but the error output goes nowhere,
since by that time it is already being redirected to the
child.
No simple solution for (1) comes to mind.
Number (2) can be solved by not sending error output to the pager.
Not redirecting error output to the pager can result in the pager
overwriting error output with standard output, however.
Since there is no reliable way to handle error reporting in the
parent, produce the output in the child instead.
Signed-off-by: Clemens Buchacher <drizzd@aon.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-08-01 19:59:21 +02:00
|
|
|
} else {
|
2010-01-10 14:07:52 +01:00
|
|
|
die_errno("cannot exec '%s'", cmd->argv[0]);
|
notice error exit from pager
If the pager fails to run, git produces no output, e.g.:
$ GIT_PAGER=not-a-command git log
The error reporting fails for two reasons:
(1) start_command: There is a mechanism that detects errors during
execvp introduced in 2b541bf8 (start_command: detect execvp
failures early). The child writes one byte to a pipe only if
execvp fails. The parent waits for either EOF, when the
successful execvp automatically closes the pipe (see
FD_CLOEXEC in fcntl(1)), or it reads a single byte, in which
case it knows that the execvp failed. This mechanism is
incompatible with the workaround introduced in 35ce8622
(pager: Work around window resizing bug in 'less'), which
waits for input from the parent before the exec. Since both
the parent and the child are waiting for input from each
other, that would result in a deadlock. In order to avoid
that, the mechanism is disabled by closing the child_notifier
file descriptor.
(2) finish_command: The parent correctly detects the 127 exit
status from the child, but the error output goes nowhere,
since by that time it is already being redirected to the
child.
No simple solution for (1) comes to mind.
Number (2) can be solved by not sending error output to the pager.
Not redirecting error output to the pager can result in the pager
overwriting error output with standard output, however.
Since there is no reliable way to handle error reporting in the
parent, produce the output in the child instead.
Signed-off-by: Clemens Buchacher <drizzd@aon.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-08-01 19:59:21 +02:00
|
|
|
}
|
2005-07-31 21:17:43 +02:00
|
|
|
}
|
run_command: report system call errors instead of returning error codes
The motivation for this change is that system call failures are serious
errors that should be reported to the user, but only few callers took the
burden to decode the error codes that the functions returned into error
messages.
If at all, then only an unspecific error message was given. A prominent
example is this:
$ git upload-pack . | :
fatal: unable to run 'git-upload-pack'
In this example, git-upload-pack, the external command invoked through the
git wrapper, dies due to SIGPIPE, but the git wrapper does not bother to
report the real cause. In fact, this very error message is copied to the
syslog if git-daemon's client aborts the connection early.
With this change, system call failures are reported immediately after the
failure and only a generic failure code is returned to the caller. In the
above example the error is now to the point:
$ git upload-pack . | :
error: git-upload-pack died of signal
Note that there is no error report if the invoked program terminated with
a non-zero exit code, because it is reasonable to expect that the invoked
program has already reported an error. (But many run_command call sites
nevertheless write a generic error message.)
There was one special return code that was used to identify the case where
run_command failed because the requested program could not be exec'd. This
special case is now treated like a system call failure with errno set to
ENOENT. No error is reported in this case, because the call site in git.c
expects this as a normal result. Therefore, the callers that carefully
decoded the return value still check for this condition.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-04 21:26:40 +02:00
|
|
|
if (cmd->pid < 0)
|
|
|
|
error("cannot fork() for %s: %s", cmd->argv[0],
|
2013-03-21 16:45:00 +01:00
|
|
|
strerror(errno));
|
run-command: optionally kill children on exit
When we spawn a helper process, it should generally be done
and finish_command called before we exit. However, if we
exit abnormally due to an early return or a signal, the
helper may continue to run in our absence.
In the best case, this may simply be wasted CPU cycles or a
few stray messages on a terminal. But it could also mean a
process that the user thought was aborted continues to run
to completion (e.g., a push's pack-objects helper will
complete the push, even though you killed the push process).
This patch provides infrastructure for run-command to keep
track of PIDs to be killed, and clean them on signal
reception or input, just as we do with tempfiles. PIDs can
be added in two ways:
1. If NO_PTHREADS is defined, async helper processes are
automatically marked. By definition this code must be
ready to die when the parent dies, since it may be
implemented as a thread of the parent process.
2. If the run-command caller specifies the "clean_on_exit"
option. This is not the default, as there are cases
where it is OK for the child to outlive us (e.g., when
spawning a pager).
PIDs are cleared from the kill-list automatically during
wait_or_whine, which is called from finish_command and
finish_async.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Clemens Buchacher <drizzd@aon.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-07 12:42:43 +01:00
|
|
|
else if (cmd->clean_on_exit)
|
|
|
|
mark_child_for_cleanup(cmd->pid);
|
2010-01-10 14:11:22 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Wait for child's execvp. If the execvp succeeds (or if fork()
|
|
|
|
* failed), EOF is seen immediately by the parent. Otherwise, the
|
|
|
|
* child process sends a single byte.
|
|
|
|
* Note that use of this infrastructure is completely advisory,
|
|
|
|
* therefore, we keep error checks minimal.
|
|
|
|
*/
|
|
|
|
close(notify_pipe[1]);
|
|
|
|
if (read(notify_pipe[0], ¬ify_pipe[1], 1) == 1) {
|
|
|
|
/*
|
|
|
|
* At this point we know that fork() succeeded, but execvp()
|
|
|
|
* failed. Errors have been reported to our stderr.
|
|
|
|
*/
|
pager: don't use unsafe functions in signal handlers
Since the commit a3da8821208d (pager: do wait_for_pager on signal
death), we call wait_for_pager() in the pager's signal handler. The
recent bug report revealed that this causes a deadlock in glibc at
aborting "git log" [*1*]. When this happens, git process is left
unterminated, and it can't be killed by SIGTERM but only by SIGKILL.
The problem is that wait_for_pager() function does more than waiting
for pager process's termination, but it does cleanups and printing
errors. Unfortunately, the functions that may be used in a signal
handler are very limited [*2*]. Particularly, malloc(), free() and the
variants can't be used in a signal handler because they take a mutex
internally in glibc. This was the cause of the deadlock above. Other
than the direct calls of malloc/free, many functions calling
malloc/free can't be used. strerror() is such one, either.
Also the usage of fflush() and printf() in a signal handler is bad,
although it seems working so far. In a safer side, we should avoid
them, too.
This patch tries to reduce the calls of such functions in signal
handlers. wait_for_signal() takes a flag and avoids the unsafe
calls. Also, finish_command_in_signal() is introduced for the
same reason. There the free() calls are removed, and only waits for
the children without whining at errors.
[*1*] https://bugzilla.opensuse.org/show_bug.cgi?id=942297
[*2*] http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-04 11:35:57 +02:00
|
|
|
wait_or_whine(cmd->pid, cmd->argv[0], 0);
|
2010-01-10 14:11:22 +01:00
|
|
|
failed_errno = errno;
|
|
|
|
cmd->pid = -1;
|
|
|
|
}
|
|
|
|
close(notify_pipe[0]);
|
|
|
|
}
|
2007-12-07 22:08:59 +01:00
|
|
|
#else
|
2009-09-16 10:20:17 +02:00
|
|
|
{
|
Windows: avoid the "dup dance" when spawning a child process
When stdin, stdout, or stderr must be redirected for a child process that
on Windows is spawned using one of the spawn() functions of Microsoft's
C runtime, then there is no choice other than to
1. make a backup copy of fd 0,1,2 with dup
2. dup2 the redirection source fd into 0,1,2
3. spawn
4. dup2 the backup back into 0,1,2
5. close the backup copy and the redirection source
We used this idiom as well -- but we are not using the spawn() functions
anymore!
Instead, we have our own implementation. We had hardcoded that stdin,
stdout, and stderr of the child process were inherited from the parent's
fds 0, 1, and 2. But we can actually specify any fd.
With this patch, the fds to inherit are passed from start_command()'s
WIN32 section to our spawn implementation. This way, we can avoid the
backup copies of the fds.
The backup copies were a bug waiting to surface: The OS handles underlying
the dup()ed fds were inherited by the child process (but were not
associated with a file descriptor in the child). Consequently, the file or
pipe represented by the OS handle remained open even after the backup copy
was closed in the parent process until the child exited.
Since our implementation of pipe() creates non-inheritable OS handles, we
still dup() file descriptors in start_command() because dup() happens to
create inheritable duplicates. (A nice side effect is that the fd cleanup
in start_command is the same for Windows and Unix and remains unchanged.)
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-15 21:12:18 +01:00
|
|
|
int fhin = 0, fhout = 1, fherr = 2;
|
2008-07-28 07:50:28 +02:00
|
|
|
const char **sargv = cmd->argv;
|
2007-12-07 22:08:59 +01:00
|
|
|
|
Windows: avoid the "dup dance" when spawning a child process
When stdin, stdout, or stderr must be redirected for a child process that
on Windows is spawned using one of the spawn() functions of Microsoft's
C runtime, then there is no choice other than to
1. make a backup copy of fd 0,1,2 with dup
2. dup2 the redirection source fd into 0,1,2
3. spawn
4. dup2 the backup back into 0,1,2
5. close the backup copy and the redirection source
We used this idiom as well -- but we are not using the spawn() functions
anymore!
Instead, we have our own implementation. We had hardcoded that stdin,
stdout, and stderr of the child process were inherited from the parent's
fds 0, 1, and 2. But we can actually specify any fd.
With this patch, the fds to inherit are passed from start_command()'s
WIN32 section to our spawn implementation. This way, we can avoid the
backup copies of the fds.
The backup copies were a bug waiting to surface: The OS handles underlying
the dup()ed fds were inherited by the child process (but were not
associated with a file descriptor in the child). Consequently, the file or
pipe represented by the OS handle remained open even after the backup copy
was closed in the parent process until the child exited.
Since our implementation of pipe() creates non-inheritable OS handles, we
still dup() file descriptors in start_command() because dup() happens to
create inheritable duplicates. (A nice side effect is that the fd cleanup
in start_command is the same for Windows and Unix and remains unchanged.)
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-15 21:12:18 +01:00
|
|
|
if (cmd->no_stdin)
|
|
|
|
fhin = open("/dev/null", O_RDWR);
|
|
|
|
else if (need_in)
|
|
|
|
fhin = dup(fdin[0]);
|
|
|
|
else if (cmd->in)
|
|
|
|
fhin = dup(cmd->in);
|
|
|
|
|
|
|
|
if (cmd->no_stderr)
|
|
|
|
fherr = open("/dev/null", O_RDWR);
|
|
|
|
else if (need_err)
|
|
|
|
fherr = dup(fderr[1]);
|
2010-02-06 06:08:53 +01:00
|
|
|
else if (cmd->err > 2)
|
|
|
|
fherr = dup(cmd->err);
|
Windows: avoid the "dup dance" when spawning a child process
When stdin, stdout, or stderr must be redirected for a child process that
on Windows is spawned using one of the spawn() functions of Microsoft's
C runtime, then there is no choice other than to
1. make a backup copy of fd 0,1,2 with dup
2. dup2 the redirection source fd into 0,1,2
3. spawn
4. dup2 the backup back into 0,1,2
5. close the backup copy and the redirection source
We used this idiom as well -- but we are not using the spawn() functions
anymore!
Instead, we have our own implementation. We had hardcoded that stdin,
stdout, and stderr of the child process were inherited from the parent's
fds 0, 1, and 2. But we can actually specify any fd.
With this patch, the fds to inherit are passed from start_command()'s
WIN32 section to our spawn implementation. This way, we can avoid the
backup copies of the fds.
The backup copies were a bug waiting to surface: The OS handles underlying
the dup()ed fds were inherited by the child process (but were not
associated with a file descriptor in the child). Consequently, the file or
pipe represented by the OS handle remained open even after the backup copy
was closed in the parent process until the child exited.
Since our implementation of pipe() creates non-inheritable OS handles, we
still dup() file descriptors in start_command() because dup() happens to
create inheritable duplicates. (A nice side effect is that the fd cleanup
in start_command is the same for Windows and Unix and remains unchanged.)
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-15 21:12:18 +01:00
|
|
|
|
|
|
|
if (cmd->no_stdout)
|
|
|
|
fhout = open("/dev/null", O_RDWR);
|
|
|
|
else if (cmd->stdout_to_stderr)
|
|
|
|
fhout = dup(fherr);
|
|
|
|
else if (need_out)
|
|
|
|
fhout = dup(fdout[1]);
|
|
|
|
else if (cmd->out > 1)
|
|
|
|
fhout = dup(cmd->out);
|
2007-12-07 22:08:59 +01:00
|
|
|
|
2013-10-31 10:25:45 +01:00
|
|
|
if (cmd->git_cmd)
|
2008-07-28 07:50:28 +02:00
|
|
|
cmd->argv = prepare_git_cmd(cmd->argv);
|
2013-10-31 10:25:45 +01:00
|
|
|
else if (cmd->use_shell)
|
2009-12-30 11:53:16 +01:00
|
|
|
cmd->argv = prepare_shell_cmd(cmd->argv);
|
2007-12-07 22:08:59 +01:00
|
|
|
|
2014-07-17 17:38:01 +02:00
|
|
|
cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, (char**) cmd->env,
|
|
|
|
cmd->dir, fhin, fhout, fherr);
|
run_command: report system call errors instead of returning error codes
The motivation for this change is that system call failures are serious
errors that should be reported to the user, but only few callers took the
burden to decode the error codes that the functions returned into error
messages.
If at all, then only an unspecific error message was given. A prominent
example is this:
$ git upload-pack . | :
fatal: unable to run 'git-upload-pack'
In this example, git-upload-pack, the external command invoked through the
git wrapper, dies due to SIGPIPE, but the git wrapper does not bother to
report the real cause. In fact, this very error message is copied to the
syslog if git-daemon's client aborts the connection early.
With this change, system call failures are reported immediately after the
failure and only a generic failure code is returned to the caller. In the
above example the error is now to the point:
$ git upload-pack . | :
error: git-upload-pack died of signal
Note that there is no error report if the invoked program terminated with
a non-zero exit code, because it is reasonable to expect that the invoked
program has already reported an error. (But many run_command call sites
nevertheless write a generic error message.)
There was one special return code that was used to identify the case where
run_command failed because the requested program could not be exec'd. This
special case is now treated like a system call failure with errno set to
ENOENT. No error is reported in this case, because the call site in git.c
expects this as a normal result. Therefore, the callers that carefully
decoded the return value still check for this condition.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-04 21:26:40 +02:00
|
|
|
failed_errno = errno;
|
2009-07-04 21:26:42 +02:00
|
|
|
if (cmd->pid < 0 && (!cmd->silent_exec_failure || errno != ENOENT))
|
run_command: report system call errors instead of returning error codes
The motivation for this change is that system call failures are serious
errors that should be reported to the user, but only few callers took the
burden to decode the error codes that the functions returned into error
messages.
If at all, then only an unspecific error message was given. A prominent
example is this:
$ git upload-pack . | :
fatal: unable to run 'git-upload-pack'
In this example, git-upload-pack, the external command invoked through the
git wrapper, dies due to SIGPIPE, but the git wrapper does not bother to
report the real cause. In fact, this very error message is copied to the
syslog if git-daemon's client aborts the connection early.
With this change, system call failures are reported immediately after the
failure and only a generic failure code is returned to the caller. In the
above example the error is now to the point:
$ git upload-pack . | :
error: git-upload-pack died of signal
Note that there is no error report if the invoked program terminated with
a non-zero exit code, because it is reasonable to expect that the invoked
program has already reported an error. (But many run_command call sites
nevertheless write a generic error message.)
There was one special return code that was used to identify the case where
run_command failed because the requested program could not be exec'd. This
special case is now treated like a system call failure with errno set to
ENOENT. No error is reported in this case, because the call site in git.c
expects this as a normal result. Therefore, the callers that carefully
decoded the return value still check for this condition.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-04 21:26:40 +02:00
|
|
|
error("cannot spawn %s: %s", cmd->argv[0], strerror(errno));
|
run-command: optionally kill children on exit
When we spawn a helper process, it should generally be done
and finish_command called before we exit. However, if we
exit abnormally due to an early return or a signal, the
helper may continue to run in our absence.
In the best case, this may simply be wasted CPU cycles or a
few stray messages on a terminal. But it could also mean a
process that the user thought was aborted continues to run
to completion (e.g., a push's pack-objects helper will
complete the push, even though you killed the push process).
This patch provides infrastructure for run-command to keep
track of PIDs to be killed, and clean them on signal
reception or input, just as we do with tempfiles. PIDs can
be added in two ways:
1. If NO_PTHREADS is defined, async helper processes are
automatically marked. By definition this code must be
ready to die when the parent dies, since it may be
implemented as a thread of the parent process.
2. If the run-command caller specifies the "clean_on_exit"
option. This is not the default, as there are cases
where it is OK for the child to outlive us (e.g., when
spawning a pager).
PIDs are cleared from the kill-list automatically during
wait_or_whine, which is called from finish_command and
finish_async.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Clemens Buchacher <drizzd@aon.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-07 12:42:43 +01:00
|
|
|
if (cmd->clean_on_exit && cmd->pid >= 0)
|
|
|
|
mark_child_for_cleanup(cmd->pid);
|
2007-12-07 22:08:59 +01:00
|
|
|
|
|
|
|
if (cmd->git_cmd)
|
2008-07-28 07:50:28 +02:00
|
|
|
free(cmd->argv);
|
2007-12-07 22:08:59 +01:00
|
|
|
|
2008-07-28 07:50:28 +02:00
|
|
|
cmd->argv = sargv;
|
Windows: avoid the "dup dance" when spawning a child process
When stdin, stdout, or stderr must be redirected for a child process that
on Windows is spawned using one of the spawn() functions of Microsoft's
C runtime, then there is no choice other than to
1. make a backup copy of fd 0,1,2 with dup
2. dup2 the redirection source fd into 0,1,2
3. spawn
4. dup2 the backup back into 0,1,2
5. close the backup copy and the redirection source
We used this idiom as well -- but we are not using the spawn() functions
anymore!
Instead, we have our own implementation. We had hardcoded that stdin,
stdout, and stderr of the child process were inherited from the parent's
fds 0, 1, and 2. But we can actually specify any fd.
With this patch, the fds to inherit are passed from start_command()'s
WIN32 section to our spawn implementation. This way, we can avoid the
backup copies of the fds.
The backup copies were a bug waiting to surface: The OS handles underlying
the dup()ed fds were inherited by the child process (but were not
associated with a file descriptor in the child). Consequently, the file or
pipe represented by the OS handle remained open even after the backup copy
was closed in the parent process until the child exited.
Since our implementation of pipe() creates non-inheritable OS handles, we
still dup() file descriptors in start_command() because dup() happens to
create inheritable duplicates. (A nice side effect is that the fd cleanup
in start_command is the same for Windows and Unix and remains unchanged.)
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-15 21:12:18 +01:00
|
|
|
if (fhin != 0)
|
|
|
|
close(fhin);
|
|
|
|
if (fhout != 1)
|
|
|
|
close(fhout);
|
|
|
|
if (fherr != 2)
|
|
|
|
close(fherr);
|
2009-09-16 10:20:17 +02:00
|
|
|
}
|
2007-12-07 22:08:59 +01:00
|
|
|
#endif
|
|
|
|
|
|
|
|
if (cmd->pid < 0) {
|
|
|
|
if (need_in)
|
|
|
|
close_pair(fdin);
|
|
|
|
else if (cmd->in)
|
|
|
|
close(cmd->in);
|
|
|
|
if (need_out)
|
|
|
|
close_pair(fdout);
|
|
|
|
else if (cmd->out)
|
|
|
|
close(cmd->out);
|
|
|
|
if (need_err)
|
|
|
|
close_pair(fderr);
|
2010-05-20 20:57:52 +02:00
|
|
|
else if (cmd->err)
|
|
|
|
close(cmd->err);
|
2015-10-24 14:11:27 +02:00
|
|
|
child_process_clear(cmd);
|
run_command: report system call errors instead of returning error codes
The motivation for this change is that system call failures are serious
errors that should be reported to the user, but only few callers took the
burden to decode the error codes that the functions returned into error
messages.
If at all, then only an unspecific error message was given. A prominent
example is this:
$ git upload-pack . | :
fatal: unable to run 'git-upload-pack'
In this example, git-upload-pack, the external command invoked through the
git wrapper, dies due to SIGPIPE, but the git wrapper does not bother to
report the real cause. In fact, this very error message is copied to the
syslog if git-daemon's client aborts the connection early.
With this change, system call failures are reported immediately after the
failure and only a generic failure code is returned to the caller. In the
above example the error is now to the point:
$ git upload-pack . | :
error: git-upload-pack died of signal
Note that there is no error report if the invoked program terminated with
a non-zero exit code, because it is reasonable to expect that the invoked
program has already reported an error. (But many run_command call sites
nevertheless write a generic error message.)
There was one special return code that was used to identify the case where
run_command failed because the requested program could not be exec'd. This
special case is now treated like a system call failure with errno set to
ENOENT. No error is reported in this case, because the call site in git.c
expects this as a normal result. Therefore, the callers that carefully
decoded the return value still check for this condition.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-04 21:26:40 +02:00
|
|
|
errno = failed_errno;
|
|
|
|
return -1;
|
2007-12-07 22:08:59 +01:00
|
|
|
}
|
2007-03-10 09:28:08 +01:00
|
|
|
|
|
|
|
if (need_in)
|
|
|
|
close(fdin[0]);
|
|
|
|
else if (cmd->in)
|
|
|
|
close(cmd->in);
|
|
|
|
|
2007-03-12 19:37:45 +01:00
|
|
|
if (need_out)
|
|
|
|
close(fdout[1]);
|
2008-02-21 23:42:56 +01:00
|
|
|
else if (cmd->out)
|
2007-03-12 19:37:45 +01:00
|
|
|
close(cmd->out);
|
|
|
|
|
2007-10-19 21:47:58 +02:00
|
|
|
if (need_err)
|
|
|
|
close(fderr[1]);
|
2010-02-05 21:57:37 +01:00
|
|
|
else if (cmd->err)
|
|
|
|
close(cmd->err);
|
2007-10-19 21:47:58 +02:00
|
|
|
|
2007-03-10 09:28:05 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-10-19 21:48:00 +02:00
|
|
|
int finish_command(struct child_process *cmd)
|
|
|
|
{
|
pager: don't use unsafe functions in signal handlers
Since the commit a3da8821208d (pager: do wait_for_pager on signal
death), we call wait_for_pager() in the pager's signal handler. The
recent bug report revealed that this causes a deadlock in glibc at
aborting "git log" [*1*]. When this happens, git process is left
unterminated, and it can't be killed by SIGTERM but only by SIGKILL.
The problem is that wait_for_pager() function does more than waiting
for pager process's termination, but it does cleanups and printing
errors. Unfortunately, the functions that may be used in a signal
handler are very limited [*2*]. Particularly, malloc(), free() and the
variants can't be used in a signal handler because they take a mutex
internally in glibc. This was the cause of the deadlock above. Other
than the direct calls of malloc/free, many functions calling
malloc/free can't be used. strerror() is such one, either.
Also the usage of fflush() and printf() in a signal handler is bad,
although it seems working so far. In a safer side, we should avoid
them, too.
This patch tries to reduce the calls of such functions in signal
handlers. wait_for_signal() takes a flag and avoids the unsafe
calls. Also, finish_command_in_signal() is introduced for the
same reason. There the free() calls are removed, and only waits for
the children without whining at errors.
[*1*] https://bugzilla.opensuse.org/show_bug.cgi?id=942297
[*2*] http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-04 11:35:57 +02:00
|
|
|
int ret = wait_or_whine(cmd->pid, cmd->argv[0], 0);
|
2015-10-24 14:11:27 +02:00
|
|
|
child_process_clear(cmd);
|
2014-05-15 10:33:26 +02:00
|
|
|
return ret;
|
2007-10-19 21:48:00 +02:00
|
|
|
}
|
|
|
|
|
pager: don't use unsafe functions in signal handlers
Since the commit a3da8821208d (pager: do wait_for_pager on signal
death), we call wait_for_pager() in the pager's signal handler. The
recent bug report revealed that this causes a deadlock in glibc at
aborting "git log" [*1*]. When this happens, git process is left
unterminated, and it can't be killed by SIGTERM but only by SIGKILL.
The problem is that wait_for_pager() function does more than waiting
for pager process's termination, but it does cleanups and printing
errors. Unfortunately, the functions that may be used in a signal
handler are very limited [*2*]. Particularly, malloc(), free() and the
variants can't be used in a signal handler because they take a mutex
internally in glibc. This was the cause of the deadlock above. Other
than the direct calls of malloc/free, many functions calling
malloc/free can't be used. strerror() is such one, either.
Also the usage of fflush() and printf() in a signal handler is bad,
although it seems working so far. In a safer side, we should avoid
them, too.
This patch tries to reduce the calls of such functions in signal
handlers. wait_for_signal() takes a flag and avoids the unsafe
calls. Also, finish_command_in_signal() is introduced for the
same reason. There the free() calls are removed, and only waits for
the children without whining at errors.
[*1*] https://bugzilla.opensuse.org/show_bug.cgi?id=942297
[*2*] http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-04 11:35:57 +02:00
|
|
|
int finish_command_in_signal(struct child_process *cmd)
|
|
|
|
{
|
|
|
|
return wait_or_whine(cmd->pid, cmd->argv[0], 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-03-10 09:28:05 +01:00
|
|
|
int run_command(struct child_process *cmd)
|
|
|
|
{
|
2015-03-23 04:54:05 +01:00
|
|
|
int code;
|
|
|
|
|
|
|
|
if (cmd->out < 0 || cmd->err < 0)
|
|
|
|
die("BUG: run_command with a pipe can cause deadlock");
|
|
|
|
|
|
|
|
code = start_command(cmd);
|
2007-03-10 09:28:05 +01:00
|
|
|
if (code)
|
|
|
|
return code;
|
|
|
|
return finish_command(cmd);
|
|
|
|
}
|
|
|
|
|
2007-03-10 09:28:00 +01:00
|
|
|
int run_command_v_opt(const char **argv, int opt)
|
|
|
|
{
|
2014-08-19 21:11:00 +02:00
|
|
|
return run_command_v_opt_cd_env(argv, opt, NULL, NULL);
|
2007-05-22 23:48:23 +02:00
|
|
|
}
|
|
|
|
|
2007-05-22 23:48:47 +02:00
|
|
|
int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env)
|
|
|
|
{
|
2014-08-19 21:11:43 +02:00
|
|
|
struct child_process cmd = CHILD_PROCESS_INIT;
|
|
|
|
cmd.argv = argv;
|
|
|
|
cmd.no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0;
|
|
|
|
cmd.git_cmd = opt & RUN_GIT_CMD ? 1 : 0;
|
|
|
|
cmd.stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0;
|
|
|
|
cmd.silent_exec_failure = opt & RUN_SILENT_EXEC_FAILURE ? 1 : 0;
|
|
|
|
cmd.use_shell = opt & RUN_USING_SHELL ? 1 : 0;
|
|
|
|
cmd.clean_on_exit = opt & RUN_CLEAN_ON_EXIT ? 1 : 0;
|
2007-05-22 23:48:47 +02:00
|
|
|
cmd.dir = dir;
|
|
|
|
cmd.env = env;
|
|
|
|
return run_command(&cmd);
|
|
|
|
}
|
2007-10-19 21:48:00 +02:00
|
|
|
|
2010-03-09 21:00:36 +01:00
|
|
|
#ifndef NO_PTHREADS
|
Dying in an async procedure should only exit the thread, not the process.
Async procedures are intended as helpers that perform a very restricted
task, and the caller usually has to manage them in a larger context.
Conceptually, the async procedure is not concerned with the "bigger
picture" in whose context it is run. When it dies, it is not supposed
to destroy this "bigger picture", but rather only its own limit view
of the world. On POSIX, the async procedure is run in its own process,
and exiting this process naturally had only these limited effects.
On Windows (or when ASYNC_AS_THREAD is set), calling die() exited the
whole process, destroying the caller (the "big picture") as well.
This fixes it to exit only the thread.
Without ASYNC_AS_THREAD, one particular effect of exiting the async
procedure process is that it automatically closes file descriptors, most
notably the writable end of the pipe that the async procedure writes to.
The async API already requires that the async procedure closes the pipe
ends when it exits normally. But for calls to die() no requirements are
imposed. In the non-threaded case the pipe ends are closed implicitly
by the exiting process, but in the threaded case, the die routine must
take care of closing them.
Now t5530-upload-pack-error.sh passes on Windows.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-03-06 16:40:43 +01:00
|
|
|
static pthread_t main_thread;
|
|
|
|
static int main_thread_set;
|
|
|
|
static pthread_key_t async_key;
|
run-command: use thread-aware die_is_recursing routine
If we die from an async thread, we do not actually exit the
program, but just kill the thread. This confuses the static
counter in usage.c's default die_is_recursing function; it
updates the counter once for the thread death, and then when
the main program calls die() itself, it erroneously thinks
we are recursing. The end result is that we print "recursion
detected in die handler" instead of the real error in such a
case (the easiest way to trigger this is having a remote
connection hang up while running a sideband demultiplexer).
This patch solves it by using a per-thread counter when the
async_die function is installed; we detect recursion in each
thread (including the main one), but they do not step on
each other's toes.
Other threaded code does not need to worry about this, as
they do not install specialized die handlers; they just let
a die() from a sub-thread take down the whole program.
Since we are overriding the default recursion-check
function, there is an interesting corner case that is not a
problem, but bears some explanation. Imagine the main thread
calls die(), and then in the die_routine starts an async
call. We will switch to using thread-local storage, which
starts at 0, for the main thread's counter, even though
the original counter was actually at 1. That's OK, though,
for two reasons:
1. It would miss only the first level of recursion, and
would still find recursive failures inside the async
helper.
2. We do not currently and are not likely to start doing
anything as heavyweight as starting an async routine
from within a die routine or helper function.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-16 21:50:07 +02:00
|
|
|
static pthread_key_t async_die_counter;
|
Dying in an async procedure should only exit the thread, not the process.
Async procedures are intended as helpers that perform a very restricted
task, and the caller usually has to manage them in a larger context.
Conceptually, the async procedure is not concerned with the "bigger
picture" in whose context it is run. When it dies, it is not supposed
to destroy this "bigger picture", but rather only its own limit view
of the world. On POSIX, the async procedure is run in its own process,
and exiting this process naturally had only these limited effects.
On Windows (or when ASYNC_AS_THREAD is set), calling die() exited the
whole process, destroying the caller (the "big picture") as well.
This fixes it to exit only the thread.
Without ASYNC_AS_THREAD, one particular effect of exiting the async
procedure process is that it automatically closes file descriptors, most
notably the writable end of the pipe that the async procedure writes to.
The async API already requires that the async procedure closes the pipe
ends when it exits normally. But for calls to die() no requirements are
imposed. In the non-threaded case the pipe ends are closed implicitly
by the exiting process, but in the threaded case, the die routine must
take care of closing them.
Now t5530-upload-pack-error.sh passes on Windows.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-03-06 16:40:43 +01:00
|
|
|
|
2010-03-06 16:40:42 +01:00
|
|
|
static void *run_thread(void *data)
|
2007-12-08 22:19:14 +01:00
|
|
|
{
|
|
|
|
struct async *async = data;
|
2010-03-09 21:00:36 +01:00
|
|
|
intptr_t ret;
|
Dying in an async procedure should only exit the thread, not the process.
Async procedures are intended as helpers that perform a very restricted
task, and the caller usually has to manage them in a larger context.
Conceptually, the async procedure is not concerned with the "bigger
picture" in whose context it is run. When it dies, it is not supposed
to destroy this "bigger picture", but rather only its own limit view
of the world. On POSIX, the async procedure is run in its own process,
and exiting this process naturally had only these limited effects.
On Windows (or when ASYNC_AS_THREAD is set), calling die() exited the
whole process, destroying the caller (the "big picture") as well.
This fixes it to exit only the thread.
Without ASYNC_AS_THREAD, one particular effect of exiting the async
procedure process is that it automatically closes file descriptors, most
notably the writable end of the pipe that the async procedure writes to.
The async API already requires that the async procedure closes the pipe
ends when it exits normally. But for calls to die() no requirements are
imposed. In the non-threaded case the pipe ends are closed implicitly
by the exiting process, but in the threaded case, the die routine must
take care of closing them.
Now t5530-upload-pack-error.sh passes on Windows.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-03-06 16:40:43 +01:00
|
|
|
|
|
|
|
pthread_setspecific(async_key, async);
|
2010-03-09 21:00:36 +01:00
|
|
|
ret = async->proc(async->proc_in, async->proc_out, async->data);
|
2010-03-06 16:40:42 +01:00
|
|
|
return (void *)ret;
|
2007-12-08 22:19:14 +01:00
|
|
|
}
|
Dying in an async procedure should only exit the thread, not the process.
Async procedures are intended as helpers that perform a very restricted
task, and the caller usually has to manage them in a larger context.
Conceptually, the async procedure is not concerned with the "bigger
picture" in whose context it is run. When it dies, it is not supposed
to destroy this "bigger picture", but rather only its own limit view
of the world. On POSIX, the async procedure is run in its own process,
and exiting this process naturally had only these limited effects.
On Windows (or when ASYNC_AS_THREAD is set), calling die() exited the
whole process, destroying the caller (the "big picture") as well.
This fixes it to exit only the thread.
Without ASYNC_AS_THREAD, one particular effect of exiting the async
procedure process is that it automatically closes file descriptors, most
notably the writable end of the pipe that the async procedure writes to.
The async API already requires that the async procedure closes the pipe
ends when it exits normally. But for calls to die() no requirements are
imposed. In the non-threaded case the pipe ends are closed implicitly
by the exiting process, but in the threaded case, the die routine must
take care of closing them.
Now t5530-upload-pack-error.sh passes on Windows.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-03-06 16:40:43 +01:00
|
|
|
|
|
|
|
static NORETURN void die_async(const char *err, va_list params)
|
|
|
|
{
|
|
|
|
vreportf("fatal: ", err, params);
|
|
|
|
|
2015-09-01 22:22:43 +02:00
|
|
|
if (in_async()) {
|
Dying in an async procedure should only exit the thread, not the process.
Async procedures are intended as helpers that perform a very restricted
task, and the caller usually has to manage them in a larger context.
Conceptually, the async procedure is not concerned with the "bigger
picture" in whose context it is run. When it dies, it is not supposed
to destroy this "bigger picture", but rather only its own limit view
of the world. On POSIX, the async procedure is run in its own process,
and exiting this process naturally had only these limited effects.
On Windows (or when ASYNC_AS_THREAD is set), calling die() exited the
whole process, destroying the caller (the "big picture") as well.
This fixes it to exit only the thread.
Without ASYNC_AS_THREAD, one particular effect of exiting the async
procedure process is that it automatically closes file descriptors, most
notably the writable end of the pipe that the async procedure writes to.
The async API already requires that the async procedure closes the pipe
ends when it exits normally. But for calls to die() no requirements are
imposed. In the non-threaded case the pipe ends are closed implicitly
by the exiting process, but in the threaded case, the die routine must
take care of closing them.
Now t5530-upload-pack-error.sh passes on Windows.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-03-06 16:40:43 +01:00
|
|
|
struct async *async = pthread_getspecific(async_key);
|
|
|
|
if (async->proc_in >= 0)
|
|
|
|
close(async->proc_in);
|
|
|
|
if (async->proc_out >= 0)
|
|
|
|
close(async->proc_out);
|
|
|
|
pthread_exit((void *)128);
|
|
|
|
}
|
|
|
|
|
|
|
|
exit(128);
|
2007-12-08 22:19:14 +01:00
|
|
|
}
|
run-command: use thread-aware die_is_recursing routine
If we die from an async thread, we do not actually exit the
program, but just kill the thread. This confuses the static
counter in usage.c's default die_is_recursing function; it
updates the counter once for the thread death, and then when
the main program calls die() itself, it erroneously thinks
we are recursing. The end result is that we print "recursion
detected in die handler" instead of the real error in such a
case (the easiest way to trigger this is having a remote
connection hang up while running a sideband demultiplexer).
This patch solves it by using a per-thread counter when the
async_die function is installed; we detect recursion in each
thread (including the main one), but they do not step on
each other's toes.
Other threaded code does not need to worry about this, as
they do not install specialized die handlers; they just let
a die() from a sub-thread take down the whole program.
Since we are overriding the default recursion-check
function, there is an interesting corner case that is not a
problem, but bears some explanation. Imagine the main thread
calls die(), and then in the die_routine starts an async
call. We will switch to using thread-local storage, which
starts at 0, for the main thread's counter, even though
the original counter was actually at 1. That's OK, though,
for two reasons:
1. It would miss only the first level of recursion, and
would still find recursive failures inside the async
helper.
2. We do not currently and are not likely to start doing
anything as heavyweight as starting an async routine
from within a die routine or helper function.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-16 21:50:07 +02:00
|
|
|
|
|
|
|
static int async_die_is_recursing(void)
|
|
|
|
{
|
|
|
|
void *ret = pthread_getspecific(async_die_counter);
|
|
|
|
pthread_setspecific(async_die_counter, (void *)1);
|
|
|
|
return ret != NULL;
|
|
|
|
}
|
|
|
|
|
2015-09-01 22:22:43 +02:00
|
|
|
int in_async(void)
|
|
|
|
{
|
|
|
|
if (!main_thread_set)
|
|
|
|
return 0; /* no asyncs started yet */
|
|
|
|
return !pthread_equal(main_thread, pthread_self());
|
|
|
|
}
|
|
|
|
|
2014-10-18 14:31:15 +02:00
|
|
|
#else
|
|
|
|
|
|
|
|
static struct {
|
|
|
|
void (**handlers)(void);
|
|
|
|
size_t nr;
|
|
|
|
size_t alloc;
|
|
|
|
} git_atexit_hdlrs;
|
|
|
|
|
|
|
|
static int git_atexit_installed;
|
|
|
|
|
2014-11-10 22:17:00 +01:00
|
|
|
static void git_atexit_dispatch(void)
|
2014-10-18 14:31:15 +02:00
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
for (i=git_atexit_hdlrs.nr ; i ; i--)
|
|
|
|
git_atexit_hdlrs.handlers[i-1]();
|
|
|
|
}
|
|
|
|
|
2014-11-10 22:17:00 +01:00
|
|
|
static void git_atexit_clear(void)
|
2014-10-18 14:31:15 +02:00
|
|
|
{
|
|
|
|
free(git_atexit_hdlrs.handlers);
|
|
|
|
memset(&git_atexit_hdlrs, 0, sizeof(git_atexit_hdlrs));
|
|
|
|
git_atexit_installed = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#undef atexit
|
|
|
|
int git_atexit(void (*handler)(void))
|
|
|
|
{
|
|
|
|
ALLOC_GROW(git_atexit_hdlrs.handlers, git_atexit_hdlrs.nr + 1, git_atexit_hdlrs.alloc);
|
|
|
|
git_atexit_hdlrs.handlers[git_atexit_hdlrs.nr++] = handler;
|
|
|
|
if (!git_atexit_installed) {
|
|
|
|
if (atexit(&git_atexit_dispatch))
|
|
|
|
return -1;
|
|
|
|
git_atexit_installed = 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#define atexit git_atexit
|
|
|
|
|
2015-09-01 22:22:43 +02:00
|
|
|
static int process_is_async;
|
|
|
|
int in_async(void)
|
|
|
|
{
|
|
|
|
return process_is_async;
|
|
|
|
}
|
|
|
|
|
2007-12-08 22:19:14 +01:00
|
|
|
#endif
|
|
|
|
|
2007-10-19 21:48:00 +02:00
|
|
|
int start_async(struct async *async)
|
|
|
|
{
|
2010-02-05 21:57:38 +01:00
|
|
|
int need_in, need_out;
|
|
|
|
int fdin[2], fdout[2];
|
|
|
|
int proc_in, proc_out;
|
2007-10-19 21:48:00 +02:00
|
|
|
|
2010-02-05 21:57:38 +01:00
|
|
|
need_in = async->in < 0;
|
|
|
|
if (need_in) {
|
|
|
|
if (pipe(fdin) < 0) {
|
|
|
|
if (async->out > 0)
|
|
|
|
close(async->out);
|
|
|
|
return error("cannot create pipe: %s", strerror(errno));
|
|
|
|
}
|
|
|
|
async->in = fdin[1];
|
|
|
|
}
|
|
|
|
|
|
|
|
need_out = async->out < 0;
|
|
|
|
if (need_out) {
|
|
|
|
if (pipe(fdout) < 0) {
|
|
|
|
if (need_in)
|
|
|
|
close_pair(fdin);
|
|
|
|
else if (async->in)
|
|
|
|
close(async->in);
|
|
|
|
return error("cannot create pipe: %s", strerror(errno));
|
|
|
|
}
|
|
|
|
async->out = fdout[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
if (need_in)
|
|
|
|
proc_in = fdin[0];
|
|
|
|
else if (async->in)
|
|
|
|
proc_in = async->in;
|
|
|
|
else
|
|
|
|
proc_in = -1;
|
|
|
|
|
|
|
|
if (need_out)
|
|
|
|
proc_out = fdout[1];
|
|
|
|
else if (async->out)
|
|
|
|
proc_out = async->out;
|
|
|
|
else
|
|
|
|
proc_out = -1;
|
2007-10-19 21:48:00 +02:00
|
|
|
|
2010-03-09 21:00:36 +01:00
|
|
|
#ifdef NO_PTHREADS
|
2008-08-04 02:30:03 +02:00
|
|
|
/* Flush stdio before fork() to avoid cloning buffers */
|
|
|
|
fflush(NULL);
|
|
|
|
|
2007-10-19 21:48:00 +02:00
|
|
|
async->pid = fork();
|
|
|
|
if (async->pid < 0) {
|
|
|
|
error("fork (async) failed: %s", strerror(errno));
|
2010-02-05 21:57:38 +01:00
|
|
|
goto error;
|
2007-10-19 21:48:00 +02:00
|
|
|
}
|
|
|
|
if (!async->pid) {
|
2010-02-05 21:57:38 +01:00
|
|
|
if (need_in)
|
|
|
|
close(fdin[1]);
|
|
|
|
if (need_out)
|
|
|
|
close(fdout[0]);
|
2014-10-18 14:31:15 +02:00
|
|
|
git_atexit_clear();
|
2015-09-01 22:22:43 +02:00
|
|
|
process_is_async = 1;
|
2010-02-05 21:57:38 +01:00
|
|
|
exit(!!async->proc(proc_in, proc_out, async->data));
|
2007-10-19 21:48:00 +02:00
|
|
|
}
|
2010-02-05 21:57:38 +01:00
|
|
|
|
run-command: optionally kill children on exit
When we spawn a helper process, it should generally be done
and finish_command called before we exit. However, if we
exit abnormally due to an early return or a signal, the
helper may continue to run in our absence.
In the best case, this may simply be wasted CPU cycles or a
few stray messages on a terminal. But it could also mean a
process that the user thought was aborted continues to run
to completion (e.g., a push's pack-objects helper will
complete the push, even though you killed the push process).
This patch provides infrastructure for run-command to keep
track of PIDs to be killed, and clean them on signal
reception or input, just as we do with tempfiles. PIDs can
be added in two ways:
1. If NO_PTHREADS is defined, async helper processes are
automatically marked. By definition this code must be
ready to die when the parent dies, since it may be
implemented as a thread of the parent process.
2. If the run-command caller specifies the "clean_on_exit"
option. This is not the default, as there are cases
where it is OK for the child to outlive us (e.g., when
spawning a pager).
PIDs are cleared from the kill-list automatically during
wait_or_whine, which is called from finish_command and
finish_async.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Clemens Buchacher <drizzd@aon.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-07 12:42:43 +01:00
|
|
|
mark_child_for_cleanup(async->pid);
|
|
|
|
|
2010-02-05 21:57:38 +01:00
|
|
|
if (need_in)
|
|
|
|
close(fdin[0]);
|
|
|
|
else if (async->in)
|
|
|
|
close(async->in);
|
|
|
|
|
|
|
|
if (need_out)
|
|
|
|
close(fdout[1]);
|
|
|
|
else if (async->out)
|
|
|
|
close(async->out);
|
2007-12-08 22:19:14 +01:00
|
|
|
#else
|
Dying in an async procedure should only exit the thread, not the process.
Async procedures are intended as helpers that perform a very restricted
task, and the caller usually has to manage them in a larger context.
Conceptually, the async procedure is not concerned with the "bigger
picture" in whose context it is run. When it dies, it is not supposed
to destroy this "bigger picture", but rather only its own limit view
of the world. On POSIX, the async procedure is run in its own process,
and exiting this process naturally had only these limited effects.
On Windows (or when ASYNC_AS_THREAD is set), calling die() exited the
whole process, destroying the caller (the "big picture") as well.
This fixes it to exit only the thread.
Without ASYNC_AS_THREAD, one particular effect of exiting the async
procedure process is that it automatically closes file descriptors, most
notably the writable end of the pipe that the async procedure writes to.
The async API already requires that the async procedure closes the pipe
ends when it exits normally. But for calls to die() no requirements are
imposed. In the non-threaded case the pipe ends are closed implicitly
by the exiting process, but in the threaded case, the die routine must
take care of closing them.
Now t5530-upload-pack-error.sh passes on Windows.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-03-06 16:40:43 +01:00
|
|
|
if (!main_thread_set) {
|
|
|
|
/*
|
|
|
|
* We assume that the first time that start_async is called
|
|
|
|
* it is from the main thread.
|
|
|
|
*/
|
|
|
|
main_thread_set = 1;
|
|
|
|
main_thread = pthread_self();
|
|
|
|
pthread_key_create(&async_key, NULL);
|
run-command: use thread-aware die_is_recursing routine
If we die from an async thread, we do not actually exit the
program, but just kill the thread. This confuses the static
counter in usage.c's default die_is_recursing function; it
updates the counter once for the thread death, and then when
the main program calls die() itself, it erroneously thinks
we are recursing. The end result is that we print "recursion
detected in die handler" instead of the real error in such a
case (the easiest way to trigger this is having a remote
connection hang up while running a sideband demultiplexer).
This patch solves it by using a per-thread counter when the
async_die function is installed; we detect recursion in each
thread (including the main one), but they do not step on
each other's toes.
Other threaded code does not need to worry about this, as
they do not install specialized die handlers; they just let
a die() from a sub-thread take down the whole program.
Since we are overriding the default recursion-check
function, there is an interesting corner case that is not a
problem, but bears some explanation. Imagine the main thread
calls die(), and then in the die_routine starts an async
call. We will switch to using thread-local storage, which
starts at 0, for the main thread's counter, even though
the original counter was actually at 1. That's OK, though,
for two reasons:
1. It would miss only the first level of recursion, and
would still find recursive failures inside the async
helper.
2. We do not currently and are not likely to start doing
anything as heavyweight as starting an async routine
from within a die routine or helper function.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-16 21:50:07 +02:00
|
|
|
pthread_key_create(&async_die_counter, NULL);
|
Dying in an async procedure should only exit the thread, not the process.
Async procedures are intended as helpers that perform a very restricted
task, and the caller usually has to manage them in a larger context.
Conceptually, the async procedure is not concerned with the "bigger
picture" in whose context it is run. When it dies, it is not supposed
to destroy this "bigger picture", but rather only its own limit view
of the world. On POSIX, the async procedure is run in its own process,
and exiting this process naturally had only these limited effects.
On Windows (or when ASYNC_AS_THREAD is set), calling die() exited the
whole process, destroying the caller (the "big picture") as well.
This fixes it to exit only the thread.
Without ASYNC_AS_THREAD, one particular effect of exiting the async
procedure process is that it automatically closes file descriptors, most
notably the writable end of the pipe that the async procedure writes to.
The async API already requires that the async procedure closes the pipe
ends when it exits normally. But for calls to die() no requirements are
imposed. In the non-threaded case the pipe ends are closed implicitly
by the exiting process, but in the threaded case, the die routine must
take care of closing them.
Now t5530-upload-pack-error.sh passes on Windows.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-03-06 16:40:43 +01:00
|
|
|
set_die_routine(die_async);
|
run-command: use thread-aware die_is_recursing routine
If we die from an async thread, we do not actually exit the
program, but just kill the thread. This confuses the static
counter in usage.c's default die_is_recursing function; it
updates the counter once for the thread death, and then when
the main program calls die() itself, it erroneously thinks
we are recursing. The end result is that we print "recursion
detected in die handler" instead of the real error in such a
case (the easiest way to trigger this is having a remote
connection hang up while running a sideband demultiplexer).
This patch solves it by using a per-thread counter when the
async_die function is installed; we detect recursion in each
thread (including the main one), but they do not step on
each other's toes.
Other threaded code does not need to worry about this, as
they do not install specialized die handlers; they just let
a die() from a sub-thread take down the whole program.
Since we are overriding the default recursion-check
function, there is an interesting corner case that is not a
problem, but bears some explanation. Imagine the main thread
calls die(), and then in the die_routine starts an async
call. We will switch to using thread-local storage, which
starts at 0, for the main thread's counter, even though
the original counter was actually at 1. That's OK, though,
for two reasons:
1. It would miss only the first level of recursion, and
would still find recursive failures inside the async
helper.
2. We do not currently and are not likely to start doing
anything as heavyweight as starting an async routine
from within a die routine or helper function.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-16 21:50:07 +02:00
|
|
|
set_die_is_recursing_routine(async_die_is_recursing);
|
Dying in an async procedure should only exit the thread, not the process.
Async procedures are intended as helpers that perform a very restricted
task, and the caller usually has to manage them in a larger context.
Conceptually, the async procedure is not concerned with the "bigger
picture" in whose context it is run. When it dies, it is not supposed
to destroy this "bigger picture", but rather only its own limit view
of the world. On POSIX, the async procedure is run in its own process,
and exiting this process naturally had only these limited effects.
On Windows (or when ASYNC_AS_THREAD is set), calling die() exited the
whole process, destroying the caller (the "big picture") as well.
This fixes it to exit only the thread.
Without ASYNC_AS_THREAD, one particular effect of exiting the async
procedure process is that it automatically closes file descriptors, most
notably the writable end of the pipe that the async procedure writes to.
The async API already requires that the async procedure closes the pipe
ends when it exits normally. But for calls to die() no requirements are
imposed. In the non-threaded case the pipe ends are closed implicitly
by the exiting process, but in the threaded case, the die routine must
take care of closing them.
Now t5530-upload-pack-error.sh passes on Windows.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-03-06 16:40:43 +01:00
|
|
|
}
|
|
|
|
|
2010-03-06 16:40:42 +01:00
|
|
|
if (proc_in >= 0)
|
|
|
|
set_cloexec(proc_in);
|
|
|
|
if (proc_out >= 0)
|
|
|
|
set_cloexec(proc_out);
|
2010-02-05 21:57:38 +01:00
|
|
|
async->proc_in = proc_in;
|
|
|
|
async->proc_out = proc_out;
|
2010-03-06 16:40:42 +01:00
|
|
|
{
|
|
|
|
int err = pthread_create(&async->tid, NULL, run_thread, async);
|
|
|
|
if (err) {
|
|
|
|
error("cannot create thread: %s", strerror(err));
|
|
|
|
goto error;
|
|
|
|
}
|
2007-12-08 22:19:14 +01:00
|
|
|
}
|
|
|
|
#endif
|
2007-10-19 21:48:00 +02:00
|
|
|
return 0;
|
2010-02-05 21:57:38 +01:00
|
|
|
|
|
|
|
error:
|
|
|
|
if (need_in)
|
|
|
|
close_pair(fdin);
|
|
|
|
else if (async->in)
|
|
|
|
close(async->in);
|
|
|
|
|
|
|
|
if (need_out)
|
|
|
|
close_pair(fdout);
|
|
|
|
else if (async->out)
|
|
|
|
close(async->out);
|
|
|
|
return -1;
|
2007-10-19 21:48:00 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
int finish_async(struct async *async)
|
|
|
|
{
|
2010-03-09 21:00:36 +01:00
|
|
|
#ifdef NO_PTHREADS
|
pager: don't use unsafe functions in signal handlers
Since the commit a3da8821208d (pager: do wait_for_pager on signal
death), we call wait_for_pager() in the pager's signal handler. The
recent bug report revealed that this causes a deadlock in glibc at
aborting "git log" [*1*]. When this happens, git process is left
unterminated, and it can't be killed by SIGTERM but only by SIGKILL.
The problem is that wait_for_pager() function does more than waiting
for pager process's termination, but it does cleanups and printing
errors. Unfortunately, the functions that may be used in a signal
handler are very limited [*2*]. Particularly, malloc(), free() and the
variants can't be used in a signal handler because they take a mutex
internally in glibc. This was the cause of the deadlock above. Other
than the direct calls of malloc/free, many functions calling
malloc/free can't be used. strerror() is such one, either.
Also the usage of fflush() and printf() in a signal handler is bad,
although it seems working so far. In a safer side, we should avoid
them, too.
This patch tries to reduce the calls of such functions in signal
handlers. wait_for_signal() takes a flag and avoids the unsafe
calls. Also, finish_command_in_signal() is introduced for the
same reason. There the free() calls are removed, and only waits for
the children without whining at errors.
[*1*] https://bugzilla.opensuse.org/show_bug.cgi?id=942297
[*2*] http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-04 11:35:57 +02:00
|
|
|
return wait_or_whine(async->pid, "child process", 0);
|
2007-12-08 22:19:14 +01:00
|
|
|
#else
|
2010-03-06 16:40:42 +01:00
|
|
|
void *ret = (void *)(intptr_t)(-1);
|
|
|
|
|
|
|
|
if (pthread_join(async->tid, &ret))
|
|
|
|
error("pthread_join failed");
|
|
|
|
return (int)(intptr_t)ret;
|
2007-12-08 22:19:14 +01:00
|
|
|
#endif
|
2007-10-19 21:48:00 +02:00
|
|
|
}
|
2009-01-16 20:09:59 +01:00
|
|
|
|
2014-11-30 09:24:27 +01:00
|
|
|
const char *find_hook(const char *name)
|
2013-01-13 06:17:02 +01:00
|
|
|
{
|
2015-08-10 11:37:45 +02:00
|
|
|
static struct strbuf path = STRBUF_INIT;
|
2013-01-13 06:17:02 +01:00
|
|
|
|
2015-08-10 11:37:45 +02:00
|
|
|
strbuf_reset(&path);
|
|
|
|
strbuf_git_path(&path, "hooks/%s", name);
|
|
|
|
if (access(path.buf, X_OK) < 0)
|
|
|
|
return NULL;
|
|
|
|
return path.buf;
|
2013-01-13 06:17:02 +01:00
|
|
|
}
|
|
|
|
|
2014-03-18 11:00:53 +01:00
|
|
|
int run_hook_ve(const char *const *env, const char *name, va_list args)
|
2009-01-16 20:09:59 +01:00
|
|
|
{
|
2014-08-19 21:09:35 +02:00
|
|
|
struct child_process hook = CHILD_PROCESS_INIT;
|
2014-03-18 11:00:53 +01:00
|
|
|
const char *p;
|
2009-01-16 20:09:59 +01:00
|
|
|
|
2013-01-13 06:17:02 +01:00
|
|
|
p = find_hook(name);
|
|
|
|
if (!p)
|
2009-01-16 20:10:01 +01:00
|
|
|
return 0;
|
|
|
|
|
2014-07-16 23:57:47 +02:00
|
|
|
argv_array_push(&hook.args, p);
|
|
|
|
while ((p = va_arg(args, const char *)))
|
|
|
|
argv_array_push(&hook.args, p);
|
2014-03-18 11:00:53 +01:00
|
|
|
hook.env = env;
|
2009-01-16 20:09:59 +01:00
|
|
|
hook.no_stdin = 1;
|
|
|
|
hook.stdout_to_stderr = 1;
|
|
|
|
|
2014-07-16 23:57:47 +02:00
|
|
|
return run_command(&hook);
|
2009-01-16 20:09:59 +01:00
|
|
|
}
|
2014-03-18 11:00:53 +01:00
|
|
|
|
|
|
|
int run_hook_le(const char *const *env, const char *name, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
va_start(args, name);
|
|
|
|
ret = run_hook_ve(env, name, args);
|
|
|
|
va_end(args);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
run-command: introduce capture_command helper
Something as simple as reading the stdout from a command
turns out to be rather hard to do right. Doing:
cmd.out = -1;
run_command(&cmd);
strbuf_read(&buf, cmd.out, 0);
can result in deadlock if the child process produces a large
amount of output. What happens is:
1. The parent spawns the child with its stdout connected
to a pipe, of which the parent is the sole reader.
2. The parent calls wait(), blocking until the child exits.
3. The child writes to stdout. If it writes more data than
the OS pipe buffer can hold, the write() call will
block.
This is a deadlock; the parent is waiting for the child to
exit, and the child is waiting for the parent to call
read().
So we might try instead:
start_command(&cmd);
strbuf_read(&buf, cmd.out, 0);
finish_command(&cmd);
But that is not quite right either. We are examining cmd.out
and running finish_command whether start_command succeeded
or not, which is wrong. Moreover, these snippets do not do
any error handling. If our read() fails, we must make sure
to still call finish_command (to reap the child process).
And both snippets failed to close the cmd.out descriptor,
which they must do (provided start_command succeeded).
Let's introduce a run-command helper that can make this a
bit simpler for callers to get right.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-23 04:53:43 +01:00
|
|
|
|
|
|
|
int capture_command(struct child_process *cmd, struct strbuf *buf, size_t hint)
|
|
|
|
{
|
|
|
|
cmd->out = -1;
|
|
|
|
if (start_command(cmd) < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (strbuf_read(buf, cmd->out, hint) < 0) {
|
|
|
|
close(cmd->out);
|
|
|
|
finish_command(cmd); /* throw away exit code */
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
close(cmd->out);
|
|
|
|
return finish_command(cmd);
|
|
|
|
}
|
run-command: add an asynchronous parallel child processor
This allows to run external commands in parallel with ordered output
on stderr.
If we run external commands in parallel we cannot pipe the output directly
to the our stdout/err as it would mix up. So each process's output will
flow through a pipe, which we buffer. One subprocess can be directly
piped to out stdout/err for a low latency feedback to the user.
Example:
Let's assume we have 5 submodules A,B,C,D,E and each fetch takes a
different amount of time as the different submodules vary in size, then
the output of fetches in sequential order might look like this:
time -->
output: |---A---| |-B-| |-------C-------| |-D-| |-E-|
When we schedule these submodules into maximal two parallel processes,
a schedule and sample output over time may look like this:
process 1: |---A---| |-D-| |-E-|
process 2: |-B-| |-------C-------|
output: |---A---|B|---C-------|DE
So A will be perceived as it would run normally in the single child
version. As B has finished by the time A is done, we can dump its whole
progress buffer on stderr, such that it looks like it finished in no
time. Once that is done, C is determined to be the visible child and
its progress will be reported in real time.
So this way of output is really good for human consumption, as it only
changes the timing, not the actual output.
For machine consumption the output needs to be prepared in the tasks,
by either having a prefix per line or per block to indicate whose tasks
output is displayed, because the output order may not follow the
original sequential ordering:
|----A----| |--B--| |-C-|
will be scheduled to be all parallel:
process 1: |----A----|
process 2: |--B--|
process 3: |-C-|
output: |----A----|CB
This happens because C finished before B did, so it will be queued for
output before B.
To detect when a child has finished executing, we check interleaved
with other actions (such as checking the liveliness of children or
starting new processes) whether the stderr pipe still exists. Once a
child closed its stderr stream, we assume it is terminating very soon,
and use `finish_command()` from the single external process execution
interface to collect the exit status.
By maintaining the strong assumption of stderr being open until the
very end of a child process, we can avoid other hassle such as an
implementation using `waitpid(-1)`, which is not implemented in Windows.
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-16 01:04:10 +01:00
|
|
|
|
|
|
|
enum child_state {
|
|
|
|
GIT_CP_FREE,
|
|
|
|
GIT_CP_WORKING,
|
|
|
|
GIT_CP_WAIT_CLEANUP,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct parallel_processes {
|
|
|
|
void *data;
|
|
|
|
|
|
|
|
int max_processes;
|
|
|
|
int nr_processes;
|
|
|
|
|
|
|
|
get_next_task_fn get_next_task;
|
|
|
|
start_failure_fn start_failure;
|
|
|
|
task_finished_fn task_finished;
|
|
|
|
|
|
|
|
struct {
|
|
|
|
enum child_state state;
|
|
|
|
struct child_process process;
|
|
|
|
struct strbuf err;
|
|
|
|
void *data;
|
|
|
|
} *children;
|
|
|
|
/*
|
|
|
|
* The struct pollfd is logically part of *children,
|
|
|
|
* but the system call expects it as its own array.
|
|
|
|
*/
|
|
|
|
struct pollfd *pfd;
|
|
|
|
|
|
|
|
unsigned shutdown : 1;
|
|
|
|
|
|
|
|
int output_owner;
|
|
|
|
struct strbuf buffered_output; /* of finished children */
|
|
|
|
};
|
|
|
|
|
|
|
|
static int default_start_failure(struct child_process *cp,
|
|
|
|
struct strbuf *err,
|
|
|
|
void *pp_cb,
|
|
|
|
void *pp_task_cb)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
strbuf_addstr(err, "Starting a child failed:");
|
|
|
|
for (i = 0; cp->argv[i]; i++)
|
|
|
|
strbuf_addf(err, " %s", cp->argv[i]);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int default_task_finished(int result,
|
|
|
|
struct child_process *cp,
|
|
|
|
struct strbuf *err,
|
|
|
|
void *pp_cb,
|
|
|
|
void *pp_task_cb)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!result)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
strbuf_addf(err, "A child failed with return code %d:", result);
|
|
|
|
for (i = 0; cp->argv[i]; i++)
|
|
|
|
strbuf_addf(err, " %s", cp->argv[i]);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void kill_children(struct parallel_processes *pp, int signo)
|
|
|
|
{
|
|
|
|
int i, n = pp->max_processes;
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++)
|
|
|
|
if (pp->children[i].state == GIT_CP_WORKING)
|
|
|
|
kill(pp->children[i].process.pid, signo);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct parallel_processes *pp_for_signal;
|
|
|
|
|
|
|
|
static void handle_children_on_signal(int signo)
|
|
|
|
{
|
|
|
|
kill_children(pp_for_signal, signo);
|
|
|
|
sigchain_pop(signo);
|
|
|
|
raise(signo);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void pp_init(struct parallel_processes *pp,
|
|
|
|
int n,
|
|
|
|
get_next_task_fn get_next_task,
|
|
|
|
start_failure_fn start_failure,
|
|
|
|
task_finished_fn task_finished,
|
|
|
|
void *data)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (n < 1)
|
|
|
|
n = online_cpus();
|
|
|
|
|
|
|
|
pp->max_processes = n;
|
|
|
|
|
|
|
|
trace_printf("run_processes_parallel: preparing to run up to %d tasks", n);
|
|
|
|
|
|
|
|
pp->data = data;
|
|
|
|
if (!get_next_task)
|
|
|
|
die("BUG: you need to specify a get_next_task function");
|
|
|
|
pp->get_next_task = get_next_task;
|
|
|
|
|
|
|
|
pp->start_failure = start_failure ? start_failure : default_start_failure;
|
|
|
|
pp->task_finished = task_finished ? task_finished : default_task_finished;
|
|
|
|
|
|
|
|
pp->nr_processes = 0;
|
|
|
|
pp->output_owner = 0;
|
|
|
|
pp->shutdown = 0;
|
|
|
|
pp->children = xcalloc(n, sizeof(*pp->children));
|
|
|
|
pp->pfd = xcalloc(n, sizeof(*pp->pfd));
|
|
|
|
strbuf_init(&pp->buffered_output, 0);
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
strbuf_init(&pp->children[i].err, 0);
|
|
|
|
child_process_init(&pp->children[i].process);
|
|
|
|
pp->pfd[i].events = POLLIN | POLLHUP;
|
|
|
|
pp->pfd[i].fd = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
pp_for_signal = pp;
|
|
|
|
sigchain_push_common(handle_children_on_signal);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void pp_cleanup(struct parallel_processes *pp)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
trace_printf("run_processes_parallel: done");
|
|
|
|
for (i = 0; i < pp->max_processes; i++) {
|
|
|
|
strbuf_release(&pp->children[i].err);
|
|
|
|
child_process_clear(&pp->children[i].process);
|
|
|
|
}
|
|
|
|
|
|
|
|
free(pp->children);
|
|
|
|
free(pp->pfd);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When get_next_task added messages to the buffer in its last
|
|
|
|
* iteration, the buffered output is non empty.
|
|
|
|
*/
|
|
|
|
fputs(pp->buffered_output.buf, stderr);
|
|
|
|
strbuf_release(&pp->buffered_output);
|
|
|
|
|
|
|
|
sigchain_pop_common();
|
|
|
|
}
|
|
|
|
|
|
|
|
/* returns
|
|
|
|
* 0 if a new task was started.
|
|
|
|
* 1 if no new jobs was started (get_next_task ran out of work, non critical
|
|
|
|
* problem with starting a new command)
|
|
|
|
* <0 no new job was started, user wishes to shutdown early. Use negative code
|
|
|
|
* to signal the children.
|
|
|
|
*/
|
|
|
|
static int pp_start_one(struct parallel_processes *pp)
|
|
|
|
{
|
|
|
|
int i, code;
|
|
|
|
|
|
|
|
for (i = 0; i < pp->max_processes; i++)
|
|
|
|
if (pp->children[i].state == GIT_CP_FREE)
|
|
|
|
break;
|
|
|
|
if (i == pp->max_processes)
|
|
|
|
die("BUG: bookkeeping is hard");
|
|
|
|
|
|
|
|
code = pp->get_next_task(&pp->children[i].process,
|
|
|
|
&pp->children[i].err,
|
|
|
|
pp->data,
|
|
|
|
&pp->children[i].data);
|
|
|
|
if (!code) {
|
|
|
|
strbuf_addbuf(&pp->buffered_output, &pp->children[i].err);
|
|
|
|
strbuf_reset(&pp->children[i].err);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
pp->children[i].process.err = -1;
|
|
|
|
pp->children[i].process.stdout_to_stderr = 1;
|
|
|
|
pp->children[i].process.no_stdin = 1;
|
|
|
|
|
|
|
|
if (start_command(&pp->children[i].process)) {
|
|
|
|
code = pp->start_failure(&pp->children[i].process,
|
|
|
|
&pp->children[i].err,
|
|
|
|
pp->data,
|
|
|
|
&pp->children[i].data);
|
|
|
|
strbuf_addbuf(&pp->buffered_output, &pp->children[i].err);
|
|
|
|
strbuf_reset(&pp->children[i].err);
|
|
|
|
if (code)
|
|
|
|
pp->shutdown = 1;
|
|
|
|
return code;
|
|
|
|
}
|
|
|
|
|
|
|
|
pp->nr_processes++;
|
|
|
|
pp->children[i].state = GIT_CP_WORKING;
|
|
|
|
pp->pfd[i].fd = pp->children[i].process.err;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void pp_buffer_stderr(struct parallel_processes *pp, int output_timeout)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
while ((i = poll(pp->pfd, pp->max_processes, output_timeout)) < 0) {
|
|
|
|
if (errno == EINTR)
|
|
|
|
continue;
|
|
|
|
pp_cleanup(pp);
|
|
|
|
die_errno("poll");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Buffer output from all pipes. */
|
|
|
|
for (i = 0; i < pp->max_processes; i++) {
|
|
|
|
if (pp->children[i].state == GIT_CP_WORKING &&
|
|
|
|
pp->pfd[i].revents & (POLLIN | POLLHUP)) {
|
|
|
|
int n = strbuf_read_once(&pp->children[i].err,
|
|
|
|
pp->children[i].process.err, 0);
|
|
|
|
if (n == 0) {
|
|
|
|
close(pp->children[i].process.err);
|
|
|
|
pp->children[i].state = GIT_CP_WAIT_CLEANUP;
|
|
|
|
} else if (n < 0)
|
|
|
|
if (errno != EAGAIN)
|
|
|
|
die_errno("read");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void pp_output(struct parallel_processes *pp)
|
|
|
|
{
|
|
|
|
int i = pp->output_owner;
|
|
|
|
if (pp->children[i].state == GIT_CP_WORKING &&
|
|
|
|
pp->children[i].err.len) {
|
|
|
|
fputs(pp->children[i].err.buf, stderr);
|
|
|
|
strbuf_reset(&pp->children[i].err);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int pp_collect_finished(struct parallel_processes *pp)
|
|
|
|
{
|
|
|
|
int i, code;
|
|
|
|
int n = pp->max_processes;
|
|
|
|
int result = 0;
|
|
|
|
|
|
|
|
while (pp->nr_processes > 0) {
|
|
|
|
for (i = 0; i < pp->max_processes; i++)
|
|
|
|
if (pp->children[i].state == GIT_CP_WAIT_CLEANUP)
|
|
|
|
break;
|
|
|
|
if (i == pp->max_processes)
|
|
|
|
break;
|
|
|
|
|
|
|
|
code = finish_command(&pp->children[i].process);
|
|
|
|
|
|
|
|
code = pp->task_finished(code, &pp->children[i].process,
|
|
|
|
&pp->children[i].err, pp->data,
|
|
|
|
&pp->children[i].data);
|
|
|
|
|
|
|
|
if (code)
|
|
|
|
result = code;
|
|
|
|
if (code < 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
pp->nr_processes--;
|
|
|
|
pp->children[i].state = GIT_CP_FREE;
|
|
|
|
pp->pfd[i].fd = -1;
|
|
|
|
child_process_init(&pp->children[i].process);
|
|
|
|
|
|
|
|
if (i != pp->output_owner) {
|
|
|
|
strbuf_addbuf(&pp->buffered_output, &pp->children[i].err);
|
|
|
|
strbuf_reset(&pp->children[i].err);
|
|
|
|
} else {
|
|
|
|
fputs(pp->children[i].err.buf, stderr);
|
|
|
|
strbuf_reset(&pp->children[i].err);
|
|
|
|
|
|
|
|
/* Output all other finished child processes */
|
|
|
|
fputs(pp->buffered_output.buf, stderr);
|
|
|
|
strbuf_reset(&pp->buffered_output);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Pick next process to output live.
|
|
|
|
* NEEDSWORK:
|
|
|
|
* For now we pick it randomly by doing a round
|
|
|
|
* robin. Later we may want to pick the one with
|
|
|
|
* the most output or the longest or shortest
|
|
|
|
* running process time.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < n; i++)
|
|
|
|
if (pp->children[(pp->output_owner + i) % n].state == GIT_CP_WORKING)
|
|
|
|
break;
|
|
|
|
pp->output_owner = (pp->output_owner + i) % n;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
int run_processes_parallel(int n,
|
|
|
|
get_next_task_fn get_next_task,
|
|
|
|
start_failure_fn start_failure,
|
|
|
|
task_finished_fn task_finished,
|
|
|
|
void *pp_cb)
|
|
|
|
{
|
|
|
|
int i, code;
|
|
|
|
int output_timeout = 100;
|
|
|
|
int spawn_cap = 4;
|
|
|
|
struct parallel_processes pp;
|
|
|
|
|
|
|
|
pp_init(&pp, n, get_next_task, start_failure, task_finished, pp_cb);
|
|
|
|
while (1) {
|
|
|
|
for (i = 0;
|
|
|
|
i < spawn_cap && !pp.shutdown &&
|
|
|
|
pp.nr_processes < pp.max_processes;
|
|
|
|
i++) {
|
|
|
|
code = pp_start_one(&pp);
|
|
|
|
if (!code)
|
|
|
|
continue;
|
|
|
|
if (code < 0) {
|
|
|
|
pp.shutdown = 1;
|
|
|
|
kill_children(&pp, -code);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!pp.nr_processes)
|
|
|
|
break;
|
|
|
|
pp_buffer_stderr(&pp, output_timeout);
|
|
|
|
pp_output(&pp);
|
|
|
|
code = pp_collect_finished(&pp);
|
|
|
|
if (code) {
|
|
|
|
pp.shutdown = 1;
|
|
|
|
if (code < 0)
|
|
|
|
kill_children(&pp, -code);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pp_cleanup(&pp);
|
|
|
|
return 0;
|
|
|
|
}
|