git-commit-vandalism/daemon.c

1411 lines
32 KiB
C
Raw Normal View History

#include "cache.h"
#include "pkt-line.h"
#include "run-command.h"
#include "strbuf.h"
#include "string-list.h"
#ifndef HOST_NAME_MAX
#define HOST_NAME_MAX 256
#endif
#ifdef NO_INITGROUPS
#define initgroups(x, y) (0) /* nothing */
#endif
static int log_syslog;
static int verbose;
static int reuseaddr;
static int informative_errors;
static const char daemon_usage[] =
"git daemon [--verbose] [--syslog] [--export-all]\n"
" [--timeout=<n>] [--init-timeout=<n>] [--max-connections=<n>]\n"
" [--strict-paths] [--base-path=<path>] [--base-path-relaxed]\n"
" [--user-path | --user-path=<path>]\n"
" [--interpolated-path=<path>]\n"
" [--reuseaddr] [--pid-file=<file>]\n"
" [--(enable|disable|allow-override|forbid-override)=<service>]\n"
" [--access-hook=<path>]\n"
" [--inetd | [--listen=<host_or_ipaddr>] [--port=<n>]\n"
" [--detach] [--user=<user> [--group=<group>]]\n"
" [<directory>...]";
/* List of acceptable pathname prefixes */
add an extra level of indirection to main() There are certain startup tasks that we expect every git process to do. In some cases this is just to improve the quality of the program (e.g., setting up gettext()). In others it is a requirement for using certain functions in libgit.a (e.g., system_path() expects that you have called git_extract_argv0_path()). Most commands are builtins and are covered by the git.c version of main(). However, there are still a few external commands that use their own main(). Each of these has to remember to include the correct startup sequence, and we are not always consistent. Rather than just fix the inconsistencies, let's make this harder to get wrong by providing a common main() that can run this standard startup. We basically have two options to do this: - the compat/mingw.h file already does something like this by adding a #define that replaces the definition of main with a wrapper that calls mingw_startup(). The upside is that the code in each program doesn't need to be changed at all; it's rewritten on the fly by the preprocessor. The downside is that it may make debugging of the startup sequence a bit more confusing, as the preprocessor is quietly inserting new code. - the builtin functions are all of the form cmd_foo(), and git.c's main() calls them. This is much more explicit, which may make things more obvious to somebody reading the code. It's also more flexible (because of course we have to figure out _which_ cmd_foo() to call). The downside is that each of the builtins must define cmd_foo(), instead of just main(). This patch chooses the latter option, preferring the more explicit approach, even though it is more invasive. We introduce a new file common-main.c, with the "real" main. It expects to call cmd_main() from whatever other objects it is linked against. We link common-main.o against anything that links against libgit.a, since we know that such programs will need to do this setup. Note that common-main.o can't actually go inside libgit.a, as the linker would not pick up its main() function automatically (it has no callers). The rest of the patch is just adjusting all of the various external programs (mostly in t/helper) to use cmd_main(). I've provided a global declaration for cmd_main(), which means that all of the programs also need to match its signature. In particular, many functions need to switch to "const char **" instead of "char **" for argv. This effect ripples out to a few other variables and functions, as well. This makes the patch even more invasive, but the end result is much better. We should be treating argv strings as const anyway, and now all programs conform to the same signature (which also matches the way builtins are defined). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-01 07:58:58 +02:00
static const char **ok_paths;
static int strict_paths;
/* If this is set, git-daemon-export-ok is not required */
static int export_all_trees;
/* Take all paths relative to this one if non-NULL */
static const char *base_path;
static const char *interpolated_path;
static int base_path_relaxed;
/* If defined, ~user notation is allowed and the string is inserted
* after ~user/. E.g. a request to git://host/~alice/frotz would
* go to /home/alice/pub_git/frotz with --user-path=pub_git.
*/
static const char *user_path;
/* Timeout, and initial timeout */
static unsigned int timeout;
static unsigned int init_timeout;
struct hostinfo {
struct strbuf hostname;
struct strbuf canon_hostname;
struct strbuf ip_address;
struct strbuf tcp_port;
unsigned int hostname_lookup_done:1;
unsigned int saw_extended_args:1;
};
static void lookup_hostname(struct hostinfo *hi);
static const char *get_canon_hostname(struct hostinfo *hi)
{
lookup_hostname(hi);
return hi->canon_hostname.buf;
}
static const char *get_ip_address(struct hostinfo *hi)
{
lookup_hostname(hi);
return hi->ip_address.buf;
}
static void logreport(int priority, const char *err, va_list params)
{
if (log_syslog) {
char buf[1024];
vsnprintf(buf, sizeof(buf), err, params);
syslog(priority, "%s", buf);
} else {
/*
* Since stderr is set to buffered mode, the
* logging of different processes will not overlap
* unless they overflow the (rather big) buffers.
*/
fprintf(stderr, "[%"PRIuMAX"] ", (uintmax_t)getpid());
vfprintf(stderr, err, params);
fputc('\n', stderr);
fflush(stderr);
}
}
__attribute__((format (printf, 1, 2)))
static void logerror(const char *err, ...)
{
va_list params;
va_start(params, err);
logreport(LOG_ERR, err, params);
va_end(params);
}
__attribute__((format (printf, 1, 2)))
static void loginfo(const char *err, ...)
{
va_list params;
if (!verbose)
return;
va_start(params, err);
logreport(LOG_INFO, err, params);
va_end(params);
}
static void NORETURN daemon_die(const char *err, va_list params)
{
logreport(LOG_ERR, err, params);
exit(1);
}
struct expand_path_context {
const char *directory;
struct hostinfo *hostinfo;
};
static size_t expand_path(struct strbuf *sb, const char *placeholder, void *ctx)
{
struct expand_path_context *context = ctx;
struct hostinfo *hi = context->hostinfo;
switch (placeholder[0]) {
case 'H':
strbuf_addbuf(sb, &hi->hostname);
return 1;
case 'C':
if (placeholder[1] == 'H') {
strbuf_addstr(sb, get_canon_hostname(hi));
return 2;
}
break;
case 'I':
if (placeholder[1] == 'P') {
strbuf_addstr(sb, get_ip_address(hi));
return 2;
}
break;
case 'P':
strbuf_addbuf(sb, &hi->tcp_port);
return 1;
case 'D':
strbuf_addstr(sb, context->directory);
return 1;
}
return 0;
}
static const char *path_ok(const char *directory, struct hostinfo *hi)
{
static char rpath[PATH_MAX];
static char interp_path[PATH_MAX];
size_t rlen;
const char *path;
const char *dir;
dir = directory;
[PATCH] daemon.c and path.enter_repo(): revamp path validation. The whitelist of git-daemon is checked against return value from enter_repo(), and enter_repo() used to return the value obtained from getcwd() to avoid directory aliasing issues as discussed earier (mid October 2005). Unfortunately, it did not go well as we hoped. For example, /pub on a kernel.org public machine is a symlink to its real mountpoint, and it is understandable that the administrator does not want to adjust the whitelist every time /pub needs to point at a different partition for storage allcation or whatever reasons. Being able to keep using /pub/scm as the whitelist is a desirable property. So this version of enter_repo() reports what it used to chdir() and validate, but does not use getcwd() to canonicalize the directory name. When it sees a user relative path ~user/path, it internally resolves it to try chdir() there, but it still reports ~user/path (possibly after appending .git if allowed to do so, in which case it would report ~user/path.git). What this means is that if a whitelist wants to allow a user relative path, it needs to say "~" (for all users) or list user home directories like "~alice" "~bob". And no, you cannot say /home if the advertised way to access user home directories are ~alice,~bob, etc. The whole point of this is to avoid unnecessary aliasing issues. Anyway, because of this, daemon needs to do a bit more work to guard itself. Namely, it needs to make sure that the accessor does not try to exploit its leading path match rule by inserting /../ in the middle or hanging /.. at the end. I resurrected the belts and suspender paranoia code HPA did for this purpose. This check cannot be done in the enter_repo() unconditionally, because there are valid callers of enter_repo() that want to honor /../; authorized users coming over ssh to run send-pack and fetch-pack should be allowed to do so. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
if (daemon_avoid_alias(dir)) {
[PATCH] daemon.c and path.enter_repo(): revamp path validation. The whitelist of git-daemon is checked against return value from enter_repo(), and enter_repo() used to return the value obtained from getcwd() to avoid directory aliasing issues as discussed earier (mid October 2005). Unfortunately, it did not go well as we hoped. For example, /pub on a kernel.org public machine is a symlink to its real mountpoint, and it is understandable that the administrator does not want to adjust the whitelist every time /pub needs to point at a different partition for storage allcation or whatever reasons. Being able to keep using /pub/scm as the whitelist is a desirable property. So this version of enter_repo() reports what it used to chdir() and validate, but does not use getcwd() to canonicalize the directory name. When it sees a user relative path ~user/path, it internally resolves it to try chdir() there, but it still reports ~user/path (possibly after appending .git if allowed to do so, in which case it would report ~user/path.git). What this means is that if a whitelist wants to allow a user relative path, it needs to say "~" (for all users) or list user home directories like "~alice" "~bob". And no, you cannot say /home if the advertised way to access user home directories are ~alice,~bob, etc. The whole point of this is to avoid unnecessary aliasing issues. Anyway, because of this, daemon needs to do a bit more work to guard itself. Namely, it needs to make sure that the accessor does not try to exploit its leading path match rule by inserting /../ in the middle or hanging /.. at the end. I resurrected the belts and suspender paranoia code HPA did for this purpose. This check cannot be done in the enter_repo() unconditionally, because there are valid callers of enter_repo() that want to honor /../; authorized users coming over ssh to run send-pack and fetch-pack should be allowed to do so. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
logerror("'%s': aliased", dir);
return NULL;
}
if (*dir == '~') {
if (!user_path) {
logerror("'%s': User-path not allowed", dir);
return NULL;
}
if (*user_path) {
/* Got either "~alice" or "~alice/foo";
* rewrite them to "~alice/%s" or
* "~alice/%s/foo".
*/
int namlen, restlen = strlen(dir);
const char *slash = strchr(dir, '/');
if (!slash)
slash = dir + restlen;
namlen = slash - dir;
restlen -= namlen;
loginfo("userpath <%s>, request <%s>, namlen %d, restlen %d, slash <%s>", user_path, dir, namlen, restlen, slash);
rlen = snprintf(rpath, sizeof(rpath), "%.*s/%s%.*s",
namlen, dir, user_path, restlen, slash);
if (rlen >= sizeof(rpath)) {
logerror("user-path too large: %s", rpath);
return NULL;
}
dir = rpath;
}
}
else if (interpolated_path && hi->saw_extended_args) {
struct strbuf expanded_path = STRBUF_INIT;
struct expand_path_context context;
context.directory = directory;
context.hostinfo = hi;
if (*dir != '/') {
/* Allow only absolute */
logerror("'%s': Non-absolute path denied (interpolated-path active)", dir);
return NULL;
}
strbuf_expand(&expanded_path, interpolated_path,
expand_path, &context);
rlen = strlcpy(interp_path, expanded_path.buf,
sizeof(interp_path));
if (rlen >= sizeof(interp_path)) {
logerror("interpolated path too large: %s",
interp_path);
return NULL;
}
strbuf_release(&expanded_path);
loginfo("Interpolated dir '%s'", interp_path);
dir = interp_path;
}
else if (base_path) {
if (*dir != '/') {
/* Allow only absolute */
logerror("'%s': Non-absolute path denied (base-path active)", dir);
return NULL;
}
rlen = snprintf(rpath, sizeof(rpath), "%s%s", base_path, dir);
if (rlen >= sizeof(rpath)) {
logerror("base-path too large: %s", rpath);
return NULL;
}
dir = rpath;
}
path = enter_repo(dir, strict_paths);
if (!path && base_path && base_path_relaxed) {
/*
* if we fail and base_path_relaxed is enabled, try without
* prefixing the base path
*/
dir = directory;
path = enter_repo(dir, strict_paths);
}
if (!path) {
logerror("'%s' does not appear to be a git repository", dir);
return NULL;
}
if ( ok_paths && *ok_paths ) {
add an extra level of indirection to main() There are certain startup tasks that we expect every git process to do. In some cases this is just to improve the quality of the program (e.g., setting up gettext()). In others it is a requirement for using certain functions in libgit.a (e.g., system_path() expects that you have called git_extract_argv0_path()). Most commands are builtins and are covered by the git.c version of main(). However, there are still a few external commands that use their own main(). Each of these has to remember to include the correct startup sequence, and we are not always consistent. Rather than just fix the inconsistencies, let's make this harder to get wrong by providing a common main() that can run this standard startup. We basically have two options to do this: - the compat/mingw.h file already does something like this by adding a #define that replaces the definition of main with a wrapper that calls mingw_startup(). The upside is that the code in each program doesn't need to be changed at all; it's rewritten on the fly by the preprocessor. The downside is that it may make debugging of the startup sequence a bit more confusing, as the preprocessor is quietly inserting new code. - the builtin functions are all of the form cmd_foo(), and git.c's main() calls them. This is much more explicit, which may make things more obvious to somebody reading the code. It's also more flexible (because of course we have to figure out _which_ cmd_foo() to call). The downside is that each of the builtins must define cmd_foo(), instead of just main(). This patch chooses the latter option, preferring the more explicit approach, even though it is more invasive. We introduce a new file common-main.c, with the "real" main. It expects to call cmd_main() from whatever other objects it is linked against. We link common-main.o against anything that links against libgit.a, since we know that such programs will need to do this setup. Note that common-main.o can't actually go inside libgit.a, as the linker would not pick up its main() function automatically (it has no callers). The rest of the patch is just adjusting all of the various external programs (mostly in t/helper) to use cmd_main(). I've provided a global declaration for cmd_main(), which means that all of the programs also need to match its signature. In particular, many functions need to switch to "const char **" instead of "char **" for argv. This effect ripples out to a few other variables and functions, as well. This makes the patch even more invasive, but the end result is much better. We should be treating argv strings as const anyway, and now all programs conform to the same signature (which also matches the way builtins are defined). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-01 07:58:58 +02:00
const char **pp;
int pathlen = strlen(path);
/* The validation is done on the paths after enter_repo
* appends optional {.git,.git/.git} and friends, but
[PATCH] daemon.c and path.enter_repo(): revamp path validation. The whitelist of git-daemon is checked against return value from enter_repo(), and enter_repo() used to return the value obtained from getcwd() to avoid directory aliasing issues as discussed earier (mid October 2005). Unfortunately, it did not go well as we hoped. For example, /pub on a kernel.org public machine is a symlink to its real mountpoint, and it is understandable that the administrator does not want to adjust the whitelist every time /pub needs to point at a different partition for storage allcation or whatever reasons. Being able to keep using /pub/scm as the whitelist is a desirable property. So this version of enter_repo() reports what it used to chdir() and validate, but does not use getcwd() to canonicalize the directory name. When it sees a user relative path ~user/path, it internally resolves it to try chdir() there, but it still reports ~user/path (possibly after appending .git if allowed to do so, in which case it would report ~user/path.git). What this means is that if a whitelist wants to allow a user relative path, it needs to say "~" (for all users) or list user home directories like "~alice" "~bob". And no, you cannot say /home if the advertised way to access user home directories are ~alice,~bob, etc. The whole point of this is to avoid unnecessary aliasing issues. Anyway, because of this, daemon needs to do a bit more work to guard itself. Namely, it needs to make sure that the accessor does not try to exploit its leading path match rule by inserting /../ in the middle or hanging /.. at the end. I resurrected the belts and suspender paranoia code HPA did for this purpose. This check cannot be done in the enter_repo() unconditionally, because there are valid callers of enter_repo() that want to honor /../; authorized users coming over ssh to run send-pack and fetch-pack should be allowed to do so. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
* it does not use getcwd(). So if your /pub is
* a symlink to /mnt/pub, you can whitelist /pub and
* do not have to say /mnt/pub.
* Do not say /pub/.
*/
for ( pp = ok_paths ; *pp ; pp++ ) {
int len = strlen(*pp);
if (len <= pathlen &&
!memcmp(*pp, path, len) &&
(path[len] == '\0' ||
(!strict_paths && path[len] == '/')))
return path;
}
}
else {
/* be backwards compatible */
if (!strict_paths)
return path;
}
logerror("'%s': not in whitelist", path);
return NULL; /* Fallthrough. Deny by default */
}
typedef int (*daemon_service_fn)(void);
struct daemon_service {
const char *name;
const char *config_name;
daemon_service_fn fn;
int enabled;
int overridable;
};
static int daemon_error(const char *dir, const char *msg)
{
if (!informative_errors)
msg = "access denied or repository not exported";
packet_write(1, "ERR %s: %s", msg, dir);
return -1;
}
static const char *access_hook;
static int run_access_hook(struct daemon_service *service, const char *dir,
const char *path, struct hostinfo *hi)
{
struct child_process child = CHILD_PROCESS_INIT;
struct strbuf buf = STRBUF_INIT;
const char *argv[8];
const char **arg = argv;
char *eol;
int seen_errors = 0;
*arg++ = access_hook;
*arg++ = service->name;
*arg++ = path;
*arg++ = hi->hostname.buf;
*arg++ = get_canon_hostname(hi);
*arg++ = get_ip_address(hi);
*arg++ = hi->tcp_port.buf;
*arg = NULL;
child.use_shell = 1;
child.argv = argv;
child.no_stdin = 1;
child.no_stderr = 1;
child.out = -1;
if (start_command(&child)) {
logerror("daemon access hook '%s' failed to start",
access_hook);
goto error_return;
}
if (strbuf_read(&buf, child.out, 0) < 0) {
logerror("failed to read from pipe to daemon access hook '%s'",
access_hook);
strbuf_reset(&buf);
seen_errors = 1;
}
if (close(child.out) < 0) {
logerror("failed to close pipe to daemon access hook '%s'",
access_hook);
seen_errors = 1;
}
if (finish_command(&child))
seen_errors = 1;
if (!seen_errors) {
strbuf_release(&buf);
return 0;
}
error_return:
strbuf_ltrim(&buf);
if (!buf.len)
strbuf_addstr(&buf, "service rejected");
eol = strchr(buf.buf, '\n');
if (eol)
*eol = '\0';
errno = EACCES;
daemon_error(dir, buf.buf);
strbuf_release(&buf);
return -1;
}
static int run_service(const char *dir, struct daemon_service *service,
struct hostinfo *hi)
{
const char *path;
int enabled = service->enabled;
struct strbuf var = STRBUF_INIT;
loginfo("Request %s for '%s'", service->name, dir);
if (!enabled && !service->overridable) {
logerror("'%s': service not enabled.", service->name);
errno = EACCES;
return daemon_error(dir, "service not enabled");
}
if (!(path = path_ok(dir, hi)))
return daemon_error(dir, "no such repository");
2005-09-27 17:49:40 +02:00
/*
* Security on the cheap.
*
* We want a readable HEAD, usable "objects" directory, and
* a "git-daemon-export-ok" flag that says that the other side
* is ok with us doing this.
*
* path_ok() uses enter_repo() and does whitelist checking.
* We only need to make sure the repository is exported.
*/
if (!export_all_trees && access("git-daemon-export-ok", F_OK)) {
logerror("'%s': repository not exported.", path);
errno = EACCES;
return daemon_error(dir, "repository not exported");
}
if (service->overridable) {
strbuf_addf(&var, "daemon.%s", service->config_name);
git_config_get_bool(var.buf, &enabled);
strbuf_release(&var);
}
if (!enabled) {
logerror("'%s': service not enabled for '%s'",
service->name, path);
errno = EACCES;
return daemon_error(dir, "service not enabled");
}
/*
* Optionally, a hook can choose to deny access to the
* repository depending on the phase of the moon.
*/
if (access_hook && run_access_hook(service, dir, path, hi))
return -1;
/*
* We'll ignore SIGTERM from now on, we have a
* good client.
*/
signal(SIGTERM, SIG_IGN);
return service->fn();
}
static void copy_to_log(int fd)
{
struct strbuf line = STRBUF_INIT;
FILE *fp;
fp = fdopen(fd, "r");
if (fp == NULL) {
logerror("fdopen of error channel failed");
close(fd);
return;
}
while (strbuf_getline_lf(&line, fp) != EOF) {
logerror("%s", line.buf);
strbuf_setlen(&line, 0);
}
strbuf_release(&line);
fclose(fp);
}
static int run_service_command(const char **argv)
{
struct child_process cld = CHILD_PROCESS_INIT;
cld.argv = argv;
cld.git_cmd = 1;
cld.err = -1;
if (start_command(&cld))
return -1;
close(0);
close(1);
copy_to_log(cld.err);
return finish_command(&cld);
}
static int upload_pack(void)
{
/* Timeout as string */
char timeout_buf[64];
const char *argv[] = { "upload-pack", "--strict", NULL, ".", NULL };
argv[2] = timeout_buf;
snprintf(timeout_buf, sizeof timeout_buf, "--timeout=%u", timeout);
return run_service_command(argv);
}
static int upload_archive(void)
{
static const char *argv[] = { "upload-archive", ".", NULL };
return run_service_command(argv);
}
static int receive_pack(void)
{
static const char *argv[] = { "receive-pack", ".", NULL };
return run_service_command(argv);
}
static struct daemon_service daemon_service[] = {
{ "upload-archive", "uploadarch", upload_archive, 0, 1 },
{ "upload-pack", "uploadpack", upload_pack, 1, 1 },
{ "receive-pack", "receivepack", receive_pack, 0, 1 },
};
static void enable_service(const char *name, int ena)
{
int i;
for (i = 0; i < ARRAY_SIZE(daemon_service); i++) {
if (!strcmp(daemon_service[i].name, name)) {
daemon_service[i].enabled = ena;
return;
}
}
die("No such service %s", name);
}
static void make_service_overridable(const char *name, int ena)
{
int i;
for (i = 0; i < ARRAY_SIZE(daemon_service); i++) {
if (!strcmp(daemon_service[i].name, name)) {
daemon_service[i].overridable = ena;
return;
}
}
die("No such service %s", name);
}
static void parse_host_and_port(char *hostport, char **host,
char **port)
{
if (*hostport == '[') {
char *end;
end = strchr(hostport, ']');
if (!end)
die("Invalid request ('[' without ']')");
*end = '\0';
*host = hostport + 1;
if (!end[1])
*port = NULL;
else if (end[1] == ':')
*port = end + 2;
else
die("Garbage after end of host part");
} else {
*host = hostport;
*port = strrchr(hostport, ':');
if (*port) {
**port = '\0';
++*port;
}
}
}
/*
* Sanitize a string from the client so that it's OK to be inserted into a
* filesystem path. Specifically, we disallow slashes, runs of "..", and
* trailing and leading dots, which means that the client cannot escape
* our base path via ".." traversal.
*/
static void sanitize_client(struct strbuf *out, const char *in)
{
for (; *in; in++) {
if (*in == '/')
continue;
if (*in == '.' && (!out->len || out->buf[out->len - 1] == '.'))
continue;
strbuf_addch(out, *in);
}
while (out->len && out->buf[out->len - 1] == '.')
strbuf_setlen(out, out->len - 1);
}
/*
* Like sanitize_client, but we also perform any canonicalization
* to make life easier on the admin.
*/
static void canonicalize_client(struct strbuf *out, const char *in)
{
sanitize_client(out, in);
strbuf_tolower(out);
}
/*
daemon: Strictly parse the "extra arg" part of the command Since 1.4.4.5 (49ba83fb67 "Add virtualization support to git-daemon") git daemon enters an infinite loop and never terminates if a client hides any extra arguments in the initial request line which is not exactly "\0host=blah\0". Since that change, a client must never insert additional extra arguments, or attempt to use any argument other than "host=", as any daemon will get stuck parsing the request line and will never complete the request. Since the client can't tell if the daemon is patched or not, it is not possible to know if additional extra args might actually be able to be safely requested. If we ever need to extend the git daemon protocol to support a new feature, we may have to do something like this to the exchange: # If both support git:// v2 # C: 000cgit://v2 S: 0010ok host user C: 0018host git.kernel.org C: 0027git-upload-pack /pub/linux-2.6.git S: ...git-upload-pack header... # If client supports git:// v2, server does not: # C: 000cgit://v2 S: <EOF> C: 003bgit-upload-pack /pub/linux-2.6.git\0host=git.kernel.org\0 S: ...git-upload-pack header... This requires the client to create two TCP connections to talk to an older git daemon, however all daemons since the introduction of daemon.c will safely reject the unknown "git://v2" command request, so the client can quite easily determine the server supports an older protocol. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-06-05 03:33:32 +02:00
* Read the host as supplied by the client connection.
*/
static void parse_host_arg(struct hostinfo *hi, char *extra_args, int buflen)
{
char *val;
int vallen;
char *end = extra_args + buflen;
daemon: Strictly parse the "extra arg" part of the command Since 1.4.4.5 (49ba83fb67 "Add virtualization support to git-daemon") git daemon enters an infinite loop and never terminates if a client hides any extra arguments in the initial request line which is not exactly "\0host=blah\0". Since that change, a client must never insert additional extra arguments, or attempt to use any argument other than "host=", as any daemon will get stuck parsing the request line and will never complete the request. Since the client can't tell if the daemon is patched or not, it is not possible to know if additional extra args might actually be able to be safely requested. If we ever need to extend the git daemon protocol to support a new feature, we may have to do something like this to the exchange: # If both support git:// v2 # C: 000cgit://v2 S: 0010ok host user C: 0018host git.kernel.org C: 0027git-upload-pack /pub/linux-2.6.git S: ...git-upload-pack header... # If client supports git:// v2, server does not: # C: 000cgit://v2 S: <EOF> C: 003bgit-upload-pack /pub/linux-2.6.git\0host=git.kernel.org\0 S: ...git-upload-pack header... This requires the client to create two TCP connections to talk to an older git daemon, however all daemons since the introduction of daemon.c will safely reject the unknown "git://v2" command request, so the client can quite easily determine the server supports an older protocol. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-06-05 03:33:32 +02:00
if (extra_args < end && *extra_args) {
hi->saw_extended_args = 1;
if (strncasecmp("host=", extra_args, 5) == 0) {
val = extra_args + 5;
vallen = strlen(val) + 1;
if (*val) {
/* Split <host>:<port> at colon. */
char *host;
char *port;
parse_host_and_port(val, &host, &port);
if (port)
sanitize_client(&hi->tcp_port, port);
canonicalize_client(&hi->hostname, host);
hi->hostname_lookup_done = 0;
}
/* On to the next one */
extra_args = val + vallen;
}
daemon: Strictly parse the "extra arg" part of the command Since 1.4.4.5 (49ba83fb67 "Add virtualization support to git-daemon") git daemon enters an infinite loop and never terminates if a client hides any extra arguments in the initial request line which is not exactly "\0host=blah\0". Since that change, a client must never insert additional extra arguments, or attempt to use any argument other than "host=", as any daemon will get stuck parsing the request line and will never complete the request. Since the client can't tell if the daemon is patched or not, it is not possible to know if additional extra args might actually be able to be safely requested. If we ever need to extend the git daemon protocol to support a new feature, we may have to do something like this to the exchange: # If both support git:// v2 # C: 000cgit://v2 S: 0010ok host user C: 0018host git.kernel.org C: 0027git-upload-pack /pub/linux-2.6.git S: ...git-upload-pack header... # If client supports git:// v2, server does not: # C: 000cgit://v2 S: <EOF> C: 003bgit-upload-pack /pub/linux-2.6.git\0host=git.kernel.org\0 S: ...git-upload-pack header... This requires the client to create two TCP connections to talk to an older git daemon, however all daemons since the introduction of daemon.c will safely reject the unknown "git://v2" command request, so the client can quite easily determine the server supports an older protocol. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-06-05 03:33:32 +02:00
if (extra_args < end && *extra_args)
die("Invalid request");
}
}
/*
* Locate canonical hostname and its IP address.
*/
static void lookup_hostname(struct hostinfo *hi)
{
if (!hi->hostname_lookup_done && hi->hostname.len) {
#ifndef NO_IPV6
struct addrinfo hints;
struct addrinfo *ai;
int gai;
static char addrbuf[HOST_NAME_MAX + 1];
memset(&hints, 0, sizeof(hints));
hints.ai_flags = AI_CANONNAME;
gai = getaddrinfo(hi->hostname.buf, NULL, &hints, &ai);
if (!gai) {
struct sockaddr_in *sin_addr = (void *)ai->ai_addr;
inet_ntop(AF_INET, &sin_addr->sin_addr,
addrbuf, sizeof(addrbuf));
strbuf_addstr(&hi->ip_address, addrbuf);
if (ai->ai_canonname)
sanitize_client(&hi->canon_hostname,
ai->ai_canonname);
else
strbuf_addbuf(&hi->canon_hostname,
&hi->ip_address);
freeaddrinfo(ai);
}
#else
struct hostent *hent;
struct sockaddr_in sa;
char **ap;
static char addrbuf[HOST_NAME_MAX + 1];
hent = gethostbyname(hi->hostname.buf);
if (hent) {
ap = hent->h_addr_list;
memset(&sa, 0, sizeof sa);
sa.sin_family = hent->h_addrtype;
sa.sin_port = htons(0);
memcpy(&sa.sin_addr, *ap, hent->h_length);
inet_ntop(hent->h_addrtype, &sa.sin_addr,
addrbuf, sizeof(addrbuf));
sanitize_client(&hi->canon_hostname, hent->h_name);
strbuf_addstr(&hi->ip_address, addrbuf);
}
#endif
hi->hostname_lookup_done = 1;
}
}
static void hostinfo_init(struct hostinfo *hi)
{
memset(hi, 0, sizeof(*hi));
strbuf_init(&hi->hostname, 0);
strbuf_init(&hi->canon_hostname, 0);
strbuf_init(&hi->ip_address, 0);
strbuf_init(&hi->tcp_port, 0);
}
static void hostinfo_clear(struct hostinfo *hi)
{
strbuf_release(&hi->hostname);
strbuf_release(&hi->canon_hostname);
strbuf_release(&hi->ip_address);
strbuf_release(&hi->tcp_port);
}
static void set_keep_alive(int sockfd)
{
int ka = 1;
if (setsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, &ka, sizeof(ka)) < 0) {
if (errno != ENOTSOCK)
logerror("unable to set SO_KEEPALIVE on socket: %s",
strerror(errno));
}
}
static int execute(void)
{
pkt-line: provide a LARGE_PACKET_MAX static buffer Most of the callers of packet_read_line just read into a static 1000-byte buffer (callers which handle arbitrary binary data already use LARGE_PACKET_MAX). This works fine in practice, because: 1. The only variable-sized data in these lines is a ref name, and refs tend to be a lot shorter than 1000 characters. 2. When sending ref lines, git-core always limits itself to 1000 byte packets. However, the only limit given in the protocol specification in Documentation/technical/protocol-common.txt is LARGE_PACKET_MAX; the 1000 byte limit is mentioned only in pack-protocol.txt, and then only describing what we write, not as a specific limit for readers. This patch lets us bump the 1000-byte limit to LARGE_PACKET_MAX. Even though git-core will never write a packet where this makes a difference, there are two good reasons to do this: 1. Other git implementations may have followed protocol-common.txt and used a larger maximum size. We don't bump into it in practice because it would involve very long ref names. 2. We may want to increase the 1000-byte limit one day. Since packets are transferred before any capabilities, it's difficult to do this in a backwards-compatible way. But if we bump the size of buffer the readers can handle, eventually older versions of git will be obsolete enough that we can justify bumping the writers, as well. We don't have plans to do this anytime soon, but there is no reason not to start the clock ticking now. Just bumping all of the reading bufs to LARGE_PACKET_MAX would waste memory. Instead, since most readers just read into a temporary buffer anyway, let's provide a single static buffer that all callers can use. We can further wrap this detail away by having the packet_read_line wrapper just use the buffer transparently and return a pointer to the static storage. That covers most of the cases, and the remaining ones already read into their own LARGE_PACKET_MAX buffers. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-20 21:02:57 +01:00
char *line = packet_buffer;
int pktlen, len, i;
char *addr = getenv("REMOTE_ADDR"), *port = getenv("REMOTE_PORT");
struct hostinfo hi;
hostinfo_init(&hi);
if (addr)
loginfo("Connection from %s:%s", addr, port);
set_keep_alive(0);
alarm(init_timeout ? init_timeout : timeout);
pkt-line: share buffer/descriptor reading implementation The packet_read function reads from a descriptor. The packet_get_line function is similar, but reads from an in-memory buffer, and uses a completely separate implementation. This patch teaches the generic packet_read function to accept either source, and we can do away with packet_get_line's implementation. There are two other differences to account for between the old and new functions. The first is that we used to read into a strbuf, but now read into a fixed size buffer. The only two callers are fine with that, and in fact it simplifies their code, since they can use the same static-buffer interface as the rest of the packet_read_line callers (and we provide a similar convenience wrapper for reading from a buffer rather than a descriptor). This is technically an externally-visible behavior change in that we used to accept arbitrary sized packets up to 65532 bytes, and now cap out at LARGE_PACKET_MAX, 65520. In practice this doesn't matter, as we use it only for parsing smart-http headers (of which there is exactly one defined, and it is small and fixed-size). And any extension headers would be breaking the protocol to go over LARGE_PACKET_MAX anyway. The other difference is that packet_get_line would return on error rather than dying. However, both callers of packet_get_line are actually improved by dying. The first caller does its own error checking, but we can drop that; as a result, we'll actually get more specific reporting about protocol breakage when packet_read dies internally. The only downside is that packet_read will not print the smart-http URL that failed, but that's not a big deal; anybody not debugging can already see the remote's URL already, and anybody debugging would want to run with GIT_CURL_VERBOSE anyway to see way more information. The second caller, which is just trying to skip past any extra smart-http headers (of which there are none defined, but which we allow to keep room for future expansion), did not error check at all. As a result, it would treat an error just like a flush packet. The resulting mess would generally cause an error later in get_remote_heads, but now we get error reporting much closer to the source of the problem. Brown-paper-bag-fixes-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-23 23:31:34 +01:00
pktlen = packet_read(0, NULL, NULL, packet_buffer, sizeof(packet_buffer), 0);
alarm(0);
len = strlen(line);
if (pktlen != len)
loginfo("Extended attributes (%d bytes) exist <%.*s>",
(int) pktlen - len,
(int) pktlen - len, line + len + 1);
if (len && line[len-1] == '\n') {
line[--len] = 0;
pktlen--;
}
if (len != pktlen)
parse_host_arg(&hi, line + len + 1, pktlen - len - 1);
for (i = 0; i < ARRAY_SIZE(daemon_service); i++) {
struct daemon_service *s = &(daemon_service[i]);
const char *arg;
if (skip_prefix(line, "git-", &arg) &&
skip_prefix(arg, s->name, &arg) &&
*arg++ == ' ') {
/*
* Note: The directory here is probably context sensitive,
* and might depend on the actual service being performed.
*/
int rc = run_service(arg, s, &hi);
hostinfo_clear(&hi);
return rc;
}
}
hostinfo_clear(&hi);
logerror("Protocol error: '%s'", line);
return -1;
}
static int addrcmp(const struct sockaddr_storage *s1,
const struct sockaddr_storage *s2)
{
const struct sockaddr *sa1 = (const struct sockaddr*) s1;
const struct sockaddr *sa2 = (const struct sockaddr*) s2;
if (sa1->sa_family != sa2->sa_family)
return sa1->sa_family - sa2->sa_family;
if (sa1->sa_family == AF_INET)
return memcmp(&((struct sockaddr_in *)s1)->sin_addr,
&((struct sockaddr_in *)s2)->sin_addr,
sizeof(struct in_addr));
#ifndef NO_IPV6
if (sa1->sa_family == AF_INET6)
return memcmp(&((struct sockaddr_in6 *)s1)->sin6_addr,
&((struct sockaddr_in6 *)s2)->sin6_addr,
sizeof(struct in6_addr));
#endif
return 0;
}
static int max_connections = 32;
static unsigned int live_children;
static struct child {
struct child *next;
struct child_process cld;
struct sockaddr_storage address;
} *firstborn;
static void add_child(struct child_process *cld, struct sockaddr *addr, socklen_t addrlen)
{
struct child *newborn, **cradle;
newborn = xcalloc(1, sizeof(*newborn));
live_children++;
memcpy(&newborn->cld, cld, sizeof(*cld));
memcpy(&newborn->address, addr, addrlen);
for (cradle = &firstborn; *cradle; cradle = &(*cradle)->next)
if (!addrcmp(&(*cradle)->address, &newborn->address))
break;
newborn->next = *cradle;
*cradle = newborn;
}
/*
* This gets called if the number of connections grows
* past "max_connections".
*
* We kill the newest connection from a duplicate IP.
*/
static void kill_some_child(void)
{
const struct child *blanket, *next;
if (!(blanket = firstborn))
return;
for (; (next = blanket->next); blanket = next)
if (!addrcmp(&blanket->address, &next->address)) {
kill(blanket->cld.pid, SIGTERM);
break;
}
}
static void check_dead_children(void)
{
int status;
pid_t pid;
struct child **cradle, *blanket;
for (cradle = &firstborn; (blanket = *cradle);)
if ((pid = waitpid(blanket->cld.pid, &status, WNOHANG)) > 1) {
const char *dead = "";
if (status)
dead = " (with error)";
loginfo("[%"PRIuMAX"] Disconnected%s", (uintmax_t)pid, dead);
/* remove the child */
*cradle = blanket->next;
live_children--;
child_process_clear(&blanket->cld);
free(blanket);
} else
cradle = &blanket->next;
}
static struct argv_array cld_argv = ARGV_ARRAY_INIT;
static void handle(int incoming, struct sockaddr *addr, socklen_t addrlen)
{
struct child_process cld = CHILD_PROCESS_INIT;
if (max_connections && live_children >= max_connections) {
kill_some_child();
sleep(1); /* give it some time to die */
check_dead_children();
if (live_children >= max_connections) {
close(incoming);
logerror("Too many children, dropping connection");
return;
}
}
if (addr->sa_family == AF_INET) {
char buf[128] = "";
struct sockaddr_in *sin_addr = (void *) addr;
inet_ntop(addr->sa_family, &sin_addr->sin_addr, buf, sizeof(buf));
argv_array_pushf(&cld.env_array, "REMOTE_ADDR=%s", buf);
argv_array_pushf(&cld.env_array, "REMOTE_PORT=%d",
ntohs(sin_addr->sin_port));
#ifndef NO_IPV6
} else if (addr->sa_family == AF_INET6) {
char buf[128] = "";
struct sockaddr_in6 *sin6_addr = (void *) addr;
inet_ntop(AF_INET6, &sin6_addr->sin6_addr, buf, sizeof(buf));
argv_array_pushf(&cld.env_array, "REMOTE_ADDR=[%s]", buf);
argv_array_pushf(&cld.env_array, "REMOTE_PORT=%d",
ntohs(sin6_addr->sin6_port));
#endif
}
cld.argv = cld_argv.argv;
cld.in = incoming;
cld.out = dup(incoming);
if (start_command(&cld))
logerror("unable to fork");
else
add_child(&cld, addr, addrlen);
}
static void child_handler(int signo)
{
/*
* Otherwise empty handler because systemcalls will get interrupted
* upon signal receipt
* SysV needs the handler to be rearmed
*/
signal(SIGCHLD, child_handler);
}
static int set_reuse_addr(int sockfd)
{
int on = 1;
if (!reuseaddr)
return 0;
return setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR,
&on, sizeof(on));
}
struct socketlist {
int *list;
size_t nr;
size_t alloc;
};
static const char *ip2str(int family, struct sockaddr *sin, socklen_t len)
{
#ifdef NO_IPV6
static char ip[INET_ADDRSTRLEN];
#else
static char ip[INET6_ADDRSTRLEN];
#endif
switch (family) {
#ifndef NO_IPV6
case AF_INET6:
inet_ntop(family, &((struct sockaddr_in6*)sin)->sin6_addr, ip, len);
break;
#endif
case AF_INET:
inet_ntop(family, &((struct sockaddr_in*)sin)->sin_addr, ip, len);
break;
default:
xsnprintf(ip, sizeof(ip), "<unknown>");
}
return ip;
}
2005-09-29 02:26:44 +02:00
#ifndef NO_IPV6
static int setup_named_sock(char *listen_addr, int listen_port, struct socketlist *socklist)
{
int socknum = 0;
char pbuf[NI_MAXSERV];
2005-09-29 02:26:44 +02:00
struct addrinfo hints, *ai0, *ai;
int gai;
long flags;
xsnprintf(pbuf, sizeof(pbuf), "%d", listen_port);
memset(&hints, 0, sizeof(hints));
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
hints.ai_protocol = IPPROTO_TCP;
hints.ai_flags = AI_PASSIVE;
gai = getaddrinfo(listen_addr, pbuf, &hints, &ai0);
if (gai) {
logerror("getaddrinfo() for %s failed: %s", listen_addr, gai_strerror(gai));
return 0;
}
for (ai = ai0; ai; ai = ai->ai_next) {
int sockfd;
sockfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
if (sockfd < 0)
continue;
if (sockfd >= FD_SETSIZE) {
logerror("Socket descriptor too large");
close(sockfd);
continue;
}
#ifdef IPV6_V6ONLY
if (ai->ai_family == AF_INET6) {
int on = 1;
setsockopt(sockfd, IPPROTO_IPV6, IPV6_V6ONLY,
&on, sizeof(on));
/* Note: error is not fatal */
}
#endif
if (set_reuse_addr(sockfd)) {
logerror("Could not set SO_REUSEADDR: %s", strerror(errno));
close(sockfd);
continue;
}
set_keep_alive(sockfd);
if (bind(sockfd, ai->ai_addr, ai->ai_addrlen) < 0) {
logerror("Could not bind to %s: %s",
ip2str(ai->ai_family, ai->ai_addr, ai->ai_addrlen),
strerror(errno));
close(sockfd);
continue; /* not fatal */
}
if (listen(sockfd, 5) < 0) {
logerror("Could not listen to %s: %s",
ip2str(ai->ai_family, ai->ai_addr, ai->ai_addrlen),
strerror(errno));
close(sockfd);
continue; /* not fatal */
}
flags = fcntl(sockfd, F_GETFD, 0);
if (flags >= 0)
fcntl(sockfd, F_SETFD, flags | FD_CLOEXEC);
ALLOC_GROW(socklist->list, socklist->nr + 1, socklist->alloc);
socklist->list[socklist->nr++] = sockfd;
socknum++;
}
freeaddrinfo(ai0);
2005-09-29 02:26:44 +02:00
return socknum;
}
#else /* NO_IPV6 */
static int setup_named_sock(char *listen_addr, int listen_port, struct socketlist *socklist)
2005-09-29 02:26:44 +02:00
{
struct sockaddr_in sin;
int sockfd;
long flags;
2005-09-29 02:26:44 +02:00
memset(&sin, 0, sizeof sin);
sin.sin_family = AF_INET;
sin.sin_port = htons(listen_port);
if (listen_addr) {
/* Well, host better be an IP address here. */
if (inet_pton(AF_INET, listen_addr, &sin.sin_addr.s_addr) <= 0)
return 0;
} else {
sin.sin_addr.s_addr = htonl(INADDR_ANY);
}
2005-09-29 02:26:44 +02:00
sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (sockfd < 0)
return 0;
if (set_reuse_addr(sockfd)) {
logerror("Could not set SO_REUSEADDR: %s", strerror(errno));
close(sockfd);
return 0;
}
set_keep_alive(sockfd);
2005-09-29 02:26:44 +02:00
if ( bind(sockfd, (struct sockaddr *)&sin, sizeof sin) < 0 ) {
logerror("Could not bind to %s: %s",
ip2str(AF_INET, (struct sockaddr *)&sin, sizeof(sin)),
strerror(errno));
2005-09-29 02:26:44 +02:00
close(sockfd);
return 0;
}
if (listen(sockfd, 5) < 0) {
logerror("Could not listen to %s: %s",
ip2str(AF_INET, (struct sockaddr *)&sin, sizeof(sin)),
strerror(errno));
close(sockfd);
return 0;
}
flags = fcntl(sockfd, F_GETFD, 0);
if (flags >= 0)
fcntl(sockfd, F_SETFD, flags | FD_CLOEXEC);
ALLOC_GROW(socklist->list, socklist->nr + 1, socklist->alloc);
socklist->list[socklist->nr++] = sockfd;
return 1;
2005-09-29 02:26:44 +02:00
}
#endif
static void socksetup(struct string_list *listen_addr, int listen_port, struct socketlist *socklist)
{
if (!listen_addr->nr)
setup_named_sock(NULL, listen_port, socklist);
else {
int i, socknum;
for (i = 0; i < listen_addr->nr; i++) {
socknum = setup_named_sock(listen_addr->items[i].string,
listen_port, socklist);
if (socknum == 0)
logerror("unable to allocate any listen sockets for host %s on port %u",
listen_addr->items[i].string, listen_port);
}
}
}
static int service_loop(struct socketlist *socklist)
2005-09-29 02:26:44 +02:00
{
struct pollfd *pfd;
int i;
pfd = xcalloc(socklist->nr, sizeof(struct pollfd));
2005-09-29 02:26:44 +02:00
for (i = 0; i < socklist->nr; i++) {
pfd[i].fd = socklist->list[i];
2005-09-29 02:26:44 +02:00
pfd[i].events = POLLIN;
}
signal(SIGCHLD, child_handler);
for (;;) {
int i;
2005-09-29 02:26:44 +02:00
check_dead_children();
if (poll(pfd, socklist->nr, -1) < 0) {
if (errno != EINTR) {
logerror("Poll failed, resuming: %s",
strerror(errno));
sleep(1);
}
continue;
}
for (i = 0; i < socklist->nr; i++) {
2005-09-29 02:26:44 +02:00
if (pfd[i].revents & POLLIN) {
union {
struct sockaddr sa;
struct sockaddr_in sai;
#ifndef NO_IPV6
struct sockaddr_in6 sai6;
#endif
} ss;
socklen_t sslen = sizeof(ss);
int incoming = accept(pfd[i].fd, &ss.sa, &sslen);
if (incoming < 0) {
switch (errno) {
case EAGAIN:
case EINTR:
case ECONNABORTED:
continue;
default:
die_errno("accept returned");
}
}
handle(incoming, &ss.sa, sslen);
}
}
}
}
#ifdef NO_POSIX_GOODIES
struct credentials;
static void drop_privileges(struct credentials *cred)
{
/* nothing */
}
static struct credentials *prepare_credentials(const char *user_name,
const char *group_name)
{
die("--user not supported on this platform");
}
#else
struct credentials {
struct passwd *pass;
gid_t gid;
};
static void drop_privileges(struct credentials *cred)
{
if (cred && (initgroups(cred->pass->pw_name, cred->gid) ||
setgid (cred->gid) || setuid(cred->pass->pw_uid)))
die("cannot drop privileges");
}
static struct credentials *prepare_credentials(const char *user_name,
const char *group_name)
{
static struct credentials c;
c.pass = getpwnam(user_name);
if (!c.pass)
die("user not found - %s", user_name);
if (!group_name)
c.gid = c.pass->pw_gid;
else {
struct group *group = getgrnam(group_name);
if (!group)
die("group not found - %s", group_name);
c.gid = group->gr_gid;
}
return &c;
}
#endif
static int serve(struct string_list *listen_addr, int listen_port,
struct credentials *cred)
2005-09-29 02:26:44 +02:00
{
struct socketlist socklist = { NULL, 0, 0 };
2005-10-21 08:21:50 +02:00
socksetup(listen_addr, listen_port, &socklist);
if (socklist.nr == 0)
die("unable to allocate any listen sockets on port %u",
listen_port);
2005-10-21 08:21:50 +02:00
drop_privileges(cred);
loginfo("Ready to rumble");
return service_loop(&socklist);
2005-10-21 08:21:50 +02:00
}
2005-09-29 02:26:44 +02:00
add an extra level of indirection to main() There are certain startup tasks that we expect every git process to do. In some cases this is just to improve the quality of the program (e.g., setting up gettext()). In others it is a requirement for using certain functions in libgit.a (e.g., system_path() expects that you have called git_extract_argv0_path()). Most commands are builtins and are covered by the git.c version of main(). However, there are still a few external commands that use their own main(). Each of these has to remember to include the correct startup sequence, and we are not always consistent. Rather than just fix the inconsistencies, let's make this harder to get wrong by providing a common main() that can run this standard startup. We basically have two options to do this: - the compat/mingw.h file already does something like this by adding a #define that replaces the definition of main with a wrapper that calls mingw_startup(). The upside is that the code in each program doesn't need to be changed at all; it's rewritten on the fly by the preprocessor. The downside is that it may make debugging of the startup sequence a bit more confusing, as the preprocessor is quietly inserting new code. - the builtin functions are all of the form cmd_foo(), and git.c's main() calls them. This is much more explicit, which may make things more obvious to somebody reading the code. It's also more flexible (because of course we have to figure out _which_ cmd_foo() to call). The downside is that each of the builtins must define cmd_foo(), instead of just main(). This patch chooses the latter option, preferring the more explicit approach, even though it is more invasive. We introduce a new file common-main.c, with the "real" main. It expects to call cmd_main() from whatever other objects it is linked against. We link common-main.o against anything that links against libgit.a, since we know that such programs will need to do this setup. Note that common-main.o can't actually go inside libgit.a, as the linker would not pick up its main() function automatically (it has no callers). The rest of the patch is just adjusting all of the various external programs (mostly in t/helper) to use cmd_main(). I've provided a global declaration for cmd_main(), which means that all of the programs also need to match its signature. In particular, many functions need to switch to "const char **" instead of "char **" for argv. This effect ripples out to a few other variables and functions, as well. This makes the patch even more invasive, but the end result is much better. We should be treating argv strings as const anyway, and now all programs conform to the same signature (which also matches the way builtins are defined). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-01 07:58:58 +02:00
int cmd_main(int argc, const char **argv)
{
int listen_port = 0;
struct string_list listen_addr = STRING_LIST_INIT_NODUP;
int serve_mode = 0, inetd_mode = 0;
const char *pid_file = NULL, *user_name = NULL, *group_name = NULL;
int detach = 0;
struct credentials *cred = NULL;
int i;
for (i = 1; i < argc; i++) {
add an extra level of indirection to main() There are certain startup tasks that we expect every git process to do. In some cases this is just to improve the quality of the program (e.g., setting up gettext()). In others it is a requirement for using certain functions in libgit.a (e.g., system_path() expects that you have called git_extract_argv0_path()). Most commands are builtins and are covered by the git.c version of main(). However, there are still a few external commands that use their own main(). Each of these has to remember to include the correct startup sequence, and we are not always consistent. Rather than just fix the inconsistencies, let's make this harder to get wrong by providing a common main() that can run this standard startup. We basically have two options to do this: - the compat/mingw.h file already does something like this by adding a #define that replaces the definition of main with a wrapper that calls mingw_startup(). The upside is that the code in each program doesn't need to be changed at all; it's rewritten on the fly by the preprocessor. The downside is that it may make debugging of the startup sequence a bit more confusing, as the preprocessor is quietly inserting new code. - the builtin functions are all of the form cmd_foo(), and git.c's main() calls them. This is much more explicit, which may make things more obvious to somebody reading the code. It's also more flexible (because of course we have to figure out _which_ cmd_foo() to call). The downside is that each of the builtins must define cmd_foo(), instead of just main(). This patch chooses the latter option, preferring the more explicit approach, even though it is more invasive. We introduce a new file common-main.c, with the "real" main. It expects to call cmd_main() from whatever other objects it is linked against. We link common-main.o against anything that links against libgit.a, since we know that such programs will need to do this setup. Note that common-main.o can't actually go inside libgit.a, as the linker would not pick up its main() function automatically (it has no callers). The rest of the patch is just adjusting all of the various external programs (mostly in t/helper) to use cmd_main(). I've provided a global declaration for cmd_main(), which means that all of the programs also need to match its signature. In particular, many functions need to switch to "const char **" instead of "char **" for argv. This effect ripples out to a few other variables and functions, as well. This makes the patch even more invasive, but the end result is much better. We should be treating argv strings as const anyway, and now all programs conform to the same signature (which also matches the way builtins are defined). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-01 07:58:58 +02:00
const char *arg = argv[i];
const char *v;
if (skip_prefix(arg, "--listen=", &v)) {
string_list_append(&listen_addr, xstrdup_tolower(v));
continue;
}
if (skip_prefix(arg, "--port=", &v)) {
char *end;
unsigned long n;
n = strtoul(v, &end, 0);
if (*v && !*end) {
listen_port = n;
continue;
}
}
if (!strcmp(arg, "--serve")) {
serve_mode = 1;
continue;
}
if (!strcmp(arg, "--inetd")) {
inetd_mode = 1;
log_syslog = 1;
continue;
}
if (!strcmp(arg, "--verbose")) {
verbose = 1;
continue;
}
if (!strcmp(arg, "--syslog")) {
log_syslog = 1;
continue;
}
if (!strcmp(arg, "--export-all")) {
export_all_trees = 1;
continue;
}
if (skip_prefix(arg, "--access-hook=", &v)) {
access_hook = v;
continue;
}
if (skip_prefix(arg, "--timeout=", &v)) {
timeout = atoi(v);
continue;
}
if (skip_prefix(arg, "--init-timeout=", &v)) {
init_timeout = atoi(v);
continue;
}
if (skip_prefix(arg, "--max-connections=", &v)) {
max_connections = atoi(v);
if (max_connections < 0)
max_connections = 0; /* unlimited */
continue;
}
if (!strcmp(arg, "--strict-paths")) {
strict_paths = 1;
continue;
}
if (skip_prefix(arg, "--base-path=", &v)) {
base_path = v;
continue;
}
if (!strcmp(arg, "--base-path-relaxed")) {
base_path_relaxed = 1;
continue;
}
if (skip_prefix(arg, "--interpolated-path=", &v)) {
interpolated_path = v;
continue;
}
if (!strcmp(arg, "--reuseaddr")) {
reuseaddr = 1;
continue;
}
if (!strcmp(arg, "--user-path")) {
user_path = "";
continue;
}
if (skip_prefix(arg, "--user-path=", &v)) {
user_path = v;
continue;
}
if (skip_prefix(arg, "--pid-file=", &v)) {
pid_file = v;
continue;
}
if (!strcmp(arg, "--detach")) {
detach = 1;
log_syslog = 1;
continue;
}
if (skip_prefix(arg, "--user=", &v)) {
user_name = v;
continue;
}
if (skip_prefix(arg, "--group=", &v)) {
group_name = v;
continue;
}
if (skip_prefix(arg, "--enable=", &v)) {
enable_service(v, 1);
continue;
}
if (skip_prefix(arg, "--disable=", &v)) {
enable_service(v, 0);
continue;
}
if (skip_prefix(arg, "--allow-override=", &v)) {
make_service_overridable(v, 1);
continue;
}
if (skip_prefix(arg, "--forbid-override=", &v)) {
make_service_overridable(v, 0);
continue;
}
if (!strcmp(arg, "--informative-errors")) {
informative_errors = 1;
continue;
}
if (!strcmp(arg, "--no-informative-errors")) {
informative_errors = 0;
continue;
}
if (!strcmp(arg, "--")) {
ok_paths = &argv[i+1];
break;
} else if (arg[0] != '-') {
ok_paths = &argv[i];
break;
}
usage(daemon_usage);
}
if (log_syslog) {
openlog("git-daemon", LOG_PID, LOG_DAEMON);
set_die_routine(daemon_die);
} else
/* avoid splitting a message in the middle */
setvbuf(stderr, NULL, _IOFBF, 4096);
if (inetd_mode && (detach || group_name || user_name))
die("--detach, --user and --group are incompatible with --inetd");
if (inetd_mode && (listen_port || (listen_addr.nr > 0)))
die("--listen= and --port= are incompatible with --inetd");
else if (listen_port == 0)
listen_port = DEFAULT_GIT_PORT;
if (group_name && !user_name)
die("--group supplied without --user");
if (user_name)
cred = prepare_credentials(user_name, group_name);
if (strict_paths && (!ok_paths || !*ok_paths))
die("option --strict-paths requires a whitelist");
if (base_path && !is_directory(base_path))
die("base-path '%s' does not exist or is not a directory",
base_path);
if (inetd_mode) {
if (!freopen("/dev/null", "w", stderr))
die_errno("failed to redirect stderr to /dev/null");
}
if (inetd_mode || serve_mode)
return execute();
if (detach) {
if (daemonize())
die("--detach not supported on this platform");
}
if (pid_file)
write_file(pid_file, "%"PRIuMAX, (uintmax_t) getpid());
/* prepare argv for serving-processes */
argv_array_push(&cld_argv, argv[0]); /* git-daemon */
argv_array_push(&cld_argv, "--serve");
for (i = 1; i < argc; ++i)
argv_array_push(&cld_argv, argv[i]);
return serve(&listen_addr, listen_port, cred);
}