git-commit-vandalism/shell.c

233 lines
5.4 KiB
C
Raw Normal View History

#include "cache.h"
#include "quote.h"
#include "exec-cmd.h"
#include "strbuf.h"
#include "run-command.h"
#include "alias.h"
#include "prompt.h"
#define COMMAND_DIR "git-shell-commands"
#define HELP_COMMAND COMMAND_DIR "/help"
2013-03-09 23:00:11 +01:00
#define NOLOGIN_COMMAND COMMAND_DIR "/no-interactive-login"
static int do_generic_cmd(const char *me, char *arg)
{
const char *my_argv[4];
setup_path();
shell: disallow repo names beginning with dash When a remote server uses git-shell, the client side will connect to it like: ssh server "git-upload-pack 'foo.git'" and we literally exec ("git-upload-pack", "foo.git"). In early versions of upload-pack and receive-pack, we took a repository argument and nothing else. But over time they learned to accept dashed options. If the user passes a repository name that starts with a dash, the results are confusing at best (we complain of a bogus option instead of a non-existent repository) and malicious at worst (the user can start an interactive pager via "--help"). We could pass "--" to the sub-process to make sure the user's argument is interpreted as a branch name. I.e.: git-upload-pack -- -foo.git But adding "--" automatically would make us inconsistent with a normal shell (i.e., when git-shell is not in use), where "-foo.git" would still be an error. For that case, the client would have to specify the "--", but they can't do so reliably, as existing versions of git-shell do not allow more than a single argument. The simplest thing is to simply disallow "-" at the start of the repo name argument. This hasn't worked either with or without git-shell since version 1.0.0, and nobody has complained. Note that this patch just applies to do_generic_cmd(), which runs upload-pack, receive-pack, and upload-archive. There are two other types of commands that git-shell runs: - do_cvs_cmd(), but this already restricts the argument to be the literal string "server" - admin-provided commands in the git-shell-commands directory. We'll pass along arbitrary arguments there, so these commands could have similar problems. But these commands might actually understand dashed arguments, so we cannot just block them here. It's up to the writer of the commands to make sure they are safe. With great power comes great responsibility. Reported-by: Timo Schmid <tschmid@ernw.de> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-04-29 14:36:44 +02:00
if (!arg || !(arg = sq_dequote(arg)) || *arg == '-')
die("bad argument");
if (!skip_prefix(me, "git-", &me))
die("bad command");
my_argv[0] = me;
my_argv[1] = arg;
my_argv[2] = NULL;
return execv_git_cmd(my_argv);
}
static int is_valid_cmd_name(const char *cmd)
{
/* Test command contains no . or / characters */
return cmd[strcspn(cmd, "./")] == '\0';
}
static char *make_cmd(const char *prog)
{
return xstrfmt("%s/%s", COMMAND_DIR, prog);
}
static void cd_to_homedir(void)
{
const char *home = getenv("HOME");
if (!home)
die("could not determine user's home directory; HOME is unset");
if (chdir(home) == -1)
die("could not chdir to user's home directory");
}
shell: limit size of interactive commands When git-shell is run in interactive mode (which must be enabled by creating $HOME/git-shell-commands), it reads commands from stdin, one per line, and executes them. We read the commands with git_read_line_interactively(), which uses a strbuf under the hood. That means we'll accept an input of arbitrary size (limited only by how much heap we can allocate). That creates two problems: - the rest of the code is not prepared to handle large inputs. The most serious issue here is that split_cmdline() uses "int" for most of its types, which can lead to integer overflow and out-of-bounds array reads and writes. But even with that fixed, we assume that we can feed the command name to snprintf() (via xstrfmt()), which is stuck for historical reasons using "int", and causes it to fail (and even trigger a BUG() call). - since the point of git-shell is to take input from untrusted or semi-trusted clients, it's a mild denial-of-service. We'll allocate as many bytes as the client sends us (actually twice as many, since we immediately duplicate the buffer). We can fix both by just limiting the amount of per-command input we're willing to receive. We should also fix split_cmdline(), of course, which is an accident waiting to happen, but that can come on top. Most calls to split_cmdline(), including the other one in git-shell, are OK because they are reading from an OS-provided argv, which is limited in practice. This patch should eliminate the immediate vulnerabilities. I picked 4MB as an arbitrary limit. It's big enough that nobody should ever run into it in practice (since the point is to run the commands via exec, we're subject to OS limits which are typically much lower). But it's small enough that allocating it isn't that big a deal. The code is mostly just swapping out fgets() for the strbuf call, but we have to add a few niceties like flushing and trimming line endings. We could simplify things further by putting the buffer on the stack, but 4MB is probably a bit much there. Note that we'll _always_ allocate 4MB, which for normal, non-malicious requests is more than we would before this patch. But on the other hand, other git programs are happy to use 96MB for a delta cache. And since we'd never touch most of those pages, on a lazy-allocating OS like Linux they won't even get allocated to actual RAM. The ideal would be a version of strbuf_getline() that accepted a maximum value. But for a minimal vulnerability fix, let's keep things localized and simple. We can always refactor further on top. The included test fails in an obvious way with ASan or UBSan (which notice the integer overflow and out-of-bounds reads). Without them, it fails in a less obvious way: we may segfault, or we may try to xstrfmt() a long string, leading to a BUG(). Either way, it fails reliably before this patch, and passes with it. Note that we don't need an EXPENSIVE prereq on it. It does take 10-15s to fail before this patch, but with the new limit, we fail almost immediately (and the perl process generating 2GB of data exits via SIGPIPE). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com>
2022-09-29 00:52:48 +02:00
#define MAX_INTERACTIVE_COMMAND (4*1024*1024)
static void run_shell(void)
{
int done = 0;
struct child_process help_cmd = CHILD_PROCESS_INIT;
2013-03-09 23:00:11 +01:00
if (!access(NOLOGIN_COMMAND, F_OK)) {
/* Interactive login disabled. */
struct child_process nologin_cmd = CHILD_PROCESS_INIT;
2013-03-09 23:00:11 +01:00
int status;
strvec_push(&nologin_cmd.args, NOLOGIN_COMMAND);
status = run_command(&nologin_cmd);
2013-03-09 23:00:11 +01:00
if (status < 0)
exit(127);
exit(status);
}
/* Print help if enabled */
help_cmd.silent_exec_failure = 1;
strvec_push(&help_cmd.args, HELP_COMMAND);
run_command(&help_cmd);
do {
const char *prog;
char *full_cmd;
char *rawargs;
shell: limit size of interactive commands When git-shell is run in interactive mode (which must be enabled by creating $HOME/git-shell-commands), it reads commands from stdin, one per line, and executes them. We read the commands with git_read_line_interactively(), which uses a strbuf under the hood. That means we'll accept an input of arbitrary size (limited only by how much heap we can allocate). That creates two problems: - the rest of the code is not prepared to handle large inputs. The most serious issue here is that split_cmdline() uses "int" for most of its types, which can lead to integer overflow and out-of-bounds array reads and writes. But even with that fixed, we assume that we can feed the command name to snprintf() (via xstrfmt()), which is stuck for historical reasons using "int", and causes it to fail (and even trigger a BUG() call). - since the point of git-shell is to take input from untrusted or semi-trusted clients, it's a mild denial-of-service. We'll allocate as many bytes as the client sends us (actually twice as many, since we immediately duplicate the buffer). We can fix both by just limiting the amount of per-command input we're willing to receive. We should also fix split_cmdline(), of course, which is an accident waiting to happen, but that can come on top. Most calls to split_cmdline(), including the other one in git-shell, are OK because they are reading from an OS-provided argv, which is limited in practice. This patch should eliminate the immediate vulnerabilities. I picked 4MB as an arbitrary limit. It's big enough that nobody should ever run into it in practice (since the point is to run the commands via exec, we're subject to OS limits which are typically much lower). But it's small enough that allocating it isn't that big a deal. The code is mostly just swapping out fgets() for the strbuf call, but we have to add a few niceties like flushing and trimming line endings. We could simplify things further by putting the buffer on the stack, but 4MB is probably a bit much there. Note that we'll _always_ allocate 4MB, which for normal, non-malicious requests is more than we would before this patch. But on the other hand, other git programs are happy to use 96MB for a delta cache. And since we'd never touch most of those pages, on a lazy-allocating OS like Linux they won't even get allocated to actual RAM. The ideal would be a version of strbuf_getline() that accepted a maximum value. But for a minimal vulnerability fix, let's keep things localized and simple. We can always refactor further on top. The included test fails in an obvious way with ASan or UBSan (which notice the integer overflow and out-of-bounds reads). Without them, it fails in a less obvious way: we may segfault, or we may try to xstrfmt() a long string, leading to a BUG(). Either way, it fails reliably before this patch, and passes with it. Note that we don't need an EXPENSIVE prereq on it. It does take 10-15s to fail before this patch, but with the new limit, we fail almost immediately (and the perl process generating 2GB of data exits via SIGPIPE). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com>
2022-09-29 00:52:48 +02:00
size_t len;
char *split_args;
const char **argv;
int code;
int count;
fprintf(stderr, "git> ");
shell: limit size of interactive commands When git-shell is run in interactive mode (which must be enabled by creating $HOME/git-shell-commands), it reads commands from stdin, one per line, and executes them. We read the commands with git_read_line_interactively(), which uses a strbuf under the hood. That means we'll accept an input of arbitrary size (limited only by how much heap we can allocate). That creates two problems: - the rest of the code is not prepared to handle large inputs. The most serious issue here is that split_cmdline() uses "int" for most of its types, which can lead to integer overflow and out-of-bounds array reads and writes. But even with that fixed, we assume that we can feed the command name to snprintf() (via xstrfmt()), which is stuck for historical reasons using "int", and causes it to fail (and even trigger a BUG() call). - since the point of git-shell is to take input from untrusted or semi-trusted clients, it's a mild denial-of-service. We'll allocate as many bytes as the client sends us (actually twice as many, since we immediately duplicate the buffer). We can fix both by just limiting the amount of per-command input we're willing to receive. We should also fix split_cmdline(), of course, which is an accident waiting to happen, but that can come on top. Most calls to split_cmdline(), including the other one in git-shell, are OK because they are reading from an OS-provided argv, which is limited in practice. This patch should eliminate the immediate vulnerabilities. I picked 4MB as an arbitrary limit. It's big enough that nobody should ever run into it in practice (since the point is to run the commands via exec, we're subject to OS limits which are typically much lower). But it's small enough that allocating it isn't that big a deal. The code is mostly just swapping out fgets() for the strbuf call, but we have to add a few niceties like flushing and trimming line endings. We could simplify things further by putting the buffer on the stack, but 4MB is probably a bit much there. Note that we'll _always_ allocate 4MB, which for normal, non-malicious requests is more than we would before this patch. But on the other hand, other git programs are happy to use 96MB for a delta cache. And since we'd never touch most of those pages, on a lazy-allocating OS like Linux they won't even get allocated to actual RAM. The ideal would be a version of strbuf_getline() that accepted a maximum value. But for a minimal vulnerability fix, let's keep things localized and simple. We can always refactor further on top. The included test fails in an obvious way with ASan or UBSan (which notice the integer overflow and out-of-bounds reads). Without them, it fails in a less obvious way: we may segfault, or we may try to xstrfmt() a long string, leading to a BUG(). Either way, it fails reliably before this patch, and passes with it. Note that we don't need an EXPENSIVE prereq on it. It does take 10-15s to fail before this patch, but with the new limit, we fail almost immediately (and the perl process generating 2GB of data exits via SIGPIPE). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com>
2022-09-29 00:52:48 +02:00
/*
* Avoid using a strbuf or git_read_line_interactively() here.
* We don't want to allocate arbitrary amounts of memory on
* behalf of a possibly untrusted client, and we're subject to
* OS limits on command length anyway.
*/
fflush(stdout);
rawargs = xmalloc(MAX_INTERACTIVE_COMMAND);
if (!fgets(rawargs, MAX_INTERACTIVE_COMMAND, stdin)) {
fprintf(stderr, "\n");
shell: limit size of interactive commands When git-shell is run in interactive mode (which must be enabled by creating $HOME/git-shell-commands), it reads commands from stdin, one per line, and executes them. We read the commands with git_read_line_interactively(), which uses a strbuf under the hood. That means we'll accept an input of arbitrary size (limited only by how much heap we can allocate). That creates two problems: - the rest of the code is not prepared to handle large inputs. The most serious issue here is that split_cmdline() uses "int" for most of its types, which can lead to integer overflow and out-of-bounds array reads and writes. But even with that fixed, we assume that we can feed the command name to snprintf() (via xstrfmt()), which is stuck for historical reasons using "int", and causes it to fail (and even trigger a BUG() call). - since the point of git-shell is to take input from untrusted or semi-trusted clients, it's a mild denial-of-service. We'll allocate as many bytes as the client sends us (actually twice as many, since we immediately duplicate the buffer). We can fix both by just limiting the amount of per-command input we're willing to receive. We should also fix split_cmdline(), of course, which is an accident waiting to happen, but that can come on top. Most calls to split_cmdline(), including the other one in git-shell, are OK because they are reading from an OS-provided argv, which is limited in practice. This patch should eliminate the immediate vulnerabilities. I picked 4MB as an arbitrary limit. It's big enough that nobody should ever run into it in practice (since the point is to run the commands via exec, we're subject to OS limits which are typically much lower). But it's small enough that allocating it isn't that big a deal. The code is mostly just swapping out fgets() for the strbuf call, but we have to add a few niceties like flushing and trimming line endings. We could simplify things further by putting the buffer on the stack, but 4MB is probably a bit much there. Note that we'll _always_ allocate 4MB, which for normal, non-malicious requests is more than we would before this patch. But on the other hand, other git programs are happy to use 96MB for a delta cache. And since we'd never touch most of those pages, on a lazy-allocating OS like Linux they won't even get allocated to actual RAM. The ideal would be a version of strbuf_getline() that accepted a maximum value. But for a minimal vulnerability fix, let's keep things localized and simple. We can always refactor further on top. The included test fails in an obvious way with ASan or UBSan (which notice the integer overflow and out-of-bounds reads). Without them, it fails in a less obvious way: we may segfault, or we may try to xstrfmt() a long string, leading to a BUG(). Either way, it fails reliably before this patch, and passes with it. Note that we don't need an EXPENSIVE prereq on it. It does take 10-15s to fail before this patch, but with the new limit, we fail almost immediately (and the perl process generating 2GB of data exits via SIGPIPE). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com>
2022-09-29 00:52:48 +02:00
free(rawargs);
break;
}
shell: limit size of interactive commands When git-shell is run in interactive mode (which must be enabled by creating $HOME/git-shell-commands), it reads commands from stdin, one per line, and executes them. We read the commands with git_read_line_interactively(), which uses a strbuf under the hood. That means we'll accept an input of arbitrary size (limited only by how much heap we can allocate). That creates two problems: - the rest of the code is not prepared to handle large inputs. The most serious issue here is that split_cmdline() uses "int" for most of its types, which can lead to integer overflow and out-of-bounds array reads and writes. But even with that fixed, we assume that we can feed the command name to snprintf() (via xstrfmt()), which is stuck for historical reasons using "int", and causes it to fail (and even trigger a BUG() call). - since the point of git-shell is to take input from untrusted or semi-trusted clients, it's a mild denial-of-service. We'll allocate as many bytes as the client sends us (actually twice as many, since we immediately duplicate the buffer). We can fix both by just limiting the amount of per-command input we're willing to receive. We should also fix split_cmdline(), of course, which is an accident waiting to happen, but that can come on top. Most calls to split_cmdline(), including the other one in git-shell, are OK because they are reading from an OS-provided argv, which is limited in practice. This patch should eliminate the immediate vulnerabilities. I picked 4MB as an arbitrary limit. It's big enough that nobody should ever run into it in practice (since the point is to run the commands via exec, we're subject to OS limits which are typically much lower). But it's small enough that allocating it isn't that big a deal. The code is mostly just swapping out fgets() for the strbuf call, but we have to add a few niceties like flushing and trimming line endings. We could simplify things further by putting the buffer on the stack, but 4MB is probably a bit much there. Note that we'll _always_ allocate 4MB, which for normal, non-malicious requests is more than we would before this patch. But on the other hand, other git programs are happy to use 96MB for a delta cache. And since we'd never touch most of those pages, on a lazy-allocating OS like Linux they won't even get allocated to actual RAM. The ideal would be a version of strbuf_getline() that accepted a maximum value. But for a minimal vulnerability fix, let's keep things localized and simple. We can always refactor further on top. The included test fails in an obvious way with ASan or UBSan (which notice the integer overflow and out-of-bounds reads). Without them, it fails in a less obvious way: we may segfault, or we may try to xstrfmt() a long string, leading to a BUG(). Either way, it fails reliably before this patch, and passes with it. Note that we don't need an EXPENSIVE prereq on it. It does take 10-15s to fail before this patch, but with the new limit, we fail almost immediately (and the perl process generating 2GB of data exits via SIGPIPE). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com>
2022-09-29 00:52:48 +02:00
len = strlen(rawargs);
/*
* If we truncated due to our input buffer size, reject the
* command. That's better than running bogus input, and
* there's a good chance it's just malicious garbage anyway.
*/
if (len >= MAX_INTERACTIVE_COMMAND - 1)
die("invalid command format: input too long");
if (len > 0 && rawargs[len - 1] == '\n') {
if (--len > 0 && rawargs[len - 1] == '\r')
--len;
rawargs[len] = '\0';
}
split_args = xstrdup(rawargs);
count = split_cmdline(split_args, &argv);
if (count < 0) {
fprintf(stderr, "invalid command format '%s': %s\n", rawargs,
split_cmdline_strerror(count));
free(split_args);
free(rawargs);
continue;
}
prog = argv[0];
if (!strcmp(prog, "")) {
} else if (!strcmp(prog, "quit") || !strcmp(prog, "logout") ||
!strcmp(prog, "exit") || !strcmp(prog, "bye")) {
done = 1;
} else if (is_valid_cmd_name(prog)) {
struct child_process cmd = CHILD_PROCESS_INIT;
full_cmd = make_cmd(prog);
argv[0] = full_cmd;
cmd.silent_exec_failure = 1;
strvec_pushv(&cmd.args, argv);
code = run_command(&cmd);
if (code == -1 && errno == ENOENT) {
fprintf(stderr, "unrecognized command '%s'\n", prog);
}
free(full_cmd);
} else {
fprintf(stderr, "invalid command format '%s'\n", prog);
}
free(argv);
free(rawargs);
} while (!done);
}
static struct commands {
const char *name;
int (*exec)(const char *me, char *arg);
} cmd_list[] = {
{ "git-receive-pack", do_generic_cmd },
{ "git-upload-pack", do_generic_cmd },
{ "git-upload-archive", do_generic_cmd },
{ NULL },
};
add an extra level of indirection to main() There are certain startup tasks that we expect every git process to do. In some cases this is just to improve the quality of the program (e.g., setting up gettext()). In others it is a requirement for using certain functions in libgit.a (e.g., system_path() expects that you have called git_extract_argv0_path()). Most commands are builtins and are covered by the git.c version of main(). However, there are still a few external commands that use their own main(). Each of these has to remember to include the correct startup sequence, and we are not always consistent. Rather than just fix the inconsistencies, let's make this harder to get wrong by providing a common main() that can run this standard startup. We basically have two options to do this: - the compat/mingw.h file already does something like this by adding a #define that replaces the definition of main with a wrapper that calls mingw_startup(). The upside is that the code in each program doesn't need to be changed at all; it's rewritten on the fly by the preprocessor. The downside is that it may make debugging of the startup sequence a bit more confusing, as the preprocessor is quietly inserting new code. - the builtin functions are all of the form cmd_foo(), and git.c's main() calls them. This is much more explicit, which may make things more obvious to somebody reading the code. It's also more flexible (because of course we have to figure out _which_ cmd_foo() to call). The downside is that each of the builtins must define cmd_foo(), instead of just main(). This patch chooses the latter option, preferring the more explicit approach, even though it is more invasive. We introduce a new file common-main.c, with the "real" main. It expects to call cmd_main() from whatever other objects it is linked against. We link common-main.o against anything that links against libgit.a, since we know that such programs will need to do this setup. Note that common-main.o can't actually go inside libgit.a, as the linker would not pick up its main() function automatically (it has no callers). The rest of the patch is just adjusting all of the various external programs (mostly in t/helper) to use cmd_main(). I've provided a global declaration for cmd_main(), which means that all of the programs also need to match its signature. In particular, many functions need to switch to "const char **" instead of "char **" for argv. This effect ripples out to a few other variables and functions, as well. This makes the patch even more invasive, but the end result is much better. We should be treating argv strings as const anyway, and now all programs conform to the same signature (which also matches the way builtins are defined). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-01 07:58:58 +02:00
int cmd_main(int argc, const char **argv)
{
char *prog;
const char **user_argv;
struct commands *cmd;
int count;
/*
* Special hack to pretend to be a CVS server
*/
if (argc == 2 && !strcmp(argv[1], "cvs server")) {
argv--;
} else if (argc == 1) {
/* Allow the user to run an interactive shell */
cd_to_homedir();
if (access(COMMAND_DIR, R_OK | X_OK) == -1) {
die("Interactive git shell is not enabled.\n"
"hint: ~/" COMMAND_DIR " should exist "
"and have read and execute access.");
}
run_shell();
exit(0);
} else if (argc != 3 || strcmp(argv[1], "-c")) {
/*
* We do not accept any other modes except "-c" followed by
* "cmd arg", where "cmd" is a very limited subset of git
* commands or a command in the COMMAND_DIR
*/
die("Run with no arguments or with -c cmd");
}
prog = xstrdup(argv[2]);
if (!strncmp(prog, "git", 3) && isspace(prog[3]))
/* Accept "git foo" as if the caller said "git-foo". */
prog[3] = '-';
for (cmd = cmd_list ; cmd->name ; cmd++) {
int len = strlen(cmd->name);
char *arg;
if (strncmp(cmd->name, prog, len))
continue;
arg = NULL;
switch (prog[len]) {
case '\0':
arg = NULL;
break;
case ' ':
arg = prog + len + 1;
break;
default:
continue;
}
return cmd->exec(cmd->name, arg);
}
cd_to_homedir();
count = split_cmdline(prog, &user_argv);
if (count >= 0) {
if (is_valid_cmd_name(user_argv[0])) {
prog = make_cmd(user_argv[0]);
user_argv[0] = prog;
execv(user_argv[0], (char *const *) user_argv);
}
free(prog);
free(user_argv);
die("unrecognized command '%s'", argv[2]);
} else {
free(prog);
die("invalid command format '%s': %s", argv[2],
split_cmdline_strerror(count));
}
}