Merge branch 'jk/http-backend-deadlock'

Communication between the HTTP server and http_backend process can
lead to a dead-lock when relaying a large ref negotiation request.
Diagnose the situation better, and mitigate it by reading such a
request first into core (to a reasonable limit).

* jk/http-backend-deadlock:
  http-backend: spool ref negotiation requests to buffer
  t5551: factor out tag creation
  http-backend: fix die recursion with custom handler
This commit is contained in:
Junio C Hamano 2015-06-01 12:45:09 -07:00
commit 777e75b605
3 changed files with 139 additions and 29 deletions

View File

@ -255,6 +255,15 @@ The GIT_HTTP_EXPORT_ALL environmental variable may be passed to
'git-http-backend' to bypass the check for the "git-daemon-export-ok" 'git-http-backend' to bypass the check for the "git-daemon-export-ok"
file in each repository before allowing export of that repository. file in each repository before allowing export of that repository.
The `GIT_HTTP_MAX_REQUEST_BUFFER` environment variable (or the
`http.maxRequestBuffer` config variable) may be set to change the
largest ref negotiation request that git will handle during a fetch; any
fetch requiring a larger buffer will not succeed. This value should not
normally need to be changed, but may be helpful if you are fetching from
a repository with an extremely large number of refs. The value can be
specified with a unit (e.g., `100M` for 100 megabytes). The default is
10 megabytes.
The backend process sets GIT_COMMITTER_NAME to '$REMOTE_USER' and The backend process sets GIT_COMMITTER_NAME to '$REMOTE_USER' and
GIT_COMMITTER_EMAIL to '$\{REMOTE_USER}@http.$\{REMOTE_ADDR\}', GIT_COMMITTER_EMAIL to '$\{REMOTE_USER}@http.$\{REMOTE_ADDR\}',
ensuring that any reflogs created by 'git-receive-pack' contain some ensuring that any reflogs created by 'git-receive-pack' contain some

View File

@ -13,18 +13,20 @@ static const char content_type[] = "Content-Type";
static const char content_length[] = "Content-Length"; static const char content_length[] = "Content-Length";
static const char last_modified[] = "Last-Modified"; static const char last_modified[] = "Last-Modified";
static int getanyfile = 1; static int getanyfile = 1;
static unsigned long max_request_buffer = 10 * 1024 * 1024;
static struct string_list *query_params; static struct string_list *query_params;
struct rpc_service { struct rpc_service {
const char *name; const char *name;
const char *config_name; const char *config_name;
unsigned buffer_input : 1;
signed enabled : 2; signed enabled : 2;
}; };
static struct rpc_service rpc_service[] = { static struct rpc_service rpc_service[] = {
{ "upload-pack", "uploadpack", 1 }, { "upload-pack", "uploadpack", 1, 1 },
{ "receive-pack", "receivepack", -1 }, { "receive-pack", "receivepack", 0, -1 },
}; };
static struct string_list *get_parameters(void) static struct string_list *get_parameters(void)
@ -225,6 +227,7 @@ static void http_config(void)
struct strbuf var = STRBUF_INIT; struct strbuf var = STRBUF_INIT;
git_config_get_bool("http.getanyfile", &getanyfile); git_config_get_bool("http.getanyfile", &getanyfile);
git_config_get_ulong("http.maxrequestbuffer", &max_request_buffer);
for (i = 0; i < ARRAY_SIZE(rpc_service); i++) { for (i = 0; i < ARRAY_SIZE(rpc_service); i++) {
struct rpc_service *svc = &rpc_service[i]; struct rpc_service *svc = &rpc_service[i];
@ -266,9 +269,52 @@ static struct rpc_service *select_service(const char *name)
return svc; return svc;
} }
static void inflate_request(const char *prog_name, int out) /*
* This is basically strbuf_read(), except that if we
* hit max_request_buffer we die (we'd rather reject a
* maliciously large request than chew up infinite memory).
*/
static ssize_t read_request(int fd, unsigned char **out)
{
size_t len = 0, alloc = 8192;
unsigned char *buf = xmalloc(alloc);
if (max_request_buffer < alloc)
max_request_buffer = alloc;
while (1) {
ssize_t cnt;
cnt = read_in_full(fd, buf + len, alloc - len);
if (cnt < 0) {
free(buf);
return -1;
}
/* partial read from read_in_full means we hit EOF */
len += cnt;
if (len < alloc) {
*out = buf;
return len;
}
/* otherwise, grow and try again (if we can) */
if (alloc == max_request_buffer)
die("request was larger than our maximum size (%lu);"
" try setting GIT_HTTP_MAX_REQUEST_BUFFER",
max_request_buffer);
alloc = alloc_nr(alloc);
if (alloc > max_request_buffer)
alloc = max_request_buffer;
REALLOC_ARRAY(buf, alloc);
}
}
static void inflate_request(const char *prog_name, int out, int buffer_input)
{ {
git_zstream stream; git_zstream stream;
unsigned char *full_request = NULL;
unsigned char in_buf[8192]; unsigned char in_buf[8192];
unsigned char out_buf[8192]; unsigned char out_buf[8192];
unsigned long cnt = 0; unsigned long cnt = 0;
@ -277,11 +323,21 @@ static void inflate_request(const char *prog_name, int out)
git_inflate_init_gzip_only(&stream); git_inflate_init_gzip_only(&stream);
while (1) { while (1) {
ssize_t n = xread(0, in_buf, sizeof(in_buf)); ssize_t n;
if (buffer_input) {
if (full_request)
n = 0; /* nothing left to read */
else
n = read_request(0, &full_request);
stream.next_in = full_request;
} else {
n = xread(0, in_buf, sizeof(in_buf));
stream.next_in = in_buf;
}
if (n <= 0) if (n <= 0)
die("request ended in the middle of the gzip stream"); die("request ended in the middle of the gzip stream");
stream.next_in = in_buf;
stream.avail_in = n; stream.avail_in = n;
while (0 < stream.avail_in) { while (0 < stream.avail_in) {
@ -307,9 +363,22 @@ static void inflate_request(const char *prog_name, int out)
done: done:
git_inflate_end(&stream); git_inflate_end(&stream);
close(out); close(out);
free(full_request);
} }
static void run_service(const char **argv) static void copy_request(const char *prog_name, int out)
{
unsigned char *buf;
ssize_t n = read_request(0, &buf);
if (n < 0)
die_errno("error reading request body");
if (write_in_full(out, buf, n) != n)
die("%s aborted reading request", prog_name);
close(out);
free(buf);
}
static void run_service(const char **argv, int buffer_input)
{ {
const char *encoding = getenv("HTTP_CONTENT_ENCODING"); const char *encoding = getenv("HTTP_CONTENT_ENCODING");
const char *user = getenv("REMOTE_USER"); const char *user = getenv("REMOTE_USER");
@ -334,7 +403,7 @@ static void run_service(const char **argv)
"GIT_COMMITTER_EMAIL=%s@http.%s", user, host); "GIT_COMMITTER_EMAIL=%s@http.%s", user, host);
cld.argv = argv; cld.argv = argv;
if (gzipped_request) if (buffer_input || gzipped_request)
cld.in = -1; cld.in = -1;
cld.git_cmd = 1; cld.git_cmd = 1;
if (start_command(&cld)) if (start_command(&cld))
@ -342,7 +411,9 @@ static void run_service(const char **argv)
close(1); close(1);
if (gzipped_request) if (gzipped_request)
inflate_request(argv[0], cld.in); inflate_request(argv[0], cld.in, buffer_input);
else if (buffer_input)
copy_request(argv[0], cld.in);
else else
close(0); close(0);
@ -392,7 +463,7 @@ static void get_info_refs(char *arg)
packet_flush(1); packet_flush(1);
argv[0] = svc->name; argv[0] = svc->name;
run_service(argv); run_service(argv, 0);
} else { } else {
select_getanyfile(); select_getanyfile();
@ -496,25 +567,28 @@ static void service_rpc(char *service_name)
end_headers(); end_headers();
argv[0] = svc->name; argv[0] = svc->name;
run_service(argv); run_service(argv, svc->buffer_input);
strbuf_release(&buf); strbuf_release(&buf);
} }
static int dead;
static NORETURN void die_webcgi(const char *err, va_list params) static NORETURN void die_webcgi(const char *err, va_list params)
{ {
static int dead; if (dead <= 1) {
vreportf("fatal: ", err, params);
if (!dead) {
dead = 1;
http_status(500, "Internal Server Error"); http_status(500, "Internal Server Error");
hdr_nocache(); hdr_nocache();
end_headers(); end_headers();
vreportf("fatal: ", err, params);
} }
exit(0); /* we successfully reported a failure ;-) */ exit(0); /* we successfully reported a failure ;-) */
} }
static int die_webcgi_recursing(void)
{
return dead++ > 1;
}
static char* getdir(void) static char* getdir(void)
{ {
struct strbuf buf = STRBUF_INIT; struct strbuf buf = STRBUF_INIT;
@ -569,6 +643,7 @@ int main(int argc, char **argv)
git_extract_argv0_path(argv[0]); git_extract_argv0_path(argv[0]);
set_die_routine(die_webcgi); set_die_routine(die_webcgi);
set_die_is_recursing_routine(die_webcgi_recursing);
if (!method) if (!method)
die("No REQUEST_METHOD from server"); die("No REQUEST_METHOD from server");
@ -619,6 +694,9 @@ int main(int argc, char **argv)
not_found("Repository not exported: '%s'", dir); not_found("Repository not exported: '%s'", dir);
http_config(); http_config();
max_request_buffer = git_env_ulong("GIT_HTTP_MAX_REQUEST_BUFFER",
max_request_buffer);
cmd->imp(cmd_arg); cmd->imp(cmd_arg);
return 0; return 0;
} }

View File

@ -218,27 +218,35 @@ test_expect_success 'transfer.hiderefs works over smart-http' '
git -C hidden.git rev-parse --verify b git -C hidden.git rev-parse --verify b
' '
test_expect_success 'create 2,000 tags in the repo' ' # create an arbitrary number of tags, numbered from tag-$1 to tag-$2
( create_tags () {
cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" && rm -f marks &&
for i in $(test_seq 2000) for i in $(test_seq "$1" "$2")
do do
echo "commit refs/heads/too-many-refs" # don't use here-doc, because it requires a process
echo "mark :$i" # per loop iteration
echo "committer git <git@example.com> $i +0000" echo "commit refs/heads/too-many-refs-$1" &&
echo "data 0" echo "mark :$i" &&
echo "M 644 inline bla.txt" echo "committer git <git@example.com> $i +0000" &&
echo "data 4" echo "data 0" &&
echo "bla" echo "M 644 inline bla.txt" &&
echo "data 4" &&
echo "bla" &&
# make every commit dangling by always # make every commit dangling by always
# rewinding the branch after each commit # rewinding the branch after each commit
echo "reset refs/heads/too-many-refs" echo "reset refs/heads/too-many-refs-$1" &&
echo "from :1" echo "from :$1"
done | git fast-import --export-marks=marks && done | git fast-import --export-marks=marks &&
# now assign tags to all the dangling commits we created above # now assign tags to all the dangling commits we created above
tag=$(perl -e "print \"bla\" x 30") && tag=$(perl -e "print \"bla\" x 30") &&
sed -e "s|^:\([^ ]*\) \(.*\)$|\2 refs/tags/$tag-\1|" <marks >>packed-refs sed -e "s|^:\([^ ]*\) \(.*\)$|\2 refs/tags/$tag-\1|" <marks >>packed-refs
}
test_expect_success 'create 2,000 tags in the repo' '
(
cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
create_tags 1 2000
) )
' '
@ -259,5 +267,20 @@ test_expect_success 'large fetch-pack requests can be split across POSTs' '
test_line_count = 2 posts test_line_count = 2 posts
' '
test_expect_success EXPENSIVE 'http can handle enormous ref negotiation' '
(
cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
create_tags 2001 50000
) &&
git -C too-many-refs fetch -q --tags &&
(
cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
create_tags 50001 100000
) &&
git -C too-many-refs fetch -q --tags &&
git -C too-many-refs for-each-ref refs/tags >tags &&
test_line_count = 100000 tags
'
stop_httpd stop_httpd
test_done test_done