2009-08-05 07:01:56 +02:00
|
|
|
#include "cache.h"
|
2017-06-14 20:07:36 +02:00
|
|
|
#include "config.h"
|
2009-08-05 07:01:56 +02:00
|
|
|
#include "remote.h"
|
2018-03-14 19:31:45 +01:00
|
|
|
#include "connect.h"
|
2009-08-05 07:01:56 +02:00
|
|
|
#include "strbuf.h"
|
|
|
|
#include "walker.h"
|
|
|
|
#include "http.h"
|
2018-04-10 23:26:18 +02:00
|
|
|
#include "exec-cmd.h"
|
2009-10-31 01:47:30 +01:00
|
|
|
#include "run-command.h"
|
2009-10-31 01:47:40 +01:00
|
|
|
#include "pkt-line.h"
|
2013-08-03 00:14:50 +02:00
|
|
|
#include "string-list.h"
|
2009-10-31 01:47:41 +01:00
|
|
|
#include "sideband.h"
|
2020-07-28 22:23:39 +02:00
|
|
|
#include "strvec.h"
|
http: hoist credential request out of handle_curl_result
When we are handling a curl response code in http_request or
in the remote-curl RPC code, we use the handle_curl_result
helper to translate curl's response into an easy-to-use
code. When we see an HTTP 401, we do one of two things:
1. If we already had a filled-in credential, we mark it as
rejected, and then return HTTP_NOAUTH to indicate to
the caller that we failed.
2. If we didn't, then we ask for a new credential and tell
the caller HTTP_REAUTH to indicate that they may want
to try again.
Rejecting in the first case makes sense; it is the natural
result of the request we just made. However, prompting for
more credentials in the second step does not always make
sense. We do not know for sure that the caller is going to
make a second request, and nor are we sure that it will be
to the same URL. Logically, the prompt belongs not to the
request we just finished, but to the request we are (maybe)
about to make.
In practice, it is very hard to trigger any bad behavior.
Currently, if we make a second request, it will always be to
the same URL (even in the face of redirects, because curl
handles the redirects internally). And we almost always
retry on HTTP_REAUTH these days. The one exception is if we
are streaming a large RPC request to the server (e.g., a
pushed packfile), in which case we cannot restart. It's
extremely unlikely to see a 401 response at this stage,
though, as we would typically have seen it when we sent a
probe request, before streaming the data.
This patch drops the automatic prompt out of case 2, and
instead requires the caller to do it. This is a few extra
lines of code, and the bug it fixes is unlikely to come up
in practice. But it is conceptually cleaner, and paves the
way for better handling of credentials across redirects.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
2013-09-28 10:31:45 +02:00
|
|
|
#include "credential.h"
|
2020-03-30 16:03:46 +02:00
|
|
|
#include "oid-array.h"
|
2015-08-19 17:26:46 +02:00
|
|
|
#include "send-pack.h"
|
2018-03-14 19:31:45 +01:00
|
|
|
#include "protocol.h"
|
2018-02-19 20:50:14 +01:00
|
|
|
#include "quote.h"
|
2019-03-04 16:33:46 +01:00
|
|
|
#include "transport.h"
|
2009-08-05 07:01:56 +02:00
|
|
|
|
2009-10-31 01:47:26 +01:00
|
|
|
static struct remote *remote;
|
2013-09-28 10:35:25 +02:00
|
|
|
/* always ends with a trailing slash */
|
|
|
|
static struct strbuf url = STRBUF_INIT;
|
2009-10-31 01:47:26 +01:00
|
|
|
|
2009-10-31 01:47:29 +01:00
|
|
|
struct options {
|
|
|
|
int verbosity;
|
|
|
|
unsigned long depth;
|
2016-06-12 12:53:59 +02:00
|
|
|
char *deepen_since;
|
2016-06-12 12:54:04 +02:00
|
|
|
struct string_list deepen_not;
|
2017-03-22 23:22:00 +01:00
|
|
|
struct string_list push_options;
|
2017-12-08 16:58:44 +01:00
|
|
|
char *filter;
|
2009-10-31 01:47:29 +01:00
|
|
|
unsigned progress : 1,
|
2013-07-21 10:18:05 +02:00
|
|
|
check_self_contained_and_connected : 1,
|
2013-12-05 14:02:50 +01:00
|
|
|
cloning : 1,
|
|
|
|
update_shallow : 1,
|
2009-10-31 01:47:30 +01:00
|
|
|
followtags : 1,
|
2009-10-31 01:47:41 +01:00
|
|
|
dry_run : 1,
|
2014-09-15 23:59:00 +02:00
|
|
|
thin : 1,
|
2015-08-19 17:26:46 +02:00
|
|
|
/* One of the SEND_PACK_PUSH_CERT_* constants. */
|
fetch, upload-pack: --deepen=N extends shallow boundary by N commits
In git-fetch, --depth argument is always relative with the latest
remote refs. This makes it a bit difficult to cover this use case,
where the user wants to make the shallow history, say 3 levels
deeper. It would work if remote refs have not moved yet, but nobody
can guarantee that, especially when that use case is performed a
couple months after the last clone or "git fetch --depth". Also,
modifying shallow boundary using --depth does not work well with
clones created by --since or --not.
This patch fixes that. A new argument --deepen=<N> will add <N> more (*)
parent commits to the current history regardless of where remote refs
are.
Have/Want negotiation is still respected. So if remote refs move, the
server will send two chunks: one between "have" and "want" and another
to extend shallow history. In theory, the client could send no "want"s
in order to get the second chunk only. But the protocol does not allow
that. Either you send no want lines, which means ls-remote; or you
have to send at least one want line that carries deep-relative to the
server..
The main work was done by Dongcan Jiang. I fixed it up here and there.
And of course all the bugs belong to me.
(*) We could even support --deepen=<N> where <N> is negative. In that
case we can cut some history from the shallow clone. This operation
(and --depth=<shorter depth>) does not require interaction with remote
side (and more complicated to implement as a result).
Helped-by: Duy Nguyen <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 12:54:09 +02:00
|
|
|
push_cert : 2,
|
introduce fetch-object: fetch one promisor object
Introduce fetch-object, providing the ability to fetch one object from a
promisor remote.
This uses fetch-pack. To do this, the transport mechanism has been
updated with 2 flags, "from-promisor" to indicate that the resulting
pack comes from a promisor remote (and thus should be annotated as such
by index-pack), and "no-dependents" to indicate that only the objects
themselves need to be fetched (but fetching additional objects is
nevertheless safe).
Whenever "no-dependents" is used, fetch-pack will refrain from using any
object flags, because it is most likely invoked as part of a dynamic
object fetch by another Git command (which may itself use object flags).
An alternative to this is to leave fetch-pack alone, and instead update
the allocation of flags so that fetch-pack's flags never overlap with
any others, but this will end up shrinking the number of flags available
to nearly every other Git command (that is, every Git command that
accesses objects), so the approach in this commit was used instead.
This will be tested in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-12-05 17:58:49 +01:00
|
|
|
deepen_relative : 1,
|
2020-08-17 21:48:18 +02:00
|
|
|
|
|
|
|
/* see documentation of corresponding flag in fetch-pack.h */
|
introduce fetch-object: fetch one promisor object
Introduce fetch-object, providing the ability to fetch one object from a
promisor remote.
This uses fetch-pack. To do this, the transport mechanism has been
updated with 2 flags, "from-promisor" to indicate that the resulting
pack comes from a promisor remote (and thus should be annotated as such
by index-pack), and "no-dependents" to indicate that only the objects
themselves need to be fetched (but fetching additional objects is
nevertheless safe).
Whenever "no-dependents" is used, fetch-pack will refrain from using any
object flags, because it is most likely invoked as part of a dynamic
object fetch by another Git command (which may itself use object flags).
An alternative to this is to leave fetch-pack alone, and instead update
the allocation of flags so that fetch-pack's flags never overlap with
any others, but this will end up shrinking the number of flags available
to nearly every other Git command (that is, every Git command that
accesses objects), so the approach in this commit was used instead.
This will be tested in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-12-05 17:58:49 +01:00
|
|
|
from_promisor : 1,
|
2020-08-17 21:48:18 +02:00
|
|
|
|
2022-03-28 16:02:07 +02:00
|
|
|
refetch : 1,
|
2020-05-25 21:59:04 +02:00
|
|
|
atomic : 1,
|
2020-10-03 14:10:45 +02:00
|
|
|
object_format : 1,
|
|
|
|
force_if_includes : 1;
|
2020-05-25 21:59:04 +02:00
|
|
|
const struct git_hash_algo *hash_algo;
|
2009-10-31 01:47:29 +01:00
|
|
|
};
|
|
|
|
static struct options options;
|
2013-08-03 00:14:50 +02:00
|
|
|
static struct string_list cas_options = STRING_LIST_INIT_DUP;
|
2009-10-31 01:47:29 +01:00
|
|
|
|
|
|
|
static int set_option(const char *name, const char *value)
|
|
|
|
{
|
|
|
|
if (!strcmp(name, "verbosity")) {
|
|
|
|
char *end;
|
|
|
|
int v = strtol(value, &end, 10);
|
|
|
|
if (value == end || *end)
|
|
|
|
return -1;
|
|
|
|
options.verbosity = v;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
else if (!strcmp(name, "progress")) {
|
|
|
|
if (!strcmp(value, "true"))
|
|
|
|
options.progress = 1;
|
|
|
|
else if (!strcmp(value, "false"))
|
|
|
|
options.progress = 0;
|
|
|
|
else
|
|
|
|
return -1;
|
2009-10-31 01:47:42 +01:00
|
|
|
return 0;
|
2009-10-31 01:47:29 +01:00
|
|
|
}
|
|
|
|
else if (!strcmp(name, "depth")) {
|
|
|
|
char *end;
|
|
|
|
unsigned long v = strtoul(value, &end, 10);
|
|
|
|
if (value == end || *end)
|
|
|
|
return -1;
|
|
|
|
options.depth = v;
|
2009-10-31 01:47:42 +01:00
|
|
|
return 0;
|
2009-10-31 01:47:29 +01:00
|
|
|
}
|
2016-06-12 12:53:59 +02:00
|
|
|
else if (!strcmp(name, "deepen-since")) {
|
|
|
|
options.deepen_since = xstrdup(value);
|
|
|
|
return 0;
|
|
|
|
}
|
2016-06-12 12:54:04 +02:00
|
|
|
else if (!strcmp(name, "deepen-not")) {
|
|
|
|
string_list_append(&options.deepen_not, value);
|
|
|
|
return 0;
|
|
|
|
}
|
fetch, upload-pack: --deepen=N extends shallow boundary by N commits
In git-fetch, --depth argument is always relative with the latest
remote refs. This makes it a bit difficult to cover this use case,
where the user wants to make the shallow history, say 3 levels
deeper. It would work if remote refs have not moved yet, but nobody
can guarantee that, especially when that use case is performed a
couple months after the last clone or "git fetch --depth". Also,
modifying shallow boundary using --depth does not work well with
clones created by --since or --not.
This patch fixes that. A new argument --deepen=<N> will add <N> more (*)
parent commits to the current history regardless of where remote refs
are.
Have/Want negotiation is still respected. So if remote refs move, the
server will send two chunks: one between "have" and "want" and another
to extend shallow history. In theory, the client could send no "want"s
in order to get the second chunk only. But the protocol does not allow
that. Either you send no want lines, which means ls-remote; or you
have to send at least one want line that carries deep-relative to the
server..
The main work was done by Dongcan Jiang. I fixed it up here and there.
And of course all the bugs belong to me.
(*) We could even support --deepen=<N> where <N> is negative. In that
case we can cut some history from the shallow clone. This operation
(and --depth=<shorter depth>) does not require interaction with remote
side (and more complicated to implement as a result).
Helped-by: Duy Nguyen <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 12:54:09 +02:00
|
|
|
else if (!strcmp(name, "deepen-relative")) {
|
|
|
|
if (!strcmp(value, "true"))
|
|
|
|
options.deepen_relative = 1;
|
|
|
|
else if (!strcmp(value, "false"))
|
|
|
|
options.deepen_relative = 0;
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
return 0;
|
|
|
|
}
|
2009-10-31 01:47:29 +01:00
|
|
|
else if (!strcmp(name, "followtags")) {
|
|
|
|
if (!strcmp(value, "true"))
|
|
|
|
options.followtags = 1;
|
|
|
|
else if (!strcmp(value, "false"))
|
|
|
|
options.followtags = 0;
|
|
|
|
else
|
|
|
|
return -1;
|
2009-10-31 01:47:42 +01:00
|
|
|
return 0;
|
2009-10-31 01:47:29 +01:00
|
|
|
}
|
2009-10-31 01:47:30 +01:00
|
|
|
else if (!strcmp(name, "dry-run")) {
|
|
|
|
if (!strcmp(value, "true"))
|
|
|
|
options.dry_run = 1;
|
|
|
|
else if (!strcmp(value, "false"))
|
|
|
|
options.dry_run = 0;
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
return 0;
|
|
|
|
}
|
2013-07-21 10:18:05 +02:00
|
|
|
else if (!strcmp(name, "check-connectivity")) {
|
|
|
|
if (!strcmp(value, "true"))
|
|
|
|
options.check_self_contained_and_connected = 1;
|
|
|
|
else if (!strcmp(value, "false"))
|
|
|
|
options.check_self_contained_and_connected = 0;
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
return 0;
|
|
|
|
}
|
2013-08-03 00:14:50 +02:00
|
|
|
else if (!strcmp(name, "cas")) {
|
|
|
|
struct strbuf val = STRBUF_INIT;
|
remote-curl: make --force-with-lease work with non-ASCII ref names
When we invoke a remote transport helper and pass an option with an
argument, we quote the argument as a C-style string if necessary. This
is the case for the cas option, which implements the --force-with-lease
command-line flag, when we're passing a non-ASCII refname.
However, the remote curl helper isn't designed to parse such an
argument, meaning that if we try to use --force-with-lease with an HTTP
push and a non-ASCII refname, we get an error like this:
error: cannot parse expected object name '0000000000000000000000000000000000000000"'
Note the double quote, which get_oid has reminded us is not valid in an
hex object ID.
Even if we had been able to parse it, we would send the wrong data to
the server: we'd send an escaped ref, which would not behave as the user
wanted and might accidentally result in updating or deleting a ref we
hadn't intended.
Since we need to expect a quoted C-style string here, just check if the
first argument is a double quote, and if so, unquote it. Note that if
the refname contains a double quote, then we will have double-quoted it
already, so there is no ambiguity.
We test for this case only in the smart protocol, since the DAV-based
protocol is not capable of handling this capability. We use UTF-8
because this is nicer in our tests and friendlier to Windows, but the
code should work for all non-ASCII refs.
While we're at it, since the name of the option is now well established
and isn't going to change, let's inline it instead of using the #define
constant.
Reported-by: Frej Bjon <frej.bjon@nemit.fi>
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-07-21 03:15:11 +02:00
|
|
|
strbuf_addstr(&val, "--force-with-lease=");
|
|
|
|
if (*value != '"')
|
|
|
|
strbuf_addstr(&val, value);
|
|
|
|
else if (unquote_c_style(&val, value, NULL))
|
|
|
|
return -1;
|
2013-08-03 00:14:50 +02:00
|
|
|
string_list_append(&cas_options, val.buf);
|
|
|
|
strbuf_release(&val);
|
|
|
|
return 0;
|
2020-10-03 14:10:45 +02:00
|
|
|
} else if (!strcmp(name, TRANS_OPT_FORCE_IF_INCLUDES)) {
|
|
|
|
if (!strcmp(value, "true"))
|
|
|
|
options.force_if_includes = 1;
|
|
|
|
else if (!strcmp(value, "false"))
|
|
|
|
options.force_if_includes = 0;
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
return 0;
|
2013-12-05 14:02:50 +01:00
|
|
|
} else if (!strcmp(name, "cloning")) {
|
|
|
|
if (!strcmp(value, "true"))
|
|
|
|
options.cloning = 1;
|
|
|
|
else if (!strcmp(value, "false"))
|
|
|
|
options.cloning = 0;
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
return 0;
|
|
|
|
} else if (!strcmp(name, "update-shallow")) {
|
|
|
|
if (!strcmp(value, "true"))
|
|
|
|
options.update_shallow = 1;
|
|
|
|
else if (!strcmp(value, "false"))
|
|
|
|
options.update_shallow = 0;
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
return 0;
|
2014-09-15 23:59:00 +02:00
|
|
|
} else if (!strcmp(name, "pushcert")) {
|
|
|
|
if (!strcmp(value, "true"))
|
2015-08-19 17:26:46 +02:00
|
|
|
options.push_cert = SEND_PACK_PUSH_CERT_ALWAYS;
|
2014-09-15 23:59:00 +02:00
|
|
|
else if (!strcmp(value, "false"))
|
2015-08-19 17:26:46 +02:00
|
|
|
options.push_cert = SEND_PACK_PUSH_CERT_NEVER;
|
|
|
|
else if (!strcmp(value, "if-asked"))
|
|
|
|
options.push_cert = SEND_PACK_PUSH_CERT_IF_ASKED;
|
2014-09-15 23:59:00 +02:00
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
return 0;
|
remote-curl: pass on atomic capability to remote side
When pushing more than one reference with the --atomic option, the
server is supposed to perform a single atomic transaction to update the
references, leaving them either all to succeed or all to fail. This
works fine when pushing locally or over SSH, but when pushing over HTTP,
we fail to pass the atomic capability to the remote side. In fact, we
have not reported this capability to any remote helpers during the life
of the feature.
Now normally, things happen to work nevertheless, since we actually
check for most types of failures, such as non-fast-forward updates, on
the client side, and just abort the entire attempt. However, if the
server side reports a problem, such as the inability to lock a ref, the
transaction isn't atomic, because we haven't passed the appropriate
capability over and the remote side has no way of knowing that we wanted
atomic behavior.
Fix this by passing the option from the transport code through to remote
helpers, and from the HTTP remote helper down to send-pack. With this
change, we can detect if the server side rejects the push and report
back appropriately. Note the difference in the messages: the remote
side reports "atomic transaction failed", while our own checking rejects
pushes with the message "atomic push failed".
Document the atomic option in the remote helper documentation, so other
implementers can implement it if they like.
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-10-17 01:45:34 +02:00
|
|
|
} else if (!strcmp(name, "atomic")) {
|
|
|
|
if (!strcmp(value, "true"))
|
|
|
|
options.atomic = 1;
|
|
|
|
else if (!strcmp(value, "false"))
|
|
|
|
options.atomic = 0;
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
return 0;
|
2017-03-22 23:22:00 +01:00
|
|
|
} else if (!strcmp(name, "push-option")) {
|
2018-02-19 20:50:14 +01:00
|
|
|
if (*value != '"')
|
|
|
|
string_list_append(&options.push_options, value);
|
|
|
|
else {
|
|
|
|
struct strbuf unquoted = STRBUF_INIT;
|
|
|
|
if (unquote_c_style(&unquoted, value, NULL) < 0)
|
2019-03-06 00:20:40 +01:00
|
|
|
die(_("invalid quoting in push-option value: '%s'"), value);
|
2018-02-19 20:50:14 +01:00
|
|
|
string_list_append_nodup(&options.push_options,
|
|
|
|
strbuf_detach(&unquoted, NULL));
|
|
|
|
}
|
2017-03-22 23:22:00 +01:00
|
|
|
return 0;
|
2016-02-03 05:09:14 +01:00
|
|
|
} else if (!strcmp(name, "family")) {
|
|
|
|
if (!strcmp(value, "ipv4"))
|
|
|
|
git_curl_ipresolve = CURL_IPRESOLVE_V4;
|
|
|
|
else if (!strcmp(value, "ipv6"))
|
|
|
|
git_curl_ipresolve = CURL_IPRESOLVE_V6;
|
|
|
|
else if (!strcmp(value, "all"))
|
|
|
|
git_curl_ipresolve = CURL_IPRESOLVE_WHATEVER;
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
return 0;
|
introduce fetch-object: fetch one promisor object
Introduce fetch-object, providing the ability to fetch one object from a
promisor remote.
This uses fetch-pack. To do this, the transport mechanism has been
updated with 2 flags, "from-promisor" to indicate that the resulting
pack comes from a promisor remote (and thus should be annotated as such
by index-pack), and "no-dependents" to indicate that only the objects
themselves need to be fetched (but fetching additional objects is
nevertheless safe).
Whenever "no-dependents" is used, fetch-pack will refrain from using any
object flags, because it is most likely invoked as part of a dynamic
object fetch by another Git command (which may itself use object flags).
An alternative to this is to leave fetch-pack alone, and instead update
the allocation of flags so that fetch-pack's flags never overlap with
any others, but this will end up shrinking the number of flags available
to nearly every other Git command (that is, every Git command that
accesses objects), so the approach in this commit was used instead.
This will be tested in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-12-05 17:58:49 +01:00
|
|
|
} else if (!strcmp(name, "from-promisor")) {
|
|
|
|
options.from_promisor = 1;
|
|
|
|
return 0;
|
2022-03-28 16:02:07 +02:00
|
|
|
} else if (!strcmp(name, "refetch")) {
|
|
|
|
options.refetch = 1;
|
|
|
|
return 0;
|
2017-12-08 16:58:44 +01:00
|
|
|
} else if (!strcmp(name, "filter")) {
|
2018-09-05 19:03:07 +02:00
|
|
|
options.filter = xstrdup(value);
|
2017-12-08 16:58:44 +01:00
|
|
|
return 0;
|
2020-05-25 21:59:04 +02:00
|
|
|
} else if (!strcmp(name, "object-format")) {
|
|
|
|
int algo;
|
|
|
|
options.object_format = 1;
|
|
|
|
if (strcmp(value, "true")) {
|
|
|
|
algo = hash_algo_by_name(value);
|
|
|
|
if (algo == GIT_HASH_UNKNOWN)
|
|
|
|
die("unknown object format '%s'", value);
|
|
|
|
options.hash_algo = &hash_algos[algo];
|
|
|
|
}
|
|
|
|
return 0;
|
2013-12-05 14:02:50 +01:00
|
|
|
} else {
|
2009-10-31 01:47:29 +01:00
|
|
|
return 1 /* unsupported */;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-10-31 01:47:40 +01:00
|
|
|
struct discovery {
|
2018-03-15 18:31:36 +01:00
|
|
|
char *service;
|
2009-10-31 01:47:40 +01:00
|
|
|
char *buf_alloc;
|
|
|
|
char *buf;
|
|
|
|
size_t len;
|
remote-curl: always parse incoming refs
When remote-curl receives a list of refs from a server, it
keeps the whole buffer intact. When we get a "list" command,
we feed the result to get_remote_heads, and when we get a
"fetch" or "push" command, we feed it to fetch-pack or
send-pack, respectively.
If the HTTP response from the server is truncated for any
reason, we will get an incomplete ref advertisement. If we
then feed this incomplete list to fetch-pack, one of a few
things may happen:
1. If the truncation is in a packet header, fetch-pack
will notice the bogus line and complain.
2. If the truncation is inside a packet, fetch-pack will
keep waiting for us to send the rest of the packet,
which we never will.
3. If the truncation is at a packet boundary, fetch-pack
will keep waiting for us to send the next packet, which
we never will.
As a result, fetch-pack hangs, waiting for input. However,
remote-curl believes it has sent all of the advertisement,
and therefore waits for fetch-pack to speak. The two
processes end up in a deadlock.
We do notice the broken ref list if we feed it to
get_remote_heads. So if git asks the helper to do a "list"
followed by a "fetch", we are safe; we'll abort during the
list operation, which parses the refs.
This patch teaches remote-curl to always parse and save the
incoming ref list when we read the ref advertisement from a
server. That means that we will always verify and abort
before even running fetch-pack (or send-pack) when reading a
corrupted list, even if we do not run the "list" command
explicitly.
Since we save the result, in the common case of running
"list" then "fetch", we do not do any extra parsing at all.
In the case of just a "fetch", we do an extra round of
parsing, but only once.
Note also that the "fetch" case will now also initialize
server_capabilities from the remote (in remote-curl; we
already would do so inside fetch-pack). Doing "list+fetch"
already does this. It doesn't actually matter now, but the
new behavior is arguably more correct, should remote-curl
ever start caring about the server's capability list.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-20 21:07:19 +01:00
|
|
|
struct ref *refs;
|
2017-03-31 03:40:00 +02:00
|
|
|
struct oid_array shallow;
|
2018-03-15 18:31:37 +01:00
|
|
|
enum protocol_version version;
|
2009-10-31 01:47:40 +01:00
|
|
|
unsigned proto_git : 1;
|
|
|
|
};
|
|
|
|
static struct discovery *last_discovery;
|
|
|
|
|
2013-02-20 21:07:11 +01:00
|
|
|
static struct ref *parse_git_refs(struct discovery *heads, int for_push)
|
|
|
|
{
|
|
|
|
struct ref *list = NULL;
|
2018-03-14 19:31:45 +01:00
|
|
|
struct packet_reader reader;
|
|
|
|
|
|
|
|
packet_reader_init(&reader, -1, heads->buf, heads->len,
|
|
|
|
PACKET_READ_CHOMP_NEWLINE |
|
pack-protocol.txt: accept error packets in any context
In the Git pack protocol definition, an error packet may appear only in
a certain context. However, servers can face a runtime error (e.g. I/O
error) at an arbitrary timing. This patch changes the protocol to allow
an error packet to be sent instead of any packet.
Without this protocol spec change, when a server cannot process a
request, there's no way to tell that to a client. Since the server
cannot produce a valid response, it would be forced to cut a connection
without telling why. With this protocol spec change, the server can be
more gentle in this situation. An old client may see these error packets
as an unexpected packet, but this is not worse than having an unexpected
EOF.
Following this protocol spec change, the error packet handling code is
moved to pkt-line.c. Implementation wise, this implementation uses
pkt-line to communicate with a subprocess. Since this is not a part of
Git protocol, it's possible that a packet that is not supposed to be an
error packet is mistakenly parsed as an error packet. This error packet
handling is enabled only for the Git pack protocol parsing code
considering this.
Signed-off-by: Masaya Suzuki <masayasuzuki@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-12-29 22:19:15 +01:00
|
|
|
PACKET_READ_GENTLE_ON_EOF |
|
|
|
|
PACKET_READ_DIE_ON_ERR_PACKET);
|
2018-03-14 19:31:45 +01:00
|
|
|
|
2018-03-15 18:31:37 +01:00
|
|
|
heads->version = discover_version(&reader);
|
|
|
|
switch (heads->version) {
|
2018-03-14 19:31:47 +01:00
|
|
|
case protocol_v2:
|
2018-03-15 18:31:41 +01:00
|
|
|
/*
|
|
|
|
* Do nothing. This isn't a list of refs but rather a
|
|
|
|
* capability advertisement. Client would have run
|
|
|
|
* 'stateless-connect' so we'll dump this capability listing
|
|
|
|
* and let them request the refs themselves.
|
|
|
|
*/
|
2018-03-14 19:31:47 +01:00
|
|
|
break;
|
2018-03-14 19:31:45 +01:00
|
|
|
case protocol_v1:
|
|
|
|
case protocol_v0:
|
|
|
|
get_remote_heads(&reader, &list, for_push ? REF_NORMAL : 0,
|
|
|
|
NULL, &heads->shallow);
|
2020-05-25 21:59:04 +02:00
|
|
|
options.hash_algo = reader.hash_algo;
|
2018-03-14 19:31:45 +01:00
|
|
|
break;
|
|
|
|
case protocol_unknown_version:
|
|
|
|
BUG("unknown protocol version");
|
|
|
|
}
|
|
|
|
|
2013-02-20 21:07:11 +01:00
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
2020-06-19 19:55:51 +02:00
|
|
|
static const struct git_hash_algo *detect_hash_algo(struct discovery *heads)
|
|
|
|
{
|
|
|
|
const char *p = memchr(heads->buf, '\t', heads->len);
|
|
|
|
int algo;
|
|
|
|
if (!p)
|
|
|
|
return the_hash_algo;
|
|
|
|
|
|
|
|
algo = hash_algo_by_length((p - heads->buf) / 2);
|
|
|
|
if (algo == GIT_HASH_UNKNOWN)
|
|
|
|
return NULL;
|
|
|
|
return &hash_algos[algo];
|
|
|
|
}
|
|
|
|
|
2013-02-20 21:07:11 +01:00
|
|
|
static struct ref *parse_info_refs(struct discovery *heads)
|
|
|
|
{
|
|
|
|
char *data, *start, *mid;
|
|
|
|
char *ref_name;
|
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
struct ref *refs = NULL;
|
|
|
|
struct ref *ref = NULL;
|
|
|
|
struct ref *last_ref = NULL;
|
|
|
|
|
2020-06-19 19:55:51 +02:00
|
|
|
options.hash_algo = detect_hash_algo(heads);
|
|
|
|
if (!options.hash_algo)
|
|
|
|
die("%sinfo/refs not valid: could not determine hash algorithm; "
|
|
|
|
"is this a git repository?",
|
|
|
|
transport_anonymize_url(url.buf));
|
|
|
|
|
2013-02-20 21:07:11 +01:00
|
|
|
data = heads->buf;
|
|
|
|
start = NULL;
|
|
|
|
mid = data;
|
|
|
|
while (i < heads->len) {
|
|
|
|
if (!start) {
|
|
|
|
start = &data[i];
|
|
|
|
}
|
|
|
|
if (data[i] == '\t')
|
|
|
|
mid = &data[i];
|
|
|
|
if (data[i] == '\n') {
|
2020-06-19 19:55:51 +02:00
|
|
|
if (mid - start != options.hash_algo->hexsz)
|
2019-03-06 00:20:40 +01:00
|
|
|
die(_("%sinfo/refs not valid: is this a git repository?"),
|
2019-03-04 16:33:46 +01:00
|
|
|
transport_anonymize_url(url.buf));
|
2013-02-20 21:07:11 +01:00
|
|
|
data[i] = 0;
|
|
|
|
ref_name = mid + 1;
|
2015-09-24 23:08:09 +02:00
|
|
|
ref = alloc_ref(ref_name);
|
2020-06-19 19:55:51 +02:00
|
|
|
get_oid_hex_algop(start, &ref->old_oid, options.hash_algo);
|
2013-02-20 21:07:11 +01:00
|
|
|
if (!refs)
|
|
|
|
refs = ref;
|
|
|
|
if (last_ref)
|
|
|
|
last_ref->next = ref;
|
|
|
|
last_ref = ref;
|
|
|
|
start = NULL;
|
|
|
|
}
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
ref = alloc_ref("HEAD");
|
2013-09-28 10:35:25 +02:00
|
|
|
if (!http_fetch_ref(url.buf, ref) &&
|
2013-02-20 21:07:11 +01:00
|
|
|
!resolve_remote_symref(ref, refs)) {
|
|
|
|
ref->next = refs;
|
|
|
|
refs = ref;
|
|
|
|
} else {
|
|
|
|
free(ref);
|
|
|
|
}
|
|
|
|
|
|
|
|
return refs;
|
|
|
|
}
|
|
|
|
|
2009-10-31 01:47:40 +01:00
|
|
|
static void free_discovery(struct discovery *d)
|
|
|
|
{
|
|
|
|
if (d) {
|
|
|
|
if (d == last_discovery)
|
|
|
|
last_discovery = NULL;
|
2017-03-26 18:01:37 +02:00
|
|
|
free(d->shallow.oid);
|
2009-10-31 01:47:40 +01:00
|
|
|
free(d->buf_alloc);
|
remote-curl: always parse incoming refs
When remote-curl receives a list of refs from a server, it
keeps the whole buffer intact. When we get a "list" command,
we feed the result to get_remote_heads, and when we get a
"fetch" or "push" command, we feed it to fetch-pack or
send-pack, respectively.
If the HTTP response from the server is truncated for any
reason, we will get an incomplete ref advertisement. If we
then feed this incomplete list to fetch-pack, one of a few
things may happen:
1. If the truncation is in a packet header, fetch-pack
will notice the bogus line and complain.
2. If the truncation is inside a packet, fetch-pack will
keep waiting for us to send the rest of the packet,
which we never will.
3. If the truncation is at a packet boundary, fetch-pack
will keep waiting for us to send the next packet, which
we never will.
As a result, fetch-pack hangs, waiting for input. However,
remote-curl believes it has sent all of the advertisement,
and therefore waits for fetch-pack to speak. The two
processes end up in a deadlock.
We do notice the broken ref list if we feed it to
get_remote_heads. So if git asks the helper to do a "list"
followed by a "fetch", we are safe; we'll abort during the
list operation, which parses the refs.
This patch teaches remote-curl to always parse and save the
incoming ref list when we read the ref advertisement from a
server. That means that we will always verify and abort
before even running fetch-pack (or send-pack) when reading a
corrupted list, even if we do not run the "list" command
explicitly.
Since we save the result, in the common case of running
"list" then "fetch", we do not do any extra parsing at all.
In the case of just a "fetch", we do an extra round of
parsing, but only once.
Note also that the "fetch" case will now also initialize
server_capabilities from the remote (in remote-curl; we
already would do so inside fetch-pack). Doing "list+fetch"
already does this. It doesn't actually matter now, but the
new behavior is arguably more correct, should remote-curl
ever start caring about the server's capability list.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-20 21:07:19 +01:00
|
|
|
free_refs(d->refs);
|
2018-03-15 18:31:36 +01:00
|
|
|
free(d->service);
|
2009-10-31 01:47:40 +01:00
|
|
|
free(d);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-05-22 11:30:29 +02:00
|
|
|
static int show_http_message(struct strbuf *type, struct strbuf *charset,
|
|
|
|
struct strbuf *msg)
|
remote-curl: show server content on http errors
If an http request to a remote git server fails, we show
only the http response code, or sometimes a custom message
for particular codes. This gives the server no opportunity
to offer a more detailed explanation of the reason for the
failure, or to give extra advice.
This patch teaches remote-curl to record and display the
body content of a failed http response. We only display such
responses when the content-type is advertised as text/plain,
as it is the most likely to look presentable on the user's
terminal (and it is hoped to be a good indication that the
message is intended for git clients, and not for a web
browser).
Each line of the new output is prepended with "remote:".
Example output may look like this (assuming the server is
configured to display such a helpful message):
$ GIT_SMART_HTTP=0 git clone https://example.com/some/repo.git
Cloning into 'repo'...
remote: Sorry, fetching via dumb http is forbidden.
remote: Please upgrade your git client to v1.6.6 or greater
remote: and make sure that smart-http is enabled.
error: The requested URL returned error: 403 while accessing http://localhost:5001/some/repo.git/info/refs
fatal: HTTP request failed
For the sake of simplicity, we only record and display these
errors during the initial fetch of the ref list, as that is
the initial contact with the server and where the most
common, interesting errors happen (and there is already
precedent, as that is the only place we currently massage
http error codes into more helpful messages).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-06 00:17:23 +02:00
|
|
|
{
|
|
|
|
const char *p, *eol;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We only show text/plain parts, as other types are likely
|
|
|
|
* to be ugly to look at on the user's terminal.
|
|
|
|
*/
|
2014-05-22 11:29:47 +02:00
|
|
|
if (strcmp(type->buf, "text/plain"))
|
remote-curl: show server content on http errors
If an http request to a remote git server fails, we show
only the http response code, or sometimes a custom message
for particular codes. This gives the server no opportunity
to offer a more detailed explanation of the reason for the
failure, or to give extra advice.
This patch teaches remote-curl to record and display the
body content of a failed http response. We only display such
responses when the content-type is advertised as text/plain,
as it is the most likely to look presentable on the user's
terminal (and it is hoped to be a good indication that the
message is intended for git clients, and not for a web
browser).
Each line of the new output is prepended with "remote:".
Example output may look like this (assuming the server is
configured to display such a helpful message):
$ GIT_SMART_HTTP=0 git clone https://example.com/some/repo.git
Cloning into 'repo'...
remote: Sorry, fetching via dumb http is forbidden.
remote: Please upgrade your git client to v1.6.6 or greater
remote: and make sure that smart-http is enabled.
error: The requested URL returned error: 403 while accessing http://localhost:5001/some/repo.git/info/refs
fatal: HTTP request failed
For the sake of simplicity, we only record and display these
errors during the initial fetch of the ref list, as that is
the initial contact with the server and where the most
common, interesting errors happen (and there is already
precedent, as that is the only place we currently massage
http error codes into more helpful messages).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-06 00:17:23 +02:00
|
|
|
return -1;
|
2014-05-22 11:30:29 +02:00
|
|
|
if (charset->len)
|
|
|
|
strbuf_reencode(msg, charset->buf, get_log_output_encoding());
|
remote-curl: show server content on http errors
If an http request to a remote git server fails, we show
only the http response code, or sometimes a custom message
for particular codes. This gives the server no opportunity
to offer a more detailed explanation of the reason for the
failure, or to give extra advice.
This patch teaches remote-curl to record and display the
body content of a failed http response. We only display such
responses when the content-type is advertised as text/plain,
as it is the most likely to look presentable on the user's
terminal (and it is hoped to be a good indication that the
message is intended for git clients, and not for a web
browser).
Each line of the new output is prepended with "remote:".
Example output may look like this (assuming the server is
configured to display such a helpful message):
$ GIT_SMART_HTTP=0 git clone https://example.com/some/repo.git
Cloning into 'repo'...
remote: Sorry, fetching via dumb http is forbidden.
remote: Please upgrade your git client to v1.6.6 or greater
remote: and make sure that smart-http is enabled.
error: The requested URL returned error: 403 while accessing http://localhost:5001/some/repo.git/info/refs
fatal: HTTP request failed
For the sake of simplicity, we only record and display these
errors during the initial fetch of the ref list, as that is
the initial contact with the server and where the most
common, interesting errors happen (and there is already
precedent, as that is the only place we currently massage
http error codes into more helpful messages).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-06 00:17:23 +02:00
|
|
|
|
|
|
|
strbuf_trim(msg);
|
|
|
|
if (!msg->len)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
p = msg->buf;
|
|
|
|
do {
|
|
|
|
eol = strchrnul(p, '\n');
|
|
|
|
fprintf(stderr, "remote: %.*s\n", (int)(eol - p), p);
|
|
|
|
p = eol + 1;
|
|
|
|
} while(*eol);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-03-15 18:31:39 +01:00
|
|
|
static int get_protocol_http_header(enum protocol_version version,
|
|
|
|
struct strbuf *header)
|
|
|
|
{
|
|
|
|
if (version > 0) {
|
|
|
|
strbuf_addf(header, GIT_PROTOCOL_HEADER ": version=%d",
|
|
|
|
version);
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
remote-curl: refactor smart-http discovery
After making initial contact with an http server, we have to decide if
the server supports smart-http, and if so, which version. Our rules are
a bit inconsistent:
1. For v0, we require that the content-type indicates a smart-http
response. We also require the response to look vaguely like a
pkt-line starting with "#". If one of those does not match, we fall
back to dumb-http.
But according to our http protocol spec[1]:
Dumb servers MUST NOT return a return type starting with
`application/x-git-`.
If we see the expected content-type, we should consider it
smart-http. At that point we can parse the pkt-line for real, and
complain if it is not syntactically valid.
2. For v2, we do not actually check the content-type. Our v2 protocol
spec says[2]:
When using the http:// or https:// transport a client makes a
"smart" info/refs request as described in `http-protocol.txt`[...]
and the http spec is clear that for a smart-http response[3]:
The Content-Type MUST be `application/x-$servicename-advertisement`.
So it is required according to the spec.
These inconsistencies were easy to miss because of the way the original
code was written as an inline conditional. Let's pull it out into its
own function for readability, and improve a few things:
- we now predicate the smart/dumb decision entirely on the presence of
the correct content-type
- we do a real pkt-line parse before deciding how to proceed (and die
if it isn't valid)
- use skip_prefix() for comparing service strings, instead of
constructing expected output in a strbuf; this avoids dealing with
memory cleanup
Note that this _is_ tightening what the client will allow. It's all
according to the spec, but it's possible that other implementations
might violate these. However, violating these particular rules seems
like an odd choice for a server to make.
[1] Documentation/technical/http-protocol.txt, l. 166-167
[2] Documentation/technical/protocol-v2.txt, l. 63-64
[3] Documentation/technical/http-protocol.txt, l. 247
Helped-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-06 20:18:48 +01:00
|
|
|
static void check_smart_http(struct discovery *d, const char *service,
|
|
|
|
struct strbuf *type)
|
|
|
|
{
|
|
|
|
const char *p;
|
|
|
|
struct packet_reader reader;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we don't see x-$service-advertisement, then it's not smart-http.
|
|
|
|
* But once we do, we commit to it and assume any other protocol
|
|
|
|
* violations are hard errors.
|
|
|
|
*/
|
|
|
|
if (!skip_prefix(type->buf, "application/x-", &p) ||
|
|
|
|
!skip_prefix(p, service, &p) ||
|
|
|
|
strcmp(p, "-advertisement"))
|
|
|
|
return;
|
|
|
|
|
|
|
|
packet_reader_init(&reader, -1, d->buf, d->len,
|
|
|
|
PACKET_READ_CHOMP_NEWLINE |
|
|
|
|
PACKET_READ_DIE_ON_ERR_PACKET);
|
|
|
|
if (packet_reader_read(&reader) != PACKET_READ_NORMAL)
|
2019-04-16 12:28:05 +02:00
|
|
|
die(_("invalid server response; expected service, got flush packet"));
|
remote-curl: refactor smart-http discovery
After making initial contact with an http server, we have to decide if
the server supports smart-http, and if so, which version. Our rules are
a bit inconsistent:
1. For v0, we require that the content-type indicates a smart-http
response. We also require the response to look vaguely like a
pkt-line starting with "#". If one of those does not match, we fall
back to dumb-http.
But according to our http protocol spec[1]:
Dumb servers MUST NOT return a return type starting with
`application/x-git-`.
If we see the expected content-type, we should consider it
smart-http. At that point we can parse the pkt-line for real, and
complain if it is not syntactically valid.
2. For v2, we do not actually check the content-type. Our v2 protocol
spec says[2]:
When using the http:// or https:// transport a client makes a
"smart" info/refs request as described in `http-protocol.txt`[...]
and the http spec is clear that for a smart-http response[3]:
The Content-Type MUST be `application/x-$servicename-advertisement`.
So it is required according to the spec.
These inconsistencies were easy to miss because of the way the original
code was written as an inline conditional. Let's pull it out into its
own function for readability, and improve a few things:
- we now predicate the smart/dumb decision entirely on the presence of
the correct content-type
- we do a real pkt-line parse before deciding how to proceed (and die
if it isn't valid)
- use skip_prefix() for comparing service strings, instead of
constructing expected output in a strbuf; this avoids dealing with
memory cleanup
Note that this _is_ tightening what the client will allow. It's all
according to the spec, but it's possible that other implementations
might violate these. However, violating these particular rules seems
like an odd choice for a server to make.
[1] Documentation/technical/http-protocol.txt, l. 166-167
[2] Documentation/technical/protocol-v2.txt, l. 63-64
[3] Documentation/technical/http-protocol.txt, l. 247
Helped-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-06 20:18:48 +01:00
|
|
|
|
|
|
|
if (skip_prefix(reader.line, "# service=", &p) && !strcmp(p, service)) {
|
|
|
|
/*
|
|
|
|
* The header can include additional metadata lines, up
|
|
|
|
* until a packet flush marker. Ignore these now, but
|
|
|
|
* in the future we might start to scan them.
|
|
|
|
*/
|
|
|
|
for (;;) {
|
|
|
|
packet_reader_read(&reader);
|
|
|
|
if (reader.pktlen <= 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* v0 smart http; callers expect us to soak up the
|
|
|
|
* service and header packets
|
|
|
|
*/
|
|
|
|
d->buf = reader.src_buffer;
|
|
|
|
d->len = reader.src_len;
|
|
|
|
d->proto_git = 1;
|
|
|
|
|
2019-02-06 20:18:58 +01:00
|
|
|
} else if (!strcmp(reader.line, "version 2")) {
|
remote-curl: refactor smart-http discovery
After making initial contact with an http server, we have to decide if
the server supports smart-http, and if so, which version. Our rules are
a bit inconsistent:
1. For v0, we require that the content-type indicates a smart-http
response. We also require the response to look vaguely like a
pkt-line starting with "#". If one of those does not match, we fall
back to dumb-http.
But according to our http protocol spec[1]:
Dumb servers MUST NOT return a return type starting with
`application/x-git-`.
If we see the expected content-type, we should consider it
smart-http. At that point we can parse the pkt-line for real, and
complain if it is not syntactically valid.
2. For v2, we do not actually check the content-type. Our v2 protocol
spec says[2]:
When using the http:// or https:// transport a client makes a
"smart" info/refs request as described in `http-protocol.txt`[...]
and the http spec is clear that for a smart-http response[3]:
The Content-Type MUST be `application/x-$servicename-advertisement`.
So it is required according to the spec.
These inconsistencies were easy to miss because of the way the original
code was written as an inline conditional. Let's pull it out into its
own function for readability, and improve a few things:
- we now predicate the smart/dumb decision entirely on the presence of
the correct content-type
- we do a real pkt-line parse before deciding how to proceed (and die
if it isn't valid)
- use skip_prefix() for comparing service strings, instead of
constructing expected output in a strbuf; this avoids dealing with
memory cleanup
Note that this _is_ tightening what the client will allow. It's all
according to the spec, but it's possible that other implementations
might violate these. However, violating these particular rules seems
like an odd choice for a server to make.
[1] Documentation/technical/http-protocol.txt, l. 166-167
[2] Documentation/technical/protocol-v2.txt, l. 63-64
[3] Documentation/technical/http-protocol.txt, l. 247
Helped-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-06 20:18:48 +01:00
|
|
|
/*
|
|
|
|
* v2 smart http; do not consume version packet, which will
|
|
|
|
* be handled elsewhere.
|
|
|
|
*/
|
|
|
|
d->proto_git = 1;
|
|
|
|
|
|
|
|
} else {
|
2019-04-16 12:28:05 +02:00
|
|
|
die(_("invalid server response; got '%s'"), reader.line);
|
remote-curl: refactor smart-http discovery
After making initial contact with an http server, we have to decide if
the server supports smart-http, and if so, which version. Our rules are
a bit inconsistent:
1. For v0, we require that the content-type indicates a smart-http
response. We also require the response to look vaguely like a
pkt-line starting with "#". If one of those does not match, we fall
back to dumb-http.
But according to our http protocol spec[1]:
Dumb servers MUST NOT return a return type starting with
`application/x-git-`.
If we see the expected content-type, we should consider it
smart-http. At that point we can parse the pkt-line for real, and
complain if it is not syntactically valid.
2. For v2, we do not actually check the content-type. Our v2 protocol
spec says[2]:
When using the http:// or https:// transport a client makes a
"smart" info/refs request as described in `http-protocol.txt`[...]
and the http spec is clear that for a smart-http response[3]:
The Content-Type MUST be `application/x-$servicename-advertisement`.
So it is required according to the spec.
These inconsistencies were easy to miss because of the way the original
code was written as an inline conditional. Let's pull it out into its
own function for readability, and improve a few things:
- we now predicate the smart/dumb decision entirely on the presence of
the correct content-type
- we do a real pkt-line parse before deciding how to proceed (and die
if it isn't valid)
- use skip_prefix() for comparing service strings, instead of
constructing expected output in a strbuf; this avoids dealing with
memory cleanup
Note that this _is_ tightening what the client will allow. It's all
according to the spec, but it's possible that other implementations
might violate these. However, violating these particular rules seems
like an odd choice for a server to make.
[1] Documentation/technical/http-protocol.txt, l. 166-167
[2] Documentation/technical/protocol-v2.txt, l. 63-64
[3] Documentation/technical/http-protocol.txt, l. 247
Helped-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-06 20:18:48 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-08-31 22:11:31 +02:00
|
|
|
static struct discovery *discover_refs(const char *service, int for_push)
|
2009-08-05 07:01:56 +02:00
|
|
|
{
|
2013-01-31 22:02:07 +01:00
|
|
|
struct strbuf type = STRBUF_INIT;
|
2014-05-22 11:30:29 +02:00
|
|
|
struct strbuf charset = STRBUF_INIT;
|
2009-08-05 07:01:56 +02:00
|
|
|
struct strbuf buffer = STRBUF_INIT;
|
2013-09-28 10:35:10 +02:00
|
|
|
struct strbuf refs_url = STRBUF_INIT;
|
remote-curl: rewrite base url from info/refs redirects
For efficiency and security reasons, an earlier commit in
this series taught http_get_* to re-write the base url based
on redirections we saw while making a specific request.
This commit wires that option into the info/refs request,
meaning that a redirect from
http://example.com/foo.git/info/refs
to
https://example.com/bar.git/info/refs
will behave as if "https://example.com/bar.git" had been
provided to git in the first place.
The tests bear some explanation. We introduce two new
hierearchies into the httpd test config:
1. Requests to /smart-redir-limited will work only for the
initial info/refs request, but not any subsequent
requests. As a result, we can confirm whether the
client is re-rooting its requests after the initial
contact, since otherwise it will fail (it will ask for
"repo.git/git-upload-pack", which is not redirected).
2. Requests to smart-redir-auth will redirect, and require
auth after the redirection. Since we are using the
redirected base for further requests, we also update
the credential struct, in order not to mislead the user
(or credential helpers) about which credential is
needed. We can therefore check the GIT_ASKPASS prompts
to make sure we are prompting for the new location.
Because we have neither multiple servers nor https
support in our test setup, we can only redirect between
paths, meaning we need to turn on
credential.useHttpPath to see the difference.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
2013-09-28 10:35:35 +02:00
|
|
|
struct strbuf effective_url = STRBUF_INIT;
|
2018-03-15 18:31:39 +01:00
|
|
|
struct strbuf protocol_header = STRBUF_INIT;
|
|
|
|
struct string_list extra_headers = STRING_LIST_INIT_DUP;
|
2009-10-31 01:47:40 +01:00
|
|
|
struct discovery *last = last_discovery;
|
2012-09-20 19:00:22 +02:00
|
|
|
int http_ret, maybe_smart = 0;
|
2016-12-06 19:24:38 +01:00
|
|
|
struct http_get_options http_options;
|
2018-03-15 18:31:42 +01:00
|
|
|
enum protocol_version version = get_protocol_version_config();
|
2009-08-05 07:01:56 +02:00
|
|
|
|
2009-10-31 01:47:40 +01:00
|
|
|
if (last && !strcmp(service, last->service))
|
|
|
|
return last;
|
|
|
|
free_discovery(last);
|
2009-08-05 07:01:56 +02:00
|
|
|
|
2013-09-28 10:35:25 +02:00
|
|
|
strbuf_addf(&refs_url, "%sinfo/refs", url.buf);
|
2013-11-30 21:55:40 +01:00
|
|
|
if ((starts_with(url.buf, "http://") || starts_with(url.buf, "https://")) &&
|
2012-09-20 23:30:58 +02:00
|
|
|
git_env_bool("GIT_SMART_HTTP", 1)) {
|
2012-09-20 19:00:22 +02:00
|
|
|
maybe_smart = 1;
|
2013-09-28 10:35:25 +02:00
|
|
|
if (!strchr(url.buf, '?'))
|
2013-09-28 10:35:10 +02:00
|
|
|
strbuf_addch(&refs_url, '?');
|
2009-10-31 01:47:40 +01:00
|
|
|
else
|
2013-09-28 10:35:10 +02:00
|
|
|
strbuf_addch(&refs_url, '&');
|
|
|
|
strbuf_addf(&refs_url, "service=%s", service);
|
2009-10-31 01:47:40 +01:00
|
|
|
}
|
2009-08-05 07:01:56 +02:00
|
|
|
|
2018-03-15 18:31:42 +01:00
|
|
|
/*
|
|
|
|
* NEEDSWORK: If we are trying to use protocol v2 and we are planning
|
|
|
|
* to perform a push, then fallback to v0 since the client doesn't know
|
|
|
|
* how to push yet using v2.
|
|
|
|
*/
|
|
|
|
if (version == protocol_v2 && !strcmp("git-receive-pack", service))
|
|
|
|
version = protocol_v0;
|
|
|
|
|
2018-03-15 18:31:39 +01:00
|
|
|
/* Add the extra Git-Protocol header */
|
2018-03-15 18:31:42 +01:00
|
|
|
if (get_protocol_http_header(version, &protocol_header))
|
2018-03-15 18:31:39 +01:00
|
|
|
string_list_append(&extra_headers, protocol_header.buf);
|
|
|
|
|
2016-12-06 19:24:38 +01:00
|
|
|
memset(&http_options, 0, sizeof(http_options));
|
|
|
|
http_options.content_type = &type;
|
|
|
|
http_options.charset = &charset;
|
|
|
|
http_options.effective_url = &effective_url;
|
|
|
|
http_options.base_url = &url;
|
2018-03-15 18:31:39 +01:00
|
|
|
http_options.extra_headers = &extra_headers;
|
http: make redirects more obvious
We instruct curl to always follow HTTP redirects. This is
convenient, but it creates opportunities for malicious
servers to create confusing situations. For instance,
imagine Alice is a git user with access to a private
repository on Bob's server. Mallory runs her own server and
wants to access objects from Bob's repository.
Mallory may try a few tricks that involve asking Alice to
clone from her, build on top, and then push the result:
1. Mallory may simply redirect all fetch requests to Bob's
server. Git will transparently follow those redirects
and fetch Bob's history, which Alice may believe she
got from Mallory. The subsequent push seems like it is
just feeding Mallory back her own objects, but is
actually leaking Bob's objects. There is nothing in
git's output to indicate that Bob's repository was
involved at all.
The downside (for Mallory) of this attack is that Alice
will have received Bob's entire repository, and is
likely to notice that when building on top of it.
2. If Mallory happens to know the sha1 of some object X in
Bob's repository, she can instead build her own history
that references that object. She then runs a dumb http
server, and Alice's client will fetch each object
individually. When it asks for X, Mallory redirects her
to Bob's server. The end result is that Alice obtains
objects from Bob, but they may be buried deep in
history. Alice is less likely to notice.
Both of these attacks are fairly hard to pull off. There's a
social component in getting Mallory to convince Alice to
work with her. Alice may be prompted for credentials in
accessing Bob's repository (but not always, if she is using
a credential helper that caches). Attack (1) requires a
certain amount of obliviousness on Alice's part while making
a new commit. Attack (2) requires that Mallory knows a sha1
in Bob's repository, that Bob's server supports dumb http,
and that the object in question is loose on Bob's server.
But we can probably make things a bit more obvious without
any loss of functionality. This patch does two things to
that end.
First, when we encounter a whole-repo redirect during the
initial ref discovery, we now inform the user on stderr,
making attack (1) much more obvious.
Second, the decision to follow redirects is now
configurable. The truly paranoid can set the new
http.followRedirects to false to avoid any redirection
entirely. But for a more practical default, we will disallow
redirects only after the initial ref discovery. This is
enough to thwart attacks similar to (2), while still
allowing the common use of redirects at the repository
level. Since c93c92f30 (http: update base URLs when we see
redirects, 2013-09-28) we re-root all further requests from
the redirect destination, which should generally mean that
no further redirection is necessary.
As an escape hatch, in case there really is a server that
needs to redirect individual requests, the user can set
http.followRedirects to "true" (and this can be done on a
per-server basis via http.*.followRedirects config).
Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-12-06 19:24:41 +01:00
|
|
|
http_options.initial_request = 1;
|
2016-12-06 19:24:38 +01:00
|
|
|
http_options.no_cache = 1;
|
2013-09-28 10:31:23 +02:00
|
|
|
|
2016-12-06 19:24:38 +01:00
|
|
|
http_ret = http_get_strbuf(refs_url.buf, &buffer, &http_options);
|
2009-08-05 07:01:56 +02:00
|
|
|
switch (http_ret) {
|
|
|
|
case HTTP_OK:
|
|
|
|
break;
|
|
|
|
case HTTP_MISSING_TARGET:
|
2014-05-22 11:30:29 +02:00
|
|
|
show_http_message(&type, &charset, &buffer);
|
2019-03-06 00:20:40 +01:00
|
|
|
die(_("repository '%s' not found"),
|
2019-03-04 16:33:46 +01:00
|
|
|
transport_anonymize_url(url.buf));
|
2010-04-02 00:14:35 +02:00
|
|
|
case HTTP_NOAUTH:
|
2014-05-22 11:30:29 +02:00
|
|
|
show_http_message(&type, &charset, &buffer);
|
2019-03-06 00:20:40 +01:00
|
|
|
die(_("Authentication failed for '%s'"),
|
2019-03-04 16:33:46 +01:00
|
|
|
transport_anonymize_url(url.buf));
|
2021-09-24 12:08:20 +02:00
|
|
|
case HTTP_NOMATCHPUBLICKEY:
|
|
|
|
show_http_message(&type, &charset, &buffer);
|
|
|
|
die(_("unable to access '%s' with http.pinnedPubkey configuration: %s"),
|
|
|
|
transport_anonymize_url(url.buf), curl_errorstr);
|
2009-08-05 07:01:56 +02:00
|
|
|
default:
|
2014-05-22 11:30:29 +02:00
|
|
|
show_http_message(&type, &charset, &buffer);
|
2019-03-06 00:20:40 +01:00
|
|
|
die(_("unable to access '%s': %s"),
|
2019-03-04 16:33:46 +01:00
|
|
|
transport_anonymize_url(url.buf), curl_errorstr);
|
2009-08-05 07:01:56 +02:00
|
|
|
}
|
|
|
|
|
2019-03-04 16:33:46 +01:00
|
|
|
if (options.verbosity && !starts_with(refs_url.buf, url.buf)) {
|
|
|
|
char *u = transport_anonymize_url(url.buf);
|
|
|
|
warning(_("redirecting to %s"), u);
|
|
|
|
free(u);
|
|
|
|
}
|
http: make redirects more obvious
We instruct curl to always follow HTTP redirects. This is
convenient, but it creates opportunities for malicious
servers to create confusing situations. For instance,
imagine Alice is a git user with access to a private
repository on Bob's server. Mallory runs her own server and
wants to access objects from Bob's repository.
Mallory may try a few tricks that involve asking Alice to
clone from her, build on top, and then push the result:
1. Mallory may simply redirect all fetch requests to Bob's
server. Git will transparently follow those redirects
and fetch Bob's history, which Alice may believe she
got from Mallory. The subsequent push seems like it is
just feeding Mallory back her own objects, but is
actually leaking Bob's objects. There is nothing in
git's output to indicate that Bob's repository was
involved at all.
The downside (for Mallory) of this attack is that Alice
will have received Bob's entire repository, and is
likely to notice that when building on top of it.
2. If Mallory happens to know the sha1 of some object X in
Bob's repository, she can instead build her own history
that references that object. She then runs a dumb http
server, and Alice's client will fetch each object
individually. When it asks for X, Mallory redirects her
to Bob's server. The end result is that Alice obtains
objects from Bob, but they may be buried deep in
history. Alice is less likely to notice.
Both of these attacks are fairly hard to pull off. There's a
social component in getting Mallory to convince Alice to
work with her. Alice may be prompted for credentials in
accessing Bob's repository (but not always, if she is using
a credential helper that caches). Attack (1) requires a
certain amount of obliviousness on Alice's part while making
a new commit. Attack (2) requires that Mallory knows a sha1
in Bob's repository, that Bob's server supports dumb http,
and that the object in question is loose on Bob's server.
But we can probably make things a bit more obvious without
any loss of functionality. This patch does two things to
that end.
First, when we encounter a whole-repo redirect during the
initial ref discovery, we now inform the user on stderr,
making attack (1) much more obvious.
Second, the decision to follow redirects is now
configurable. The truly paranoid can set the new
http.followRedirects to false to avoid any redirection
entirely. But for a more practical default, we will disallow
redirects only after the initial ref discovery. This is
enough to thwart attacks similar to (2), while still
allowing the common use of redirects at the repository
level. Since c93c92f30 (http: update base URLs when we see
redirects, 2013-09-28) we re-root all further requests from
the redirect destination, which should generally mean that
no further redirection is necessary.
As an escape hatch, in case there really is a server that
needs to redirect individual requests, the user can set
http.followRedirects to "true" (and this can be done on a
per-server basis via http.*.followRedirects config).
Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-12-06 19:24:41 +01:00
|
|
|
|
2009-10-31 01:47:40 +01:00
|
|
|
last= xcalloc(1, sizeof(*last_discovery));
|
2018-03-15 18:31:36 +01:00
|
|
|
last->service = xstrdup(service);
|
2009-10-31 01:47:40 +01:00
|
|
|
last->buf_alloc = strbuf_detach(&buffer, &last->len);
|
|
|
|
last->buf = last->buf_alloc;
|
|
|
|
|
remote-curl: refactor smart-http discovery
After making initial contact with an http server, we have to decide if
the server supports smart-http, and if so, which version. Our rules are
a bit inconsistent:
1. For v0, we require that the content-type indicates a smart-http
response. We also require the response to look vaguely like a
pkt-line starting with "#". If one of those does not match, we fall
back to dumb-http.
But according to our http protocol spec[1]:
Dumb servers MUST NOT return a return type starting with
`application/x-git-`.
If we see the expected content-type, we should consider it
smart-http. At that point we can parse the pkt-line for real, and
complain if it is not syntactically valid.
2. For v2, we do not actually check the content-type. Our v2 protocol
spec says[2]:
When using the http:// or https:// transport a client makes a
"smart" info/refs request as described in `http-protocol.txt`[...]
and the http spec is clear that for a smart-http response[3]:
The Content-Type MUST be `application/x-$servicename-advertisement`.
So it is required according to the spec.
These inconsistencies were easy to miss because of the way the original
code was written as an inline conditional. Let's pull it out into its
own function for readability, and improve a few things:
- we now predicate the smart/dumb decision entirely on the presence of
the correct content-type
- we do a real pkt-line parse before deciding how to proceed (and die
if it isn't valid)
- use skip_prefix() for comparing service strings, instead of
constructing expected output in a strbuf; this avoids dealing with
memory cleanup
Note that this _is_ tightening what the client will allow. It's all
according to the spec, but it's possible that other implementations
might violate these. However, violating these particular rules seems
like an odd choice for a server to make.
[1] Documentation/technical/http-protocol.txt, l. 166-167
[2] Documentation/technical/protocol-v2.txt, l. 63-64
[3] Documentation/technical/http-protocol.txt, l. 247
Helped-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-06 20:18:48 +01:00
|
|
|
if (maybe_smart)
|
|
|
|
check_smart_http(last, service, &type);
|
2009-10-31 01:47:40 +01:00
|
|
|
|
remote-curl: always parse incoming refs
When remote-curl receives a list of refs from a server, it
keeps the whole buffer intact. When we get a "list" command,
we feed the result to get_remote_heads, and when we get a
"fetch" or "push" command, we feed it to fetch-pack or
send-pack, respectively.
If the HTTP response from the server is truncated for any
reason, we will get an incomplete ref advertisement. If we
then feed this incomplete list to fetch-pack, one of a few
things may happen:
1. If the truncation is in a packet header, fetch-pack
will notice the bogus line and complain.
2. If the truncation is inside a packet, fetch-pack will
keep waiting for us to send the rest of the packet,
which we never will.
3. If the truncation is at a packet boundary, fetch-pack
will keep waiting for us to send the next packet, which
we never will.
As a result, fetch-pack hangs, waiting for input. However,
remote-curl believes it has sent all of the advertisement,
and therefore waits for fetch-pack to speak. The two
processes end up in a deadlock.
We do notice the broken ref list if we feed it to
get_remote_heads. So if git asks the helper to do a "list"
followed by a "fetch", we are safe; we'll abort during the
list operation, which parses the refs.
This patch teaches remote-curl to always parse and save the
incoming ref list when we read the ref advertisement from a
server. That means that we will always verify and abort
before even running fetch-pack (or send-pack) when reading a
corrupted list, even if we do not run the "list" command
explicitly.
Since we save the result, in the common case of running
"list" then "fetch", we do not do any extra parsing at all.
In the case of just a "fetch", we do an extra round of
parsing, but only once.
Note also that the "fetch" case will now also initialize
server_capabilities from the remote (in remote-curl; we
already would do so inside fetch-pack). Doing "list+fetch"
already does this. It doesn't actually matter now, but the
new behavior is arguably more correct, should remote-curl
ever start caring about the server's capability list.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-20 21:07:19 +01:00
|
|
|
if (last->proto_git)
|
|
|
|
last->refs = parse_git_refs(last, for_push);
|
|
|
|
else
|
|
|
|
last->refs = parse_info_refs(last);
|
|
|
|
|
2013-09-28 10:35:10 +02:00
|
|
|
strbuf_release(&refs_url);
|
2013-01-31 22:02:07 +01:00
|
|
|
strbuf_release(&type);
|
2014-05-22 11:30:29 +02:00
|
|
|
strbuf_release(&charset);
|
remote-curl: rewrite base url from info/refs redirects
For efficiency and security reasons, an earlier commit in
this series taught http_get_* to re-write the base url based
on redirections we saw while making a specific request.
This commit wires that option into the info/refs request,
meaning that a redirect from
http://example.com/foo.git/info/refs
to
https://example.com/bar.git/info/refs
will behave as if "https://example.com/bar.git" had been
provided to git in the first place.
The tests bear some explanation. We introduce two new
hierearchies into the httpd test config:
1. Requests to /smart-redir-limited will work only for the
initial info/refs request, but not any subsequent
requests. As a result, we can confirm whether the
client is re-rooting its requests after the initial
contact, since otherwise it will fail (it will ask for
"repo.git/git-upload-pack", which is not redirected).
2. Requests to smart-redir-auth will redirect, and require
auth after the redirection. Since we are using the
redirected base for further requests, we also update
the credential struct, in order not to mislead the user
(or credential helpers) about which credential is
needed. We can therefore check the GIT_ASKPASS prompts
to make sure we are prompting for the new location.
Because we have neither multiple servers nor https
support in our test setup, we can only redirect between
paths, meaning we need to turn on
credential.useHttpPath to see the difference.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
2013-09-28 10:35:35 +02:00
|
|
|
strbuf_release(&effective_url);
|
2009-10-31 01:47:40 +01:00
|
|
|
strbuf_release(&buffer);
|
2018-03-15 18:31:39 +01:00
|
|
|
strbuf_release(&protocol_header);
|
|
|
|
string_list_clear(&extra_headers, 0);
|
2009-10-31 01:47:40 +01:00
|
|
|
last_discovery = last;
|
|
|
|
return last;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ref *get_refs(int for_push)
|
|
|
|
{
|
|
|
|
struct discovery *heads;
|
|
|
|
|
|
|
|
if (for_push)
|
remote-curl: always parse incoming refs
When remote-curl receives a list of refs from a server, it
keeps the whole buffer intact. When we get a "list" command,
we feed the result to get_remote_heads, and when we get a
"fetch" or "push" command, we feed it to fetch-pack or
send-pack, respectively.
If the HTTP response from the server is truncated for any
reason, we will get an incomplete ref advertisement. If we
then feed this incomplete list to fetch-pack, one of a few
things may happen:
1. If the truncation is in a packet header, fetch-pack
will notice the bogus line and complain.
2. If the truncation is inside a packet, fetch-pack will
keep waiting for us to send the rest of the packet,
which we never will.
3. If the truncation is at a packet boundary, fetch-pack
will keep waiting for us to send the next packet, which
we never will.
As a result, fetch-pack hangs, waiting for input. However,
remote-curl believes it has sent all of the advertisement,
and therefore waits for fetch-pack to speak. The two
processes end up in a deadlock.
We do notice the broken ref list if we feed it to
get_remote_heads. So if git asks the helper to do a "list"
followed by a "fetch", we are safe; we'll abort during the
list operation, which parses the refs.
This patch teaches remote-curl to always parse and save the
incoming ref list when we read the ref advertisement from a
server. That means that we will always verify and abort
before even running fetch-pack (or send-pack) when reading a
corrupted list, even if we do not run the "list" command
explicitly.
Since we save the result, in the common case of running
"list" then "fetch", we do not do any extra parsing at all.
In the case of just a "fetch", we do an extra round of
parsing, but only once.
Note also that the "fetch" case will now also initialize
server_capabilities from the remote (in remote-curl; we
already would do so inside fetch-pack). Doing "list+fetch"
already does this. It doesn't actually matter now, but the
new behavior is arguably more correct, should remote-curl
ever start caring about the server's capability list.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-20 21:07:19 +01:00
|
|
|
heads = discover_refs("git-receive-pack", for_push);
|
2009-10-31 01:47:40 +01:00
|
|
|
else
|
remote-curl: always parse incoming refs
When remote-curl receives a list of refs from a server, it
keeps the whole buffer intact. When we get a "list" command,
we feed the result to get_remote_heads, and when we get a
"fetch" or "push" command, we feed it to fetch-pack or
send-pack, respectively.
If the HTTP response from the server is truncated for any
reason, we will get an incomplete ref advertisement. If we
then feed this incomplete list to fetch-pack, one of a few
things may happen:
1. If the truncation is in a packet header, fetch-pack
will notice the bogus line and complain.
2. If the truncation is inside a packet, fetch-pack will
keep waiting for us to send the rest of the packet,
which we never will.
3. If the truncation is at a packet boundary, fetch-pack
will keep waiting for us to send the next packet, which
we never will.
As a result, fetch-pack hangs, waiting for input. However,
remote-curl believes it has sent all of the advertisement,
and therefore waits for fetch-pack to speak. The two
processes end up in a deadlock.
We do notice the broken ref list if we feed it to
get_remote_heads. So if git asks the helper to do a "list"
followed by a "fetch", we are safe; we'll abort during the
list operation, which parses the refs.
This patch teaches remote-curl to always parse and save the
incoming ref list when we read the ref advertisement from a
server. That means that we will always verify and abort
before even running fetch-pack (or send-pack) when reading a
corrupted list, even if we do not run the "list" command
explicitly.
Since we save the result, in the common case of running
"list" then "fetch", we do not do any extra parsing at all.
In the case of just a "fetch", we do an extra round of
parsing, but only once.
Note also that the "fetch" case will now also initialize
server_capabilities from the remote (in remote-curl; we
already would do so inside fetch-pack). Doing "list+fetch"
already does this. It doesn't actually matter now, but the
new behavior is arguably more correct, should remote-curl
ever start caring about the server's capability list.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-20 21:07:19 +01:00
|
|
|
heads = discover_refs("git-upload-pack", for_push);
|
2009-10-31 01:47:40 +01:00
|
|
|
|
remote-curl: always parse incoming refs
When remote-curl receives a list of refs from a server, it
keeps the whole buffer intact. When we get a "list" command,
we feed the result to get_remote_heads, and when we get a
"fetch" or "push" command, we feed it to fetch-pack or
send-pack, respectively.
If the HTTP response from the server is truncated for any
reason, we will get an incomplete ref advertisement. If we
then feed this incomplete list to fetch-pack, one of a few
things may happen:
1. If the truncation is in a packet header, fetch-pack
will notice the bogus line and complain.
2. If the truncation is inside a packet, fetch-pack will
keep waiting for us to send the rest of the packet,
which we never will.
3. If the truncation is at a packet boundary, fetch-pack
will keep waiting for us to send the next packet, which
we never will.
As a result, fetch-pack hangs, waiting for input. However,
remote-curl believes it has sent all of the advertisement,
and therefore waits for fetch-pack to speak. The two
processes end up in a deadlock.
We do notice the broken ref list if we feed it to
get_remote_heads. So if git asks the helper to do a "list"
followed by a "fetch", we are safe; we'll abort during the
list operation, which parses the refs.
This patch teaches remote-curl to always parse and save the
incoming ref list when we read the ref advertisement from a
server. That means that we will always verify and abort
before even running fetch-pack (or send-pack) when reading a
corrupted list, even if we do not run the "list" command
explicitly.
Since we save the result, in the common case of running
"list" then "fetch", we do not do any extra parsing at all.
In the case of just a "fetch", we do an extra round of
parsing, but only once.
Note also that the "fetch" case will now also initialize
server_capabilities from the remote (in remote-curl; we
already would do so inside fetch-pack). Doing "list+fetch"
already does this. It doesn't actually matter now, but the
new behavior is arguably more correct, should remote-curl
ever start caring about the server's capability list.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-20 21:07:19 +01:00
|
|
|
return heads->refs;
|
2009-10-31 01:47:40 +01:00
|
|
|
}
|
|
|
|
|
2009-10-31 01:47:30 +01:00
|
|
|
static void output_refs(struct ref *refs)
|
|
|
|
{
|
|
|
|
struct ref *posn;
|
2020-05-25 21:59:04 +02:00
|
|
|
if (options.object_format && options.hash_algo) {
|
|
|
|
printf(":object-format %s\n", options.hash_algo->name);
|
2021-05-11 12:37:30 +02:00
|
|
|
repo_set_hash_algo(the_repository,
|
|
|
|
hash_algo_by_ptr(options.hash_algo));
|
2020-05-25 21:59:04 +02:00
|
|
|
}
|
2009-10-31 01:47:30 +01:00
|
|
|
for (posn = refs; posn; posn = posn->next) {
|
|
|
|
if (posn->symref)
|
|
|
|
printf("@%s %s\n", posn->symref, posn->name);
|
|
|
|
else
|
2020-06-19 19:55:54 +02:00
|
|
|
printf("%s %s\n", hash_to_hex_algop(posn->old_oid.hash,
|
|
|
|
options.hash_algo),
|
|
|
|
posn->name);
|
2009-10-31 01:47:30 +01:00
|
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
fflush(stdout);
|
|
|
|
}
|
|
|
|
|
2009-10-31 01:47:41 +01:00
|
|
|
struct rpc_state {
|
|
|
|
const char *service_name;
|
|
|
|
char *service_url;
|
|
|
|
char *hdr_content_type;
|
|
|
|
char *hdr_accept;
|
2022-07-11 07:58:54 +02:00
|
|
|
char *hdr_accept_language;
|
2018-03-15 18:31:39 +01:00
|
|
|
char *protocol_header;
|
2009-10-31 01:47:41 +01:00
|
|
|
char *buf;
|
|
|
|
size_t alloc;
|
|
|
|
size_t len;
|
|
|
|
size_t pos;
|
|
|
|
int in;
|
|
|
|
int out;
|
remote-curl: don't hang when a server dies before any output
In the event that a HTTP server closes the connection after giving a
200 but before giving any packets, we don't want to hang forever
waiting for a response that will never come. Instead, we should die
immediately.
One case where this happens is when attempting to fetch a dangling
object by its object name. In this case, the server dies before
sending any data. Prior to this patch, fetch-pack would wait for
data from the server, and remote-curl would wait for fetch-pack,
causing a deadlock.
Despite this patch, there is other possible malformed input that could
cause the same deadlock (e.g. a half-finished pktline, or a pktline but
no trailing flush). There are a few possible solutions to this:
1. Allowing remote-curl to tell fetch-pack about the EOF (so that
fetch-pack could know that no more data is coming until it says
something else). This is tricky because an out-of-band signal would
be required, or the http response would have to be re-framed inside
another layer of pkt-line or something.
2. Make remote-curl understand some of the protocol. It turns out
that in addition to understanding pkt-line, it would need to watch for
ack/nak. This is somewhat fragile, as information about the protocol
would end up in two places. Also, pkt-lines which are already at the
length limit would need special handling.
Both of these solutions would require a fair amount of work, whereas
this hack is easy and solves at least some of the problem.
Still to do: it would be good to give a better error message
than "fatal: The remote end hung up unexpectedly".
Signed-off-by: David Turner <dturner@twosigma.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-11-18 21:30:49 +01:00
|
|
|
int any_written;
|
2009-10-31 01:47:43 +01:00
|
|
|
unsigned gzip_request : 1;
|
2009-12-01 11:33:39 +01:00
|
|
|
unsigned initial_buffer : 1;
|
2019-02-21 21:24:41 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Whenever a pkt-line is read into buf, append the 4 characters
|
|
|
|
* denoting its length before appending the payload.
|
|
|
|
*/
|
|
|
|
unsigned write_line_lengths : 1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Used by rpc_out; initialize to 0. This is true if a flush has been
|
|
|
|
* read, but the corresponding line length (if write_line_lengths is
|
|
|
|
* true) and EOF have not been sent to libcurl. Since each flush marks
|
|
|
|
* the end of a request, each flush must be completely sent before any
|
|
|
|
* further reading occurs.
|
|
|
|
*/
|
|
|
|
unsigned flush_read_but_not_sent : 1;
|
2009-10-31 01:47:41 +01:00
|
|
|
};
|
|
|
|
|
2022-07-11 07:58:54 +02:00
|
|
|
#define RPC_STATE_INIT { 0 }
|
|
|
|
|
2019-02-21 21:24:40 +01:00
|
|
|
/*
|
|
|
|
* Appends the result of reading from rpc->out to the string represented by
|
|
|
|
* rpc->buf and rpc->len if there is enough space. Returns 1 if there was
|
|
|
|
* enough space, 0 otherwise.
|
|
|
|
*
|
2019-02-21 21:24:41 +01:00
|
|
|
* If rpc->write_line_lengths is true, appends the line length as a 4-byte
|
|
|
|
* hexadecimal string before appending the result described above.
|
|
|
|
*
|
|
|
|
* Writes the total number of bytes appended into appended.
|
2019-02-21 21:24:40 +01:00
|
|
|
*/
|
2019-02-21 21:24:41 +01:00
|
|
|
static int rpc_read_from_out(struct rpc_state *rpc, int options,
|
|
|
|
size_t *appended,
|
|
|
|
enum packet_read_status *status) {
|
|
|
|
size_t left;
|
|
|
|
char *buf;
|
|
|
|
int pktlen_raw;
|
|
|
|
|
|
|
|
if (rpc->write_line_lengths) {
|
|
|
|
left = rpc->alloc - rpc->len - 4;
|
|
|
|
buf = rpc->buf + rpc->len + 4;
|
|
|
|
} else {
|
|
|
|
left = rpc->alloc - rpc->len;
|
|
|
|
buf = rpc->buf + rpc->len;
|
|
|
|
}
|
2019-02-21 21:24:40 +01:00
|
|
|
|
|
|
|
if (left < LARGE_PACKET_MAX)
|
|
|
|
return 0;
|
|
|
|
|
2019-02-21 21:24:41 +01:00
|
|
|
*status = packet_read_with_status(rpc->out, NULL, NULL, buf,
|
|
|
|
left, &pktlen_raw, options);
|
|
|
|
if (*status != PACKET_READ_EOF) {
|
|
|
|
*appended = pktlen_raw + (rpc->write_line_lengths ? 4 : 0);
|
|
|
|
rpc->len += *appended;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (rpc->write_line_lengths) {
|
|
|
|
switch (*status) {
|
|
|
|
case PACKET_READ_EOF:
|
|
|
|
if (!(options & PACKET_READ_GENTLE_ON_EOF))
|
2019-04-16 12:28:05 +02:00
|
|
|
die(_("shouldn't have EOF when not gentle on EOF"));
|
2019-02-21 21:24:41 +01:00
|
|
|
break;
|
|
|
|
case PACKET_READ_NORMAL:
|
|
|
|
set_packet_header(buf - 4, *appended);
|
|
|
|
break;
|
|
|
|
case PACKET_READ_DELIM:
|
|
|
|
memcpy(buf - 4, "0001", 4);
|
|
|
|
break;
|
|
|
|
case PACKET_READ_FLUSH:
|
|
|
|
memcpy(buf - 4, "0000", 4);
|
|
|
|
break;
|
2020-05-19 12:53:59 +02:00
|
|
|
case PACKET_READ_RESPONSE_END:
|
2021-07-09 04:27:22 +02:00
|
|
|
die(_("remote server sent unexpected response end packet"));
|
2019-02-21 21:24:41 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-21 21:24:40 +01:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2009-10-31 01:47:41 +01:00
|
|
|
static size_t rpc_out(void *ptr, size_t eltsize,
|
|
|
|
size_t nmemb, void *buffer_)
|
|
|
|
{
|
|
|
|
size_t max = eltsize * nmemb;
|
|
|
|
struct rpc_state *rpc = buffer_;
|
|
|
|
size_t avail = rpc->len - rpc->pos;
|
2019-02-21 21:24:41 +01:00
|
|
|
enum packet_read_status status;
|
2009-10-31 01:47:41 +01:00
|
|
|
|
|
|
|
if (!avail) {
|
2009-12-01 11:33:39 +01:00
|
|
|
rpc->initial_buffer = 0;
|
2019-02-21 21:24:40 +01:00
|
|
|
rpc->len = 0;
|
2009-10-31 01:47:41 +01:00
|
|
|
rpc->pos = 0;
|
2019-02-21 21:24:41 +01:00
|
|
|
if (!rpc->flush_read_but_not_sent) {
|
|
|
|
if (!rpc_read_from_out(rpc, 0, &avail, &status))
|
|
|
|
BUG("The entire rpc->buf should be larger than LARGE_PACKET_MAX");
|
|
|
|
if (status == PACKET_READ_FLUSH)
|
|
|
|
rpc->flush_read_but_not_sent = 1;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* If flush_read_but_not_sent is true, we have already read one
|
|
|
|
* full request but have not fully sent it + EOF, which is why
|
|
|
|
* we need to refrain from reading.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
if (rpc->flush_read_but_not_sent) {
|
|
|
|
if (!avail) {
|
|
|
|
/*
|
|
|
|
* The line length either does not need to be sent at
|
|
|
|
* all or has already been completely sent. Now we can
|
|
|
|
* return 0, indicating EOF, meaning that the flush has
|
|
|
|
* been fully sent.
|
|
|
|
*/
|
|
|
|
rpc->flush_read_but_not_sent = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/*
|
2020-05-18 17:47:18 +02:00
|
|
|
* If avail is non-zero, the line length for the flush still
|
2019-02-21 21:24:41 +01:00
|
|
|
* hasn't been fully sent. Proceed with sending the line
|
|
|
|
* length.
|
|
|
|
*/
|
2009-10-31 01:47:41 +01:00
|
|
|
}
|
|
|
|
|
2009-11-24 03:31:30 +01:00
|
|
|
if (max < avail)
|
2009-10-31 01:47:41 +01:00
|
|
|
avail = max;
|
|
|
|
memcpy(ptr, rpc->buf + rpc->pos, avail);
|
|
|
|
rpc->pos += avail;
|
|
|
|
return avail;
|
|
|
|
}
|
|
|
|
|
2010-01-12 07:30:36 +01:00
|
|
|
static curlioerr rpc_ioctl(CURL *handle, int cmd, void *clientp)
|
2009-12-01 11:33:39 +01:00
|
|
|
{
|
|
|
|
struct rpc_state *rpc = clientp;
|
|
|
|
|
|
|
|
switch (cmd) {
|
|
|
|
case CURLIOCMD_NOP:
|
|
|
|
return CURLIOE_OK;
|
|
|
|
|
|
|
|
case CURLIOCMD_RESTARTREAD:
|
|
|
|
if (rpc->initial_buffer) {
|
|
|
|
rpc->pos = 0;
|
|
|
|
return CURLIOE_OK;
|
|
|
|
}
|
2019-03-06 00:20:40 +01:00
|
|
|
error(_("unable to rewind rpc post data - try increasing http.postBuffer"));
|
2009-12-01 11:33:39 +01:00
|
|
|
return CURLIOE_FAILRESTART;
|
|
|
|
|
|
|
|
default:
|
|
|
|
return CURLIOE_UNKNOWNCMD;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-19 12:53:58 +02:00
|
|
|
struct check_pktline_state {
|
|
|
|
char len_buf[4];
|
|
|
|
int len_filled;
|
|
|
|
int remaining;
|
|
|
|
};
|
|
|
|
|
|
|
|
static void check_pktline(struct check_pktline_state *state, const char *ptr, size_t size)
|
|
|
|
{
|
|
|
|
while (size) {
|
|
|
|
if (!state->remaining) {
|
|
|
|
int digits_remaining = 4 - state->len_filled;
|
|
|
|
if (digits_remaining > size)
|
|
|
|
digits_remaining = size;
|
|
|
|
memcpy(&state->len_buf[state->len_filled], ptr, digits_remaining);
|
|
|
|
state->len_filled += digits_remaining;
|
|
|
|
ptr += digits_remaining;
|
|
|
|
size -= digits_remaining;
|
|
|
|
|
|
|
|
if (state->len_filled == 4) {
|
|
|
|
state->remaining = packet_length(state->len_buf);
|
|
|
|
if (state->remaining < 0) {
|
|
|
|
die(_("remote-curl: bad line length character: %.4s"), state->len_buf);
|
2020-05-19 12:54:00 +02:00
|
|
|
} else if (state->remaining == 2) {
|
|
|
|
die(_("remote-curl: unexpected response end packet"));
|
2020-05-19 12:53:58 +02:00
|
|
|
} else if (state->remaining < 4) {
|
|
|
|
state->remaining = 0;
|
|
|
|
} else {
|
|
|
|
state->remaining -= 4;
|
|
|
|
}
|
|
|
|
state->len_filled = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (state->remaining) {
|
|
|
|
int remaining = state->remaining;
|
|
|
|
if (remaining > size)
|
|
|
|
remaining = size;
|
|
|
|
ptr += remaining;
|
|
|
|
size -= remaining;
|
|
|
|
state->remaining -= remaining;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-10 20:33:48 +01:00
|
|
|
struct rpc_in_data {
|
|
|
|
struct rpc_state *rpc;
|
2019-01-10 20:33:49 +01:00
|
|
|
struct active_request_slot *slot;
|
2020-05-19 12:53:58 +02:00
|
|
|
int check_pktline;
|
|
|
|
struct check_pktline_state pktline_state;
|
2019-01-10 20:33:48 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A callback for CURLOPT_WRITEFUNCTION. The return value is the bytes consumed
|
|
|
|
* from ptr.
|
|
|
|
*/
|
2011-05-03 17:47:27 +02:00
|
|
|
static size_t rpc_in(char *ptr, size_t eltsize,
|
2009-10-31 01:47:41 +01:00
|
|
|
size_t nmemb, void *buffer_)
|
|
|
|
{
|
|
|
|
size_t size = eltsize * nmemb;
|
2019-01-10 20:33:48 +01:00
|
|
|
struct rpc_in_data *data = buffer_;
|
2019-01-10 20:33:49 +01:00
|
|
|
long response_code;
|
|
|
|
|
|
|
|
if (curl_easy_getinfo(data->slot->curl, CURLINFO_RESPONSE_CODE,
|
|
|
|
&response_code) != CURLE_OK)
|
|
|
|
return size;
|
|
|
|
if (response_code >= 300)
|
|
|
|
return size;
|
remote-curl: don't hang when a server dies before any output
In the event that a HTTP server closes the connection after giving a
200 but before giving any packets, we don't want to hang forever
waiting for a response that will never come. Instead, we should die
immediately.
One case where this happens is when attempting to fetch a dangling
object by its object name. In this case, the server dies before
sending any data. Prior to this patch, fetch-pack would wait for
data from the server, and remote-curl would wait for fetch-pack,
causing a deadlock.
Despite this patch, there is other possible malformed input that could
cause the same deadlock (e.g. a half-finished pktline, or a pktline but
no trailing flush). There are a few possible solutions to this:
1. Allowing remote-curl to tell fetch-pack about the EOF (so that
fetch-pack could know that no more data is coming until it says
something else). This is tricky because an out-of-band signal would
be required, or the http response would have to be re-framed inside
another layer of pkt-line or something.
2. Make remote-curl understand some of the protocol. It turns out
that in addition to understanding pkt-line, it would need to watch for
ack/nak. This is somewhat fragile, as information about the protocol
would end up in two places. Also, pkt-lines which are already at the
length limit would need special handling.
Both of these solutions would require a fair amount of work, whereas
this hack is easy and solves at least some of the problem.
Still to do: it would be good to give a better error message
than "fatal: The remote end hung up unexpectedly".
Signed-off-by: David Turner <dturner@twosigma.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-11-18 21:30:49 +01:00
|
|
|
if (size)
|
2019-01-10 20:33:48 +01:00
|
|
|
data->rpc->any_written = 1;
|
2020-05-19 12:53:58 +02:00
|
|
|
if (data->check_pktline)
|
|
|
|
check_pktline(&data->pktline_state, ptr, size);
|
2019-01-10 20:33:48 +01:00
|
|
|
write_or_die(data->rpc->in, ptr, size);
|
2009-10-31 01:47:41 +01:00
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
2013-10-31 07:36:26 +01:00
|
|
|
static int run_slot(struct active_request_slot *slot,
|
|
|
|
struct slot_results *results)
|
2011-02-15 17:57:24 +01:00
|
|
|
{
|
http: prompt for credentials on failed POST
All of the smart-http GET requests go through the http_get_*
functions, which will prompt for credentials and retry if we
see an HTTP 401.
POST requests, however, do not go through any central point.
Moreover, it is difficult to retry in the general case; we
cannot assume the request body fits in memory or is even
seekable, and we don't know how much of it was consumed
during the attempt.
Most of the time, this is not a big deal; for both fetching
and pushing, we make a GET request before doing any POSTs,
so typically we figure out the credentials during the first
request, then reuse them during the POST. However, some
servers may allow a client to get the list of refs from
receive-pack without authentication, and then require
authentication when the client actually tries to POST the
pack.
This is not ideal, as the client may do a non-trivial amount
of work to generate the pack (e.g., delta-compressing
objects). However, for a long time it has been the
recommended example configuration in git-http-backend(1) for
setting up a repository with anonymous fetch and
authenticated push. This setup has always been broken
without putting a username into the URL. Prior to commit
986bbc0, it did work with a username in the URL, because git
would prompt for credentials before making any requests at
all. However, post-986bbc0, it is totally broken. Since it
has been advertised in the manpage for some time, we should
make sure it works.
Unfortunately, it is not as easy as simply calling post_rpc
again when it fails, due to the input issue mentioned above.
However, we can still make this specific case work by
retrying in two specific instances:
1. If the request is large (bigger than LARGE_PACKET_MAX),
we will first send a probe request with a single flush
packet. Since this request is static, we can freely
retry it.
2. If the request is small and we are not using gzip, then
we have the whole thing in-core, and we can freely
retry.
That means we will not retry in some instances, including:
1. If we are using gzip. However, we only do so when
calling git-upload-pack, so it does not apply to
pushes.
2. If we have a large request, the probe succeeds, but
then the real POST wants authentication. This is an
extremely unlikely configuration and not worth worrying
about.
While it might be nice to cover those instances, doing so
would be significantly more complex for very little
real-world gain. In the long run, we will be much better off
when curl learns to internally handle authentication as a
callback, and we can cleanly handle all cases that way.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-08-27 15:27:15 +02:00
|
|
|
int err;
|
2013-10-31 07:36:26 +01:00
|
|
|
struct slot_results results_buf;
|
2011-02-15 17:57:24 +01:00
|
|
|
|
2013-10-31 07:36:26 +01:00
|
|
|
if (!results)
|
|
|
|
results = &results_buf;
|
|
|
|
|
http: never use curl_easy_perform
We currently don't reuse http connections when fetching via
the smart-http protocol. This is bad because the TCP
handshake introduces latency, and especially because SSL
connection setup may be non-trivial.
We can fix it by consistently using curl's "multi"
interface. The reason is rather complicated:
Our http code has two ways of being used: queuing many
"slots" to be fetched in parallel, or fetching a single
request in a blocking manner. The parallel code is built on
curl's "multi" interface. Most of the single-request code
uses http_request, which is built on top of the parallel
code (we just feed it one slot, and wait until it finishes).
However, one could also accomplish the single-request scheme
by avoiding curl's multi interface entirely and just using
curl_easy_perform. This is simpler, and is used by post_rpc
in the smart-http protocol.
It does work to use the same curl handle in both contexts,
as long as it is not at the same time. However, internally
curl may not share all of the cached resources between both
contexts. In particular, a connection formed using the
"multi" code will go into a reuse pool connected to the
"multi" object. Further requests using the "easy" interface
will not be able to reuse that connection.
The smart http protocol does ref discovery via http_request,
which uses the "multi" interface, and then follows up with
the "easy" interface for its rpc calls. As a result, we make
two HTTP connections rather than reusing a single one.
We could teach the ref discovery to use the "easy"
interface. But it is only once we have done this discovery
that we know whether the protocol will be smart or dumb. If
it is dumb, then our further requests, which want to fetch
objects in parallel, will not be able to reuse the same
connection.
Instead, this patch switches post_rpc to build on the
parallel interface, which means that we use it consistently
everywhere. It's a little more complicated to use, but since
we have the infrastructure already, it doesn't add any code;
we can just factor out the relevant bits from http_request.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-02-18 11:34:20 +01:00
|
|
|
err = run_one_slot(slot, results);
|
2011-02-15 17:57:24 +01:00
|
|
|
|
http: prompt for credentials on failed POST
All of the smart-http GET requests go through the http_get_*
functions, which will prompt for credentials and retry if we
see an HTTP 401.
POST requests, however, do not go through any central point.
Moreover, it is difficult to retry in the general case; we
cannot assume the request body fits in memory or is even
seekable, and we don't know how much of it was consumed
during the attempt.
Most of the time, this is not a big deal; for both fetching
and pushing, we make a GET request before doing any POSTs,
so typically we figure out the credentials during the first
request, then reuse them during the POST. However, some
servers may allow a client to get the list of refs from
receive-pack without authentication, and then require
authentication when the client actually tries to POST the
pack.
This is not ideal, as the client may do a non-trivial amount
of work to generate the pack (e.g., delta-compressing
objects). However, for a long time it has been the
recommended example configuration in git-http-backend(1) for
setting up a repository with anonymous fetch and
authenticated push. This setup has always been broken
without putting a username into the URL. Prior to commit
986bbc0, it did work with a username in the URL, because git
would prompt for credentials before making any requests at
all. However, post-986bbc0, it is totally broken. Since it
has been advertised in the manpage for some time, we should
make sure it works.
Unfortunately, it is not as easy as simply calling post_rpc
again when it fails, due to the input issue mentioned above.
However, we can still make this specific case work by
retrying in two specific instances:
1. If the request is large (bigger than LARGE_PACKET_MAX),
we will first send a probe request with a single flush
packet. Since this request is static, we can freely
retry it.
2. If the request is small and we are not using gzip, then
we have the whole thing in-core, and we can freely
retry.
That means we will not retry in some instances, including:
1. If we are using gzip. However, we only do so when
calling git-upload-pack, so it does not apply to
pushes.
2. If we have a large request, the probe succeeds, but
then the real POST wants authentication. This is an
extremely unlikely configuration and not worth worrying
about.
While it might be nice to cover those instances, doing so
would be significantly more complex for very little
real-world gain. In the long run, we will be much better off
when curl learns to internally handle authentication as a
callback, and we can cleanly handle all cases that way.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-08-27 15:27:15 +02:00
|
|
|
if (err != HTTP_OK && err != HTTP_REAUTH) {
|
2016-02-14 02:39:34 +01:00
|
|
|
struct strbuf msg = STRBUF_INIT;
|
|
|
|
if (results->http_code && results->http_code != 200)
|
|
|
|
strbuf_addf(&msg, "HTTP %ld", results->http_code);
|
|
|
|
if (results->curl_result != CURLE_OK) {
|
|
|
|
if (msg.len)
|
|
|
|
strbuf_addch(&msg, ' ');
|
|
|
|
strbuf_addf(&msg, "curl %d", results->curl_result);
|
|
|
|
if (curl_errorstr[0]) {
|
|
|
|
strbuf_addch(&msg, ' ');
|
|
|
|
strbuf_addstr(&msg, curl_errorstr);
|
|
|
|
}
|
|
|
|
}
|
2019-03-06 00:20:40 +01:00
|
|
|
error(_("RPC failed; %s"), msg.buf);
|
2016-02-14 02:39:34 +01:00
|
|
|
strbuf_release(&msg);
|
2011-02-15 17:57:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2013-10-31 07:36:26 +01:00
|
|
|
static int probe_rpc(struct rpc_state *rpc, struct slot_results *results)
|
2011-02-15 17:57:24 +01:00
|
|
|
{
|
|
|
|
struct active_request_slot *slot;
|
2016-04-27 14:20:37 +02:00
|
|
|
struct curl_slist *headers = http_copy_default_headers();
|
2011-02-15 17:57:24 +01:00
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
slot = get_active_slot();
|
|
|
|
|
|
|
|
headers = curl_slist_append(headers, rpc->hdr_content_type);
|
|
|
|
headers = curl_slist_append(headers, rpc->hdr_accept);
|
|
|
|
|
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 0);
|
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_POST, 1);
|
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_URL, rpc->service_url);
|
2012-09-20 01:12:02 +02:00
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_ENCODING, NULL);
|
2011-02-15 17:57:24 +01:00
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDS, "0000");
|
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDSIZE, 4);
|
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, headers);
|
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
|
2021-07-30 19:59:46 +02:00
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, &buf);
|
2011-02-15 17:57:24 +01:00
|
|
|
|
2013-10-31 07:36:26 +01:00
|
|
|
err = run_slot(slot, results);
|
2011-02-15 17:57:24 +01:00
|
|
|
|
|
|
|
curl_slist_free_all(headers);
|
|
|
|
strbuf_release(&buf);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2018-12-09 11:25:21 +01:00
|
|
|
static curl_off_t xcurl_off_t(size_t len)
|
|
|
|
{
|
2018-11-09 18:41:10 +01:00
|
|
|
uintmax_t size = len;
|
|
|
|
if (size > maximum_signed_value_of_type(curl_off_t))
|
2019-03-06 00:20:40 +01:00
|
|
|
die(_("cannot handle pushes this big"));
|
2018-11-09 18:41:10 +01:00
|
|
|
return (curl_off_t)size;
|
2017-04-11 20:13:57 +02:00
|
|
|
}
|
|
|
|
|
2019-02-21 21:24:41 +01:00
|
|
|
/*
|
|
|
|
* If flush_received is true, do not attempt to read any more; just use what's
|
|
|
|
* in rpc->buf.
|
|
|
|
*/
|
2020-05-19 12:53:58 +02:00
|
|
|
static int post_rpc(struct rpc_state *rpc, int stateless_connect, int flush_received)
|
2009-10-31 01:47:41 +01:00
|
|
|
{
|
|
|
|
struct active_request_slot *slot;
|
2016-04-27 14:20:37 +02:00
|
|
|
struct curl_slist *headers = http_copy_default_headers();
|
2009-10-31 01:47:43 +01:00
|
|
|
int use_gzip = rpc->gzip_request;
|
|
|
|
char *gzip_body = NULL;
|
2012-11-21 20:08:51 +01:00
|
|
|
size_t gzip_size = 0;
|
2011-02-15 17:57:24 +01:00
|
|
|
int err, large_request = 0;
|
2013-10-31 07:36:51 +01:00
|
|
|
int needs_100_continue = 0;
|
2019-01-10 20:33:48 +01:00
|
|
|
struct rpc_in_data rpc_in_data;
|
2009-10-31 01:47:41 +01:00
|
|
|
|
|
|
|
/* Try to load the entire request, if we can fit it into the
|
|
|
|
* allocated buffer space we can use HTTP/1.0 and avoid the
|
|
|
|
* chunked encoding mess.
|
|
|
|
*/
|
2019-02-21 21:24:41 +01:00
|
|
|
if (!flush_received) {
|
|
|
|
while (1) {
|
|
|
|
size_t n;
|
|
|
|
enum packet_read_status status;
|
|
|
|
|
|
|
|
if (!rpc_read_from_out(rpc, 0, &n, &status)) {
|
|
|
|
large_request = 1;
|
|
|
|
use_gzip = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (status == PACKET_READ_FLUSH)
|
|
|
|
break;
|
2009-10-31 01:47:41 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-02-15 17:57:24 +01:00
|
|
|
if (large_request) {
|
2013-10-31 07:36:51 +01:00
|
|
|
struct slot_results results;
|
|
|
|
|
http: prompt for credentials on failed POST
All of the smart-http GET requests go through the http_get_*
functions, which will prompt for credentials and retry if we
see an HTTP 401.
POST requests, however, do not go through any central point.
Moreover, it is difficult to retry in the general case; we
cannot assume the request body fits in memory or is even
seekable, and we don't know how much of it was consumed
during the attempt.
Most of the time, this is not a big deal; for both fetching
and pushing, we make a GET request before doing any POSTs,
so typically we figure out the credentials during the first
request, then reuse them during the POST. However, some
servers may allow a client to get the list of refs from
receive-pack without authentication, and then require
authentication when the client actually tries to POST the
pack.
This is not ideal, as the client may do a non-trivial amount
of work to generate the pack (e.g., delta-compressing
objects). However, for a long time it has been the
recommended example configuration in git-http-backend(1) for
setting up a repository with anonymous fetch and
authenticated push. This setup has always been broken
without putting a username into the URL. Prior to commit
986bbc0, it did work with a username in the URL, because git
would prompt for credentials before making any requests at
all. However, post-986bbc0, it is totally broken. Since it
has been advertised in the manpage for some time, we should
make sure it works.
Unfortunately, it is not as easy as simply calling post_rpc
again when it fails, due to the input issue mentioned above.
However, we can still make this specific case work by
retrying in two specific instances:
1. If the request is large (bigger than LARGE_PACKET_MAX),
we will first send a probe request with a single flush
packet. Since this request is static, we can freely
retry it.
2. If the request is small and we are not using gzip, then
we have the whole thing in-core, and we can freely
retry.
That means we will not retry in some instances, including:
1. If we are using gzip. However, we only do so when
calling git-upload-pack, so it does not apply to
pushes.
2. If we have a large request, the probe succeeds, but
then the real POST wants authentication. This is an
extremely unlikely configuration and not worth worrying
about.
While it might be nice to cover those instances, doing so
would be significantly more complex for very little
real-world gain. In the long run, we will be much better off
when curl learns to internally handle authentication as a
callback, and we can cleanly handle all cases that way.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-08-27 15:27:15 +02:00
|
|
|
do {
|
2013-10-31 07:36:51 +01:00
|
|
|
err = probe_rpc(rpc, &results);
|
http: hoist credential request out of handle_curl_result
When we are handling a curl response code in http_request or
in the remote-curl RPC code, we use the handle_curl_result
helper to translate curl's response into an easy-to-use
code. When we see an HTTP 401, we do one of two things:
1. If we already had a filled-in credential, we mark it as
rejected, and then return HTTP_NOAUTH to indicate to
the caller that we failed.
2. If we didn't, then we ask for a new credential and tell
the caller HTTP_REAUTH to indicate that they may want
to try again.
Rejecting in the first case makes sense; it is the natural
result of the request we just made. However, prompting for
more credentials in the second step does not always make
sense. We do not know for sure that the caller is going to
make a second request, and nor are we sure that it will be
to the same URL. Logically, the prompt belongs not to the
request we just finished, but to the request we are (maybe)
about to make.
In practice, it is very hard to trigger any bad behavior.
Currently, if we make a second request, it will always be to
the same URL (even in the face of redirects, because curl
handles the redirects internally). And we almost always
retry on HTTP_REAUTH these days. The one exception is if we
are streaming a large RPC request to the server (e.g., a
pushed packfile), in which case we cannot restart. It's
extremely unlikely to see a 401 response at this stage,
though, as we would typically have seen it when we sent a
probe request, before streaming the data.
This patch drops the automatic prompt out of case 2, and
instead requires the caller to do it. This is a few extra
lines of code, and the bug it fixes is unlikely to come up
in practice. But it is conceptually cleaner, and paves the
way for better handling of credentials across redirects.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
2013-09-28 10:31:45 +02:00
|
|
|
if (err == HTTP_REAUTH)
|
|
|
|
credential_fill(&http_auth);
|
http: prompt for credentials on failed POST
All of the smart-http GET requests go through the http_get_*
functions, which will prompt for credentials and retry if we
see an HTTP 401.
POST requests, however, do not go through any central point.
Moreover, it is difficult to retry in the general case; we
cannot assume the request body fits in memory or is even
seekable, and we don't know how much of it was consumed
during the attempt.
Most of the time, this is not a big deal; for both fetching
and pushing, we make a GET request before doing any POSTs,
so typically we figure out the credentials during the first
request, then reuse them during the POST. However, some
servers may allow a client to get the list of refs from
receive-pack without authentication, and then require
authentication when the client actually tries to POST the
pack.
This is not ideal, as the client may do a non-trivial amount
of work to generate the pack (e.g., delta-compressing
objects). However, for a long time it has been the
recommended example configuration in git-http-backend(1) for
setting up a repository with anonymous fetch and
authenticated push. This setup has always been broken
without putting a username into the URL. Prior to commit
986bbc0, it did work with a username in the URL, because git
would prompt for credentials before making any requests at
all. However, post-986bbc0, it is totally broken. Since it
has been advertised in the manpage for some time, we should
make sure it works.
Unfortunately, it is not as easy as simply calling post_rpc
again when it fails, due to the input issue mentioned above.
However, we can still make this specific case work by
retrying in two specific instances:
1. If the request is large (bigger than LARGE_PACKET_MAX),
we will first send a probe request with a single flush
packet. Since this request is static, we can freely
retry it.
2. If the request is small and we are not using gzip, then
we have the whole thing in-core, and we can freely
retry.
That means we will not retry in some instances, including:
1. If we are using gzip. However, we only do so when
calling git-upload-pack, so it does not apply to
pushes.
2. If we have a large request, the probe succeeds, but
then the real POST wants authentication. This is an
extremely unlikely configuration and not worth worrying
about.
While it might be nice to cover those instances, doing so
would be significantly more complex for very little
real-world gain. In the long run, we will be much better off
when curl learns to internally handle authentication as a
callback, and we can cleanly handle all cases that way.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-08-27 15:27:15 +02:00
|
|
|
} while (err == HTTP_REAUTH);
|
|
|
|
if (err != HTTP_OK)
|
|
|
|
return -1;
|
2013-10-31 07:36:51 +01:00
|
|
|
|
|
|
|
if (results.auth_avail & CURLAUTH_GSSNEGOTIATE)
|
|
|
|
needs_100_continue = 1;
|
2011-02-15 17:57:24 +01:00
|
|
|
}
|
|
|
|
|
remote-curl: do not call run_slot repeatedly
Commit b81401c (http: prompt for credentials on failed POST)
taught post_rpc to call run_slot in a loop in order to retry
a request after asking the user for credentials. However,
after a call to run_slot we will have called
finish_active_slot. This means we have released the slot,
and we should no longer look at it.
As it happens, this does not cause any bugs in the current
code, since we know that we are not using curl_multi in this
code path, and therefore nobody will have taken over our
slot in the meantime. However, it is good form to actually
call get_active_slot again. It also future proofs us against
changes in the http code.
We can do this by jumping back to a retry label at the top
of our function. We just need to reorder a few setup lines
that should not be repeated; everything else within the loop
is either idempotent, needs to be repeated, or in a path we
do not follow (e.g., we do not even try when large_request
is set, because we don't know how much data we might have
streamed from our helper program).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-10-12 09:35:33 +02:00
|
|
|
headers = curl_slist_append(headers, rpc->hdr_content_type);
|
|
|
|
headers = curl_slist_append(headers, rpc->hdr_accept);
|
2013-10-31 07:36:51 +01:00
|
|
|
headers = curl_slist_append(headers, needs_100_continue ?
|
|
|
|
"Expect: 100-continue" : "Expect:");
|
remote-curl: do not call run_slot repeatedly
Commit b81401c (http: prompt for credentials on failed POST)
taught post_rpc to call run_slot in a loop in order to retry
a request after asking the user for credentials. However,
after a call to run_slot we will have called
finish_active_slot. This means we have released the slot,
and we should no longer look at it.
As it happens, this does not cause any bugs in the current
code, since we know that we are not using curl_multi in this
code path, and therefore nobody will have taken over our
slot in the meantime. However, it is good form to actually
call get_active_slot again. It also future proofs us against
changes in the http code.
We can do this by jumping back to a retry label at the top
of our function. We just need to reorder a few setup lines
that should not be repeated; everything else within the loop
is either idempotent, needs to be repeated, or in a path we
do not follow (e.g., we do not even try when large_request
is set, because we don't know how much data we might have
streamed from our helper program).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-10-12 09:35:33 +02:00
|
|
|
|
2022-07-11 07:58:54 +02:00
|
|
|
/* Add Accept-Language header */
|
|
|
|
if (rpc->hdr_accept_language)
|
|
|
|
headers = curl_slist_append(headers, rpc->hdr_accept_language);
|
|
|
|
|
2018-03-15 18:31:39 +01:00
|
|
|
/* Add the extra Git-Protocol header */
|
|
|
|
if (rpc->protocol_header)
|
|
|
|
headers = curl_slist_append(headers, rpc->protocol_header);
|
|
|
|
|
remote-curl: do not call run_slot repeatedly
Commit b81401c (http: prompt for credentials on failed POST)
taught post_rpc to call run_slot in a loop in order to retry
a request after asking the user for credentials. However,
after a call to run_slot we will have called
finish_active_slot. This means we have released the slot,
and we should no longer look at it.
As it happens, this does not cause any bugs in the current
code, since we know that we are not using curl_multi in this
code path, and therefore nobody will have taken over our
slot in the meantime. However, it is good form to actually
call get_active_slot again. It also future proofs us against
changes in the http code.
We can do this by jumping back to a retry label at the top
of our function. We just need to reorder a few setup lines
that should not be repeated; everything else within the loop
is either idempotent, needs to be repeated, or in a path we
do not follow (e.g., we do not even try when large_request
is set, because we don't know how much data we might have
streamed from our helper program).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-10-12 09:35:33 +02:00
|
|
|
retry:
|
2009-10-31 01:47:41 +01:00
|
|
|
slot = get_active_slot();
|
|
|
|
|
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 0);
|
2009-11-23 04:03:28 +01:00
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_POST, 1);
|
2009-10-31 01:47:41 +01:00
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_URL, rpc->service_url);
|
2018-05-22 20:42:03 +02:00
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_ENCODING, "");
|
2009-10-31 01:47:41 +01:00
|
|
|
|
|
|
|
if (large_request) {
|
|
|
|
/* The request body is large and the size cannot be predicted.
|
|
|
|
* We must use chunked encoding to send it.
|
|
|
|
*/
|
|
|
|
headers = curl_slist_append(headers, "Transfer-Encoding: chunked");
|
2009-12-01 11:33:39 +01:00
|
|
|
rpc->initial_buffer = 1;
|
2009-10-31 01:47:41 +01:00
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, rpc_out);
|
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_INFILE, rpc);
|
2009-12-01 11:33:39 +01:00
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_IOCTLFUNCTION, rpc_ioctl);
|
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_IOCTLDATA, rpc);
|
2009-10-31 01:47:41 +01:00
|
|
|
if (options.verbosity > 1) {
|
|
|
|
fprintf(stderr, "POST %s (chunked)\n", rpc->service_name);
|
|
|
|
fflush(stderr);
|
|
|
|
}
|
|
|
|
|
remote-curl: retry failed requests for auth even with gzip
Commit b81401c taught the post_rpc function to retry the
http request after prompting for credentials. However, it
did not handle two cases:
1. If we have a large request, we do not retry. That's OK,
since we would have sent a probe (with retry) already.
2. If we are gzipping the request, we do not retry. That
was considered OK, because the intended use was for
push (e.g., listing refs is OK, but actually pushing
objects is not), and we never gzip on push.
This patch teaches post_rpc to retry even a gzipped request.
This has two advantages:
1. It is possible to configure a "half-auth" state for
fetching, where the set of refs and their sha1s are
advertised, but one cannot actually fetch objects.
This is not a recommended configuration, as it leaks
some information about what is in the repository (e.g.,
an attacker can try brute-forcing possible content in
your repository and checking whether it matches your
branch sha1). However, it can be slightly more
convenient, since a no-op fetch will not require a
password at all.
2. It future-proofs us should we decide to ever gzip more
requests.
Signed-off-by: Jeff King <peff@peff.net>
2012-10-31 12:29:16 +01:00
|
|
|
} else if (gzip_body) {
|
|
|
|
/*
|
|
|
|
* If we are looping to retry authentication, then the previous
|
|
|
|
* run will have set up the headers and gzip buffer already,
|
|
|
|
* and we just need to send it.
|
|
|
|
*/
|
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDS, gzip_body);
|
2017-04-11 20:13:57 +02:00
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDSIZE_LARGE, xcurl_off_t(gzip_size));
|
remote-curl: retry failed requests for auth even with gzip
Commit b81401c taught the post_rpc function to retry the
http request after prompting for credentials. However, it
did not handle two cases:
1. If we have a large request, we do not retry. That's OK,
since we would have sent a probe (with retry) already.
2. If we are gzipping the request, we do not retry. That
was considered OK, because the intended use was for
push (e.g., listing refs is OK, but actually pushing
objects is not), and we never gzip on push.
This patch teaches post_rpc to retry even a gzipped request.
This has two advantages:
1. It is possible to configure a "half-auth" state for
fetching, where the set of refs and their sha1s are
advertised, but one cannot actually fetch objects.
This is not a recommended configuration, as it leaks
some information about what is in the repository (e.g.,
an attacker can try brute-forcing possible content in
your repository and checking whether it matches your
branch sha1). However, it can be slightly more
convenient, since a no-op fetch will not require a
password at all.
2. It future-proofs us should we decide to ever gzip more
requests.
Signed-off-by: Jeff King <peff@peff.net>
2012-10-31 12:29:16 +01:00
|
|
|
|
2009-10-31 01:47:43 +01:00
|
|
|
} else if (use_gzip && 1024 < rpc->len) {
|
|
|
|
/* The client backend isn't giving us compressed data so
|
2018-08-08 13:50:00 +02:00
|
|
|
* we can try to deflate it ourselves, this may save on
|
2009-10-31 01:47:43 +01:00
|
|
|
* the transfer time.
|
|
|
|
*/
|
2011-06-10 20:52:15 +02:00
|
|
|
git_zstream stream;
|
2009-10-31 01:47:43 +01:00
|
|
|
int ret;
|
|
|
|
|
2011-06-10 19:55:10 +02:00
|
|
|
git_deflate_init_gzip(&stream, Z_BEST_COMPRESSION);
|
2012-10-31 12:20:15 +01:00
|
|
|
gzip_size = git_deflate_bound(&stream, rpc->len);
|
|
|
|
gzip_body = xmalloc(gzip_size);
|
2009-10-31 01:47:43 +01:00
|
|
|
|
|
|
|
stream.next_in = (unsigned char *)rpc->buf;
|
|
|
|
stream.avail_in = rpc->len;
|
|
|
|
stream.next_out = (unsigned char *)gzip_body;
|
2012-10-31 12:20:15 +01:00
|
|
|
stream.avail_out = gzip_size;
|
2009-10-31 01:47:43 +01:00
|
|
|
|
2011-06-10 19:55:10 +02:00
|
|
|
ret = git_deflate(&stream, Z_FINISH);
|
2009-10-31 01:47:43 +01:00
|
|
|
if (ret != Z_STREAM_END)
|
2019-03-06 00:20:40 +01:00
|
|
|
die(_("cannot deflate request; zlib deflate error %d"), ret);
|
2009-10-31 01:47:43 +01:00
|
|
|
|
2011-06-10 19:55:10 +02:00
|
|
|
ret = git_deflate_end_gently(&stream);
|
2009-10-31 01:47:43 +01:00
|
|
|
if (ret != Z_OK)
|
2019-03-06 00:20:40 +01:00
|
|
|
die(_("cannot deflate request; zlib end error %d"), ret);
|
2009-10-31 01:47:43 +01:00
|
|
|
|
2012-10-31 12:20:15 +01:00
|
|
|
gzip_size = stream.total_out;
|
2009-10-31 01:47:43 +01:00
|
|
|
|
|
|
|
headers = curl_slist_append(headers, "Content-Encoding: gzip");
|
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDS, gzip_body);
|
2017-04-11 20:13:57 +02:00
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDSIZE_LARGE, xcurl_off_t(gzip_size));
|
2009-10-31 01:47:43 +01:00
|
|
|
|
|
|
|
if (options.verbosity > 1) {
|
|
|
|
fprintf(stderr, "POST %s (gzip %lu to %lu bytes)\n",
|
|
|
|
rpc->service_name,
|
2012-10-31 12:20:15 +01:00
|
|
|
(unsigned long)rpc->len, (unsigned long)gzip_size);
|
2009-10-31 01:47:43 +01:00
|
|
|
fflush(stderr);
|
|
|
|
}
|
2009-10-31 01:47:41 +01:00
|
|
|
} else {
|
|
|
|
/* We know the complete request size in advance, use the
|
|
|
|
* more normal Content-Length approach.
|
|
|
|
*/
|
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDS, rpc->buf);
|
2017-04-11 20:13:57 +02:00
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDSIZE_LARGE, xcurl_off_t(rpc->len));
|
2009-10-31 01:47:41 +01:00
|
|
|
if (options.verbosity > 1) {
|
|
|
|
fprintf(stderr, "POST %s (%lu bytes)\n",
|
|
|
|
rpc->service_name, (unsigned long)rpc->len);
|
|
|
|
fflush(stderr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, headers);
|
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, rpc_in);
|
2019-01-10 20:33:48 +01:00
|
|
|
rpc_in_data.rpc = rpc;
|
2019-01-10 20:33:49 +01:00
|
|
|
rpc_in_data.slot = slot;
|
2020-05-19 12:53:58 +02:00
|
|
|
rpc_in_data.check_pktline = stateless_connect;
|
|
|
|
memset(&rpc_in_data.pktline_state, 0, sizeof(rpc_in_data.pktline_state));
|
2021-07-30 19:59:46 +02:00
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, &rpc_in_data);
|
2019-01-10 20:33:49 +01:00
|
|
|
curl_easy_setopt(slot->curl, CURLOPT_FAILONERROR, 0);
|
2009-10-31 01:47:41 +01:00
|
|
|
|
remote-curl: don't hang when a server dies before any output
In the event that a HTTP server closes the connection after giving a
200 but before giving any packets, we don't want to hang forever
waiting for a response that will never come. Instead, we should die
immediately.
One case where this happens is when attempting to fetch a dangling
object by its object name. In this case, the server dies before
sending any data. Prior to this patch, fetch-pack would wait for
data from the server, and remote-curl would wait for fetch-pack,
causing a deadlock.
Despite this patch, there is other possible malformed input that could
cause the same deadlock (e.g. a half-finished pktline, or a pktline but
no trailing flush). There are a few possible solutions to this:
1. Allowing remote-curl to tell fetch-pack about the EOF (so that
fetch-pack could know that no more data is coming until it says
something else). This is tricky because an out-of-band signal would
be required, or the http response would have to be re-framed inside
another layer of pkt-line or something.
2. Make remote-curl understand some of the protocol. It turns out
that in addition to understanding pkt-line, it would need to watch for
ack/nak. This is somewhat fragile, as information about the protocol
would end up in two places. Also, pkt-lines which are already at the
length limit would need special handling.
Both of these solutions would require a fair amount of work, whereas
this hack is easy and solves at least some of the problem.
Still to do: it would be good to give a better error message
than "fatal: The remote end hung up unexpectedly".
Signed-off-by: David Turner <dturner@twosigma.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-11-18 21:30:49 +01:00
|
|
|
|
|
|
|
rpc->any_written = 0;
|
2013-10-31 07:36:26 +01:00
|
|
|
err = run_slot(slot, NULL);
|
http: hoist credential request out of handle_curl_result
When we are handling a curl response code in http_request or
in the remote-curl RPC code, we use the handle_curl_result
helper to translate curl's response into an easy-to-use
code. When we see an HTTP 401, we do one of two things:
1. If we already had a filled-in credential, we mark it as
rejected, and then return HTTP_NOAUTH to indicate to
the caller that we failed.
2. If we didn't, then we ask for a new credential and tell
the caller HTTP_REAUTH to indicate that they may want
to try again.
Rejecting in the first case makes sense; it is the natural
result of the request we just made. However, prompting for
more credentials in the second step does not always make
sense. We do not know for sure that the caller is going to
make a second request, and nor are we sure that it will be
to the same URL. Logically, the prompt belongs not to the
request we just finished, but to the request we are (maybe)
about to make.
In practice, it is very hard to trigger any bad behavior.
Currently, if we make a second request, it will always be to
the same URL (even in the face of redirects, because curl
handles the redirects internally). And we almost always
retry on HTTP_REAUTH these days. The one exception is if we
are streaming a large RPC request to the server (e.g., a
pushed packfile), in which case we cannot restart. It's
extremely unlikely to see a 401 response at this stage,
though, as we would typically have seen it when we sent a
probe request, before streaming the data.
This patch drops the automatic prompt out of case 2, and
instead requires the caller to do it. This is a few extra
lines of code, and the bug it fixes is unlikely to come up
in practice. But it is conceptually cleaner, and paves the
way for better handling of credentials across redirects.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
2013-09-28 10:31:45 +02:00
|
|
|
if (err == HTTP_REAUTH && !large_request) {
|
|
|
|
credential_fill(&http_auth);
|
remote-curl: do not call run_slot repeatedly
Commit b81401c (http: prompt for credentials on failed POST)
taught post_rpc to call run_slot in a loop in order to retry
a request after asking the user for credentials. However,
after a call to run_slot we will have called
finish_active_slot. This means we have released the slot,
and we should no longer look at it.
As it happens, this does not cause any bugs in the current
code, since we know that we are not using curl_multi in this
code path, and therefore nobody will have taken over our
slot in the meantime. However, it is good form to actually
call get_active_slot again. It also future proofs us against
changes in the http code.
We can do this by jumping back to a retry label at the top
of our function. We just need to reorder a few setup lines
that should not be repeated; everything else within the loop
is either idempotent, needs to be repeated, or in a path we
do not follow (e.g., we do not even try when large_request
is set, because we don't know how much data we might have
streamed from our helper program).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-10-12 09:35:33 +02:00
|
|
|
goto retry;
|
http: hoist credential request out of handle_curl_result
When we are handling a curl response code in http_request or
in the remote-curl RPC code, we use the handle_curl_result
helper to translate curl's response into an easy-to-use
code. When we see an HTTP 401, we do one of two things:
1. If we already had a filled-in credential, we mark it as
rejected, and then return HTTP_NOAUTH to indicate to
the caller that we failed.
2. If we didn't, then we ask for a new credential and tell
the caller HTTP_REAUTH to indicate that they may want
to try again.
Rejecting in the first case makes sense; it is the natural
result of the request we just made. However, prompting for
more credentials in the second step does not always make
sense. We do not know for sure that the caller is going to
make a second request, and nor are we sure that it will be
to the same URL. Logically, the prompt belongs not to the
request we just finished, but to the request we are (maybe)
about to make.
In practice, it is very hard to trigger any bad behavior.
Currently, if we make a second request, it will always be to
the same URL (even in the face of redirects, because curl
handles the redirects internally). And we almost always
retry on HTTP_REAUTH these days. The one exception is if we
are streaming a large RPC request to the server (e.g., a
pushed packfile), in which case we cannot restart. It's
extremely unlikely to see a 401 response at this stage,
though, as we would typically have seen it when we sent a
probe request, before streaming the data.
This patch drops the automatic prompt out of case 2, and
instead requires the caller to do it. This is a few extra
lines of code, and the bug it fixes is unlikely to come up
in practice. But it is conceptually cleaner, and paves the
way for better handling of credentials across redirects.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
2013-09-28 10:31:45 +02:00
|
|
|
}
|
http: prompt for credentials on failed POST
All of the smart-http GET requests go through the http_get_*
functions, which will prompt for credentials and retry if we
see an HTTP 401.
POST requests, however, do not go through any central point.
Moreover, it is difficult to retry in the general case; we
cannot assume the request body fits in memory or is even
seekable, and we don't know how much of it was consumed
during the attempt.
Most of the time, this is not a big deal; for both fetching
and pushing, we make a GET request before doing any POSTs,
so typically we figure out the credentials during the first
request, then reuse them during the POST. However, some
servers may allow a client to get the list of refs from
receive-pack without authentication, and then require
authentication when the client actually tries to POST the
pack.
This is not ideal, as the client may do a non-trivial amount
of work to generate the pack (e.g., delta-compressing
objects). However, for a long time it has been the
recommended example configuration in git-http-backend(1) for
setting up a repository with anonymous fetch and
authenticated push. This setup has always been broken
without putting a username into the URL. Prior to commit
986bbc0, it did work with a username in the URL, because git
would prompt for credentials before making any requests at
all. However, post-986bbc0, it is totally broken. Since it
has been advertised in the manpage for some time, we should
make sure it works.
Unfortunately, it is not as easy as simply calling post_rpc
again when it fails, due to the input issue mentioned above.
However, we can still make this specific case work by
retrying in two specific instances:
1. If the request is large (bigger than LARGE_PACKET_MAX),
we will first send a probe request with a single flush
packet. Since this request is static, we can freely
retry it.
2. If the request is small and we are not using gzip, then
we have the whole thing in-core, and we can freely
retry.
That means we will not retry in some instances, including:
1. If we are using gzip. However, we only do so when
calling git-upload-pack, so it does not apply to
pushes.
2. If we have a large request, the probe succeeds, but
then the real POST wants authentication. This is an
extremely unlikely configuration and not worth worrying
about.
While it might be nice to cover those instances, doing so
would be significantly more complex for very little
real-world gain. In the long run, we will be much better off
when curl learns to internally handle authentication as a
callback, and we can cleanly handle all cases that way.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-08-27 15:27:15 +02:00
|
|
|
if (err != HTTP_OK)
|
|
|
|
err = -1;
|
2009-10-31 01:47:41 +01:00
|
|
|
|
remote-curl: don't hang when a server dies before any output
In the event that a HTTP server closes the connection after giving a
200 but before giving any packets, we don't want to hang forever
waiting for a response that will never come. Instead, we should die
immediately.
One case where this happens is when attempting to fetch a dangling
object by its object name. In this case, the server dies before
sending any data. Prior to this patch, fetch-pack would wait for
data from the server, and remote-curl would wait for fetch-pack,
causing a deadlock.
Despite this patch, there is other possible malformed input that could
cause the same deadlock (e.g. a half-finished pktline, or a pktline but
no trailing flush). There are a few possible solutions to this:
1. Allowing remote-curl to tell fetch-pack about the EOF (so that
fetch-pack could know that no more data is coming until it says
something else). This is tricky because an out-of-band signal would
be required, or the http response would have to be re-framed inside
another layer of pkt-line or something.
2. Make remote-curl understand some of the protocol. It turns out
that in addition to understanding pkt-line, it would need to watch for
ack/nak. This is somewhat fragile, as information about the protocol
would end up in two places. Also, pkt-lines which are already at the
length limit would need special handling.
Both of these solutions would require a fair amount of work, whereas
this hack is easy and solves at least some of the problem.
Still to do: it would be good to give a better error message
than "fatal: The remote end hung up unexpectedly".
Signed-off-by: David Turner <dturner@twosigma.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-11-18 21:30:49 +01:00
|
|
|
if (!rpc->any_written)
|
|
|
|
err = -1;
|
|
|
|
|
2020-05-19 12:53:58 +02:00
|
|
|
if (rpc_in_data.pktline_state.len_filled)
|
|
|
|
err = error(_("%d bytes of length header were received"), rpc_in_data.pktline_state.len_filled);
|
|
|
|
if (rpc_in_data.pktline_state.remaining)
|
|
|
|
err = error(_("%d bytes of body are still expected"), rpc_in_data.pktline_state.remaining);
|
|
|
|
|
2020-05-19 12:54:00 +02:00
|
|
|
if (stateless_connect)
|
|
|
|
packet_response_end(rpc->in);
|
|
|
|
|
2009-10-31 01:47:41 +01:00
|
|
|
curl_slist_free_all(headers);
|
2009-10-31 01:47:43 +01:00
|
|
|
free(gzip_body);
|
2009-10-31 01:47:41 +01:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2019-02-14 20:06:35 +01:00
|
|
|
static int rpc_service(struct rpc_state *rpc, struct discovery *heads,
|
2019-02-14 20:06:37 +01:00
|
|
|
const char **client_argv, const struct strbuf *preamble,
|
|
|
|
struct strbuf *rpc_result)
|
2009-10-31 01:47:41 +01:00
|
|
|
{
|
|
|
|
const char *svc = rpc->service_name;
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
2014-08-19 21:09:35 +02:00
|
|
|
struct child_process client = CHILD_PROCESS_INIT;
|
2009-10-31 01:47:41 +01:00
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
client.in = -1;
|
|
|
|
client.out = -1;
|
|
|
|
client.git_cmd = 1;
|
2021-11-25 23:52:18 +01:00
|
|
|
strvec_pushv(&client.args, client_argv);
|
2009-10-31 01:47:41 +01:00
|
|
|
if (start_command(&client))
|
|
|
|
exit(1);
|
2019-02-14 20:06:36 +01:00
|
|
|
write_or_die(client.in, preamble->buf, preamble->len);
|
2009-10-31 01:47:41 +01:00
|
|
|
if (heads)
|
|
|
|
write_or_die(client.in, heads->buf, heads->len);
|
|
|
|
|
|
|
|
rpc->alloc = http_post_buffer;
|
|
|
|
rpc->buf = xmalloc(rpc->alloc);
|
|
|
|
rpc->in = client.in;
|
|
|
|
rpc->out = client.out;
|
|
|
|
|
2013-09-28 10:35:25 +02:00
|
|
|
strbuf_addf(&buf, "%s%s", url.buf, svc);
|
2009-10-31 01:47:41 +01:00
|
|
|
rpc->service_url = strbuf_detach(&buf, NULL);
|
|
|
|
|
2022-07-11 07:58:54 +02:00
|
|
|
rpc->hdr_accept_language = xstrdup_or_null(http_get_accept_language_header());
|
|
|
|
|
2009-10-31 01:47:41 +01:00
|
|
|
strbuf_addf(&buf, "Content-Type: application/x-%s-request", svc);
|
|
|
|
rpc->hdr_content_type = strbuf_detach(&buf, NULL);
|
|
|
|
|
2010-01-12 18:54:04 +01:00
|
|
|
strbuf_addf(&buf, "Accept: application/x-%s-result", svc);
|
2009-10-31 01:47:41 +01:00
|
|
|
rpc->hdr_accept = strbuf_detach(&buf, NULL);
|
|
|
|
|
2018-03-15 18:31:39 +01:00
|
|
|
if (get_protocol_http_header(heads->version, &buf))
|
|
|
|
rpc->protocol_header = strbuf_detach(&buf, NULL);
|
|
|
|
else
|
|
|
|
rpc->protocol_header = NULL;
|
|
|
|
|
2009-10-31 01:47:41 +01:00
|
|
|
while (!err) {
|
2021-10-14 22:15:12 +02:00
|
|
|
int n = packet_read(rpc->out, rpc->buf, rpc->alloc, 0);
|
2009-10-31 01:47:41 +01:00
|
|
|
if (!n)
|
|
|
|
break;
|
|
|
|
rpc->pos = 0;
|
|
|
|
rpc->len = n;
|
2020-05-19 12:53:58 +02:00
|
|
|
err |= post_rpc(rpc, 0, 0);
|
2009-10-31 01:47:41 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
close(client.in);
|
|
|
|
client.in = -1;
|
2011-10-05 01:20:19 +02:00
|
|
|
if (!err) {
|
2019-02-14 20:06:37 +01:00
|
|
|
strbuf_read(rpc_result, client.out, 0);
|
2011-10-05 01:20:19 +02:00
|
|
|
} else {
|
|
|
|
char buf[4096];
|
|
|
|
for (;;)
|
|
|
|
if (xread(client.out, buf, sizeof(buf)) <= 0)
|
|
|
|
break;
|
|
|
|
}
|
2010-08-06 23:19:44 +02:00
|
|
|
|
|
|
|
close(client.out);
|
2009-10-31 01:47:41 +01:00
|
|
|
client.out = -1;
|
|
|
|
|
|
|
|
err |= finish_command(&client);
|
|
|
|
free(rpc->service_url);
|
|
|
|
free(rpc->hdr_content_type);
|
|
|
|
free(rpc->hdr_accept);
|
2022-07-11 07:58:54 +02:00
|
|
|
free(rpc->hdr_accept_language);
|
2018-03-15 18:31:39 +01:00
|
|
|
free(rpc->protocol_header);
|
2009-10-31 01:47:41 +01:00
|
|
|
free(rpc->buf);
|
|
|
|
strbuf_release(&buf);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2009-10-31 01:47:28 +01:00
|
|
|
static int fetch_dumb(int nr_heads, struct ref **to_fetch)
|
|
|
|
{
|
2010-03-02 11:49:31 +01:00
|
|
|
struct walker *walker;
|
2016-02-22 23:44:25 +01:00
|
|
|
char **targets;
|
2009-10-31 01:47:28 +01:00
|
|
|
int ret, i;
|
|
|
|
|
2016-02-22 23:44:25 +01:00
|
|
|
ALLOC_ARRAY(targets, nr_heads);
|
2016-06-12 12:53:59 +02:00
|
|
|
if (options.depth || options.deepen_since)
|
2019-03-06 00:20:40 +01:00
|
|
|
die(_("dumb http transport does not support shallow capabilities"));
|
2009-10-31 01:47:28 +01:00
|
|
|
for (i = 0; i < nr_heads; i++)
|
2015-11-10 03:22:20 +01:00
|
|
|
targets[i] = xstrdup(oid_to_hex(&to_fetch[i]->old_oid));
|
2009-10-31 01:47:28 +01:00
|
|
|
|
2013-09-28 10:35:25 +02:00
|
|
|
walker = get_http_walker(url.buf);
|
2009-10-31 01:47:29 +01:00
|
|
|
walker->get_verbosely = options.verbosity >= 3;
|
2020-03-03 21:55:34 +01:00
|
|
|
walker->get_progress = options.progress;
|
2009-10-31 01:47:28 +01:00
|
|
|
walker->get_recover = 0;
|
|
|
|
ret = walker_fetch(walker, nr_heads, targets, NULL, NULL);
|
2010-03-02 11:49:31 +01:00
|
|
|
walker_free(walker);
|
2009-10-31 01:47:28 +01:00
|
|
|
|
|
|
|
for (i = 0; i < nr_heads; i++)
|
|
|
|
free(targets[i]);
|
|
|
|
free(targets);
|
|
|
|
|
2019-03-06 00:20:40 +01:00
|
|
|
return ret ? error(_("fetch failed.")) : 0;
|
2009-10-31 01:47:28 +01:00
|
|
|
}
|
|
|
|
|
2009-10-31 01:47:42 +01:00
|
|
|
static int fetch_git(struct discovery *heads,
|
|
|
|
int nr_heads, struct ref **to_fetch)
|
|
|
|
{
|
2022-07-11 07:58:54 +02:00
|
|
|
struct rpc_state rpc = RPC_STATE_INIT;
|
2012-04-02 17:14:44 +02:00
|
|
|
struct strbuf preamble = STRBUF_INIT;
|
2016-06-12 12:53:43 +02:00
|
|
|
int i, err;
|
2020-07-28 22:25:12 +02:00
|
|
|
struct strvec args = STRVEC_INIT;
|
2019-02-14 20:06:37 +01:00
|
|
|
struct strbuf rpc_result = STRBUF_INIT;
|
2016-06-12 12:53:43 +02:00
|
|
|
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_pushl(&args, "fetch-pack", "--stateless-rpc",
|
strvec: fix indentation in renamed calls
Code which split an argv_array call across multiple lines, like:
argv_array_pushl(&args, "one argument",
"another argument", "and more",
NULL);
was recently mechanically renamed to use strvec, which results in
mis-matched indentation like:
strvec_pushl(&args, "one argument",
"another argument", "and more",
NULL);
Let's fix these up to align the arguments with the opening paren. I did
this manually by sifting through the results of:
git jump grep 'strvec_.*,$'
and liberally applying my editor's auto-format. Most of the changes are
of the form shown above, though I also normalized a few that had
originally used a single-tab indentation (rather than our usual style of
aligning with the open paren). I also rewrapped a couple of obvious
cases (e.g., where previously too-long lines became short enough to fit
on one), but I wasn't aggressive about it. In cases broken to three or
more lines, the grouping of arguments is sometimes meaningful, and it
wasn't worth my time or reviewer time to ponder each case individually.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-07-28 22:26:31 +02:00
|
|
|
"--stdin", "--lock-pack", NULL);
|
2009-10-31 01:47:42 +01:00
|
|
|
if (options.followtags)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&args, "--include-tag");
|
2009-10-31 01:47:42 +01:00
|
|
|
if (options.thin)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&args, "--thin");
|
2016-06-12 12:53:43 +02:00
|
|
|
if (options.verbosity >= 3)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_pushl(&args, "-v", "-v", NULL);
|
2013-07-21 10:18:05 +02:00
|
|
|
if (options.check_self_contained_and_connected)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&args, "--check-self-contained-and-connected");
|
2013-12-05 14:02:50 +01:00
|
|
|
if (options.cloning)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&args, "--cloning");
|
2013-12-05 14:02:50 +01:00
|
|
|
if (options.update_shallow)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&args, "--update-shallow");
|
2009-10-31 01:47:42 +01:00
|
|
|
if (!options.progress)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&args, "--no-progress");
|
2016-06-12 12:53:43 +02:00
|
|
|
if (options.depth)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_pushf(&args, "--depth=%lu", options.depth);
|
2016-06-12 12:53:59 +02:00
|
|
|
if (options.deepen_since)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_pushf(&args, "--shallow-since=%s", options.deepen_since);
|
2016-06-12 12:54:04 +02:00
|
|
|
for (i = 0; i < options.deepen_not.nr; i++)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_pushf(&args, "--shallow-exclude=%s",
|
strvec: fix indentation in renamed calls
Code which split an argv_array call across multiple lines, like:
argv_array_pushl(&args, "one argument",
"another argument", "and more",
NULL);
was recently mechanically renamed to use strvec, which results in
mis-matched indentation like:
strvec_pushl(&args, "one argument",
"another argument", "and more",
NULL);
Let's fix these up to align the arguments with the opening paren. I did
this manually by sifting through the results of:
git jump grep 'strvec_.*,$'
and liberally applying my editor's auto-format. Most of the changes are
of the form shown above, though I also normalized a few that had
originally used a single-tab indentation (rather than our usual style of
aligning with the open paren). I also rewrapped a couple of obvious
cases (e.g., where previously too-long lines became short enough to fit
on one), but I wasn't aggressive about it. In cases broken to three or
more lines, the grouping of arguments is sometimes meaningful, and it
wasn't worth my time or reviewer time to ponder each case individually.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-07-28 22:26:31 +02:00
|
|
|
options.deepen_not.items[i].string);
|
fetch, upload-pack: --deepen=N extends shallow boundary by N commits
In git-fetch, --depth argument is always relative with the latest
remote refs. This makes it a bit difficult to cover this use case,
where the user wants to make the shallow history, say 3 levels
deeper. It would work if remote refs have not moved yet, but nobody
can guarantee that, especially when that use case is performed a
couple months after the last clone or "git fetch --depth". Also,
modifying shallow boundary using --depth does not work well with
clones created by --since or --not.
This patch fixes that. A new argument --deepen=<N> will add <N> more (*)
parent commits to the current history regardless of where remote refs
are.
Have/Want negotiation is still respected. So if remote refs move, the
server will send two chunks: one between "have" and "want" and another
to extend shallow history. In theory, the client could send no "want"s
in order to get the second chunk only. But the protocol does not allow
that. Either you send no want lines, which means ls-remote; or you
have to send at least one want line that carries deep-relative to the
server..
The main work was done by Dongcan Jiang. I fixed it up here and there.
And of course all the bugs belong to me.
(*) We could even support --deepen=<N> where <N> is negative. In that
case we can cut some history from the shallow clone. This operation
(and --depth=<shorter depth>) does not require interaction with remote
side (and more complicated to implement as a result).
Helped-by: Duy Nguyen <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 12:54:09 +02:00
|
|
|
if (options.deepen_relative && options.depth)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&args, "--deepen-relative");
|
introduce fetch-object: fetch one promisor object
Introduce fetch-object, providing the ability to fetch one object from a
promisor remote.
This uses fetch-pack. To do this, the transport mechanism has been
updated with 2 flags, "from-promisor" to indicate that the resulting
pack comes from a promisor remote (and thus should be annotated as such
by index-pack), and "no-dependents" to indicate that only the objects
themselves need to be fetched (but fetching additional objects is
nevertheless safe).
Whenever "no-dependents" is used, fetch-pack will refrain from using any
object flags, because it is most likely invoked as part of a dynamic
object fetch by another Git command (which may itself use object flags).
An alternative to this is to leave fetch-pack alone, and instead update
the allocation of flags so that fetch-pack's flags never overlap with
any others, but this will end up shrinking the number of flags available
to nearly every other Git command (that is, every Git command that
accesses objects), so the approach in this commit was used instead.
This will be tested in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-12-05 17:58:49 +01:00
|
|
|
if (options.from_promisor)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&args, "--from-promisor");
|
2022-03-28 16:02:07 +02:00
|
|
|
if (options.refetch)
|
|
|
|
strvec_push(&args, "--refetch");
|
2017-12-08 16:58:44 +01:00
|
|
|
if (options.filter)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_pushf(&args, "--filter=%s", options.filter);
|
|
|
|
strvec_push(&args, url.buf);
|
2012-04-02 17:14:44 +02:00
|
|
|
|
2009-10-31 01:47:42 +01:00
|
|
|
for (i = 0; i < nr_heads; i++) {
|
|
|
|
struct ref *ref = to_fetch[i];
|
2015-01-28 18:58:50 +01:00
|
|
|
if (!*ref->name)
|
2019-03-06 00:20:40 +01:00
|
|
|
die(_("cannot fetch by sha1 over smart http"));
|
2013-12-05 14:02:49 +01:00
|
|
|
packet_buf_write(&preamble, "%s %s\n",
|
2015-11-10 03:22:20 +01:00
|
|
|
oid_to_hex(&ref->old_oid), ref->name);
|
2009-10-31 01:47:42 +01:00
|
|
|
}
|
2012-04-02 17:14:44 +02:00
|
|
|
packet_buf_flush(&preamble);
|
2009-10-31 01:47:42 +01:00
|
|
|
|
|
|
|
memset(&rpc, 0, sizeof(rpc));
|
|
|
|
rpc.service_name = "git-upload-pack",
|
2009-10-31 01:47:43 +01:00
|
|
|
rpc.gzip_request = 1;
|
2009-10-31 01:47:42 +01:00
|
|
|
|
2020-07-29 02:37:20 +02:00
|
|
|
err = rpc_service(&rpc, heads, args.v, &preamble, &rpc_result);
|
2019-02-14 20:06:37 +01:00
|
|
|
if (rpc_result.len)
|
|
|
|
write_or_die(1, rpc_result.buf, rpc_result.len);
|
|
|
|
strbuf_release(&rpc_result);
|
2012-04-02 17:14:44 +02:00
|
|
|
strbuf_release(&preamble);
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_clear(&args);
|
2009-10-31 01:47:42 +01:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int fetch(int nr_heads, struct ref **to_fetch)
|
|
|
|
{
|
remote-curl: always parse incoming refs
When remote-curl receives a list of refs from a server, it
keeps the whole buffer intact. When we get a "list" command,
we feed the result to get_remote_heads, and when we get a
"fetch" or "push" command, we feed it to fetch-pack or
send-pack, respectively.
If the HTTP response from the server is truncated for any
reason, we will get an incomplete ref advertisement. If we
then feed this incomplete list to fetch-pack, one of a few
things may happen:
1. If the truncation is in a packet header, fetch-pack
will notice the bogus line and complain.
2. If the truncation is inside a packet, fetch-pack will
keep waiting for us to send the rest of the packet,
which we never will.
3. If the truncation is at a packet boundary, fetch-pack
will keep waiting for us to send the next packet, which
we never will.
As a result, fetch-pack hangs, waiting for input. However,
remote-curl believes it has sent all of the advertisement,
and therefore waits for fetch-pack to speak. The two
processes end up in a deadlock.
We do notice the broken ref list if we feed it to
get_remote_heads. So if git asks the helper to do a "list"
followed by a "fetch", we are safe; we'll abort during the
list operation, which parses the refs.
This patch teaches remote-curl to always parse and save the
incoming ref list when we read the ref advertisement from a
server. That means that we will always verify and abort
before even running fetch-pack (or send-pack) when reading a
corrupted list, even if we do not run the "list" command
explicitly.
Since we save the result, in the common case of running
"list" then "fetch", we do not do any extra parsing at all.
In the case of just a "fetch", we do an extra round of
parsing, but only once.
Note also that the "fetch" case will now also initialize
server_capabilities from the remote (in remote-curl; we
already would do so inside fetch-pack). Doing "list+fetch"
already does this. It doesn't actually matter now, but the
new behavior is arguably more correct, should remote-curl
ever start caring about the server's capability list.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-20 21:07:19 +01:00
|
|
|
struct discovery *d = discover_refs("git-upload-pack", 0);
|
2009-10-31 01:47:42 +01:00
|
|
|
if (d->proto_git)
|
|
|
|
return fetch_git(d, nr_heads, to_fetch);
|
|
|
|
else
|
|
|
|
return fetch_dumb(nr_heads, to_fetch);
|
|
|
|
}
|
|
|
|
|
2009-10-31 01:47:28 +01:00
|
|
|
static void parse_fetch(struct strbuf *buf)
|
|
|
|
{
|
|
|
|
struct ref **to_fetch = NULL;
|
|
|
|
struct ref *list_head = NULL;
|
|
|
|
struct ref **list = &list_head;
|
|
|
|
int alloc_heads = 0, nr_heads = 0;
|
|
|
|
|
|
|
|
do {
|
2014-06-18 21:48:29 +02:00
|
|
|
const char *p;
|
|
|
|
if (skip_prefix(buf->buf, "fetch ", &p)) {
|
|
|
|
const char *name;
|
2009-10-31 01:47:28 +01:00
|
|
|
struct ref *ref;
|
2015-11-10 03:22:22 +01:00
|
|
|
struct object_id old_oid;
|
2019-02-19 01:05:16 +01:00
|
|
|
const char *q;
|
2009-10-31 01:47:28 +01:00
|
|
|
|
2019-02-19 01:05:16 +01:00
|
|
|
if (parse_oid_hex(p, &old_oid, &q))
|
2019-06-02 17:11:22 +02:00
|
|
|
die(_("protocol error: expected sha/ref, got '%s'"), p);
|
2019-02-19 01:05:16 +01:00
|
|
|
if (*q == ' ')
|
|
|
|
name = q + 1;
|
|
|
|
else if (!*q)
|
2009-10-31 01:47:28 +01:00
|
|
|
name = "";
|
|
|
|
else
|
2019-06-02 17:11:22 +02:00
|
|
|
die(_("protocol error: expected sha/ref, got '%s'"), p);
|
2009-10-31 01:47:28 +01:00
|
|
|
|
|
|
|
ref = alloc_ref(name);
|
2015-11-10 03:22:22 +01:00
|
|
|
oidcpy(&ref->old_oid, &old_oid);
|
2009-10-31 01:47:28 +01:00
|
|
|
|
|
|
|
*list = ref;
|
|
|
|
list = &ref->next;
|
|
|
|
|
|
|
|
ALLOC_GROW(to_fetch, nr_heads + 1, alloc_heads);
|
|
|
|
to_fetch[nr_heads++] = ref;
|
|
|
|
}
|
|
|
|
else
|
2019-03-06 00:20:40 +01:00
|
|
|
die(_("http transport does not support %s"), buf->buf);
|
2009-10-31 01:47:28 +01:00
|
|
|
|
|
|
|
strbuf_reset(buf);
|
2016-01-14 00:31:17 +01:00
|
|
|
if (strbuf_getline_lf(buf, stdin) == EOF)
|
2009-10-31 01:47:28 +01:00
|
|
|
return;
|
|
|
|
if (!*buf->buf)
|
|
|
|
break;
|
|
|
|
} while (1);
|
|
|
|
|
2009-10-31 01:47:42 +01:00
|
|
|
if (fetch(nr_heads, to_fetch))
|
2009-10-31 01:47:28 +01:00
|
|
|
exit(128); /* error already reported */
|
|
|
|
free_refs(list_head);
|
|
|
|
free(to_fetch);
|
|
|
|
|
|
|
|
printf("\n");
|
|
|
|
fflush(stdout);
|
|
|
|
strbuf_reset(buf);
|
|
|
|
}
|
|
|
|
|
2019-10-13 15:37:39 +02:00
|
|
|
static int push_dav(int nr_spec, const char **specs)
|
2009-10-31 01:47:30 +01:00
|
|
|
{
|
2016-02-22 23:44:21 +01:00
|
|
|
struct child_process child = CHILD_PROCESS_INIT;
|
|
|
|
size_t i;
|
2009-10-31 01:47:30 +01:00
|
|
|
|
2016-02-22 23:44:21 +01:00
|
|
|
child.git_cmd = 1;
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&child.args, "http-push");
|
|
|
|
strvec_push(&child.args, "--helper-status");
|
2009-10-31 01:47:30 +01:00
|
|
|
if (options.dry_run)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&child.args, "--dry-run");
|
2009-10-31 01:47:30 +01:00
|
|
|
if (options.verbosity > 1)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&child.args, "--verbose");
|
|
|
|
strvec_push(&child.args, url.buf);
|
2009-10-31 01:47:30 +01:00
|
|
|
for (i = 0; i < nr_spec; i++)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&child.args, specs[i]);
|
2009-10-31 01:47:30 +01:00
|
|
|
|
2016-02-22 23:44:21 +01:00
|
|
|
if (run_command(&child))
|
2019-03-06 00:20:40 +01:00
|
|
|
die(_("git-http-push failed"));
|
2009-10-31 01:47:30 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-10-13 15:37:39 +02:00
|
|
|
static int push_git(struct discovery *heads, int nr_spec, const char **specs)
|
2009-10-31 01:47:41 +01:00
|
|
|
{
|
2022-07-11 07:58:54 +02:00
|
|
|
struct rpc_state rpc = RPC_STATE_INIT;
|
2013-07-09 07:16:31 +02:00
|
|
|
int i, err;
|
2020-07-28 22:25:12 +02:00
|
|
|
struct strvec args;
|
2013-08-03 00:14:50 +02:00
|
|
|
struct string_list_item *cas_option;
|
2014-08-21 14:21:20 +02:00
|
|
|
struct strbuf preamble = STRBUF_INIT;
|
2019-02-14 20:06:37 +01:00
|
|
|
struct strbuf rpc_result = STRBUF_INIT;
|
2013-07-09 07:16:31 +02:00
|
|
|
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_init(&args);
|
|
|
|
strvec_pushl(&args, "send-pack", "--stateless-rpc", "--helper-status",
|
strvec: fix indentation in renamed calls
Code which split an argv_array call across multiple lines, like:
argv_array_pushl(&args, "one argument",
"another argument", "and more",
NULL);
was recently mechanically renamed to use strvec, which results in
mis-matched indentation like:
strvec_pushl(&args, "one argument",
"another argument", "and more",
NULL);
Let's fix these up to align the arguments with the opening paren. I did
this manually by sifting through the results of:
git jump grep 'strvec_.*,$'
and liberally applying my editor's auto-format. Most of the changes are
of the form shown above, though I also normalized a few that had
originally used a single-tab indentation (rather than our usual style of
aligning with the open paren). I also rewrapped a couple of obvious
cases (e.g., where previously too-long lines became short enough to fit
on one), but I wasn't aggressive about it. In cases broken to three or
more lines, the grouping of arguments is sometimes meaningful, and it
wasn't worth my time or reviewer time to ponder each case individually.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-07-28 22:26:31 +02:00
|
|
|
NULL);
|
2009-10-31 01:47:41 +01:00
|
|
|
|
|
|
|
if (options.thin)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&args, "--thin");
|
2009-10-31 01:47:41 +01:00
|
|
|
if (options.dry_run)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&args, "--dry-run");
|
2015-08-19 17:26:46 +02:00
|
|
|
if (options.push_cert == SEND_PACK_PUSH_CERT_ALWAYS)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&args, "--signed=yes");
|
2015-08-19 17:26:46 +02:00
|
|
|
else if (options.push_cert == SEND_PACK_PUSH_CERT_IF_ASKED)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&args, "--signed=if-asked");
|
remote-curl: pass on atomic capability to remote side
When pushing more than one reference with the --atomic option, the
server is supposed to perform a single atomic transaction to update the
references, leaving them either all to succeed or all to fail. This
works fine when pushing locally or over SSH, but when pushing over HTTP,
we fail to pass the atomic capability to the remote side. In fact, we
have not reported this capability to any remote helpers during the life
of the feature.
Now normally, things happen to work nevertheless, since we actually
check for most types of failures, such as non-fast-forward updates, on
the client side, and just abort the entire attempt. However, if the
server side reports a problem, such as the inability to lock a ref, the
transaction isn't atomic, because we haven't passed the appropriate
capability over and the remote side has no way of knowing that we wanted
atomic behavior.
Fix this by passing the option from the transport code through to remote
helpers, and from the HTTP remote helper down to send-pack. With this
change, we can detect if the server side rejects the push and report
back appropriately. Note the difference in the messages: the remote
side reports "atomic transaction failed", while our own checking rejects
pushes with the message "atomic push failed".
Document the atomic option in the remote helper documentation, so other
implementers can implement it if they like.
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-10-17 01:45:34 +02:00
|
|
|
if (options.atomic)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&args, "--atomic");
|
2012-01-08 22:06:20 +01:00
|
|
|
if (options.verbosity == 0)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&args, "--quiet");
|
2012-01-08 22:06:20 +01:00
|
|
|
else if (options.verbosity > 1)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&args, "--verbose");
|
2017-03-22 23:22:00 +01:00
|
|
|
for (i = 0; i < options.push_options.nr; i++)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_pushf(&args, "--push-option=%s",
|
strvec: fix indentation in renamed calls
Code which split an argv_array call across multiple lines, like:
argv_array_pushl(&args, "one argument",
"another argument", "and more",
NULL);
was recently mechanically renamed to use strvec, which results in
mis-matched indentation like:
strvec_pushl(&args, "one argument",
"another argument", "and more",
NULL);
Let's fix these up to align the arguments with the opening paren. I did
this manually by sifting through the results of:
git jump grep 'strvec_.*,$'
and liberally applying my editor's auto-format. Most of the changes are
of the form shown above, though I also normalized a few that had
originally used a single-tab indentation (rather than our usual style of
aligning with the open paren). I also rewrapped a couple of obvious
cases (e.g., where previously too-long lines became short enough to fit
on one), but I wasn't aggressive about it. In cases broken to three or
more lines, the grouping of arguments is sometimes meaningful, and it
wasn't worth my time or reviewer time to ponder each case individually.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-07-28 22:26:31 +02:00
|
|
|
options.push_options.items[i].string);
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&args, options.progress ? "--progress" : "--no-progress");
|
2013-08-03 00:14:50 +02:00
|
|
|
for_each_string_list_item(cas_option, &cas_options)
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&args, cas_option->string);
|
|
|
|
strvec_push(&args, url.buf);
|
2014-08-21 14:21:20 +02:00
|
|
|
|
2020-10-03 14:10:45 +02:00
|
|
|
if (options.force_if_includes)
|
|
|
|
strvec_push(&args, "--force-if-includes");
|
|
|
|
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&args, "--stdin");
|
2009-10-31 01:47:41 +01:00
|
|
|
for (i = 0; i < nr_spec; i++)
|
2014-08-21 14:21:20 +02:00
|
|
|
packet_buf_write(&preamble, "%s\n", specs[i]);
|
|
|
|
packet_buf_flush(&preamble);
|
2009-10-31 01:47:41 +01:00
|
|
|
|
|
|
|
memset(&rpc, 0, sizeof(rpc));
|
|
|
|
rpc.service_name = "git-receive-pack",
|
|
|
|
|
2020-07-29 02:37:20 +02:00
|
|
|
err = rpc_service(&rpc, heads, args.v, &preamble, &rpc_result);
|
2019-02-14 20:06:37 +01:00
|
|
|
if (rpc_result.len)
|
|
|
|
write_or_die(1, rpc_result.buf, rpc_result.len);
|
|
|
|
strbuf_release(&rpc_result);
|
2014-08-21 14:21:20 +02:00
|
|
|
strbuf_release(&preamble);
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_clear(&args);
|
2009-10-31 01:47:41 +01:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2019-10-13 15:37:39 +02:00
|
|
|
static int push(int nr_spec, const char **specs)
|
2009-10-31 01:47:41 +01:00
|
|
|
{
|
remote-curl: always parse incoming refs
When remote-curl receives a list of refs from a server, it
keeps the whole buffer intact. When we get a "list" command,
we feed the result to get_remote_heads, and when we get a
"fetch" or "push" command, we feed it to fetch-pack or
send-pack, respectively.
If the HTTP response from the server is truncated for any
reason, we will get an incomplete ref advertisement. If we
then feed this incomplete list to fetch-pack, one of a few
things may happen:
1. If the truncation is in a packet header, fetch-pack
will notice the bogus line and complain.
2. If the truncation is inside a packet, fetch-pack will
keep waiting for us to send the rest of the packet,
which we never will.
3. If the truncation is at a packet boundary, fetch-pack
will keep waiting for us to send the next packet, which
we never will.
As a result, fetch-pack hangs, waiting for input. However,
remote-curl believes it has sent all of the advertisement,
and therefore waits for fetch-pack to speak. The two
processes end up in a deadlock.
We do notice the broken ref list if we feed it to
get_remote_heads. So if git asks the helper to do a "list"
followed by a "fetch", we are safe; we'll abort during the
list operation, which parses the refs.
This patch teaches remote-curl to always parse and save the
incoming ref list when we read the ref advertisement from a
server. That means that we will always verify and abort
before even running fetch-pack (or send-pack) when reading a
corrupted list, even if we do not run the "list" command
explicitly.
Since we save the result, in the common case of running
"list" then "fetch", we do not do any extra parsing at all.
In the case of just a "fetch", we do an extra round of
parsing, but only once.
Note also that the "fetch" case will now also initialize
server_capabilities from the remote (in remote-curl; we
already would do so inside fetch-pack). Doing "list+fetch"
already does this. It doesn't actually matter now, but the
new behavior is arguably more correct, should remote-curl
ever start caring about the server's capability list.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-20 21:07:19 +01:00
|
|
|
struct discovery *heads = discover_refs("git-receive-pack", 1);
|
2009-10-31 01:47:41 +01:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (heads->proto_git)
|
|
|
|
ret = push_git(heads, nr_spec, specs);
|
|
|
|
else
|
|
|
|
ret = push_dav(nr_spec, specs);
|
|
|
|
free_discovery(heads);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2009-10-31 01:47:30 +01:00
|
|
|
static void parse_push(struct strbuf *buf)
|
|
|
|
{
|
2020-07-28 22:25:12 +02:00
|
|
|
struct strvec specs = STRVEC_INIT;
|
2019-10-13 15:37:39 +02:00
|
|
|
int ret;
|
2009-10-31 01:47:30 +01:00
|
|
|
|
|
|
|
do {
|
2020-01-30 20:35:46 +01:00
|
|
|
const char *arg;
|
|
|
|
if (skip_prefix(buf->buf, "push ", &arg))
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_push(&specs, arg);
|
2009-10-31 01:47:30 +01:00
|
|
|
else
|
2019-03-06 00:20:40 +01:00
|
|
|
die(_("http transport does not support %s"), buf->buf);
|
2009-10-31 01:47:30 +01:00
|
|
|
|
|
|
|
strbuf_reset(buf);
|
2016-01-14 00:31:17 +01:00
|
|
|
if (strbuf_getline_lf(buf, stdin) == EOF)
|
2011-06-20 09:40:06 +02:00
|
|
|
goto free_specs;
|
2009-10-31 01:47:30 +01:00
|
|
|
if (!*buf->buf)
|
|
|
|
break;
|
|
|
|
} while (1);
|
|
|
|
|
2020-07-29 02:37:20 +02:00
|
|
|
ret = push(specs.nr, specs.v);
|
2009-10-31 01:47:30 +01:00
|
|
|
printf("\n");
|
|
|
|
fflush(stdout);
|
2011-06-20 09:40:06 +02:00
|
|
|
|
remote-curl: Fix push status report when all branches fail
The protocol between transport-helper.c and remote-curl requires
remote-curl to always print a blank line after the push command
has run. If the blank line is ommitted, transport-helper kills its
container process (the git push the user started) with exit(128)
and no message indicating a problem, assuming the helper already
printed reasonable error text to the console.
However if the remote rejects all branches with "ng" commands in the
report-status reply, send-pack terminates with non-zero status, and
in turn remote-curl exited with non-zero status before outputting
the blank line after the helper status printed by send-pack. No
error messages reach the user.
This caused users to see the following from git push over HTTP
when the remote side's update hook rejected the branch:
$ git push http://... master
Counting objects: 4, done.
Delta compression using up to 6 threads.
Compressing objects: 100% (2/2), done.
Writing objects: 100% (3/3), 301 bytes, done.
Total 3 (delta 0), reused 0 (delta 0)
$
Always print a blank line after the send-pack process terminates,
ensuring the helper status report (if it was output) will be
correctly parsed by the calling transport-helper.c. This ensures
the helper doesn't abort before the status report can be shown to
the user.
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-20 04:12:09 +01:00
|
|
|
if (ret)
|
|
|
|
exit(128); /* error already reported */
|
|
|
|
|
2020-05-18 17:47:19 +02:00
|
|
|
free_specs:
|
2020-07-28 22:25:12 +02:00
|
|
|
strvec_clear(&specs);
|
2009-10-31 01:47:30 +01:00
|
|
|
}
|
|
|
|
|
2018-03-15 18:31:41 +01:00
|
|
|
static int stateless_connect(const char *service_name)
|
|
|
|
{
|
|
|
|
struct discovery *discover;
|
2022-07-11 07:58:54 +02:00
|
|
|
struct rpc_state rpc = RPC_STATE_INIT;
|
2019-02-21 21:24:41 +01:00
|
|
|
struct strbuf buf = STRBUF_INIT;
|
2022-07-11 07:58:54 +02:00
|
|
|
const char *accept_language;
|
2018-03-15 18:31:41 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Run the info/refs request and see if the server supports protocol
|
|
|
|
* v2. If and only if the server supports v2 can we successfully
|
|
|
|
* establish a stateless connection, otherwise we need to tell the
|
|
|
|
* client to fallback to using other transport helper functions to
|
|
|
|
* complete their request.
|
|
|
|
*/
|
|
|
|
discover = discover_refs(service_name, 0);
|
|
|
|
if (discover->version != protocol_v2) {
|
|
|
|
printf("fallback\n");
|
|
|
|
fflush(stdout);
|
|
|
|
return -1;
|
|
|
|
} else {
|
|
|
|
/* Stateless Connection established */
|
|
|
|
printf("\n");
|
|
|
|
fflush(stdout);
|
|
|
|
}
|
2022-07-11 07:58:54 +02:00
|
|
|
accept_language = http_get_accept_language_header();
|
|
|
|
if (accept_language)
|
|
|
|
rpc.hdr_accept_language = xstrfmt("%s", accept_language);
|
2018-03-15 18:31:41 +01:00
|
|
|
|
2019-02-21 21:24:41 +01:00
|
|
|
rpc.service_name = service_name;
|
|
|
|
rpc.service_url = xstrfmt("%s%s", url.buf, rpc.service_name);
|
|
|
|
rpc.hdr_content_type = xstrfmt("Content-Type: application/x-%s-request", rpc.service_name);
|
|
|
|
rpc.hdr_accept = xstrfmt("Accept: application/x-%s-result", rpc.service_name);
|
|
|
|
if (get_protocol_http_header(discover->version, &buf)) {
|
|
|
|
rpc.protocol_header = strbuf_detach(&buf, NULL);
|
|
|
|
} else {
|
|
|
|
rpc.protocol_header = NULL;
|
|
|
|
strbuf_release(&buf);
|
|
|
|
}
|
|
|
|
rpc.buf = xmalloc(http_post_buffer);
|
|
|
|
rpc.alloc = http_post_buffer;
|
|
|
|
rpc.len = 0;
|
|
|
|
rpc.pos = 0;
|
|
|
|
rpc.in = 1;
|
|
|
|
rpc.out = 0;
|
|
|
|
rpc.any_written = 0;
|
|
|
|
rpc.gzip_request = 1;
|
|
|
|
rpc.initial_buffer = 0;
|
|
|
|
rpc.write_line_lengths = 1;
|
|
|
|
rpc.flush_read_but_not_sent = 0;
|
2018-03-15 18:31:41 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Dump the capability listing that we got from the server earlier
|
|
|
|
* during the info/refs request.
|
|
|
|
*/
|
2019-02-21 21:24:41 +01:00
|
|
|
write_or_die(rpc.in, discover->buf, discover->len);
|
|
|
|
|
|
|
|
/* Until we see EOF keep sending POSTs */
|
|
|
|
while (1) {
|
|
|
|
size_t avail;
|
|
|
|
enum packet_read_status status;
|
2018-03-15 18:31:41 +01:00
|
|
|
|
2019-02-21 21:24:41 +01:00
|
|
|
if (!rpc_read_from_out(&rpc, PACKET_READ_GENTLE_ON_EOF, &avail,
|
|
|
|
&status))
|
|
|
|
BUG("The entire rpc->buf should be larger than LARGE_PACKET_MAX");
|
|
|
|
if (status == PACKET_READ_EOF)
|
|
|
|
break;
|
2020-05-19 12:53:58 +02:00
|
|
|
if (post_rpc(&rpc, 1, status == PACKET_READ_FLUSH))
|
2018-03-15 18:31:41 +01:00
|
|
|
/* We would have an err here */
|
|
|
|
break;
|
2019-02-21 21:24:41 +01:00
|
|
|
/* Reset the buffer for next request */
|
|
|
|
rpc.len = 0;
|
2018-03-15 18:31:41 +01:00
|
|
|
}
|
|
|
|
|
2019-02-21 21:24:41 +01:00
|
|
|
free(rpc.service_url);
|
|
|
|
free(rpc.hdr_content_type);
|
|
|
|
free(rpc.hdr_accept);
|
2022-07-11 07:58:54 +02:00
|
|
|
free(rpc.hdr_accept_language);
|
2019-02-21 21:24:41 +01:00
|
|
|
free(rpc.protocol_header);
|
|
|
|
free(rpc.buf);
|
|
|
|
strbuf_release(&buf);
|
|
|
|
|
2018-03-15 18:31:41 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
add an extra level of indirection to main()
There are certain startup tasks that we expect every git
process to do. In some cases this is just to improve the
quality of the program (e.g., setting up gettext()). In
others it is a requirement for using certain functions in
libgit.a (e.g., system_path() expects that you have called
git_extract_argv0_path()).
Most commands are builtins and are covered by the git.c
version of main(). However, there are still a few external
commands that use their own main(). Each of these has to
remember to include the correct startup sequence, and we are
not always consistent.
Rather than just fix the inconsistencies, let's make this
harder to get wrong by providing a common main() that can
run this standard startup.
We basically have two options to do this:
- the compat/mingw.h file already does something like this by
adding a #define that replaces the definition of main with a
wrapper that calls mingw_startup().
The upside is that the code in each program doesn't need
to be changed at all; it's rewritten on the fly by the
preprocessor.
The downside is that it may make debugging of the startup
sequence a bit more confusing, as the preprocessor is
quietly inserting new code.
- the builtin functions are all of the form cmd_foo(),
and git.c's main() calls them.
This is much more explicit, which may make things more
obvious to somebody reading the code. It's also more
flexible (because of course we have to figure out _which_
cmd_foo() to call).
The downside is that each of the builtins must define
cmd_foo(), instead of just main().
This patch chooses the latter option, preferring the more
explicit approach, even though it is more invasive. We
introduce a new file common-main.c, with the "real" main. It
expects to call cmd_main() from whatever other objects it is
linked against.
We link common-main.o against anything that links against
libgit.a, since we know that such programs will need to do
this setup. Note that common-main.o can't actually go inside
libgit.a, as the linker would not pick up its main()
function automatically (it has no callers).
The rest of the patch is just adjusting all of the various
external programs (mostly in t/helper) to use cmd_main().
I've provided a global declaration for cmd_main(), which
means that all of the programs also need to match its
signature. In particular, many functions need to switch to
"const char **" instead of "char **" for argv. This effect
ripples out to a few other variables and functions, as well.
This makes the patch even more invasive, but the end result
is much better. We should be treating argv strings as const
anyway, and now all programs conform to the same signature
(which also matches the way builtins are defined).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-01 07:58:58 +02:00
|
|
|
int cmd_main(int argc, const char **argv)
|
2009-08-05 07:01:56 +02:00
|
|
|
{
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
2009-11-04 03:52:35 +01:00
|
|
|
int nongit;
|
2022-03-04 19:32:08 +01:00
|
|
|
int ret = 1;
|
2009-08-05 07:01:56 +02:00
|
|
|
|
2009-11-04 03:52:35 +01:00
|
|
|
setup_git_directory_gently(&nongit);
|
2009-08-05 07:01:56 +02:00
|
|
|
if (argc < 2) {
|
2019-03-06 00:20:40 +01:00
|
|
|
error(_("remote-curl: usage: git remote-curl <remote> [<url>]"));
|
2022-03-04 19:32:08 +01:00
|
|
|
goto cleanup;
|
2009-08-05 07:01:56 +02:00
|
|
|
}
|
|
|
|
|
2009-10-31 01:47:29 +01:00
|
|
|
options.verbosity = 1;
|
|
|
|
options.progress = !!isatty(2);
|
2009-10-31 01:47:41 +01:00
|
|
|
options.thin = 1;
|
string-list.[ch]: remove string_list_init() compatibility function
Remove this function left over to accommodate in-flight changes, see
770fedaf9fb (string-list.[ch]: add a string_list_init_{nodup,dup}(),
2021-07-01) for the recent change to add
"string_list_init_{nodup,dup}()" initializers.
There was only one user of the API left in remote-curl.c. I don't know
why I didn't include this change to remote-curl.c in
bc40dfb10a0 (string-list.h users: change to use *_{nodup,dup}(),
2021-07-01), perhaps I just missed it.
In any case, let's change that one user to use the new API, as of
writing this there are no in-flight changes that use, so this seems
like a good time to drop this before we get any new users of this
compatibility API.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-09-28 14:49:45 +02:00
|
|
|
string_list_init_dup(&options.deepen_not);
|
|
|
|
string_list_init_dup(&options.push_options);
|
2009-10-31 01:47:29 +01:00
|
|
|
|
2019-02-22 23:25:01 +01:00
|
|
|
/*
|
|
|
|
* Just report "remote-curl" here (folding all the various aliases
|
|
|
|
* ("git-remote-http", "git-remote-https", and etc.) here since they
|
|
|
|
* are all just copies of the same actual executable.
|
|
|
|
*/
|
|
|
|
trace2_cmd_name("remote-curl");
|
|
|
|
|
2009-08-05 07:01:56 +02:00
|
|
|
remote = remote_get(argv[1]);
|
|
|
|
|
|
|
|
if (argc > 2) {
|
2013-09-28 10:35:25 +02:00
|
|
|
end_url_with_slash(&url, argv[2]);
|
2009-08-05 07:01:56 +02:00
|
|
|
} else {
|
2013-09-28 10:35:25 +02:00
|
|
|
end_url_with_slash(&url, remote->url[0]);
|
2009-08-05 07:01:56 +02:00
|
|
|
}
|
|
|
|
|
2013-09-28 10:35:25 +02:00
|
|
|
http_init(remote, url.buf, 0);
|
2010-03-02 11:49:29 +01:00
|
|
|
|
2009-08-05 07:01:56 +02:00
|
|
|
do {
|
2014-06-18 21:48:29 +02:00
|
|
|
const char *arg;
|
|
|
|
|
2016-01-14 00:31:17 +01:00
|
|
|
if (strbuf_getline_lf(&buf, stdin) == EOF) {
|
2011-07-16 15:03:29 +02:00
|
|
|
if (ferror(stdin))
|
2019-03-06 00:20:40 +01:00
|
|
|
error(_("remote-curl: error reading command stream from git"));
|
2022-03-04 19:32:08 +01:00
|
|
|
goto cleanup;
|
2011-07-16 15:03:29 +02:00
|
|
|
}
|
|
|
|
if (buf.len == 0)
|
2009-08-05 07:01:56 +02:00
|
|
|
break;
|
2013-11-30 21:55:40 +01:00
|
|
|
if (starts_with(buf.buf, "fetch ")) {
|
2009-11-04 03:52:35 +01:00
|
|
|
if (nongit)
|
2019-03-06 00:20:40 +01:00
|
|
|
die(_("remote-curl: fetch attempted without a local repo"));
|
2009-10-31 01:47:28 +01:00
|
|
|
parse_fetch(&buf);
|
|
|
|
|
2013-11-30 21:55:40 +01:00
|
|
|
} else if (!strcmp(buf.buf, "list") || starts_with(buf.buf, "list ")) {
|
2009-10-31 01:47:40 +01:00
|
|
|
int for_push = !!strstr(buf.buf + 4, "for-push");
|
|
|
|
output_refs(get_refs(for_push));
|
2009-10-31 01:47:30 +01:00
|
|
|
|
2013-11-30 21:55:40 +01:00
|
|
|
} else if (starts_with(buf.buf, "push ")) {
|
2009-10-31 01:47:30 +01:00
|
|
|
parse_push(&buf);
|
|
|
|
|
2014-06-18 21:48:29 +02:00
|
|
|
} else if (skip_prefix(buf.buf, "option ", &arg)) {
|
|
|
|
char *value = strchr(arg, ' ');
|
2009-10-31 01:47:29 +01:00
|
|
|
int result;
|
|
|
|
|
|
|
|
if (value)
|
|
|
|
*value++ = '\0';
|
|
|
|
else
|
|
|
|
value = "true";
|
|
|
|
|
2014-06-18 21:48:29 +02:00
|
|
|
result = set_option(arg, value);
|
2009-10-31 01:47:29 +01:00
|
|
|
if (!result)
|
|
|
|
printf("ok\n");
|
|
|
|
else if (result < 0)
|
|
|
|
printf("error invalid value\n");
|
|
|
|
else
|
|
|
|
printf("unsupported\n");
|
2009-08-05 07:01:56 +02:00
|
|
|
fflush(stdout);
|
2009-10-31 01:47:29 +01:00
|
|
|
|
2009-08-05 07:01:56 +02:00
|
|
|
} else if (!strcmp(buf.buf, "capabilities")) {
|
2018-03-15 18:31:41 +01:00
|
|
|
printf("stateless-connect\n");
|
2009-08-05 07:01:56 +02:00
|
|
|
printf("fetch\n");
|
2009-10-31 01:47:29 +01:00
|
|
|
printf("option\n");
|
2009-10-31 01:47:30 +01:00
|
|
|
printf("push\n");
|
2013-07-21 10:18:05 +02:00
|
|
|
printf("check-connectivity\n");
|
2020-05-25 21:59:04 +02:00
|
|
|
printf("object-format\n");
|
2009-08-05 07:01:56 +02:00
|
|
|
printf("\n");
|
|
|
|
fflush(stdout);
|
2018-03-15 18:31:41 +01:00
|
|
|
} else if (skip_prefix(buf.buf, "stateless-connect ", &arg)) {
|
|
|
|
if (!stateless_connect(arg))
|
|
|
|
break;
|
2009-08-05 07:01:56 +02:00
|
|
|
} else {
|
2019-03-06 00:20:40 +01:00
|
|
|
error(_("remote-curl: unknown command '%s' from git"), buf.buf);
|
2022-03-04 19:32:08 +01:00
|
|
|
goto cleanup;
|
2009-08-05 07:01:56 +02:00
|
|
|
}
|
|
|
|
strbuf_reset(&buf);
|
|
|
|
} while (1);
|
2010-03-02 11:49:29 +01:00
|
|
|
|
|
|
|
http_cleanup();
|
2022-03-04 19:32:08 +01:00
|
|
|
ret = 0;
|
|
|
|
cleanup:
|
|
|
|
strbuf_release(&buf);
|
2010-03-02 11:49:29 +01:00
|
|
|
|
2022-03-04 19:32:08 +01:00
|
|
|
return ret;
|
2009-08-05 07:01:56 +02:00
|
|
|
}
|