git-commit-vandalism/credential.c

515 lines
12 KiB
C
Raw Normal View History

#include "cache.h"
#include "config.h"
#include "credential.h"
#include "string-list.h"
#include "run-command.h"
#include "url.h"
#include "prompt.h"
#include "sigchain.h"
#include "urlmatch.h"
void credential_init(struct credential *c)
{
struct credential blank = CREDENTIAL_INIT;
memcpy(c, &blank, sizeof(*c));
}
void credential_clear(struct credential *c)
{
free(c->protocol);
free(c->host);
free(c->path);
free(c->username);
free(c->password);
string_list_clear(&c->helpers, 0);
credential_init(c);
}
int credential_match(const struct credential *want,
const struct credential *have)
{
#define CHECK(x) (!want->x || (have->x && !strcmp(want->x, have->x)))
return CHECK(protocol) &&
CHECK(host) &&
CHECK(path) &&
CHECK(username);
#undef CHECK
}
static int credential_from_potentially_partial_url(struct credential *c,
const char *url);
static int credential_config_callback(const char *var, const char *value,
void *data)
{
struct credential *c = data;
const char *key;
if (!skip_prefix(var, "credential.", &key))
return 0;
if (!value)
return config_error_nonbool(var);
if (!strcmp(key, "helper")) {
if (*value)
string_list_append(&c->helpers, value);
else
string_list_clear(&c->helpers, 0);
} else if (!strcmp(key, "username")) {
if (!c->username_from_proto) {
free(c->username);
c->username = xstrdup(value);
}
}
credential: make relevance of http path configurable When parsing a URL into a credential struct, we carefully record each part of the URL, including the path on the remote host, and use the result as part of the credential context. This had two practical implications: 1. Credential helpers which store a credential for later access are likely to use the "path" portion as part of the storage key. That means that a request to https://example.com/foo.git would not use the same credential that was stored in an earlier request for: https://example.com/bar.git 2. The prompt shown to the user includes all relevant context, including the path. In most cases, however, users will have a single password per host. The behavior in (1) will be inconvenient, and the prompt in (2) will be overly long. This patch introduces a config option to toggle the relevance of http paths. When turned on, we use the path as before. When turned off, we drop the path component from the context: helpers don't see it, and it does not appear in the prompt. This is nothing you couldn't do with a clever credential helper at the start of your stack, like: [credential "http://"] helper = "!f() { grep -v ^path= ; }; f" helper = your_real_helper But doing this: [credential] useHttpPath = false is way easier and more readable. Furthermore, since most users will want the "off" behavior, that is the new default. Users who want it "on" can set the variable (either for all credentials, or just for a subset using credential.*.useHttpPath). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-12-10 11:31:34 +01:00
else if (!strcmp(key, "usehttppath"))
c->use_http_path = git_config_bool(var, value);
return 0;
}
credential: make relevance of http path configurable When parsing a URL into a credential struct, we carefully record each part of the URL, including the path on the remote host, and use the result as part of the credential context. This had two practical implications: 1. Credential helpers which store a credential for later access are likely to use the "path" portion as part of the storage key. That means that a request to https://example.com/foo.git would not use the same credential that was stored in an earlier request for: https://example.com/bar.git 2. The prompt shown to the user includes all relevant context, including the path. In most cases, however, users will have a single password per host. The behavior in (1) will be inconvenient, and the prompt in (2) will be overly long. This patch introduces a config option to toggle the relevance of http paths. When turned on, we use the path as before. When turned off, we drop the path component from the context: helpers don't see it, and it does not appear in the prompt. This is nothing you couldn't do with a clever credential helper at the start of your stack, like: [credential "http://"] helper = "!f() { grep -v ^path= ; }; f" helper = your_real_helper But doing this: [credential] useHttpPath = false is way easier and more readable. Furthermore, since most users will want the "off" behavior, that is the new default. Users who want it "on" can set the variable (either for all credentials, or just for a subset using credential.*.useHttpPath). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-12-10 11:31:34 +01:00
static int proto_is_http(const char *s)
{
if (!s)
return 0;
return !strcmp(s, "https") || !strcmp(s, "http");
}
static void credential_describe(struct credential *c, struct strbuf *out);
static void credential_format(struct credential *c, struct strbuf *out);
static int select_all(const struct urlmatch_item *a,
const struct urlmatch_item *b)
{
return 0;
}
static int match_partial_url(const char *url, void *cb)
{
struct credential *c = cb;
struct credential want = CREDENTIAL_INIT;
int matches = 0;
if (credential_from_potentially_partial_url(&want, url) < 0)
warning(_("skipping credential lookup for key: credential.%s"),
url);
else
matches = credential_match(&want, c);
credential_clear(&want);
return matches;
}
static void credential_apply_config(struct credential *c)
{
char *normalized_url;
struct urlmatch_config config = URLMATCH_CONFIG_INIT;
struct strbuf url = STRBUF_INIT;
credential: refuse to operate when missing host or protocol The credential helper protocol was designed to be very flexible: the fields it takes as input are treated as a pattern, and any missing fields are taken as wildcards. This allows unusual things like: echo protocol=https | git credential reject to delete all stored https credentials (assuming the helpers themselves treat the input that way). But when helpers are invoked automatically by Git, this flexibility works against us. If for whatever reason we don't have a "host" field, then we'd match _any_ host. When you're filling a credential to send to a remote server, this is almost certainly not what you want. Prevent this at the layer that writes to the credential helper. Add a check to the credential API that the host and protocol are always passed in, and add an assertion to the credential_write function that speaks credential helper protocol to be doubly sure. There are a few ways this can be triggered in practice: - the "git credential" command passes along arbitrary credential parameters it reads from stdin. - until the previous patch, when the host field of a URL is empty, we would leave it unset (rather than setting it to the empty string) - a URL like "example.com/foo.git" is treated by curl as if "http://" was present, but our parser sees it as a non-URL and leaves all fields unset - the recent fix for URLs with embedded newlines blanks the URL but otherwise continues. Rather than having the desired effect of looking up no credential at all, many helpers will return _any_ credential Our earlier test for an embedded newline didn't catch this because it only checked that the credential was cleared, but didn't configure an actual helper. Configuring the "verbatim" helper in the test would show that it is invoked (it's obviously a silly helper which doesn't look at its input, but the point is that it shouldn't be run at all). Since we're switching this case to die(), we don't need to bother with a helper. We can see the new behavior just by checking that the operation fails. We'll add new tests covering partial input as well (these can be triggered through various means with url-parsing, but it's simpler to just check them directly, as we know we are covered even if the url parser changes behavior in the future). [jn: changed to die() instead of logging and showing a manual username/password prompt] Reported-by: Carlo Arenas <carenas@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
2020-04-19 05:50:48 +02:00
if (!c->host)
die(_("refusing to work with credential missing host field"));
if (!c->protocol)
die(_("refusing to work with credential missing protocol field"));
if (c->configured)
return;
config.section = "credential";
config.key = NULL;
config.collect_fn = credential_config_callback;
config.cascade_fn = NULL;
config.select_fn = select_all;
config.fallback_match_fn = match_partial_url;
config.cb = c;
credential_format(c, &url);
normalized_url = url_normalize(url.buf, &config.url);
git_config(urlmatch_config_entry, &config);
string_list_clear(&config.vars, 1);
free(normalized_url);
urlmatch_config_release(&config);
strbuf_release(&url);
c->configured = 1;
credential: make relevance of http path configurable When parsing a URL into a credential struct, we carefully record each part of the URL, including the path on the remote host, and use the result as part of the credential context. This had two practical implications: 1. Credential helpers which store a credential for later access are likely to use the "path" portion as part of the storage key. That means that a request to https://example.com/foo.git would not use the same credential that was stored in an earlier request for: https://example.com/bar.git 2. The prompt shown to the user includes all relevant context, including the path. In most cases, however, users will have a single password per host. The behavior in (1) will be inconvenient, and the prompt in (2) will be overly long. This patch introduces a config option to toggle the relevance of http paths. When turned on, we use the path as before. When turned off, we drop the path component from the context: helpers don't see it, and it does not appear in the prompt. This is nothing you couldn't do with a clever credential helper at the start of your stack, like: [credential "http://"] helper = "!f() { grep -v ^path= ; }; f" helper = your_real_helper But doing this: [credential] useHttpPath = false is way easier and more readable. Furthermore, since most users will want the "off" behavior, that is the new default. Users who want it "on" can set the variable (either for all credentials, or just for a subset using credential.*.useHttpPath). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-12-10 11:31:34 +01:00
if (!c->use_http_path && proto_is_http(c->protocol)) {
FREE_AND_NULL(c->path);
credential: make relevance of http path configurable When parsing a URL into a credential struct, we carefully record each part of the URL, including the path on the remote host, and use the result as part of the credential context. This had two practical implications: 1. Credential helpers which store a credential for later access are likely to use the "path" portion as part of the storage key. That means that a request to https://example.com/foo.git would not use the same credential that was stored in an earlier request for: https://example.com/bar.git 2. The prompt shown to the user includes all relevant context, including the path. In most cases, however, users will have a single password per host. The behavior in (1) will be inconvenient, and the prompt in (2) will be overly long. This patch introduces a config option to toggle the relevance of http paths. When turned on, we use the path as before. When turned off, we drop the path component from the context: helpers don't see it, and it does not appear in the prompt. This is nothing you couldn't do with a clever credential helper at the start of your stack, like: [credential "http://"] helper = "!f() { grep -v ^path= ; }; f" helper = your_real_helper But doing this: [credential] useHttpPath = false is way easier and more readable. Furthermore, since most users will want the "off" behavior, that is the new default. Users who want it "on" can set the variable (either for all credentials, or just for a subset using credential.*.useHttpPath). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-12-10 11:31:34 +01:00
}
}
static void credential_describe(struct credential *c, struct strbuf *out)
{
if (!c->protocol)
return;
strbuf_addf(out, "%s://", c->protocol);
if (c->username && *c->username)
strbuf_addf(out, "%s@", c->username);
if (c->host)
strbuf_addstr(out, c->host);
if (c->path)
strbuf_addf(out, "/%s", c->path);
}
static void credential_format(struct credential *c, struct strbuf *out)
{
if (!c->protocol)
return;
strbuf_addf(out, "%s://", c->protocol);
if (c->username && *c->username) {
strbuf_add_percentencode(out, c->username, STRBUF_ENCODE_SLASH);
strbuf_addch(out, '@');
}
if (c->host)
strbuf_addstr(out, c->host);
if (c->path) {
strbuf_addch(out, '/');
strbuf_add_percentencode(out, c->path, 0);
}
}
static char *credential_ask_one(const char *what, struct credential *c,
int flags)
{
struct strbuf desc = STRBUF_INIT;
struct strbuf prompt = STRBUF_INIT;
char *r;
credential_describe(c, &desc);
if (desc.len)
strbuf_addf(&prompt, "%s for '%s': ", what, desc.buf);
else
strbuf_addf(&prompt, "%s: ", what);
r = git_prompt(prompt.buf, flags);
strbuf_release(&desc);
strbuf_release(&prompt);
return xstrdup(r);
}
static void credential_getpass(struct credential *c)
{
if (!c->username)
c->username = credential_ask_one("Username", c,
PROMPT_ASKPASS|PROMPT_ECHO);
if (!c->password)
c->password = credential_ask_one("Password", c,
PROMPT_ASKPASS);
}
int credential_read(struct credential *c, FILE *fp)
{
struct strbuf line = STRBUF_INIT;
credential: treat CR/LF as line endings in the credential protocol This fix makes using Git credentials more friendly to Windows users: it allows a credential helper to communicate using CR/LF line endings ("DOS line endings" commonly found on Windows) instead of LF-only line endings ("Unix line endings"). Note that this changes the behavior a bit: if a credential helper produces, say, a password with a trailing Carriage Return character, that will now be culled even when the rest of the lines end only in Line Feed characters, indicating that the Carriage Return was not meant to be part of the line ending. In practice, it seems _very_ unlikely that something like this happens. Passwords usually need to consist of non-control characters, URLs need to have special characters URL-encoded, and user names, well, are names. However, it _does_ help on Windows, where CR/LF line endings are common: as unrecognized commands are simply ignored by the credential machinery, even a command like `quit\r` (which is clearly intended to abort) would simply be ignored (silently) by Git. So let's change the credential machinery to accept both CR/LF and LF line endings. While we do this for the credential helper protocol, we do _not_ adjust `git credential-cache--daemon` (which won't work on Windows, anyway, because it requires Unix sockets) nor `git credential-store` (which writes the file `~/.git-credentials` which we consider an implementation detail that should be opaque to the user, read: we do expect users _not_ to edit this file manually). Signed-off-by: Nikita Leonov <nykyta.leonov@gmail.com> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-03 15:29:12 +02:00
while (strbuf_getline(&line, fp) != EOF) {
char *key = line.buf;
char *value = strchr(key, '=');
if (!line.len)
break;
if (!value) {
warning("invalid credential line: %s", key);
strbuf_release(&line);
return -1;
}
*value++ = '\0';
if (!strcmp(key, "username")) {
free(c->username);
c->username = xstrdup(value);
c->username_from_proto = 1;
} else if (!strcmp(key, "password")) {
free(c->password);
c->password = xstrdup(value);
} else if (!strcmp(key, "protocol")) {
free(c->protocol);
c->protocol = xstrdup(value);
} else if (!strcmp(key, "host")) {
free(c->host);
c->host = xstrdup(value);
} else if (!strcmp(key, "path")) {
free(c->path);
c->path = xstrdup(value);
} else if (!strcmp(key, "url")) {
credential_from_url(c, value);
} else if (!strcmp(key, "quit")) {
c->quit = !!git_config_bool("quit", value);
}
/*
* Ignore other lines; we don't know what they mean, but
* this future-proofs us when later versions of git do
* learn new lines, and the helpers are updated to match.
*/
}
strbuf_release(&line);
return 0;
}
credential: refuse to operate when missing host or protocol The credential helper protocol was designed to be very flexible: the fields it takes as input are treated as a pattern, and any missing fields are taken as wildcards. This allows unusual things like: echo protocol=https | git credential reject to delete all stored https credentials (assuming the helpers themselves treat the input that way). But when helpers are invoked automatically by Git, this flexibility works against us. If for whatever reason we don't have a "host" field, then we'd match _any_ host. When you're filling a credential to send to a remote server, this is almost certainly not what you want. Prevent this at the layer that writes to the credential helper. Add a check to the credential API that the host and protocol are always passed in, and add an assertion to the credential_write function that speaks credential helper protocol to be doubly sure. There are a few ways this can be triggered in practice: - the "git credential" command passes along arbitrary credential parameters it reads from stdin. - until the previous patch, when the host field of a URL is empty, we would leave it unset (rather than setting it to the empty string) - a URL like "example.com/foo.git" is treated by curl as if "http://" was present, but our parser sees it as a non-URL and leaves all fields unset - the recent fix for URLs with embedded newlines blanks the URL but otherwise continues. Rather than having the desired effect of looking up no credential at all, many helpers will return _any_ credential Our earlier test for an embedded newline didn't catch this because it only checked that the credential was cleared, but didn't configure an actual helper. Configuring the "verbatim" helper in the test would show that it is invoked (it's obviously a silly helper which doesn't look at its input, but the point is that it shouldn't be run at all). Since we're switching this case to die(), we don't need to bother with a helper. We can see the new behavior just by checking that the operation fails. We'll add new tests covering partial input as well (these can be triggered through various means with url-parsing, but it's simpler to just check them directly, as we know we are covered even if the url parser changes behavior in the future). [jn: changed to die() instead of logging and showing a manual username/password prompt] Reported-by: Carlo Arenas <carenas@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
2020-04-19 05:50:48 +02:00
static void credential_write_item(FILE *fp, const char *key, const char *value,
int required)
{
credential: refuse to operate when missing host or protocol The credential helper protocol was designed to be very flexible: the fields it takes as input are treated as a pattern, and any missing fields are taken as wildcards. This allows unusual things like: echo protocol=https | git credential reject to delete all stored https credentials (assuming the helpers themselves treat the input that way). But when helpers are invoked automatically by Git, this flexibility works against us. If for whatever reason we don't have a "host" field, then we'd match _any_ host. When you're filling a credential to send to a remote server, this is almost certainly not what you want. Prevent this at the layer that writes to the credential helper. Add a check to the credential API that the host and protocol are always passed in, and add an assertion to the credential_write function that speaks credential helper protocol to be doubly sure. There are a few ways this can be triggered in practice: - the "git credential" command passes along arbitrary credential parameters it reads from stdin. - until the previous patch, when the host field of a URL is empty, we would leave it unset (rather than setting it to the empty string) - a URL like "example.com/foo.git" is treated by curl as if "http://" was present, but our parser sees it as a non-URL and leaves all fields unset - the recent fix for URLs with embedded newlines blanks the URL but otherwise continues. Rather than having the desired effect of looking up no credential at all, many helpers will return _any_ credential Our earlier test for an embedded newline didn't catch this because it only checked that the credential was cleared, but didn't configure an actual helper. Configuring the "verbatim" helper in the test would show that it is invoked (it's obviously a silly helper which doesn't look at its input, but the point is that it shouldn't be run at all). Since we're switching this case to die(), we don't need to bother with a helper. We can see the new behavior just by checking that the operation fails. We'll add new tests covering partial input as well (these can be triggered through various means with url-parsing, but it's simpler to just check them directly, as we know we are covered even if the url parser changes behavior in the future). [jn: changed to die() instead of logging and showing a manual username/password prompt] Reported-by: Carlo Arenas <carenas@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
2020-04-19 05:50:48 +02:00
if (!value && required)
BUG("credential value for %s is missing", key);
if (!value)
return;
if (strchr(value, '\n'))
die("credential value for %s contains newline", key);
fprintf(fp, "%s=%s\n", key, value);
}
void credential_write(const struct credential *c, FILE *fp)
{
credential: refuse to operate when missing host or protocol The credential helper protocol was designed to be very flexible: the fields it takes as input are treated as a pattern, and any missing fields are taken as wildcards. This allows unusual things like: echo protocol=https | git credential reject to delete all stored https credentials (assuming the helpers themselves treat the input that way). But when helpers are invoked automatically by Git, this flexibility works against us. If for whatever reason we don't have a "host" field, then we'd match _any_ host. When you're filling a credential to send to a remote server, this is almost certainly not what you want. Prevent this at the layer that writes to the credential helper. Add a check to the credential API that the host and protocol are always passed in, and add an assertion to the credential_write function that speaks credential helper protocol to be doubly sure. There are a few ways this can be triggered in practice: - the "git credential" command passes along arbitrary credential parameters it reads from stdin. - until the previous patch, when the host field of a URL is empty, we would leave it unset (rather than setting it to the empty string) - a URL like "example.com/foo.git" is treated by curl as if "http://" was present, but our parser sees it as a non-URL and leaves all fields unset - the recent fix for URLs with embedded newlines blanks the URL but otherwise continues. Rather than having the desired effect of looking up no credential at all, many helpers will return _any_ credential Our earlier test for an embedded newline didn't catch this because it only checked that the credential was cleared, but didn't configure an actual helper. Configuring the "verbatim" helper in the test would show that it is invoked (it's obviously a silly helper which doesn't look at its input, but the point is that it shouldn't be run at all). Since we're switching this case to die(), we don't need to bother with a helper. We can see the new behavior just by checking that the operation fails. We'll add new tests covering partial input as well (these can be triggered through various means with url-parsing, but it's simpler to just check them directly, as we know we are covered even if the url parser changes behavior in the future). [jn: changed to die() instead of logging and showing a manual username/password prompt] Reported-by: Carlo Arenas <carenas@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
2020-04-19 05:50:48 +02:00
credential_write_item(fp, "protocol", c->protocol, 1);
credential_write_item(fp, "host", c->host, 1);
credential_write_item(fp, "path", c->path, 0);
credential_write_item(fp, "username", c->username, 0);
credential_write_item(fp, "password", c->password, 0);
}
static int run_credential_helper(struct credential *c,
const char *cmd,
int want_output)
{
struct child_process helper = CHILD_PROCESS_INIT;
FILE *fp;
strvec_push(&helper.args, cmd);
helper.use_shell = 1;
helper.in = -1;
if (want_output)
helper.out = -1;
else
helper.no_stdout = 1;
if (start_command(&helper) < 0)
return -1;
fp = xfdopen(helper.in, "w");
sigchain_push(SIGPIPE, SIG_IGN);
credential_write(c, fp);
fclose(fp);
sigchain_pop(SIGPIPE);
if (want_output) {
int r;
fp = xfdopen(helper.out, "r");
r = credential_read(c, fp);
fclose(fp);
if (r < 0) {
finish_command(&helper);
return -1;
}
}
if (finish_command(&helper))
return -1;
return 0;
}
static int credential_do(struct credential *c, const char *helper,
const char *operation)
{
struct strbuf cmd = STRBUF_INIT;
int r;
if (helper[0] == '!')
strbuf_addstr(&cmd, helper + 1);
else if (is_absolute_path(helper))
strbuf_addstr(&cmd, helper);
else
strbuf_addf(&cmd, "git credential-%s", helper);
strbuf_addf(&cmd, " %s", operation);
r = run_credential_helper(c, cmd.buf, !strcmp(operation, "get"));
strbuf_release(&cmd);
return r;
}
void credential_fill(struct credential *c)
{
int i;
if (c->username && c->password)
return;
credential_apply_config(c);
for (i = 0; i < c->helpers.nr; i++) {
credential_do(c, c->helpers.items[i].string, "get");
if (c->username && c->password)
return;
if (c->quit)
die("credential helper '%s' told us to quit",
c->helpers.items[i].string);
}
credential_getpass(c);
if (!c->username && !c->password)
die("unable to get password from user");
}
void credential_approve(struct credential *c)
{
int i;
if (c->approved)
return;
if (!c->username || !c->password)
return;
credential_apply_config(c);
for (i = 0; i < c->helpers.nr; i++)
credential_do(c, c->helpers.items[i].string, "store");
c->approved = 1;
}
void credential_reject(struct credential *c)
{
int i;
credential_apply_config(c);
for (i = 0; i < c->helpers.nr; i++)
credential_do(c, c->helpers.items[i].string, "erase");
FREE_AND_NULL(c->username);
FREE_AND_NULL(c->password);
c->approved = 0;
}
credential: detect unrepresentable values when parsing urls The credential protocol can't represent newlines in values, but URLs can embed percent-encoded newlines in various components. A previous commit taught the low-level writing routines to die() when encountering this, but we can be a little friendlier to the user by detecting them earlier and handling them gracefully. This patch teaches credential_from_url() to notice such components, issue a warning, and blank the credential (which will generally result in prompting the user for a username and password). We blank the whole credential in this case. Another option would be to blank only the invalid component. However, we're probably better off not feeding a partially-parsed URL result to a credential helper. We don't know how a given helper would handle it, so we're better off to err on the side of matching nothing rather than something unexpected. The die() call in credential_write() is _probably_ impossible to reach after this patch. Values should end up in credential structs only by URL parsing (which is covered here), or by reading credential protocol input (which by definition cannot read a newline into a value). But we should definitely keep the low-level check, as it's our final and most accurate line of defense against protocol injection attacks. Arguably it could become a BUG(), but it probably doesn't matter much either way. Note that the public interface of credential_from_url() grows a little more than we need here. We'll use the extra flexibility in a future patch to help fsck catch these cases.
2020-03-12 06:31:11 +01:00
static int check_url_component(const char *url, int quiet,
const char *name, const char *value)
{
if (!value)
return 0;
if (!strchr(value, '\n'))
return 0;
if (!quiet)
warning(_("url contains a newline in its %s component: %s"),
name, url);
return -1;
}
credential: optionally allow partial URLs in credential_from_url_gently() Prior to the fixes for CVE-2020-11008, we were _very_ lenient in what we required from a URL in order to parse it into a `struct credential`. That led to serious vulnerabilities. There was one call site, though, that really needed that leniency: when parsing config settings a la `credential.dev.azure.com.useHTTPPath`. Settings like this might be desired when users want to use, say, a given user name on a given host, regardless of the protocol to be used. In preparation for fixing that bug, let's refactor the code to optionally allow for partial URLs. For the moment, this functionality is only exposed via the now-renamed function `credential_from_url_1()`, but it is not used. The intention is to make it easier to verify that this commit does not change the existing behavior unless explicitly allowing for partial URLs. Please note that this patch does more than just reinstating a way to imitate the behavior before those CVE-2020-11008 fixes: Before that, we would simply ignore URLs without a protocol. In other words, misleadingly, the following setting would be applied to _all_ URLs: [credential "example.com"] username = that-me The obvious intention is to match the host name only. With this patch, we allow precisely that: when parsing the URL with non-zero `allow_partial_url`, we do not simply return success if there was no protocol, but we simply leave the protocol unset and continue parsing the URL. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Reviewed-by: Carlo Marcelo Arenas Belón <carenas@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-24 13:49:51 +02:00
/*
* Potentially-partial URLs can, but do not have to, contain
*
* - a protocol (or scheme) of the form "<protocol>://"
*
* - a host name (the part after the protocol and before the first slash after
* that, if any)
*
* - a user name and potentially a password (as "<user>[:<password>]@" part of
* the host name)
*
* - a path (the part after the host name, if any, starting with the slash)
*
* Missing parts will be left unset in `struct credential`. Thus, `https://`
* will have only the `protocol` set, `example.com` only the host name, and
* `/git` only the path.
*
* Note that an empty host name in an otherwise fully-qualified URL (e.g.
* `cert:///path/to/cert.pem`) will be treated as unset if we expect the URL to
* be potentially partial, and only then (otherwise, the empty string is used).
*
* The credential_from_url() function does not allow partial URLs.
*/
static int credential_from_url_1(struct credential *c, const char *url,
int allow_partial_url, int quiet)
{
const char *at, *colon, *cp, *slash, *host, *proto_end;
credential_clear(c);
/*
* Match one of:
* (1) proto://<host>/...
* (2) proto://<user>@<host>/...
* (3) proto://<user>:<pass>@<host>/...
*/
proto_end = strstr(url, "://");
credential: optionally allow partial URLs in credential_from_url_gently() Prior to the fixes for CVE-2020-11008, we were _very_ lenient in what we required from a URL in order to parse it into a `struct credential`. That led to serious vulnerabilities. There was one call site, though, that really needed that leniency: when parsing config settings a la `credential.dev.azure.com.useHTTPPath`. Settings like this might be desired when users want to use, say, a given user name on a given host, regardless of the protocol to be used. In preparation for fixing that bug, let's refactor the code to optionally allow for partial URLs. For the moment, this functionality is only exposed via the now-renamed function `credential_from_url_1()`, but it is not used. The intention is to make it easier to verify that this commit does not change the existing behavior unless explicitly allowing for partial URLs. Please note that this patch does more than just reinstating a way to imitate the behavior before those CVE-2020-11008 fixes: Before that, we would simply ignore URLs without a protocol. In other words, misleadingly, the following setting would be applied to _all_ URLs: [credential "example.com"] username = that-me The obvious intention is to match the host name only. With this patch, we allow precisely that: when parsing the URL with non-zero `allow_partial_url`, we do not simply return success if there was no protocol, but we simply leave the protocol unset and continue parsing the URL. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Reviewed-by: Carlo Marcelo Arenas Belón <carenas@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-24 13:49:51 +02:00
if (!allow_partial_url && (!proto_end || proto_end == url)) {
credential: treat URL without scheme as invalid libcurl permits making requests without a URL scheme specified. In this case, it guesses the URL from the hostname, so I can run git ls-remote http::ftp.example.com/path/to/repo and it would make an FTP request. Any user intentionally using such a URL is likely to have made a typo. Unfortunately, credential_from_url is not able to determine the host and protocol in order to determine appropriate credentials to send, and until "credential: refuse to operate when missing host or protocol", this resulted in another host's credentials being leaked to the named host. Teach credential_from_url_gently to consider such a URL to be invalid so that fsck can detect and block gitmodules files with such URLs, allowing server operators to avoid serving them to downstream users running older versions of Git. This also means that when such URLs are passed on the command line, Git will print a clearer error so affected users can switch to the simpler URL that explicitly specifies the host and protocol they intend. One subtlety: .gitmodules files can contain relative URLs, representing a URL relative to the URL they were cloned from. The relative URL resolver used for .gitmodules can follow ".." components out of the path part and past the host part of a URL, meaning that such a relative URL can be used to traverse from a https://foo.example.com/innocent superproject to a https::attacker.example.com/exploit submodule. Fortunately a leading ':' in the first path component after a series of leading './' and '../' components is unlikely to show up in other contexts, so we can catch this by detecting that pattern. Reported-by: Jeff King <peff@peff.net> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Reviewed-by: Jeff King <peff@peff.net>
2020-04-19 05:54:13 +02:00
if (!quiet)
warning(_("url has no scheme: %s"), url);
return -1;
}
credential: optionally allow partial URLs in credential_from_url_gently() Prior to the fixes for CVE-2020-11008, we were _very_ lenient in what we required from a URL in order to parse it into a `struct credential`. That led to serious vulnerabilities. There was one call site, though, that really needed that leniency: when parsing config settings a la `credential.dev.azure.com.useHTTPPath`. Settings like this might be desired when users want to use, say, a given user name on a given host, regardless of the protocol to be used. In preparation for fixing that bug, let's refactor the code to optionally allow for partial URLs. For the moment, this functionality is only exposed via the now-renamed function `credential_from_url_1()`, but it is not used. The intention is to make it easier to verify that this commit does not change the existing behavior unless explicitly allowing for partial URLs. Please note that this patch does more than just reinstating a way to imitate the behavior before those CVE-2020-11008 fixes: Before that, we would simply ignore URLs without a protocol. In other words, misleadingly, the following setting would be applied to _all_ URLs: [credential "example.com"] username = that-me The obvious intention is to match the host name only. With this patch, we allow precisely that: when parsing the URL with non-zero `allow_partial_url`, we do not simply return success if there was no protocol, but we simply leave the protocol unset and continue parsing the URL. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Reviewed-by: Carlo Marcelo Arenas Belón <carenas@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-24 13:49:51 +02:00
cp = proto_end ? proto_end + 3 : url;
at = strchr(cp, '@');
colon = strchr(cp, ':');
/*
* A query or fragment marker before the slash ends the host portion.
* We'll just continue to call this "slash" for simplicity. Notably our
* "trim leading slashes" part won't skip over this part of the path,
* but that's what we'd want.
*/
slash = cp + strcspn(cp, "/?#");
if (!at || slash <= at) {
/* Case (1) */
host = cp;
}
else if (!colon || at <= colon) {
/* Case (2) */
c->username = url_decode_mem(cp, at - cp);
if (c->username && *c->username)
c->username_from_proto = 1;
host = at + 1;
} else {
/* Case (3) */
c->username = url_decode_mem(cp, colon - cp);
if (c->username && *c->username)
c->username_from_proto = 1;
c->password = url_decode_mem(colon + 1, at - (colon + 1));
host = at + 1;
}
credential: optionally allow partial URLs in credential_from_url_gently() Prior to the fixes for CVE-2020-11008, we were _very_ lenient in what we required from a URL in order to parse it into a `struct credential`. That led to serious vulnerabilities. There was one call site, though, that really needed that leniency: when parsing config settings a la `credential.dev.azure.com.useHTTPPath`. Settings like this might be desired when users want to use, say, a given user name on a given host, regardless of the protocol to be used. In preparation for fixing that bug, let's refactor the code to optionally allow for partial URLs. For the moment, this functionality is only exposed via the now-renamed function `credential_from_url_1()`, but it is not used. The intention is to make it easier to verify that this commit does not change the existing behavior unless explicitly allowing for partial URLs. Please note that this patch does more than just reinstating a way to imitate the behavior before those CVE-2020-11008 fixes: Before that, we would simply ignore URLs without a protocol. In other words, misleadingly, the following setting would be applied to _all_ URLs: [credential "example.com"] username = that-me The obvious intention is to match the host name only. With this patch, we allow precisely that: when parsing the URL with non-zero `allow_partial_url`, we do not simply return success if there was no protocol, but we simply leave the protocol unset and continue parsing the URL. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Reviewed-by: Carlo Marcelo Arenas Belón <carenas@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-24 13:49:51 +02:00
if (proto_end && proto_end - url > 0)
c->protocol = xmemdupz(url, proto_end - url);
if (!allow_partial_url || slash - host > 0)
c->host = url_decode_mem(host, slash - host);
/* Trim leading and trailing slashes from path */
while (*slash == '/')
slash++;
if (*slash) {
char *p;
c->path = url_decode(slash);
p = c->path + strlen(c->path) - 1;
while (p > c->path && *p == '/')
*p-- = '\0';
}
credential: detect unrepresentable values when parsing urls The credential protocol can't represent newlines in values, but URLs can embed percent-encoded newlines in various components. A previous commit taught the low-level writing routines to die() when encountering this, but we can be a little friendlier to the user by detecting them earlier and handling them gracefully. This patch teaches credential_from_url() to notice such components, issue a warning, and blank the credential (which will generally result in prompting the user for a username and password). We blank the whole credential in this case. Another option would be to blank only the invalid component. However, we're probably better off not feeding a partially-parsed URL result to a credential helper. We don't know how a given helper would handle it, so we're better off to err on the side of matching nothing rather than something unexpected. The die() call in credential_write() is _probably_ impossible to reach after this patch. Values should end up in credential structs only by URL parsing (which is covered here), or by reading credential protocol input (which by definition cannot read a newline into a value). But we should definitely keep the low-level check, as it's our final and most accurate line of defense against protocol injection attacks. Arguably it could become a BUG(), but it probably doesn't matter much either way. Note that the public interface of credential_from_url() grows a little more than we need here. We'll use the extra flexibility in a future patch to help fsck catch these cases.
2020-03-12 06:31:11 +01:00
if (check_url_component(url, quiet, "username", c->username) < 0 ||
check_url_component(url, quiet, "password", c->password) < 0 ||
check_url_component(url, quiet, "protocol", c->protocol) < 0 ||
check_url_component(url, quiet, "host", c->host) < 0 ||
check_url_component(url, quiet, "path", c->path) < 0)
return -1;
return 0;
}
static int credential_from_potentially_partial_url(struct credential *c,
const char *url)
{
return credential_from_url_1(c, url, 1, 0);
}
credential: optionally allow partial URLs in credential_from_url_gently() Prior to the fixes for CVE-2020-11008, we were _very_ lenient in what we required from a URL in order to parse it into a `struct credential`. That led to serious vulnerabilities. There was one call site, though, that really needed that leniency: when parsing config settings a la `credential.dev.azure.com.useHTTPPath`. Settings like this might be desired when users want to use, say, a given user name on a given host, regardless of the protocol to be used. In preparation for fixing that bug, let's refactor the code to optionally allow for partial URLs. For the moment, this functionality is only exposed via the now-renamed function `credential_from_url_1()`, but it is not used. The intention is to make it easier to verify that this commit does not change the existing behavior unless explicitly allowing for partial URLs. Please note that this patch does more than just reinstating a way to imitate the behavior before those CVE-2020-11008 fixes: Before that, we would simply ignore URLs without a protocol. In other words, misleadingly, the following setting would be applied to _all_ URLs: [credential "example.com"] username = that-me The obvious intention is to match the host name only. With this patch, we allow precisely that: when parsing the URL with non-zero `allow_partial_url`, we do not simply return success if there was no protocol, but we simply leave the protocol unset and continue parsing the URL. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Reviewed-by: Carlo Marcelo Arenas Belón <carenas@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-24 13:49:51 +02:00
int credential_from_url_gently(struct credential *c, const char *url, int quiet)
{
return credential_from_url_1(c, url, 0, quiet);
}
credential: detect unrepresentable values when parsing urls The credential protocol can't represent newlines in values, but URLs can embed percent-encoded newlines in various components. A previous commit taught the low-level writing routines to die() when encountering this, but we can be a little friendlier to the user by detecting them earlier and handling them gracefully. This patch teaches credential_from_url() to notice such components, issue a warning, and blank the credential (which will generally result in prompting the user for a username and password). We blank the whole credential in this case. Another option would be to blank only the invalid component. However, we're probably better off not feeding a partially-parsed URL result to a credential helper. We don't know how a given helper would handle it, so we're better off to err on the side of matching nothing rather than something unexpected. The die() call in credential_write() is _probably_ impossible to reach after this patch. Values should end up in credential structs only by URL parsing (which is covered here), or by reading credential protocol input (which by definition cannot read a newline into a value). But we should definitely keep the low-level check, as it's our final and most accurate line of defense against protocol injection attacks. Arguably it could become a BUG(), but it probably doesn't matter much either way. Note that the public interface of credential_from_url() grows a little more than we need here. We'll use the extra flexibility in a future patch to help fsck catch these cases.
2020-03-12 06:31:11 +01:00
void credential_from_url(struct credential *c, const char *url)
{
credential: die() when parsing invalid urls When we try to initialize credential loading by URL and find that the URL is invalid, we set all fields to NULL in order to avoid acting on malicious input. Later when we request credentials, we diagonse the erroneous input: fatal: refusing to work with credential missing host field This is problematic in two ways: - The message doesn't tell the user *why* we are missing the host field, so they can't tell from this message alone how to recover. There can be intervening messages after the original warning of bad input, so the user may not have the context to put two and two together. - The error only occurs when we actually need to get a credential. If the URL permits anonymous access, the only encouragement the user gets to correct their bogus URL is a quiet warning. This is inconsistent with the check we perform in fsck, where any use of such a URL as a submodule is an error. When we see such a bogus URL, let's not try to be nice and continue without helpers. Instead, die() immediately. This is simpler and obviously safe. And there's very little chance of disrupting a normal workflow. It's _possible_ that somebody has a legitimate URL with a raw newline in it. It already wouldn't work with credential helpers, so this patch steps that up from an inconvenience to "we will refuse to work with it at all". If such a case does exist, we should figure out a way to work with it (especially if the newline is only in the path component, which we normally don't even pass to helpers). But until we see a real report, we're better off being defensive. Reported-by: Carlo Arenas <carenas@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
2020-04-19 05:53:09 +02:00
if (credential_from_url_gently(c, url, 0) < 0)
die(_("credential url cannot be parsed: %s"), url);
}