From 33439956b7f6f91434673d49e8c8af5c90b4a0fa Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 31 Jan 2017 10:01:43 +0100 Subject: [PATCH 1/5] mailmap: add Patrick Steinhardt's work address Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 9c87a3840b..ea59205b92 100644 --- a/.mailmap +++ b/.mailmap @@ -177,6 +177,7 @@ Paolo Bonzini Pascal Obry Pascal Obry Pat Notz +Patrick Steinhardt Paul Mackerras Paul Mackerras Peter Baumann From 3e6a0e64a47497d1addaf063e13865c67cbeb009 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 31 Jan 2017 10:01:44 +0100 Subject: [PATCH 2/5] urlmatch: enable normalization of URLs with globs The `url_normalize` function is used to validate and normalize URLs. As such, it does not allow for some special characters to be part of the URLs that are to be normalized. As we want to allow using globs in some configuration keys making use of URLs, namely `http..`, but still normalize them, we need to somehow enable some additional allowed characters. To do this without having to change all callers of `url_normalize`, where most do not actually want globbing at all, we split off another function `url_normalize_1`. This function accepts an additional parameter `allow_globs`, which is subsequently called by `url_normalize` with `allow_globs=0`. As of now, this function is not used with globbing enabled. A caller will be added in the following commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- urlmatch.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/urlmatch.c b/urlmatch.c index 132d342bc1..d350478c09 100644 --- a/urlmatch.c +++ b/urlmatch.c @@ -63,7 +63,7 @@ static int append_normalized_escapes(struct strbuf *buf, return 1; } -char *url_normalize(const char *url, struct url_info *out_info) +static char *url_normalize_1(const char *url, struct url_info *out_info, char allow_globs) { /* * Normalize NUL-terminated url using the following rules: @@ -191,7 +191,12 @@ char *url_normalize(const char *url, struct url_info *out_info) strbuf_release(&norm); return NULL; } - spanned = strspn(url, URL_HOST_CHARS); + + if (allow_globs) + spanned = strspn(url, URL_HOST_CHARS "*"); + else + spanned = strspn(url, URL_HOST_CHARS); + if (spanned < colon_ptr - url) { /* Host name has invalid characters */ if (out_info) { @@ -380,6 +385,11 @@ char *url_normalize(const char *url, struct url_info *out_info) return result; } +char *url_normalize(const char *url, struct url_info *out_info) +{ + return url_normalize_1(url, out_info, 0); +} + static size_t url_match_prefix(const char *url, const char *url_prefix, size_t url_prefix_len) From 3ec6e6e8a0870a32357689e2179d845700539623 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 31 Jan 2017 10:01:45 +0100 Subject: [PATCH 3/5] urlmatch: split host and port fields in `struct url_info` The `url_info` structure contains information about a normalized URL with the URL's components being represented by different fields. The host and port part though are to be accessed by the same `host` field, so that getting the host and/or port separately becomes more involved than really necessary. To make the port more readily accessible, split up the host and port fields. Namely, the `host_len` will not include the port length anymore and a new `port_off` field has been added which includes the offset to the port, if available. The only user of these fields is `url_normalize_1`. This change makes it easier later on to treat host and port differently when introducing globs for domains. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- urlmatch.c | 16 ++++++++++++---- urlmatch.h | 9 +++++---- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/urlmatch.c b/urlmatch.c index d350478c09..e328905eb3 100644 --- a/urlmatch.c +++ b/urlmatch.c @@ -104,7 +104,7 @@ static char *url_normalize_1(const char *url, struct url_info *out_info, char al struct strbuf norm; size_t spanned; size_t scheme_len, user_off=0, user_len=0, passwd_off=0, passwd_len=0; - size_t host_off=0, host_len=0, port_len=0, path_off, path_len, result_len; + size_t host_off=0, host_len=0, port_off=0, port_len=0, path_off, path_len, result_len; const char *slash_ptr, *at_ptr, *colon_ptr, *path_start; char *result; @@ -263,6 +263,7 @@ static char *url_normalize_1(const char *url, struct url_info *out_info, char al return NULL; } strbuf_addch(&norm, ':'); + port_off = norm.len; strbuf_add(&norm, url, slash_ptr - url); port_len = slash_ptr - url; } @@ -270,7 +271,7 @@ static char *url_normalize_1(const char *url, struct url_info *out_info, char al url = slash_ptr; } if (host_off) - host_len = norm.len - host_off; + host_len = norm.len - host_off - (port_len ? port_len + 1 : 0); /* @@ -378,6 +379,7 @@ static char *url_normalize_1(const char *url, struct url_info *out_info, char al out_info->passwd_len = passwd_len; out_info->host_off = host_off; out_info->host_len = host_len; + out_info->port_off = port_off; out_info->port_len = port_len; out_info->path_off = path_off; out_info->path_len = path_len; @@ -464,11 +466,17 @@ static int match_urls(const struct url_info *url, usermatched = 1; } - /* check the host and port */ + /* check the host */ if (url_prefix->host_len != url->host_len || strncmp(url->url + url->host_off, url_prefix->url + url_prefix->host_off, url->host_len)) - return 0; /* host names and/or ports do not match */ + return 0; /* host names do not match */ + + /* check the port */ + if (url_prefix->port_len != url->port_len || + strncmp(url->url + url->port_off, + url_prefix->url + url_prefix->port_off, url->port_len)) + return 0; /* ports do not match */ /* check the path */ pathmatchlen = url_match_prefix( diff --git a/urlmatch.h b/urlmatch.h index 528862adc5..0ea812b03a 100644 --- a/urlmatch.h +++ b/urlmatch.h @@ -18,11 +18,12 @@ struct url_info { size_t passwd_len; /* length of passwd; if passwd_off != 0 but passwd_len == 0, an empty passwd was given */ size_t host_off; /* offset into url to start of host name (0 => none) */ - size_t host_len; /* length of host name; this INCLUDES any ':portnum'; + size_t host_len; /* length of host name; * file urls may have host_len == 0 */ - size_t port_len; /* if a portnum is present (port_len != 0), it has - * this length (excluding the leading ':') at the - * end of the host name (always 0 for file urls) */ + size_t port_off; /* offset into url to start of port number (0 => none) */ + size_t port_len; /* if a portnum is present (port_off != 0), it has + * this length (excluding the leading ':') starting + * from port_off (always 0 for file urls) */ size_t path_off; /* offset into url to the start of the url path; * this will always point to a '/' character * after the url has been normalized */ From af99049ca92f5a5d16d8cce9727b859ac5c9ee00 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 31 Jan 2017 10:01:46 +0100 Subject: [PATCH 4/5] urlmatch: include host in urlmatch ranking In order to be able to rank positive matches by `urlmatch`, we inspect the path length and user part to decide whether a match is better than another match. As all other parts are matched exactly between both URLs, this is the correct thing to do right now. In the future, though, we want to introduce wild cards for the domain part. When doing this, it does not make sense anymore to only compare the path lengths. Instead, we also want to compare the domain lengths to determine which of both URLs matches the host part more closely. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/t1300-repo-config.sh | 33 ++++++++++++++++++++++++ urlmatch.c | 58 +++++++++++++++++++++++++----------------- urlmatch.h | 3 ++- 3 files changed, 69 insertions(+), 25 deletions(-) diff --git a/t/t1300-repo-config.sh b/t/t1300-repo-config.sh index 923bfc5a26..6c844d5191 100755 --- a/t/t1300-repo-config.sh +++ b/t/t1300-repo-config.sh @@ -1177,6 +1177,39 @@ test_expect_success 'urlmatch' ' test_cmp expect actual ' +test_expect_success 'urlmatch favors more specific URLs' ' + cat >.git/config <<-\EOF && + [http "https://example.com/"] + cookieFile = /tmp/root.txt + [http "https://example.com/subdirectory"] + cookieFile = /tmp/subdirectory.txt + [http "https://user@example.com/"] + cookieFile = /tmp/user.txt + [http "https://averylonguser@example.com/"] + cookieFile = /tmp/averylonguser.txt + EOF + + echo http.cookiefile /tmp/root.txt >expect && + git config --get-urlmatch HTTP https://example.com >actual && + test_cmp expect actual && + + echo http.cookiefile /tmp/subdirectory.txt >expect && + git config --get-urlmatch HTTP https://example.com/subdirectory >actual && + test_cmp expect actual && + + echo http.cookiefile /tmp/subdirectory.txt >expect && + git config --get-urlmatch HTTP https://example.com/subdirectory/nested >actual && + test_cmp expect actual && + + echo http.cookiefile /tmp/user.txt >expect && + git config --get-urlmatch HTTP https://user@example.com/ >actual && + test_cmp expect actual && + + echo http.cookiefile /tmp/subdirectory.txt >expect && + git config --get-urlmatch HTTP https://averylonguser@example.com/subdirectory >actual && + test_cmp expect actual +' + # good section hygiene test_expect_failure 'unsetting the last key in a section removes header' ' cat >.git/config <<-\EOF && diff --git a/urlmatch.c b/urlmatch.c index e328905eb3..990a9de5c0 100644 --- a/urlmatch.c +++ b/urlmatch.c @@ -426,7 +426,7 @@ static size_t url_match_prefix(const char *url, static int match_urls(const struct url_info *url, const struct url_info *url_prefix, - int *exactusermatch) + struct urlmatch_item *match) { /* * url_prefix matches url if the scheme, host and port of url_prefix @@ -445,8 +445,8 @@ static int match_urls(const struct url_info *url, * contained a user name or false if url_prefix did not have a * user name. If there is no match *exactusermatch is left untouched. */ - int usermatched = 0; - int pathmatchlen; + char usermatched = 0; + size_t pathmatchlen; if (!url || !url_prefix || !url->url || !url_prefix->url) return 0; @@ -483,22 +483,38 @@ static int match_urls(const struct url_info *url, url->url + url->path_off, url_prefix->url + url_prefix->path_off, url_prefix->url_len - url_prefix->path_off); + if (!pathmatchlen) + return 0; /* paths do not match */ - if (pathmatchlen && exactusermatch) - *exactusermatch = usermatched; - return pathmatchlen; + if (match) { + match->hostmatch_len = url_prefix->host_len; + match->pathmatch_len = pathmatchlen; + match->user_matched = usermatched; + } + + return 1; +} + +static int cmp_matches(const struct urlmatch_item *a, + const struct urlmatch_item *b) +{ + if (a->hostmatch_len != b->hostmatch_len) + return a->hostmatch_len < b->hostmatch_len ? -1 : 1; + if (a->pathmatch_len != b->pathmatch_len) + return a->pathmatch_len < b->pathmatch_len ? -1 : 1; + if (a->user_matched != b->user_matched) + return b->user_matched ? -1 : 1; + return 0; } int urlmatch_config_entry(const char *var, const char *value, void *cb) { struct string_list_item *item; struct urlmatch_config *collect = cb; - struct urlmatch_item *matched; + struct urlmatch_item matched = {0}; struct url_info *url = &collect->url; const char *key, *dot; struct strbuf synthkey = STRBUF_INIT; - size_t matched_len = 0; - int user_matched = 0; int retval; if (!skip_prefix(var, collect->section, &key) || *(key++) != '.') { @@ -516,9 +532,9 @@ int urlmatch_config_entry(const char *var, const char *value, void *cb) free(config_url); if (!norm_url) return 0; - matched_len = match_urls(url, &norm_info, &user_matched); + retval = match_urls(url, &norm_info, &matched); free(norm_url); - if (!matched_len) + if (!retval) return 0; key = dot + 1; } @@ -528,24 +544,18 @@ int urlmatch_config_entry(const char *var, const char *value, void *cb) item = string_list_insert(&collect->vars, key); if (!item->util) { - matched = xcalloc(1, sizeof(*matched)); - item->util = matched; + item->util = xcalloc(1, sizeof(matched)); } else { - matched = item->util; - /* - * Is our match shorter? Is our match the same - * length, and without user while the current - * candidate is with user? Then we cannot use it. - */ - if (matched_len < matched->matched_len || - ((matched_len == matched->matched_len) && - (!user_matched && matched->user_matched))) + if (cmp_matches(&matched, item->util) < 0) + /* + * Our match is worse than the old one, + * we cannot use it. + */ return 0; /* Otherwise, replace it with this one. */ } - matched->matched_len = matched_len; - matched->user_matched = user_matched; + memcpy(item->util, &matched, sizeof(matched)); strbuf_addstr(&synthkey, collect->section); strbuf_addch(&synthkey, '.'); strbuf_addstr(&synthkey, key); diff --git a/urlmatch.h b/urlmatch.h index 0ea812b03a..37ee5da85e 100644 --- a/urlmatch.h +++ b/urlmatch.h @@ -34,7 +34,8 @@ struct url_info { extern char *url_normalize(const char *, struct url_info *); struct urlmatch_item { - size_t matched_len; + size_t hostmatch_len; + size_t pathmatch_len; char user_matched; }; From a272b9e70a48a355b6dd7ff0179c11f8da7ef0f3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 31 Jan 2017 10:01:47 +0100 Subject: [PATCH 5/5] urlmatch: allow globbing for the URL host part The URL matching function computes for two URLs whether they match not. The match is performed by splitting up the URL into different parts and then doing an exact comparison with the to-be-matched URL. The main user of `urlmatch` is the configuration subsystem. It allows to set certain configurations based on the URL which is being connected to via keys like `http..*`. A common use case for this is to set proxies for only some remotes which match the given URL. Unfortunately, having exact matches for all parts of the URL can become quite tedious in some setups. Imagine for example a corporate network where there are dozens or even hundreds of subdomains, which would have to be configured individually. Allow users to write an asterisk '*' in place of any 'host' or 'subdomain' label as part of the host name. For example, "http.https://*.example.com.proxy" sets "http.proxy" for all direct subdomains of "https://example.com", e.g. "https://foo.example.com", but not "https://foo.bar.example.com". Signed-off-by: Patrick Steinhardt Helped-by: Junio C Hamano Signed-off-by: Junio C Hamano --- Documentation/config.txt | 5 ++- t/t1300-repo-config.sh | 72 ++++++++++++++++++++++++++++++++++++++++ urlmatch.c | 49 ++++++++++++++++++++++++--- 3 files changed, 121 insertions(+), 5 deletions(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index af2ae4cc02..ee155d8a6b 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1914,7 +1914,10 @@ http..*:: must match exactly between the config key and the URL. . Host/domain name (e.g., `example.com` in `https://example.com/`). - This field must match exactly between the config key and the URL. + This field must match between the config key and the URL. It is + possible to specify a `*` as part of the host name to match all subdomains + at this level. `https://*.example.com/` for example would match + `https://foo.example.com/`, but not `https://foo.bar.example.com/`. . Port number (e.g., `8080` in `http://example.com:8080/`). This field must match exactly between the config key and the URL. diff --git a/t/t1300-repo-config.sh b/t/t1300-repo-config.sh index 6c844d5191..052f120216 100755 --- a/t/t1300-repo-config.sh +++ b/t/t1300-repo-config.sh @@ -1187,6 +1187,18 @@ test_expect_success 'urlmatch favors more specific URLs' ' cookieFile = /tmp/user.txt [http "https://averylonguser@example.com/"] cookieFile = /tmp/averylonguser.txt + [http "https://preceding.example.com"] + cookieFile = /tmp/preceding.txt + [http "https://*.example.com"] + cookieFile = /tmp/wildcard.txt + [http "https://*.example.com/wildcardwithsubdomain"] + cookieFile = /tmp/wildcardwithsubdomain.txt + [http "https://trailing.example.com"] + cookieFile = /tmp/trailing.txt + [http "https://user@*.example.com/"] + cookieFile = /tmp/wildcardwithuser.txt + [http "https://sub.example.com/"] + cookieFile = /tmp/sub.txt EOF echo http.cookiefile /tmp/root.txt >expect && @@ -1207,6 +1219,66 @@ test_expect_success 'urlmatch favors more specific URLs' ' echo http.cookiefile /tmp/subdirectory.txt >expect && git config --get-urlmatch HTTP https://averylonguser@example.com/subdirectory >actual && + test_cmp expect actual && + + echo http.cookiefile /tmp/preceding.txt >expect && + git config --get-urlmatch HTTP https://preceding.example.com >actual && + test_cmp expect actual && + + echo http.cookiefile /tmp/wildcard.txt >expect && + git config --get-urlmatch HTTP https://wildcard.example.com >actual && + test_cmp expect actual && + + echo http.cookiefile /tmp/sub.txt >expect && + git config --get-urlmatch HTTP https://sub.example.com/wildcardwithsubdomain >actual && + test_cmp expect actual && + + echo http.cookiefile /tmp/trailing.txt >expect && + git config --get-urlmatch HTTP https://trailing.example.com >actual && + test_cmp expect actual && + + echo http.cookiefile /tmp/sub.txt >expect && + git config --get-urlmatch HTTP https://user@sub.example.com >actual && + test_cmp expect actual +' + +test_expect_success 'urlmatch with wildcard' ' + cat >.git/config <<-\EOF && + [http] + sslVerify + [http "https://*.example.com"] + sslVerify = false + cookieFile = /tmp/cookie.txt + EOF + + test_expect_code 1 git config --bool --get-urlmatch doesnt.exist https://good.example.com >actual && + test_must_be_empty actual && + + echo true >expect && + git config --bool --get-urlmatch http.SSLverify https://example.com >actual && + test_cmp expect actual && + + echo true >expect && + git config --bool --get-urlmatch http.SSLverify https://good-example.com >actual && + test_cmp expect actual && + + echo true >expect && + git config --bool --get-urlmatch http.sslverify https://deep.nested.example.com >actual && + test_cmp expect actual && + + echo false >expect && + git config --bool --get-urlmatch http.sslverify https://good.example.com >actual && + test_cmp expect actual && + + { + echo http.cookiefile /tmp/cookie.txt && + echo http.sslverify false + } >expect && + git config --get-urlmatch HTTP https://good.example.com >actual && + test_cmp expect actual && + + echo http.sslverify >expect && + git config --get-urlmatch HTTP https://more.example.com.au >actual && test_cmp expect actual ' diff --git a/urlmatch.c b/urlmatch.c index 990a9de5c0..4bbde924e8 100644 --- a/urlmatch.c +++ b/urlmatch.c @@ -63,6 +63,49 @@ static int append_normalized_escapes(struct strbuf *buf, return 1; } +static const char *end_of_token(const char *s, int c, size_t n) +{ + const char *next = memchr(s, c, n); + if (!next) + next = s + n; + return next; +} + +static int match_host(const struct url_info *url_info, + const struct url_info *pattern_info) +{ + const char *url = url_info->url + url_info->host_off; + const char *pat = pattern_info->url + pattern_info->host_off; + int url_len = url_info->host_len; + int pat_len = pattern_info->host_len; + + while (url_len && pat_len) { + const char *url_next = end_of_token(url, '.', url_len); + const char *pat_next = end_of_token(pat, '.', pat_len); + + if (pat_next == pat + 1 && pat[0] == '*') + /* wildcard matches anything */ + ; + else if ((pat_next - pat) == (url_next - url) && + !memcmp(url, pat, url_next - url)) + /* the components are the same */ + ; + else + return 0; /* found an unmatch */ + + if (url_next < url + url_len) + url_next++; + url_len -= url_next - url; + url = url_next; + if (pat_next < pat + pat_len) + pat_next++; + pat_len -= pat_next - pat; + pat = pat_next; + } + + return (!url_len && !pat_len); +} + static char *url_normalize_1(const char *url, struct url_info *out_info, char allow_globs) { /* @@ -467,9 +510,7 @@ static int match_urls(const struct url_info *url, } /* check the host */ - if (url_prefix->host_len != url->host_len || - strncmp(url->url + url->host_off, - url_prefix->url + url_prefix->host_off, url->host_len)) + if (!match_host(url, url_prefix)) return 0; /* host names do not match */ /* check the port */ @@ -528,7 +569,7 @@ int urlmatch_config_entry(const char *var, const char *value, void *cb) struct url_info norm_info; config_url = xmemdupz(key, dot - key); - norm_url = url_normalize(config_url, &norm_info); + norm_url = url_normalize_1(config_url, &norm_info, 1); free(config_url); if (!norm_url) return 0;