diff --git a/.gitignore b/.gitignore index 6669bf0c6c..b8524bfe48 100644 --- a/.gitignore +++ b/.gitignore @@ -198,6 +198,7 @@ /test-string-list /test-subprocess /test-svn-fe +/test-urlmatch-normalization /test-wildmatch /common-cmds.h *.tar.gz diff --git a/Documentation/config.txt b/Documentation/config.txt index 6e53fc5074..a81f3ab74e 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1513,6 +1513,51 @@ http.useragent:: of common USER_AGENT strings (but not including those like git/1.7.1). Can be overridden by the 'GIT_HTTP_USER_AGENT' environment variable. +http..*:: + Any of the http.* options above can be applied selectively to some urls. + For a config key to match a URL, each element of the config key is + compared to that of the URL, in the following order: ++ +-- +. Scheme (e.g., `https` in `https://example.com/`). This field + must match exactly between the config key and the URL. + +. Host/domain name (e.g., `example.com` in `https://example.com/`). + This field must match exactly between the config key and the URL. + +. Port number (e.g., `8080` in `http://example.com:8080/`). + This field must match exactly between the config key and the URL. + Omitted port numbers are automatically converted to the correct + default for the scheme before matching. + +. Path (e.g., `repo.git` in `https://example.com/repo.git`). The + path field of the config key must match the path field of the URL + either exactly or as a prefix of slash-delimited path elements. This means + a config key with path `foo/` matches URL path `foo/bar`. A prefix can only + match on a slash (`/`) boundary. Longer matches take precedence (so a config + key with path `foo/bar` is a better match to URL path `foo/bar` than a config + key with just path `foo/`). + +. User name (e.g., `user` in `https://user@example.com/repo.git`). If + the config key has a user name it must match the user name in the + URL exactly. If the config key does not have a user name, that + config key will match a URL with any user name (including none), + but at a lower precedence than a config key with a user name. +-- ++ +The list above is ordered by decreasing precedence; a URL that matches +a config key's path is preferred to one that matches its user name. For example, +if the URL is `https://user@example.com/foo/bar` a config key match of +`https://example.com/foo` will be preferred over a config key match of +`https://user@example.com`. ++ +All URLs are normalized before attempting any matching (the password part, +if embedded in the URL, is always ignored for matching purposes) so that +equivalent urls that are simply spelled differently will match properly. +Environment variable settings always override any matches. The urls that are +matched against are those given directly to Git commands. This means any URLs +visited as a result of a redirection do not participate in matching. + i18n.commitEncoding:: Character encoding the commit messages are stored in; Git itself does not care per se, but this information is necessary e.g. when diff --git a/Makefile b/Makefile index 0f931a2030..2df742c24d 100644 --- a/Makefile +++ b/Makefile @@ -567,6 +567,7 @@ TEST_PROGRAMS_NEED_X += test-sigchain TEST_PROGRAMS_NEED_X += test-string-list TEST_PROGRAMS_NEED_X += test-subprocess TEST_PROGRAMS_NEED_X += test-svn-fe +TEST_PROGRAMS_NEED_X += test-urlmatch-normalization TEST_PROGRAMS_NEED_X += test-wildmatch TEST_PROGRAMS = $(patsubst %,%$X,$(TEST_PROGRAMS_NEED_X)) @@ -721,6 +722,7 @@ LIB_H += tree-walk.h LIB_H += tree.h LIB_H += unpack-trees.h LIB_H += url.h +LIB_H += urlmatch.h LIB_H += userdiff.h LIB_H += utf8.h LIB_H += varint.h @@ -868,6 +870,7 @@ LIB_OBJS += tree.o LIB_OBJS += tree-walk.o LIB_OBJS += unpack-trees.o LIB_OBJS += url.o +LIB_OBJS += urlmatch.o LIB_OBJS += usage.o LIB_OBJS += userdiff.o LIB_OBJS += utf8.o diff --git a/http.c b/http.c index 37986f8252..5eda356fd9 100644 --- a/http.c +++ b/http.c @@ -3,6 +3,7 @@ #include "sideband.h" #include "run-command.h" #include "url.h" +#include "urlmatch.h" #include "credential.h" #include "version.h" #include "pkt-line.h" @@ -334,10 +335,20 @@ void http_init(struct remote *remote, const char *url, int proactive_auth) { char *low_speed_limit; char *low_speed_time; + char *normalized_url; + struct urlmatch_config config = { STRING_LIST_INIT_DUP }; + + config.section = "http"; + config.key = NULL; + config.collect_fn = http_options; + config.cascade_fn = git_default_config; + config.cb = NULL; http_is_verbose = 0; + normalized_url = url_normalize(url, &config.url); - git_config(http_options, NULL); + git_config(urlmatch_config_entry, &config); + free(normalized_url); curl_global_init(CURL_GLOBAL_ALL); diff --git a/t/.gitattributes b/t/.gitattributes index 1b97c5465b..2d44088f56 100644 --- a/t/.gitattributes +++ b/t/.gitattributes @@ -1 +1,2 @@ t[0-9][0-9][0-9][0-9]/* -whitespace +t0110/url-* binary diff --git a/t/t0110-urlmatch-normalization.sh b/t/t0110-urlmatch-normalization.sh new file mode 100755 index 0000000000..8d6096d4d1 --- /dev/null +++ b/t/t0110-urlmatch-normalization.sh @@ -0,0 +1,177 @@ +#!/bin/sh + +test_description='urlmatch URL normalization' +. ./test-lib.sh + +# The base name of the test url files +tu="$TEST_DIRECTORY/t0110/url" + +# Note that only file: URLs should be allowed without a host + +test_expect_success 'url scheme' ' + ! test-urlmatch-normalization "" && + ! test-urlmatch-normalization "_" && + ! test-urlmatch-normalization "scheme" && + ! test-urlmatch-normalization "scheme:" && + ! test-urlmatch-normalization "scheme:/" && + ! test-urlmatch-normalization "scheme://" && + ! test-urlmatch-normalization "file" && + ! test-urlmatch-normalization "file:" && + ! test-urlmatch-normalization "file:/" && + test-urlmatch-normalization "file://" && + ! test-urlmatch-normalization "://acme.co" && + ! test-urlmatch-normalization "x_test://acme.co" && + ! test-urlmatch-normalization "-test://acme.co" && + ! test-urlmatch-normalization "0test://acme.co" && + ! test-urlmatch-normalization "+test://acme.co" && + ! test-urlmatch-normalization ".test://acme.co" && + ! test-urlmatch-normalization "schem%6e://" && + test-urlmatch-normalization "x-Test+v1.0://acme.co" && + test "$(test-urlmatch-normalization -p "AbCdeF://x.Y")" = "abcdef://x.y/" +' + +test_expect_success 'url authority' ' + ! test-urlmatch-normalization "scheme://user:pass@" && + ! test-urlmatch-normalization "scheme://?" && + ! test-urlmatch-normalization "scheme://#" && + ! test-urlmatch-normalization "scheme:///" && + ! test-urlmatch-normalization "scheme://:" && + ! test-urlmatch-normalization "scheme://:555" && + test-urlmatch-normalization "file://user:pass@" && + test-urlmatch-normalization "file://?" && + test-urlmatch-normalization "file://#" && + test-urlmatch-normalization "file:///" && + test-urlmatch-normalization "file://:" && + ! test-urlmatch-normalization "file://:555" && + test-urlmatch-normalization "scheme://user:pass@host" && + test-urlmatch-normalization "scheme://@host" && + test-urlmatch-normalization "scheme://%00@host" && + ! test-urlmatch-normalization "scheme://%%@host" && + ! test-urlmatch-normalization "scheme://host_" && + test-urlmatch-normalization "scheme://user:pass@host/" && + test-urlmatch-normalization "scheme://@host/" && + test-urlmatch-normalization "scheme://host/" && + test-urlmatch-normalization "scheme://host?x" && + test-urlmatch-normalization "scheme://host#x" && + test-urlmatch-normalization "scheme://host/@" && + test-urlmatch-normalization "scheme://host?@x" && + test-urlmatch-normalization "scheme://host#@x" && + test-urlmatch-normalization "scheme://[::1]" && + test-urlmatch-normalization "scheme://[::1]/" && + ! test-urlmatch-normalization "scheme://hos%41/" && + test-urlmatch-normalization "scheme://[invalid....:/" && + test-urlmatch-normalization "scheme://invalid....:]/" && + ! test-urlmatch-normalization "scheme://invalid....:[/" && + ! test-urlmatch-normalization "scheme://invalid....:[" +' + +test_expect_success 'url port checks' ' + test-urlmatch-normalization "xyz://q@some.host:" && + test-urlmatch-normalization "xyz://q@some.host:456/" && + ! test-urlmatch-normalization "xyz://q@some.host:0" && + ! test-urlmatch-normalization "xyz://q@some.host:0000000" && + test-urlmatch-normalization "xyz://q@some.host:0000001?" && + test-urlmatch-normalization "xyz://q@some.host:065535#" && + test-urlmatch-normalization "xyz://q@some.host:65535" && + ! test-urlmatch-normalization "xyz://q@some.host:65536" && + ! test-urlmatch-normalization "xyz://q@some.host:99999" && + ! test-urlmatch-normalization "xyz://q@some.host:100000" && + ! test-urlmatch-normalization "xyz://q@some.host:100001" && + test-urlmatch-normalization "http://q@some.host:80" && + test-urlmatch-normalization "https://q@some.host:443" && + test-urlmatch-normalization "http://q@some.host:80/" && + test-urlmatch-normalization "https://q@some.host:443?" && + ! test-urlmatch-normalization "http://q@:8008" && + ! test-urlmatch-normalization "http://:8080" && + ! test-urlmatch-normalization "http://:" && + test-urlmatch-normalization "xyz://q@some.host:456/" && + test-urlmatch-normalization "xyz://[::1]:456/" && + test-urlmatch-normalization "xyz://[::1]:/" && + ! test-urlmatch-normalization "xyz://[::1]:000/" && + ! test-urlmatch-normalization "xyz://[::1]:0%300/" && + ! test-urlmatch-normalization "xyz://[::1]:0x80/" && + ! test-urlmatch-normalization "xyz://[::1]:4294967297/" && + ! test-urlmatch-normalization "xyz://[::1]:030f/" +' + +test_expect_success 'url port normalization' ' + test "$(test-urlmatch-normalization -p "http://x:800")" = "http://x:800/" && + test "$(test-urlmatch-normalization -p "http://x:0800")" = "http://x:800/" && + test "$(test-urlmatch-normalization -p "http://x:00000800")" = "http://x:800/" && + test "$(test-urlmatch-normalization -p "http://x:065535")" = "http://x:65535/" && + test "$(test-urlmatch-normalization -p "http://x:1")" = "http://x:1/" && + test "$(test-urlmatch-normalization -p "http://x:80")" = "http://x/" && + test "$(test-urlmatch-normalization -p "http://x:080")" = "http://x/" && + test "$(test-urlmatch-normalization -p "http://x:000000080")" = "http://x/" && + test "$(test-urlmatch-normalization -p "https://x:443")" = "https://x/" && + test "$(test-urlmatch-normalization -p "https://x:0443")" = "https://x/" && + test "$(test-urlmatch-normalization -p "https://x:000000443")" = "https://x/" +' + +test_expect_success 'url general escapes' ' + ! test-urlmatch-normalization "http://x.y?%fg" && + test "$(test-urlmatch-normalization -p "X://W/%7e%41^%3a")" = "x://w/~A%5E%3A" && + test "$(test-urlmatch-normalization -p "X://W/:/?#[]@")" = "x://w/:/?#[]@" && + test "$(test-urlmatch-normalization -p "X://W/$&()*+,;=")" = "x://w/$&()*+,;=" && + test "$(test-urlmatch-normalization -p "X://W/'\''")" = "x://w/'\''" && + test "$(test-urlmatch-normalization -p "X://W?'\!'")" = "x://w/?'\!'" +' + +test_expect_success 'url high-bit escapes' ' + test "$(test-urlmatch-normalization -p "$(cat "$tu-1")")" = "x://q/%01%02%03%04%05%06%07%08%0E%0F%10%11%12" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-2")")" = "x://q/%13%14%15%16%17%18%19%1B%1C%1D%1E%1F%7F" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-3")")" = "x://q/%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-4")")" = "x://q/%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-5")")" = "x://q/%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-6")")" = "x://q/%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-7")")" = "x://q/%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-8")")" = "x://q/%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-9")")" = "x://q/%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-10")")" = "x://q/%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-11")")" = "x://q/%C2%80%DF%BF%E0%A0%80%EF%BF%BD%F0%90%80%80%F0%AF%BF%BD" +' + +test_expect_success 'url username/password escapes' ' + test "$(test-urlmatch-normalization -p "x://%41%62(^):%70+d@foo")" = "x://Ab(%5E):p+d@foo/" +' + +test_expect_success 'url normalized lengths' ' + test "$(test-urlmatch-normalization -l "Http://%4d%65:%4d^%70@The.Host")" = 25 && + test "$(test-urlmatch-normalization -l "http://%41:%42@x.y/%61/")" = 17 && + test "$(test-urlmatch-normalization -l "http://@x.y/^")" = 15 +' + +test_expect_success 'url . and .. segments' ' + test "$(test-urlmatch-normalization -p "x://y/.")" = "x://y/" && + test "$(test-urlmatch-normalization -p "x://y/./")" = "x://y/" && + test "$(test-urlmatch-normalization -p "x://y/a/.")" = "x://y/a" && + test "$(test-urlmatch-normalization -p "x://y/a/./")" = "x://y/a/" && + test "$(test-urlmatch-normalization -p "x://y/.?")" = "x://y/?" && + test "$(test-urlmatch-normalization -p "x://y/./?")" = "x://y/?" && + test "$(test-urlmatch-normalization -p "x://y/a/.?")" = "x://y/a?" && + test "$(test-urlmatch-normalization -p "x://y/a/./?")" = "x://y/a/?" && + test "$(test-urlmatch-normalization -p "x://y/a/./b/.././../c")" = "x://y/c" && + test "$(test-urlmatch-normalization -p "x://y/a/./b/../.././c/")" = "x://y/c/" && + test "$(test-urlmatch-normalization -p "x://y/a/./b/.././../c/././.././.")" = "x://y/" && + ! test-urlmatch-normalization "x://y/a/./b/.././../c/././.././.." && + test "$(test-urlmatch-normalization -p "x://y/a/./?/././..")" = "x://y/a/?/././.." && + test "$(test-urlmatch-normalization -p "x://y/%2e/")" = "x://y/" && + test "$(test-urlmatch-normalization -p "x://y/%2E/")" = "x://y/" && + test "$(test-urlmatch-normalization -p "x://y/a/%2e./")" = "x://y/" && + test "$(test-urlmatch-normalization -p "x://y/b/.%2E/")" = "x://y/" && + test "$(test-urlmatch-normalization -p "x://y/c/%2e%2E/")" = "x://y/" +' + +# http://@foo specifies an empty user name but does not specify a password +# http://foo specifies neither a user name nor a password +# So they should not be equivalent +test_expect_success 'url equivalents' ' + test-urlmatch-normalization "httP://x" "Http://X/" && + test-urlmatch-normalization "Http://%4d%65:%4d^%70@The.Host" "hTTP://Me:%4D^p@the.HOST:80/" && + ! test-urlmatch-normalization "https://@x.y/^" "httpS://x.y:443/^" && + test-urlmatch-normalization "https://@x.y/^" "httpS://@x.y:0443/^" && + test-urlmatch-normalization "https://@x.y/^/../abc" "httpS://@x.y:0443/abc" && + test-urlmatch-normalization "https://@x.y/^/.." "httpS://@x.y:0443/" +' + +test_done diff --git a/t/t0110/README b/t/t0110/README new file mode 100644 index 0000000000..ad4a50ecd8 --- /dev/null +++ b/t/t0110/README @@ -0,0 +1,9 @@ +The url data files in this directory contain URLs with characters +in the range 0x01-0x1f and 0x7f-0xff to test the proper normalization +of unprintable characters. + +A select few characters in the 0x01-0x1f range are skipped to help +avoid problems running the test itself. + +The urls are in test files in this directory rather than being +embedded in the test script for portability. diff --git a/t/t0110/url-1 b/t/t0110/url-1 new file mode 100644 index 0000000000..519019c5ce --- /dev/null +++ b/t/t0110/url-1 @@ -0,0 +1 @@ +x://q/ diff --git a/t/t0110/url-10 b/t/t0110/url-10 new file mode 100644 index 0000000000..b9965de6a5 --- /dev/null +++ b/t/t0110/url-10 @@ -0,0 +1 @@ +x://q/ diff --git a/t/t0110/url-11 b/t/t0110/url-11 new file mode 100644 index 0000000000..f0a50f1009 --- /dev/null +++ b/t/t0110/url-11 @@ -0,0 +1 @@ +x://q/€߿ࠀ�𐀀𯿽 diff --git a/t/t0110/url-2 b/t/t0110/url-2 new file mode 100644 index 0000000000..43334b05b2 --- /dev/null +++ b/t/t0110/url-2 @@ -0,0 +1 @@ +x://q/ diff --git a/t/t0110/url-3 b/t/t0110/url-3 new file mode 100644 index 0000000000..7378c7bec2 --- /dev/null +++ b/t/t0110/url-3 @@ -0,0 +1 @@ +x://q/ diff --git a/t/t0110/url-4 b/t/t0110/url-4 new file mode 100644 index 0000000000..220b198c97 --- /dev/null +++ b/t/t0110/url-4 @@ -0,0 +1 @@ +x://q/ diff --git a/t/t0110/url-5 b/t/t0110/url-5 new file mode 100644 index 0000000000..1ccd927779 --- /dev/null +++ b/t/t0110/url-5 @@ -0,0 +1 @@ +x://q/ diff --git a/t/t0110/url-6 b/t/t0110/url-6 new file mode 100644 index 0000000000..e8283aac6d --- /dev/null +++ b/t/t0110/url-6 @@ -0,0 +1 @@ +x://q/ diff --git a/t/t0110/url-7 b/t/t0110/url-7 new file mode 100644 index 0000000000..fa7c10b615 --- /dev/null +++ b/t/t0110/url-7 @@ -0,0 +1 @@ +x://q/ diff --git a/t/t0110/url-8 b/t/t0110/url-8 new file mode 100644 index 0000000000..79a0ba836f --- /dev/null +++ b/t/t0110/url-8 @@ -0,0 +1 @@ +x://q/ diff --git a/t/t0110/url-9 b/t/t0110/url-9 new file mode 100644 index 0000000000..8b44bec48b --- /dev/null +++ b/t/t0110/url-9 @@ -0,0 +1 @@ +x://q/ diff --git a/test-urlmatch-normalization.c b/test-urlmatch-normalization.c new file mode 100644 index 0000000000..2603899bfe --- /dev/null +++ b/test-urlmatch-normalization.c @@ -0,0 +1,50 @@ +#include "git-compat-util.h" +#include "urlmatch.h" + +int main(int argc, char **argv) +{ + const char *usage = "test-urlmatch-normalization [-p | -l] | "; + char *url1, *url2; + int opt_p = 0, opt_l = 0; + + /* + * For one url, succeed if url_normalize succeeds on it, fail otherwise. + * For two urls, succeed only if url_normalize succeeds on both and + * the results compare equal with strcmp. If -p is given (one url only) + * and url_normalize succeeds, print the result followed by "\n". If + * -l is given (one url only) and url_normalize succeeds, print the + * returned length in decimal followed by "\n". + */ + + if (argc > 1 && !strcmp(argv[1], "-p")) { + opt_p = 1; + argc--; + argv++; + } else if (argc > 1 && !strcmp(argv[1], "-l")) { + opt_l = 1; + argc--; + argv++; + } + + if (argc < 2 || argc > 3) + die(usage); + + if (argc == 2) { + struct url_info info; + url1 = url_normalize(argv[1], &info); + if (!url1) + return 1; + if (opt_p) + printf("%s\n", url1); + if (opt_l) + printf("%u\n", (unsigned)info.url_len); + return 0; + } + + if (opt_p || opt_l) + die(usage); + + url1 = url_normalize(argv[1], NULL); + url2 = url_normalize(argv[2], NULL); + return (url1 && url2 && !strcmp(url1, url2)) ? 0 : 1; +}