From 2f36eed936f70105e80681aafac645ff34acc667 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 12 Jan 2016 08:57:22 +0100 Subject: [PATCH 1/6] Refactor skipping DOS drive prefixes Junio noticed that there is an implicit assumption in pretty much all the code calling has_dos_drive_prefix(): it forces all of its callsites to hardcode the knowledge that the DOS drive prefix is always two bytes long. While this assumption is pretty safe, we can still make the code more readable and less error-prone by introducing a function that skips the DOS drive prefix safely. While at it, we change the has_dos_drive_prefix() return value: it now returns the number of bytes to be skipped if there is a DOS drive prefix. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- compat/basename.c | 4 +--- compat/mingw.c | 14 +++++--------- compat/mingw.h | 10 +++++++++- git-compat-util.h | 8 ++++++++ path.c | 14 +++++--------- 5 files changed, 28 insertions(+), 22 deletions(-) diff --git a/compat/basename.c b/compat/basename.c index d8f8a3c6dc..9f00421a26 100644 --- a/compat/basename.c +++ b/compat/basename.c @@ -4,9 +4,7 @@ char *gitbasename (char *path) { const char *base; - /* Skip over the disk name in MSDOS pathnames. */ - if (has_dos_drive_prefix(path)) - path += 2; + skip_dos_drive_prefix(&path); for (base = path; *path; path++) { if (is_dir_sep(*path)) base = path + 1; diff --git a/compat/mingw.c b/compat/mingw.c index f74da235f5..10a51c058b 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1917,26 +1917,22 @@ pid_t waitpid(pid_t pid, int *status, int options) int mingw_offset_1st_component(const char *path) { - int offset = 0; - if (has_dos_drive_prefix(path)) - offset = 2; + char *pos = (char *)path; /* unc paths */ - else if (is_dir_sep(path[0]) && is_dir_sep(path[1])) { - + if (!skip_dos_drive_prefix(&pos) && + is_dir_sep(pos[0]) && is_dir_sep(pos[1])) { /* skip server name */ - char *pos = strpbrk(path + 2, "\\/"); + pos = strpbrk(pos + 2, "\\/"); if (!pos) return 0; /* Error: malformed unc path */ do { pos++; } while (*pos && !is_dir_sep(*pos)); - - offset = pos - path; } - return offset + is_dir_sep(path[offset]); + return pos + is_dir_sep(*pos) - path; } int xutftowcsn(wchar_t *wcs, const char *utfs, size_t wcslen, int utflen) diff --git a/compat/mingw.h b/compat/mingw.h index 738865c6c0..9b5db4ecc1 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -358,7 +358,15 @@ HANDLE winansi_get_osfhandle(int fd); * git specific compatibility */ -#define has_dos_drive_prefix(path) (isalpha(*(path)) && (path)[1] == ':') +#define has_dos_drive_prefix(path) \ + (isalpha(*(path)) && (path)[1] == ':' ? 2 : 0) +static inline int mingw_skip_dos_drive_prefix(char **path) +{ + int ret = has_dos_drive_prefix(*path); + *path += ret; + return ret; +} +#define skip_dos_drive_prefix mingw_skip_dos_drive_prefix #define is_dir_sep(c) ((c) == '/' || (c) == '\\') static inline char *mingw_find_last_dir_sep(const char *path) { diff --git a/git-compat-util.h b/git-compat-util.h index 0feeae2983..38397d7afb 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -335,6 +335,14 @@ static inline int git_has_dos_drive_prefix(const char *path) #define has_dos_drive_prefix git_has_dos_drive_prefix #endif +#ifndef skip_dos_drive_prefix +static inline int git_skip_dos_drive_prefix(char **path) +{ + return 0; +} +#define skip_dos_drive_prefix git_skip_dos_drive_prefix +#endif + #ifndef is_dir_sep static inline int git_is_dir_sep(int c) { diff --git a/path.c b/path.c index 38f2ebd6bf..747d6da2c8 100644 --- a/path.c +++ b/path.c @@ -544,13 +544,10 @@ const char *relative_path(const char *in, const char *prefix, else if (!prefix_len) return in; - if (have_same_root(in, prefix)) { + if (have_same_root(in, prefix)) /* bypass dos_drive, for "c:" is identical to "C:" */ - if (has_dos_drive_prefix(in)) { - i = 2; - j = 2; - } - } else { + i = j = has_dos_drive_prefix(in); + else { return in; } @@ -703,11 +700,10 @@ const char *remove_leading_path(const char *in, const char *prefix) int normalize_path_copy_len(char *dst, const char *src, int *prefix_len) { char *dst0; + int i; - if (has_dos_drive_prefix(src)) { + for (i = has_dos_drive_prefix(src); i > 0; i--) *dst++ = *src++; - *dst++ = *src++; - } dst0 = dst; if (is_dir_sep(*src)) { From 61725be349b44f15b0239182c859553d5c547ba0 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 12 Jan 2016 08:57:30 +0100 Subject: [PATCH 2/6] compat/basename: make basename() conform to POSIX According to POSIX, basename("/path/") should return "path", not "path/". Likewise, basename(NULL) and basename("") should both return "." to conform. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- compat/basename.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/compat/basename.c b/compat/basename.c index 9f00421a26..0f1b0b0930 100644 --- a/compat/basename.c +++ b/compat/basename.c @@ -4,10 +4,24 @@ char *gitbasename (char *path) { const char *base; - skip_dos_drive_prefix(&path); + + if (path) + skip_dos_drive_prefix(&path); + + if (!path || !*path) + return "."; + for (base = path; *path; path++) { - if (is_dir_sep(*path)) - base = path + 1; + if (!is_dir_sep(*path)) + continue; + do { + path++; + } while (is_dir_sep(*path)); + if (*path) + base = path; + else + while (--path != base && is_dir_sep(*path)) + *path = '\0'; } return (char *)base; } From 824682ab51e3510817f7a7303decc9f9df38ee9a Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 12 Jan 2016 08:57:36 +0100 Subject: [PATCH 3/6] compat/basename.c: provide a dirname() compatibility function When there is no `libgen.h` to our disposal, we miss the `dirname()` function. Earlier we added basename() compatibility function for the same reason at e1c06886 (compat: add a basename() compatibility function, 2009-05-31). So far, we only had one user of that function: credential-cache--daemon (which was only compiled when Unix sockets are available, anyway). But now we also have `builtin/am.c` as user, so we need it. Since `dirname()` is a sibling of `basename()`, we simply put our very own `gitdirname()` implementation next to `gitbasename()` and use it if `NO_LIBGEN_H` has been set. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- compat/basename.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ git-compat-util.h | 2 ++ 2 files changed, 46 insertions(+) diff --git a/compat/basename.c b/compat/basename.c index 0f1b0b0930..96bd9533b4 100644 --- a/compat/basename.c +++ b/compat/basename.c @@ -1,4 +1,5 @@ #include "../git-compat-util.h" +#include "../strbuf.h" /* Adapted from libiberty's basename.c. */ char *gitbasename (char *path) @@ -25,3 +26,46 @@ char *gitbasename (char *path) } return (char *)base; } + +char *gitdirname(char *path) +{ + static struct strbuf buf = STRBUF_INIT; + char *p = path, *slash = NULL, c; + int dos_drive_prefix; + + if (!p) + return "."; + + if ((dos_drive_prefix = skip_dos_drive_prefix(&p)) && !*p) + goto dot; + + /* + * POSIX.1-2001 says dirname("/") should return "/", and dirname("//") + * should return "//", but dirname("///") should return "/" again. + */ + if (is_dir_sep(*p)) { + if (!p[1] || (is_dir_sep(p[1]) && !p[2])) + return path; + slash = ++p; + } + while ((c = *(p++))) + if (is_dir_sep(c)) { + char *tentative = p - 1; + + /* POSIX.1-2001 says to ignore trailing slashes */ + while (is_dir_sep(*p)) + p++; + if (*p) + slash = tentative; + } + + if (slash) { + *slash = '\0'; + return path; + } + +dot: + strbuf_reset(&buf); + strbuf_addf(&buf, "%.*s.", dos_drive_prefix, path); + return buf.buf; +} diff --git a/git-compat-util.h b/git-compat-util.h index 38397d7afb..1cc6de194d 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -253,6 +253,8 @@ struct itimerval { #else #define basename gitbasename extern char *gitbasename(char *); +#define dirname gitdirname +extern char *gitdirname(char *); #endif #ifndef NO_ICONV From 7d1aaa684d42964b8b287b8c9450184dfd5bce85 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 12 Jan 2016 08:57:57 +0100 Subject: [PATCH 4/6] t0060: verify that basename() and dirname() work as expected Unfortunately, some libgen implementations yield outcomes different from what Git expects. For example, mingw-w64-crt provides a basename() function, that shortens `path0/` to `path`! So let's verify that the basename() and dirname() functions we use conform to what Git expects. Derived-from-code-by: Ramsay Jones Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t0060-path-utils.sh | 3 + test-path-utils.c | 166 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 169 insertions(+) diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh index 93605f42f2..584a0decfc 100755 --- a/t/t0060-path-utils.sh +++ b/t/t0060-path-utils.sh @@ -59,6 +59,9 @@ case $(uname -s) in ;; esac +test_expect_success basename 'test-path-utils basename' +test_expect_success dirname 'test-path-utils dirname' + norm_path "" "" norm_path . "" norm_path ./ "" diff --git a/test-path-utils.c b/test-path-utils.c index c67bf65b34..4ab68aca56 100644 --- a/test-path-utils.c +++ b/test-path-utils.c @@ -39,6 +39,166 @@ static void normalize_argv_string(const char **var, const char *input) die("Bad value: %s\n", input); } +struct test_data { + const char *from; /* input: transform from this ... */ + const char *to; /* output: ... to this. */ +}; + +static int test_function(struct test_data *data, char *(*func)(char *input), + const char *funcname) +{ + int failed = 0, i; + char buffer[1024]; + char *to; + + for (i = 0; data[i].to; i++) { + if (!data[i].from) + to = func(NULL); + else { + strcpy(buffer, data[i].from); + to = func(buffer); + } + if (strcmp(to, data[i].to)) { + error("FAIL: %s(%s) => '%s' != '%s'\n", + funcname, data[i].from, to, data[i].to); + failed = 1; + } + } + return failed; +} + +static struct test_data basename_data[] = { + /* --- POSIX type paths --- */ + { NULL, "." }, + { "", "." }, + { ".", "." }, + { "..", ".." }, + { "/", "/" }, +#if defined(__CYGWIN__) && !defined(NO_LIBGEN_H) + { "//", "//" }, + { "///", "//" }, + { "////", "//" }, +#else + { "//", "/" }, + { "///", "/" }, + { "////", "/" }, +#endif + { "usr", "usr" }, + { "/usr", "usr" }, + { "/usr/", "usr" }, + { "/usr//", "usr" }, + { "/usr/lib", "lib" }, + { "usr/lib", "lib" }, + { "usr/lib///", "lib" }, + +#if defined(__MINGW32__) || defined(_MSC_VER) + + /* --- win32 type paths --- */ + { "\\usr", "usr" }, + { "\\usr\\", "usr" }, + { "\\usr\\\\", "usr" }, + { "\\usr\\lib", "lib" }, + { "usr\\lib", "lib" }, + { "usr\\lib\\\\\\", "lib" }, + { "C:/usr", "usr" }, + { "C:/usr", "usr" }, + { "C:/usr/", "usr" }, + { "C:/usr//", "usr" }, + { "C:/usr/lib", "lib" }, + { "C:usr/lib", "lib" }, + { "C:usr/lib///", "lib" }, + { "C:", "." }, + { "C:a", "a" }, + { "C:/", "/" }, + { "C:///", "/" }, +#if defined(NO_LIBGEN_H) + { "\\", "\\" }, + { "\\\\", "\\" }, + { "\\\\\\", "\\" }, +#else + + /* win32 platform variations: */ +#if defined(__MINGW32__) + { "\\", "/" }, + { "\\\\", "/" }, + { "\\\\\\", "/" }, +#endif + +#if defined(_MSC_VER) + { "\\", "\\" }, + { "\\\\", "\\" }, + { "\\\\\\", "\\" }, +#endif + +#endif +#endif + { NULL, NULL } +}; + +static struct test_data dirname_data[] = { + /* --- POSIX type paths --- */ + { NULL, "." }, + { "", "." }, + { ".", "." }, + { "..", "." }, + { "/", "/" }, + { "//", "//" }, +#if defined(__CYGWIN__) && !defined(NO_LIBGEN_H) + { "///", "//" }, + { "////", "//" }, +#else + { "///", "/" }, + { "////", "/" }, +#endif + { "usr", "." }, + { "/usr", "/" }, + { "/usr/", "/" }, + { "/usr//", "/" }, + { "/usr/lib", "/usr" }, + { "usr/lib", "usr" }, + { "usr/lib///", "usr" }, + +#if defined(__MINGW32__) || defined(_MSC_VER) + + /* --- win32 type paths --- */ + { "\\", "\\" }, + { "\\\\", "\\\\" }, + { "\\usr", "\\" }, + { "\\usr\\", "\\" }, + { "\\usr\\\\", "\\" }, + { "\\usr\\lib", "\\usr" }, + { "usr\\lib", "usr" }, + { "usr\\lib\\\\\\", "usr" }, + { "C:a", "C:." }, + { "C:/", "C:/" }, + { "C:///", "C:/" }, + { "C:/usr", "C:/" }, + { "C:/usr/", "C:/" }, + { "C:/usr//", "C:/" }, + { "C:/usr/lib", "C:/usr" }, + { "C:usr/lib", "C:usr" }, + { "C:usr/lib///", "C:usr" }, + { "\\\\\\", "\\" }, + { "\\\\\\\\", "\\" }, +#if defined(NO_LIBGEN_H) + { "C:", "C:." }, +#else + + /* win32 platform variations: */ +#if defined(__MINGW32__) + /* the following is clearly wrong ... */ + { "C:", "." }, +#endif + +#if defined(_MSC_VER) + { "C:", "C:." }, +#endif + +#endif +#endif + { NULL, NULL } +}; + int main(int argc, char **argv) { if (argc == 3 && !strcmp(argv[1], "normalize_path_copy")) { @@ -133,6 +293,12 @@ int main(int argc, char **argv) return 0; } + if (argc == 2 && !strcmp(argv[1], "basename")) + return test_function(basename_data, basename, argv[1]); + + if (argc == 2 && !strcmp(argv[1], "dirname")) + return test_function(dirname_data, dirname, argv[1]); + fprintf(stderr, "%s: unknown function name: %s\n", argv[0], argv[1] ? argv[1] : "(there was none)"); return 1; From 371471cea38cb4b5834c9e5715e1fe633829004f Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 14 Jan 2016 07:48:27 +0100 Subject: [PATCH 5/6] t0060: loosen overly strict expectations The dirname() tests file were developed and tested on only the five platforms available to the developer at the time, namely: Linux (both 32 and 64bit), Windows XP 32-bit (MSVC), MinGW 32-bit and Cygwin 32-bit. http://pubs.opengroup.org/onlinepubs/9699919799/functions/basename.html (i.e. the POSIX spec) says, in part: If the string pointed to by path consists entirely of the '/' character, basename() shall return a pointer to the string "/". If the string pointed to by path is exactly "//", it is implementation-defined whether "/" or "//" is returned. The thinking behind testing precise, OS-dependent output values was to document that different setups produce different values. However, as the test failures on MacOSX illustrated eloquently: hardcoding pretty much each and every setup's expectations is pretty fragile. This is not limited to the "//" vs "/" case, of course, other inputs are also allowed to produce multiple outputs by the POSIX specs. So let's just test for all allowed values and be done with it. This still documents that Git cannot rely on one particular output value in those cases, so the intention of the original tests is still met. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- test-path-utils.c | 78 +++++++++++++---------------------------------- 1 file changed, 21 insertions(+), 57 deletions(-) diff --git a/test-path-utils.c b/test-path-utils.c index 4ab68aca56..c3adcd87b8 100644 --- a/test-path-utils.c +++ b/test-path-utils.c @@ -42,6 +42,7 @@ static void normalize_argv_string(const char **var, const char *input) struct test_data { const char *from; /* input: transform from this ... */ const char *to; /* output: ... to this. */ + const char *alternative; /* output: ... or this. */ }; static int test_function(struct test_data *data, char *(*func)(char *input), @@ -58,11 +59,18 @@ static int test_function(struct test_data *data, char *(*func)(char *input), strcpy(buffer, data[i].from); to = func(buffer); } - if (strcmp(to, data[i].to)) { + if (!strcmp(to, data[i].to)) + continue; + if (!data[i].alternative) error("FAIL: %s(%s) => '%s' != '%s'\n", funcname, data[i].from, to, data[i].to); - failed = 1; - } + else if (!strcmp(to, data[i].alternative)) + continue; + else + error("FAIL: %s(%s) => '%s' != '%s', '%s'\n", + funcname, data[i].from, to, data[i].to, + data[i].alternative); + failed = 1; } return failed; } @@ -74,15 +82,9 @@ static struct test_data basename_data[] = { { ".", "." }, { "..", ".." }, { "/", "/" }, -#if defined(__CYGWIN__) && !defined(NO_LIBGEN_H) - { "//", "//" }, - { "///", "//" }, - { "////", "//" }, -#else - { "//", "/" }, - { "///", "/" }, - { "////", "/" }, -#endif + { "//", "/", "//" }, + { "///", "/", "//" }, + { "////", "/", "//" }, { "usr", "usr" }, { "/usr", "usr" }, { "/usr/", "usr" }, @@ -92,7 +94,6 @@ static struct test_data basename_data[] = { { "usr/lib///", "lib" }, #if defined(__MINGW32__) || defined(_MSC_VER) - /* --- win32 type paths --- */ { "\\usr", "usr" }, { "\\usr\\", "usr" }, @@ -111,26 +112,9 @@ static struct test_data basename_data[] = { { "C:a", "a" }, { "C:/", "/" }, { "C:///", "/" }, -#if defined(NO_LIBGEN_H) - { "\\", "\\" }, - { "\\\\", "\\" }, - { "\\\\\\", "\\" }, -#else - - /* win32 platform variations: */ -#if defined(__MINGW32__) - { "\\", "/" }, - { "\\\\", "/" }, - { "\\\\\\", "/" }, -#endif - -#if defined(_MSC_VER) - { "\\", "\\" }, - { "\\\\", "\\" }, - { "\\\\\\", "\\" }, -#endif - -#endif + { "\\", "\\", "/" }, + { "\\\\", "\\", "/" }, + { "\\\\\\", "\\", "/" }, #endif { NULL, NULL } }; @@ -142,14 +126,9 @@ static struct test_data dirname_data[] = { { ".", "." }, { "..", "." }, { "/", "/" }, - { "//", "//" }, -#if defined(__CYGWIN__) && !defined(NO_LIBGEN_H) - { "///", "//" }, - { "////", "//" }, -#else - { "///", "/" }, - { "////", "/" }, -#endif + { "//", "/", "//" }, + { "///", "/", "//" }, + { "////", "/", "//" }, { "usr", "." }, { "/usr", "/" }, { "/usr/", "/" }, @@ -159,7 +138,6 @@ static struct test_data dirname_data[] = { { "usr/lib///", "usr" }, #if defined(__MINGW32__) || defined(_MSC_VER) - /* --- win32 type paths --- */ { "\\", "\\" }, { "\\\\", "\\\\" }, @@ -180,21 +158,7 @@ static struct test_data dirname_data[] = { { "C:usr/lib///", "C:usr" }, { "\\\\\\", "\\" }, { "\\\\\\\\", "\\" }, -#if defined(NO_LIBGEN_H) - { "C:", "C:." }, -#else - - /* win32 platform variations: */ -#if defined(__MINGW32__) - /* the following is clearly wrong ... */ - { "C:", "." }, -#endif - -#if defined(_MSC_VER) - { "C:", "C:." }, -#endif - -#endif + { "C:", "C:.", "." }, #endif { NULL, NULL } }; From e7d5ce816579723150c341116737fb51d8e33eb3 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Mon, 25 Jan 2016 22:47:56 +0100 Subject: [PATCH 6/6] mingw: avoid linking to the C library's isalpha() The implementation of mingw_skip_dos_drive_prefix() calls isalpha() via has_dos_drive_prefix(). Since the definition occurs long before isalpha() is defined in git-compat-util.h, my build environment reports: CC alloc.o In file included from git-compat-util.h:186, from cache.h:4, from alloc.c:12: compat/mingw.h: In function 'mingw_skip_dos_drive_prefix': compat/mingw.h:365: warning: implicit declaration of function 'isalpha' Dscho does not see a similar warning in his build and suspects that ctype.h is included somehow behind the scenes. This implies that his build links to the C library's isalpha() and does not use git's isalpha(). To fix both the warning in my build and the inconsistency in Dscho's build, move the function definition to mingw.c. Then it picks up git's isalpha() because git-compat-util.h is included at the top of the file. Signed-off-by: Johannes Sixt Signed-off-by: Junio C Hamano --- compat/mingw.c | 7 +++++++ compat/mingw.h | 7 +------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 10a51c058b..0cebb61aab 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1915,6 +1915,13 @@ pid_t waitpid(pid_t pid, int *status, int options) return -1; } +int mingw_skip_dos_drive_prefix(char **path) +{ + int ret = has_dos_drive_prefix(*path); + *path += ret; + return ret; +} + int mingw_offset_1st_component(const char *path) { char *pos = (char *)path; diff --git a/compat/mingw.h b/compat/mingw.h index 9b5db4ecc1..2099b79bcf 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -360,12 +360,7 @@ HANDLE winansi_get_osfhandle(int fd); #define has_dos_drive_prefix(path) \ (isalpha(*(path)) && (path)[1] == ':' ? 2 : 0) -static inline int mingw_skip_dos_drive_prefix(char **path) -{ - int ret = has_dos_drive_prefix(*path); - *path += ret; - return ret; -} +int mingw_skip_dos_drive_prefix(char **path); #define skip_dos_drive_prefix mingw_skip_dos_drive_prefix #define is_dir_sep(c) ((c) == '/' || (c) == '\\') static inline char *mingw_find_last_dir_sep(const char *path)