git-commit-vandalism/t/helper/test-path-utils.c
Johannes Schindelin d2c84dad1c mingw: refuse to access paths with trailing spaces or periods
When creating a directory on Windows whose path ends in a space or a
period (or chains thereof), the Win32 API "helpfully" trims those. For
example, `mkdir("abc ");` will return success, but actually create a
directory called `abc` instead.

This stems back to the DOS days, when all file names had exactly 8
characters plus exactly 3 characters for the file extension, and the
only way to have shorter names was by padding with spaces.

Sadly, this "helpful" behavior is a bit inconsistent: after a successful
`mkdir("abc ");`, a `mkdir("abc /def")` will actually _fail_ (because
the directory `abc ` does not actually exist).

Even if it would work, we now have a serious problem because a Git
repository could contain directories `abc` and `abc `, and on Windows,
they would be "merged" unintentionally.

As these paths are illegal on Windows, anyway, let's disallow any
accesses to such paths on that Operating System.

For practical reasons, this behavior is still guarded by the
config setting `core.protectNTFS`: it is possible (and at least two
regression tests make use of it) to create commits without involving the
worktree. In such a scenario, it is of course possible -- even on
Windows -- to create such file names.

Among other consequences, this patch disallows submodules' paths to end
in spaces on Windows (which would formerly have confused Git enough to
try to write into incorrect paths, anyway).

While this patch does not fix a vulnerability on its own, it prevents an
attack vector that was exploited in demonstrations of a number of
recently-fixed security bugs.

The regression test added to `t/t7417-submodule-path-url.sh` reflects
that attack vector.

Note that we have to adjust the test case "prevent git~1 squatting on
Windows" in `t/t7415-submodule-names.sh` because of a very subtle issue.
It tries to clone two submodules whose names differ only in a trailing
period character, and as a consequence their git directories differ in
the same way. Previously, when Git tried to clone the second submodule,
it thought that the git directory already existed (because on Windows,
when you create a directory with the name `b.` it actually creates `b`),
but with this patch, the first submodule's clone will fail because of
the illegal name of the git directory. Therefore, when cloning the
second submodule, Git will take a different code path: a fresh clone
(without an existing git directory). Both code paths fail to clone the
second submodule, both because the the corresponding worktree directory
exists and is not empty, but the error messages are worded differently.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
2019-12-05 15:37:06 +01:00

410 lines
11 KiB
C

#include "cache.h"
#include "string-list.h"
#include "utf8.h"
/*
* A "string_list_each_func_t" function that normalizes an entry from
* GIT_CEILING_DIRECTORIES. If the path is unusable for some reason,
* die with an explanation.
*/
static int normalize_ceiling_entry(struct string_list_item *item, void *unused)
{
char *ceil = item->string;
if (!*ceil)
die("Empty path is not supported");
if (!is_absolute_path(ceil))
die("Path \"%s\" is not absolute", ceil);
if (normalize_path_copy(ceil, ceil) < 0)
die("Path \"%s\" could not be normalized", ceil);
return 1;
}
static void normalize_argv_string(const char **var, const char *input)
{
if (!strcmp(input, "<null>"))
*var = NULL;
else if (!strcmp(input, "<empty>"))
*var = "";
else
*var = input;
if (*var && (**var == '<' || **var == '('))
die("Bad value: %s\n", input);
}
struct test_data {
const char *from; /* input: transform from this ... */
const char *to; /* output: ... to this. */
const char *alternative; /* output: ... or this. */
};
/*
* Compatibility wrappers for OpenBSD, whose basename(3) and dirname(3)
* have const parameters.
*/
static char *posix_basename(char *path)
{
return basename(path);
}
static char *posix_dirname(char *path)
{
return dirname(path);
}
static int test_function(struct test_data *data, char *(*func)(char *input),
const char *funcname)
{
int failed = 0, i;
char buffer[1024];
char *to;
for (i = 0; data[i].to; i++) {
if (!data[i].from)
to = func(NULL);
else {
xsnprintf(buffer, sizeof(buffer), "%s", data[i].from);
to = func(buffer);
}
if (!strcmp(to, data[i].to))
continue;
if (!data[i].alternative)
error("FAIL: %s(%s) => '%s' != '%s'\n",
funcname, data[i].from, to, data[i].to);
else if (!strcmp(to, data[i].alternative))
continue;
else
error("FAIL: %s(%s) => '%s' != '%s', '%s'\n",
funcname, data[i].from, to, data[i].to,
data[i].alternative);
failed = 1;
}
return failed;
}
static struct test_data basename_data[] = {
/* --- POSIX type paths --- */
{ NULL, "." },
{ "", "." },
{ ".", "." },
{ "..", ".." },
{ "/", "/" },
{ "//", "/", "//" },
{ "///", "/", "//" },
{ "////", "/", "//" },
{ "usr", "usr" },
{ "/usr", "usr" },
{ "/usr/", "usr" },
{ "/usr//", "usr" },
{ "/usr/lib", "lib" },
{ "usr/lib", "lib" },
{ "usr/lib///", "lib" },
#if defined(__MINGW32__) || defined(_MSC_VER)
/* --- win32 type paths --- */
{ "\\usr", "usr" },
{ "\\usr\\", "usr" },
{ "\\usr\\\\", "usr" },
{ "\\usr\\lib", "lib" },
{ "usr\\lib", "lib" },
{ "usr\\lib\\\\\\", "lib" },
{ "C:/usr", "usr" },
{ "C:/usr", "usr" },
{ "C:/usr/", "usr" },
{ "C:/usr//", "usr" },
{ "C:/usr/lib", "lib" },
{ "C:usr/lib", "lib" },
{ "C:usr/lib///", "lib" },
{ "C:", "." },
{ "C:a", "a" },
{ "C:/", "/" },
{ "C:///", "/" },
{ "\\", "\\", "/" },
{ "\\\\", "\\", "/" },
{ "\\\\\\", "\\", "/" },
#endif
{ NULL, NULL }
};
static struct test_data dirname_data[] = {
/* --- POSIX type paths --- */
{ NULL, "." },
{ "", "." },
{ ".", "." },
{ "..", "." },
{ "/", "/" },
{ "//", "/", "//" },
{ "///", "/", "//" },
{ "////", "/", "//" },
{ "usr", "." },
{ "/usr", "/" },
{ "/usr/", "/" },
{ "/usr//", "/" },
{ "/usr/lib", "/usr" },
{ "usr/lib", "usr" },
{ "usr/lib///", "usr" },
#if defined(__MINGW32__) || defined(_MSC_VER)
/* --- win32 type paths --- */
{ "\\", "\\" },
{ "\\\\", "\\\\" },
{ "\\usr", "\\" },
{ "\\usr\\", "\\" },
{ "\\usr\\\\", "\\" },
{ "\\usr\\lib", "\\usr" },
{ "usr\\lib", "usr" },
{ "usr\\lib\\\\\\", "usr" },
{ "C:a", "C:." },
{ "C:/", "C:/" },
{ "C:///", "C:/" },
{ "C:/usr", "C:/" },
{ "C:/usr/", "C:/" },
{ "C:/usr//", "C:/" },
{ "C:/usr/lib", "C:/usr" },
{ "C:usr/lib", "C:usr" },
{ "C:usr/lib///", "C:usr" },
{ "\\\\\\", "\\" },
{ "\\\\\\\\", "\\" },
{ "C:", "C:.", "." },
#endif
{ NULL, NULL }
};
static int is_dotgitmodules(const char *path)
{
return is_hfs_dotgitmodules(path) || is_ntfs_dotgitmodules(path);
}
/*
* A very simple, reproducible pseudo-random generator. Copied from
* `test-genrandom.c`.
*/
static uint64_t my_random_value = 1234;
static uint64_t my_random(void)
{
my_random_value = my_random_value * 1103515245 + 12345;
return my_random_value;
}
/*
* A fast approximation of the square root, without requiring math.h.
*
* It uses Newton's method to approximate the solution of 0 = x^2 - value.
*/
static double my_sqrt(double value)
{
const double epsilon = 1e-6;
double x = value;
if (value == 0)
return 0;
for (;;) {
double delta = (value / x - x) / 2;
if (delta < epsilon && delta > -epsilon)
return x + delta;
x += delta;
}
}
static int protect_ntfs_hfs_benchmark(int argc, const char **argv)
{
size_t i, j, nr, min_len = 3, max_len = 20;
char **names;
int repetitions = 15, file_mode = 0100644;
uint64_t begin, end;
double m[3][2], v[3][2];
uint64_t cumul;
double cumul2;
if (argc > 1 && !strcmp(argv[1], "--with-symlink-mode")) {
file_mode = 0120000;
argc--;
argv++;
}
nr = argc > 1 ? strtoul(argv[1], NULL, 0) : 1000000;
ALLOC_ARRAY(names, nr);
if (argc > 2) {
min_len = strtoul(argv[2], NULL, 0);
if (argc > 3)
max_len = strtoul(argv[3], NULL, 0);
if (min_len > max_len)
die("min_len > max_len");
}
for (i = 0; i < nr; i++) {
size_t len = min_len + (my_random() % (max_len + 1 - min_len));
names[i] = xmallocz(len);
while (len > 0)
names[i][--len] = (char)(' ' + (my_random() % ('\x7f' - ' ')));
}
for (protect_ntfs = 0; protect_ntfs < 2; protect_ntfs++)
for (protect_hfs = 0; protect_hfs < 2; protect_hfs++) {
cumul = 0;
cumul2 = 0;
for (i = 0; i < repetitions; i++) {
begin = getnanotime();
for (j = 0; j < nr; j++)
verify_path(names[j], file_mode);
end = getnanotime();
printf("protect_ntfs = %d, protect_hfs = %d: %lfms\n", protect_ntfs, protect_hfs, (end-begin) / (double)1e6);
cumul += end - begin;
cumul2 += (end - begin) * (end - begin);
}
m[protect_ntfs][protect_hfs] = cumul / (double)repetitions;
v[protect_ntfs][protect_hfs] = my_sqrt(cumul2 / (double)repetitions - m[protect_ntfs][protect_hfs] * m[protect_ntfs][protect_hfs]);
printf("mean: %lfms, stddev: %lfms\n", m[protect_ntfs][protect_hfs] / (double)1e6, v[protect_ntfs][protect_hfs] / (double)1e6);
}
for (protect_ntfs = 0; protect_ntfs < 2; protect_ntfs++)
for (protect_hfs = 0; protect_hfs < 2; protect_hfs++)
printf("ntfs=%d/hfs=%d: %lf%% slower\n", protect_ntfs, protect_hfs, (m[protect_ntfs][protect_hfs] - m[0][0]) * 100 / m[0][0]);
return 0;
}
int cmd_main(int argc, const char **argv)
{
if (argc == 3 && !strcmp(argv[1], "normalize_path_copy")) {
char *buf = xmallocz(strlen(argv[2]));
int rv = normalize_path_copy(buf, argv[2]);
if (rv)
buf = "++failed++";
puts(buf);
return 0;
}
if (argc >= 2 && !strcmp(argv[1], "real_path")) {
while (argc > 2) {
puts(real_path(argv[2]));
argc--;
argv++;
}
return 0;
}
if (argc >= 2 && !strcmp(argv[1], "absolute_path")) {
while (argc > 2) {
puts(absolute_path(argv[2]));
argc--;
argv++;
}
return 0;
}
if (argc == 4 && !strcmp(argv[1], "longest_ancestor_length")) {
int len;
struct string_list ceiling_dirs = STRING_LIST_INIT_DUP;
char *path = xstrdup(argv[2]);
/*
* We have to normalize the arguments because under
* Windows, bash mangles arguments that look like
* absolute POSIX paths or colon-separate lists of
* absolute POSIX paths into DOS paths (e.g.,
* "/foo:/foo/bar" might be converted to
* "D:\Src\msysgit\foo;D:\Src\msysgit\foo\bar"),
* whereas longest_ancestor_length() requires paths
* that use forward slashes.
*/
if (normalize_path_copy(path, path))
die("Path \"%s\" could not be normalized", argv[2]);
string_list_split(&ceiling_dirs, argv[3], PATH_SEP, -1);
filter_string_list(&ceiling_dirs, 0,
normalize_ceiling_entry, NULL);
len = longest_ancestor_length(path, &ceiling_dirs);
string_list_clear(&ceiling_dirs, 0);
free(path);
printf("%d\n", len);
return 0;
}
if (argc >= 4 && !strcmp(argv[1], "prefix_path")) {
const char *prefix = argv[2];
int prefix_len = strlen(prefix);
int nongit_ok;
setup_git_directory_gently(&nongit_ok);
while (argc > 3) {
puts(prefix_path(prefix, prefix_len, argv[3]));
argc--;
argv++;
}
return 0;
}
if (argc == 4 && !strcmp(argv[1], "strip_path_suffix")) {
char *prefix = strip_path_suffix(argv[2], argv[3]);
printf("%s\n", prefix ? prefix : "(null)");
return 0;
}
if (argc == 3 && !strcmp(argv[1], "print_path")) {
puts(argv[2]);
return 0;
}
if (argc == 4 && !strcmp(argv[1], "relative_path")) {
struct strbuf sb = STRBUF_INIT;
const char *in, *prefix, *rel;
normalize_argv_string(&in, argv[2]);
normalize_argv_string(&prefix, argv[3]);
rel = relative_path(in, prefix, &sb);
if (!rel)
puts("(null)");
else
puts(strlen(rel) > 0 ? rel : "(empty)");
strbuf_release(&sb);
return 0;
}
if (argc == 2 && !strcmp(argv[1], "basename"))
return test_function(basename_data, posix_basename, argv[1]);
if (argc == 2 && !strcmp(argv[1], "dirname"))
return test_function(dirname_data, posix_dirname, argv[1]);
if (argc > 2 && !strcmp(argv[1], "is_dotgitmodules")) {
int res = 0, expect = 1, i;
for (i = 2; i < argc; i++)
if (!strcmp("--not", argv[i]))
expect = !expect;
else if (expect != is_dotgitmodules(argv[i]))
res = error("'%s' is %s.gitmodules", argv[i],
expect ? "not " : "");
else
fprintf(stderr, "ok: '%s' is %s.gitmodules\n",
argv[i], expect ? "" : "not ");
return !!res;
}
if (argc > 1 && !strcmp(argv[1], "protect_ntfs_hfs"))
return !!protect_ntfs_hfs_benchmark(argc - 1, argv + 1);
if (argc > 1 && !strcmp(argv[1], "is_valid_path")) {
int res = 0, expect = 1, i;
for (i = 2; i < argc; i++)
if (!strcmp("--not", argv[i]))
expect = 0;
else if (expect != is_valid_path(argv[i]))
res = error("'%s' is%s a valid path",
argv[i], expect ? " not" : "");
else
fprintf(stderr,
"'%s' is%s a valid path\n",
argv[i], expect ? "" : " not");
return !!res;
}
fprintf(stderr, "%s: unknown function name: %s\n", argv[0],
argv[1] ? argv[1] : "(there was none)");
return 1;
}