2005-07-09 01:20:59 +02:00
|
|
|
/*
|
2013-03-30 10:53:32 +01:00
|
|
|
* Utilities for paths and pathnames
|
2005-07-09 01:20:59 +02:00
|
|
|
*/
|
|
|
|
#include "cache.h"
|
2017-06-22 20:43:33 +02:00
|
|
|
#include "repository.h"
|
2009-11-17 18:24:25 +01:00
|
|
|
#include "strbuf.h"
|
2012-10-28 17:16:23 +01:00
|
|
|
#include "string-list.h"
|
2014-11-30 09:24:54 +01:00
|
|
|
#include "dir.h"
|
2016-04-22 15:01:29 +02:00
|
|
|
#include "worktree.h"
|
2016-09-01 01:27:22 +02:00
|
|
|
#include "submodule-config.h"
|
2017-06-22 20:43:35 +02:00
|
|
|
#include "path.h"
|
2017-08-19 00:20:26 +02:00
|
|
|
#include "packfile.h"
|
2018-03-23 18:20:55 +01:00
|
|
|
#include "object-store.h"
|
2019-10-28 13:57:18 +01:00
|
|
|
#include "lockfile.h"
|
2005-07-09 01:20:59 +02:00
|
|
|
|
cygwin: Remove the Win32 l/stat() implementation
Commit adbc0b6b ("cygwin: Use native Win32 API for stat", 30-09-2008)
added a Win32 specific implementation of the stat functions. In order
to handle absolute paths, cygwin mount points and symbolic links, this
implementation may fall back on the standard cygwin l/stat() functions.
Also, the choice of cygwin or Win32 functions is made lazily (by the
first call(s) to l/stat) based on the state of some config variables.
Unfortunately, this "schizophrenic stat" implementation has been the
source of many problems ever since. For example, see commits 7faee6b8,
79748439, 452993c2, 085479e7, b8a97333, 924aaf3e, 05bab3ea and 0117c2f0.
In order to avoid further problems, such as the issue raised by the new
reference handling API, remove the Win32 l/stat() implementation.
Signed-off-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-22 21:42:47 +02:00
|
|
|
static int get_st_mode_bits(const char *path, int *mode)
|
2013-03-23 13:40:29 +01:00
|
|
|
{
|
|
|
|
struct stat st;
|
|
|
|
if (lstat(path, &st) < 0)
|
|
|
|
return -1;
|
|
|
|
*mode = st.st_mode;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-07-09 01:20:59 +02:00
|
|
|
static char bad_path[] = "/bad-path/";
|
|
|
|
|
2014-11-30 09:24:26 +01:00
|
|
|
static struct strbuf *get_pathname(void)
|
2006-09-11 21:03:15 +02:00
|
|
|
{
|
2014-11-30 09:24:26 +01:00
|
|
|
static struct strbuf pathname_array[4] = {
|
|
|
|
STRBUF_INIT, STRBUF_INIT, STRBUF_INIT, STRBUF_INIT
|
|
|
|
};
|
2006-09-11 21:03:15 +02:00
|
|
|
static int index;
|
2016-10-23 19:57:30 +02:00
|
|
|
struct strbuf *sb = &pathname_array[index];
|
|
|
|
index = (index + 1) % ARRAY_SIZE(pathname_array);
|
2014-11-30 09:24:26 +01:00
|
|
|
strbuf_reset(sb);
|
|
|
|
return sb;
|
2006-09-11 21:03:15 +02:00
|
|
|
}
|
|
|
|
|
2017-10-04 01:30:40 +02:00
|
|
|
static const char *cleanup_path(const char *path)
|
2005-07-09 01:20:59 +02:00
|
|
|
{
|
|
|
|
/* Clean it up */
|
2017-10-04 01:30:40 +02:00
|
|
|
if (skip_prefix(path, "./", &path)) {
|
2005-07-09 01:20:59 +02:00
|
|
|
while (*path == '/')
|
|
|
|
path++;
|
|
|
|
}
|
|
|
|
return path;
|
|
|
|
}
|
|
|
|
|
2014-11-30 09:24:26 +01:00
|
|
|
static void strbuf_cleanup_path(struct strbuf *sb)
|
|
|
|
{
|
2017-10-04 01:30:40 +02:00
|
|
|
const char *path = cleanup_path(sb->buf);
|
2014-11-30 09:24:26 +01:00
|
|
|
if (path > sb->buf)
|
|
|
|
strbuf_remove(sb, 0, path - sb->buf);
|
|
|
|
}
|
|
|
|
|
2008-10-26 22:59:13 +01:00
|
|
|
char *mksnpath(char *buf, size_t n, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
unsigned len;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
len = vsnprintf(buf, n, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
if (len >= n) {
|
2008-11-10 22:07:52 +01:00
|
|
|
strlcpy(buf, bad_path, n);
|
2008-10-26 22:59:13 +01:00
|
|
|
return buf;
|
|
|
|
}
|
2017-10-04 01:30:40 +02:00
|
|
|
return (char *)cleanup_path(buf);
|
2008-10-26 22:59:13 +01:00
|
|
|
}
|
|
|
|
|
2014-11-30 09:24:31 +01:00
|
|
|
static int dir_prefix(const char *buf, const char *dir)
|
2008-10-27 10:22:21 +01:00
|
|
|
{
|
2014-11-30 09:24:31 +01:00
|
|
|
int len = strlen(dir);
|
|
|
|
return !strncmp(buf, dir, len) &&
|
|
|
|
(is_dir_sep(buf[len]) || buf[len] == '\0');
|
|
|
|
}
|
2008-10-27 10:22:21 +01:00
|
|
|
|
2014-11-30 09:24:31 +01:00
|
|
|
/* $buf =~ m|$dir/+$file| but without regex */
|
|
|
|
static int is_dir_file(const char *buf, const char *dir, const char *file)
|
|
|
|
{
|
|
|
|
int len = strlen(dir);
|
|
|
|
if (strncmp(buf, dir, len) || !is_dir_sep(buf[len]))
|
|
|
|
return 0;
|
|
|
|
while (is_dir_sep(buf[len]))
|
|
|
|
len++;
|
|
|
|
return !strcmp(buf + len, file);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void replace_dir(struct strbuf *buf, int len, const char *newdir)
|
|
|
|
{
|
|
|
|
int newlen = strlen(newdir);
|
|
|
|
int need_sep = (buf->buf[len] && !is_dir_sep(buf->buf[len])) &&
|
|
|
|
!is_dir_sep(newdir[newlen - 1]);
|
|
|
|
if (need_sep)
|
|
|
|
len--; /* keep one char, to be replaced with '/' */
|
|
|
|
strbuf_splice(buf, 0, len, newdir, newlen);
|
|
|
|
if (need_sep)
|
|
|
|
buf->buf[newlen] = '/';
|
|
|
|
}
|
|
|
|
|
2015-09-01 04:13:09 +02:00
|
|
|
struct common_dir {
|
|
|
|
/* Not considered garbage for report_linked_checkout_garbage */
|
|
|
|
unsigned ignore_garbage:1;
|
|
|
|
unsigned is_dir:1;
|
2019-10-21 18:00:42 +02:00
|
|
|
/* Belongs to the common dir, though it may contain paths that don't */
|
|
|
|
unsigned is_common:1;
|
|
|
|
const char *path;
|
2015-09-01 04:13:09 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct common_dir common_list[] = {
|
2019-10-21 18:00:42 +02:00
|
|
|
{ 0, 1, 1, "branches" },
|
|
|
|
{ 0, 1, 1, "common" },
|
|
|
|
{ 0, 1, 1, "hooks" },
|
|
|
|
{ 0, 1, 1, "info" },
|
|
|
|
{ 0, 0, 0, "info/sparse-checkout" },
|
|
|
|
{ 1, 1, 1, "logs" },
|
|
|
|
{ 1, 0, 0, "logs/HEAD" },
|
|
|
|
{ 0, 1, 0, "logs/refs/bisect" },
|
|
|
|
{ 0, 1, 0, "logs/refs/rewritten" },
|
|
|
|
{ 0, 1, 0, "logs/refs/worktree" },
|
|
|
|
{ 0, 1, 1, "lost-found" },
|
|
|
|
{ 0, 1, 1, "objects" },
|
|
|
|
{ 0, 1, 1, "refs" },
|
|
|
|
{ 0, 1, 0, "refs/bisect" },
|
|
|
|
{ 0, 1, 0, "refs/rewritten" },
|
|
|
|
{ 0, 1, 0, "refs/worktree" },
|
|
|
|
{ 0, 1, 1, "remotes" },
|
|
|
|
{ 0, 1, 1, "worktrees" },
|
|
|
|
{ 0, 1, 1, "rr-cache" },
|
|
|
|
{ 0, 1, 1, "svn" },
|
|
|
|
{ 0, 0, 1, "config" },
|
|
|
|
{ 1, 0, 1, "gc.pid" },
|
|
|
|
{ 0, 0, 1, "packed-refs" },
|
|
|
|
{ 0, 0, 1, "shallow" },
|
2015-09-01 04:13:09 +02:00
|
|
|
{ 0, 0, 0, NULL }
|
$GIT_COMMON_DIR: a new environment variable
This variable is intended to support multiple working directories
attached to a repository. Such a repository may have a main working
directory, created by either "git init" or "git clone" and one or more
linked working directories. These working directories and the main
repository share the same repository directory.
In linked working directories, $GIT_COMMON_DIR must be defined to point
to the real repository directory and $GIT_DIR points to an unused
subdirectory inside $GIT_COMMON_DIR. File locations inside the
repository are reorganized from the linked worktree view point:
- worktree-specific such as HEAD, logs/HEAD, index, other top-level
refs and unrecognized files are from $GIT_DIR.
- the rest like objects, refs, info, hooks, packed-refs, shallow...
are from $GIT_COMMON_DIR (except info/sparse-checkout, but that's
a separate patch)
Scripts are supposed to retrieve paths in $GIT_DIR with "git rev-parse
--git-path", which will take care of "$GIT_DIR vs $GIT_COMMON_DIR"
business.
The redirection is done by git_path(), git_pathdup() and
strbuf_git_path(). The selected list of paths goes to $GIT_COMMON_DIR,
not the other way around in case a developer adds a new
worktree-specific file and it's accidentally promoted to be shared
across repositories (this includes unknown files added by third party
commands)
The list of known files that belong to $GIT_DIR are:
ADD_EDIT.patch BISECT_ANCESTORS_OK BISECT_EXPECTED_REV BISECT_LOG
BISECT_NAMES CHERRY_PICK_HEAD COMMIT_MSG FETCH_HEAD HEAD MERGE_HEAD
MERGE_MODE MERGE_RR NOTES_EDITMSG NOTES_MERGE_WORKTREE ORIG_HEAD
REVERT_HEAD SQUASH_MSG TAG_EDITMSG fast_import_crash_* logs/HEAD
next-index-* rebase-apply rebase-merge rsync-refs-* sequencer/*
shallow_*
Path mapping is NOT done for git_path_submodule(). Multi-checkouts are
not supported as submodules.
Helped-by: Jens Lehmann <Jens.Lehmann@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-11-30 09:24:36 +01:00
|
|
|
};
|
|
|
|
|
2015-09-01 04:13:10 +02:00
|
|
|
/*
|
|
|
|
* A compressed trie. A trie node consists of zero or more characters that
|
|
|
|
* are common to all elements with this prefix, optionally followed by some
|
|
|
|
* children. If value is not NULL, the trie node is a terminal node.
|
|
|
|
*
|
|
|
|
* For example, consider the following set of strings:
|
|
|
|
* abc
|
|
|
|
* def
|
|
|
|
* definite
|
|
|
|
* definition
|
|
|
|
*
|
2016-05-06 14:36:46 +02:00
|
|
|
* The trie would look like:
|
2015-09-01 04:13:10 +02:00
|
|
|
* root: len = 0, children a and d non-NULL, value = NULL.
|
|
|
|
* a: len = 2, contents = bc, value = (data for "abc")
|
|
|
|
* d: len = 2, contents = ef, children i non-NULL, value = (data for "def")
|
|
|
|
* i: len = 3, contents = nit, children e and i non-NULL, value = NULL
|
|
|
|
* e: len = 0, children all NULL, value = (data for "definite")
|
|
|
|
* i: len = 2, contents = on, children all NULL,
|
|
|
|
* value = (data for "definition")
|
|
|
|
*/
|
|
|
|
struct trie {
|
|
|
|
struct trie *children[256];
|
|
|
|
int len;
|
|
|
|
char *contents;
|
|
|
|
void *value;
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct trie *make_trie_node(const char *key, void *value)
|
$GIT_COMMON_DIR: a new environment variable
This variable is intended to support multiple working directories
attached to a repository. Such a repository may have a main working
directory, created by either "git init" or "git clone" and one or more
linked working directories. These working directories and the main
repository share the same repository directory.
In linked working directories, $GIT_COMMON_DIR must be defined to point
to the real repository directory and $GIT_DIR points to an unused
subdirectory inside $GIT_COMMON_DIR. File locations inside the
repository are reorganized from the linked worktree view point:
- worktree-specific such as HEAD, logs/HEAD, index, other top-level
refs and unrecognized files are from $GIT_DIR.
- the rest like objects, refs, info, hooks, packed-refs, shallow...
are from $GIT_COMMON_DIR (except info/sparse-checkout, but that's
a separate patch)
Scripts are supposed to retrieve paths in $GIT_DIR with "git rev-parse
--git-path", which will take care of "$GIT_DIR vs $GIT_COMMON_DIR"
business.
The redirection is done by git_path(), git_pathdup() and
strbuf_git_path(). The selected list of paths goes to $GIT_COMMON_DIR,
not the other way around in case a developer adds a new
worktree-specific file and it's accidentally promoted to be shared
across repositories (this includes unknown files added by third party
commands)
The list of known files that belong to $GIT_DIR are:
ADD_EDIT.patch BISECT_ANCESTORS_OK BISECT_EXPECTED_REV BISECT_LOG
BISECT_NAMES CHERRY_PICK_HEAD COMMIT_MSG FETCH_HEAD HEAD MERGE_HEAD
MERGE_MODE MERGE_RR NOTES_EDITMSG NOTES_MERGE_WORKTREE ORIG_HEAD
REVERT_HEAD SQUASH_MSG TAG_EDITMSG fast_import_crash_* logs/HEAD
next-index-* rebase-apply rebase-merge rsync-refs-* sequencer/*
shallow_*
Path mapping is NOT done for git_path_submodule(). Multi-checkouts are
not supported as submodules.
Helped-by: Jens Lehmann <Jens.Lehmann@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-11-30 09:24:36 +01:00
|
|
|
{
|
2015-09-01 04:13:10 +02:00
|
|
|
struct trie *new_node = xcalloc(1, sizeof(*new_node));
|
|
|
|
new_node->len = strlen(key);
|
|
|
|
if (new_node->len) {
|
|
|
|
new_node->contents = xmalloc(new_node->len);
|
|
|
|
memcpy(new_node->contents, key, new_node->len);
|
|
|
|
}
|
|
|
|
new_node->value = value;
|
|
|
|
return new_node;
|
|
|
|
}
|
$GIT_COMMON_DIR: a new environment variable
This variable is intended to support multiple working directories
attached to a repository. Such a repository may have a main working
directory, created by either "git init" or "git clone" and one or more
linked working directories. These working directories and the main
repository share the same repository directory.
In linked working directories, $GIT_COMMON_DIR must be defined to point
to the real repository directory and $GIT_DIR points to an unused
subdirectory inside $GIT_COMMON_DIR. File locations inside the
repository are reorganized from the linked worktree view point:
- worktree-specific such as HEAD, logs/HEAD, index, other top-level
refs and unrecognized files are from $GIT_DIR.
- the rest like objects, refs, info, hooks, packed-refs, shallow...
are from $GIT_COMMON_DIR (except info/sparse-checkout, but that's
a separate patch)
Scripts are supposed to retrieve paths in $GIT_DIR with "git rev-parse
--git-path", which will take care of "$GIT_DIR vs $GIT_COMMON_DIR"
business.
The redirection is done by git_path(), git_pathdup() and
strbuf_git_path(). The selected list of paths goes to $GIT_COMMON_DIR,
not the other way around in case a developer adds a new
worktree-specific file and it's accidentally promoted to be shared
across repositories (this includes unknown files added by third party
commands)
The list of known files that belong to $GIT_DIR are:
ADD_EDIT.patch BISECT_ANCESTORS_OK BISECT_EXPECTED_REV BISECT_LOG
BISECT_NAMES CHERRY_PICK_HEAD COMMIT_MSG FETCH_HEAD HEAD MERGE_HEAD
MERGE_MODE MERGE_RR NOTES_EDITMSG NOTES_MERGE_WORKTREE ORIG_HEAD
REVERT_HEAD SQUASH_MSG TAG_EDITMSG fast_import_crash_* logs/HEAD
next-index-* rebase-apply rebase-merge rsync-refs-* sequencer/*
shallow_*
Path mapping is NOT done for git_path_submodule(). Multi-checkouts are
not supported as submodules.
Helped-by: Jens Lehmann <Jens.Lehmann@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-11-30 09:24:36 +01:00
|
|
|
|
2015-09-01 04:13:10 +02:00
|
|
|
/*
|
|
|
|
* Add a key/value pair to a trie. The key is assumed to be \0-terminated.
|
|
|
|
* If there was an existing value for this key, return it.
|
|
|
|
*/
|
|
|
|
static void *add_to_trie(struct trie *root, const char *key, void *value)
|
|
|
|
{
|
|
|
|
struct trie *child;
|
|
|
|
void *old;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!*key) {
|
|
|
|
/* we have reached the end of the key */
|
|
|
|
old = root->value;
|
|
|
|
root->value = value;
|
|
|
|
return old;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < root->len; i++) {
|
|
|
|
if (root->contents[i] == key[i])
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Split this node: child will contain this node's
|
|
|
|
* existing children.
|
|
|
|
*/
|
2017-10-24 17:15:05 +02:00
|
|
|
child = xmalloc(sizeof(*child));
|
2015-09-01 04:13:10 +02:00
|
|
|
memcpy(child->children, root->children, sizeof(root->children));
|
|
|
|
|
|
|
|
child->len = root->len - i - 1;
|
|
|
|
if (child->len) {
|
|
|
|
child->contents = xstrndup(root->contents + i + 1,
|
|
|
|
child->len);
|
$GIT_COMMON_DIR: a new environment variable
This variable is intended to support multiple working directories
attached to a repository. Such a repository may have a main working
directory, created by either "git init" or "git clone" and one or more
linked working directories. These working directories and the main
repository share the same repository directory.
In linked working directories, $GIT_COMMON_DIR must be defined to point
to the real repository directory and $GIT_DIR points to an unused
subdirectory inside $GIT_COMMON_DIR. File locations inside the
repository are reorganized from the linked worktree view point:
- worktree-specific such as HEAD, logs/HEAD, index, other top-level
refs and unrecognized files are from $GIT_DIR.
- the rest like objects, refs, info, hooks, packed-refs, shallow...
are from $GIT_COMMON_DIR (except info/sparse-checkout, but that's
a separate patch)
Scripts are supposed to retrieve paths in $GIT_DIR with "git rev-parse
--git-path", which will take care of "$GIT_DIR vs $GIT_COMMON_DIR"
business.
The redirection is done by git_path(), git_pathdup() and
strbuf_git_path(). The selected list of paths goes to $GIT_COMMON_DIR,
not the other way around in case a developer adds a new
worktree-specific file and it's accidentally promoted to be shared
across repositories (this includes unknown files added by third party
commands)
The list of known files that belong to $GIT_DIR are:
ADD_EDIT.patch BISECT_ANCESTORS_OK BISECT_EXPECTED_REV BISECT_LOG
BISECT_NAMES CHERRY_PICK_HEAD COMMIT_MSG FETCH_HEAD HEAD MERGE_HEAD
MERGE_MODE MERGE_RR NOTES_EDITMSG NOTES_MERGE_WORKTREE ORIG_HEAD
REVERT_HEAD SQUASH_MSG TAG_EDITMSG fast_import_crash_* logs/HEAD
next-index-* rebase-apply rebase-merge rsync-refs-* sequencer/*
shallow_*
Path mapping is NOT done for git_path_submodule(). Multi-checkouts are
not supported as submodules.
Helped-by: Jens Lehmann <Jens.Lehmann@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-11-30 09:24:36 +01:00
|
|
|
}
|
2015-09-01 04:13:10 +02:00
|
|
|
child->value = root->value;
|
|
|
|
root->value = NULL;
|
|
|
|
root->len = i;
|
|
|
|
|
|
|
|
memset(root->children, 0, sizeof(root->children));
|
|
|
|
root->children[(unsigned char)root->contents[i]] = child;
|
|
|
|
|
|
|
|
/* This is the newly-added child. */
|
|
|
|
root->children[(unsigned char)key[i]] =
|
|
|
|
make_trie_node(key + i + 1, value);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We have matched the entire compressed section */
|
|
|
|
if (key[i]) {
|
|
|
|
child = root->children[(unsigned char)key[root->len]];
|
|
|
|
if (child) {
|
|
|
|
return add_to_trie(child, key + root->len + 1, value);
|
|
|
|
} else {
|
|
|
|
child = make_trie_node(key + root->len + 1, value);
|
|
|
|
root->children[(unsigned char)key[root->len]] = child;
|
|
|
|
return NULL;
|
$GIT_COMMON_DIR: a new environment variable
This variable is intended to support multiple working directories
attached to a repository. Such a repository may have a main working
directory, created by either "git init" or "git clone" and one or more
linked working directories. These working directories and the main
repository share the same repository directory.
In linked working directories, $GIT_COMMON_DIR must be defined to point
to the real repository directory and $GIT_DIR points to an unused
subdirectory inside $GIT_COMMON_DIR. File locations inside the
repository are reorganized from the linked worktree view point:
- worktree-specific such as HEAD, logs/HEAD, index, other top-level
refs and unrecognized files are from $GIT_DIR.
- the rest like objects, refs, info, hooks, packed-refs, shallow...
are from $GIT_COMMON_DIR (except info/sparse-checkout, but that's
a separate patch)
Scripts are supposed to retrieve paths in $GIT_DIR with "git rev-parse
--git-path", which will take care of "$GIT_DIR vs $GIT_COMMON_DIR"
business.
The redirection is done by git_path(), git_pathdup() and
strbuf_git_path(). The selected list of paths goes to $GIT_COMMON_DIR,
not the other way around in case a developer adds a new
worktree-specific file and it's accidentally promoted to be shared
across repositories (this includes unknown files added by third party
commands)
The list of known files that belong to $GIT_DIR are:
ADD_EDIT.patch BISECT_ANCESTORS_OK BISECT_EXPECTED_REV BISECT_LOG
BISECT_NAMES CHERRY_PICK_HEAD COMMIT_MSG FETCH_HEAD HEAD MERGE_HEAD
MERGE_MODE MERGE_RR NOTES_EDITMSG NOTES_MERGE_WORKTREE ORIG_HEAD
REVERT_HEAD SQUASH_MSG TAG_EDITMSG fast_import_crash_* logs/HEAD
next-index-* rebase-apply rebase-merge rsync-refs-* sequencer/*
shallow_*
Path mapping is NOT done for git_path_submodule(). Multi-checkouts are
not supported as submodules.
Helped-by: Jens Lehmann <Jens.Lehmann@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-11-30 09:24:36 +01:00
|
|
|
}
|
|
|
|
}
|
2015-09-01 04:13:10 +02:00
|
|
|
|
|
|
|
old = root->value;
|
|
|
|
root->value = value;
|
|
|
|
return old;
|
|
|
|
}
|
|
|
|
|
2019-10-21 18:00:40 +02:00
|
|
|
typedef int (*match_fn)(const char *unmatched, void *value, void *baton);
|
2015-09-01 04:13:10 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Search a trie for some key. Find the longest /-or-\0-terminated
|
2019-10-21 18:00:40 +02:00
|
|
|
* prefix of the key for which the trie contains a value. If there is
|
|
|
|
* no such prefix, return -1. Otherwise call fn with the unmatched
|
|
|
|
* portion of the key and the found value. If fn returns 0 or
|
|
|
|
* positive, then return its return value. If fn returns negative,
|
|
|
|
* then call fn with the next-longest /-terminated prefix of the key
|
|
|
|
* (i.e. a parent directory) for which the trie contains a value, and
|
|
|
|
* handle its return value the same way. If there is no shorter
|
|
|
|
* /-terminated prefix with a value left, then return the negative
|
|
|
|
* return value of the most recent fn invocation.
|
2015-09-01 04:13:10 +02:00
|
|
|
*
|
|
|
|
* The key is partially normalized: consecutive slashes are skipped.
|
|
|
|
*
|
2019-10-21 18:00:40 +02:00
|
|
|
* For example, consider the trie containing only [logs,
|
|
|
|
* logs/refs/bisect], both with values, but not logs/refs.
|
2015-09-01 04:13:10 +02:00
|
|
|
*
|
2019-10-21 18:00:40 +02:00
|
|
|
* | key | unmatched | prefix to node | return value |
|
|
|
|
* |--------------------|----------------|------------------|--------------|
|
|
|
|
* | a | not called | n/a | -1 |
|
|
|
|
* | logstore | not called | n/a | -1 |
|
|
|
|
* | logs | \0 | logs | as per fn |
|
|
|
|
* | logs/ | / | logs | as per fn |
|
|
|
|
* | logs/refs | /refs | logs | as per fn |
|
|
|
|
* | logs/refs/ | /refs/ | logs | as per fn |
|
|
|
|
* | logs/refs/b | /refs/b | logs | as per fn |
|
|
|
|
* | logs/refs/bisected | /refs/bisected | logs | as per fn |
|
|
|
|
* | logs/refs/bisect | \0 | logs/refs/bisect | as per fn |
|
|
|
|
* | logs/refs/bisect/ | / | logs/refs/bisect | as per fn |
|
|
|
|
* | logs/refs/bisect/a | /a | logs/refs/bisect | as per fn |
|
|
|
|
* | (If fn in the previous line returns -1, then fn is called once more:) |
|
|
|
|
* | logs/refs/bisect/a | /refs/bisect/a | logs | as per fn |
|
|
|
|
* |--------------------|----------------|------------------|--------------|
|
2015-09-01 04:13:10 +02:00
|
|
|
*/
|
|
|
|
static int trie_find(struct trie *root, const char *key, match_fn fn,
|
|
|
|
void *baton)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int result;
|
|
|
|
struct trie *child;
|
|
|
|
|
|
|
|
if (!*key) {
|
|
|
|
/* we have reached the end of the key */
|
|
|
|
if (root->value && !root->len)
|
|
|
|
return fn(key, root->value, baton);
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < root->len; i++) {
|
|
|
|
/* Partial path normalization: skip consecutive slashes. */
|
|
|
|
if (key[i] == '/' && key[i+1] == '/') {
|
|
|
|
key++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (root->contents[i] != key[i])
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Matched the entire compressed section */
|
|
|
|
key += i;
|
path.c: don't call the match function without value in trie_find()
'logs/refs' is not a working tree-specific path, but since commit
b9317d55a3 (Make sure refs/rewritten/ is per-worktree, 2019-03-07)
'git rev-parse --git-path' has been returning a bogus path if a
trailing '/' is present:
$ git -C WT/ rev-parse --git-path logs/refs --git-path logs/refs/
/home/szeder/src/git/.git/logs/refs
/home/szeder/src/git/.git/worktrees/WT/logs/refs/
We use a trie data structure to efficiently decide whether a path
belongs to the common dir or is working tree-specific. As it happens
b9317d55a3 triggered a bug that is as old as the trie implementation
itself, added in 4e09cf2acf (path: optimize common dir checking,
2015-08-31).
- According to the comment describing trie_find(), it should only
call the given match function 'fn' for a "/-or-\0-terminated
prefix of the key for which the trie contains a value". This is
not true: there are three places where trie_find() calls the match
function, but one of them is missing the check for value's
existence.
- b9317d55a3 added two new keys to the trie: 'logs/refs/rewritten'
and 'logs/refs/worktree', next to the already existing
'logs/refs/bisect'. This resulted in a trie node with the path
'logs/refs/', which didn't exist before, and which doesn't have a
value attached. A query for 'logs/refs/' finds this node and then
hits that one callsite of the match function which doesn't check
for the value's existence, and thus invokes the match function
with NULL as value.
- When the match function check_common() is invoked with a NULL
value, it returns 0, which indicates that the queried path doesn't
belong to the common directory, ultimately resulting the bogus
path shown above.
Add the missing condition to trie_find() so it will never invoke the
match function with a non-existing value. check_common() will then no
longer have to check that it got a non-NULL value, so remove that
condition.
I believe that there are no other paths that could cause similar bogus
output. AFAICT the only other key resulting in the match function
being called with a NULL value is 'co' (because of the keys 'common'
and 'config'). However, as they are not in a directory that belongs
to the common directory the resulting working tree-specific path is
expected.
Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-10-21 18:00:43 +02:00
|
|
|
if (!*key) {
|
2015-09-01 04:13:10 +02:00
|
|
|
/* End of key */
|
path.c: don't call the match function without value in trie_find()
'logs/refs' is not a working tree-specific path, but since commit
b9317d55a3 (Make sure refs/rewritten/ is per-worktree, 2019-03-07)
'git rev-parse --git-path' has been returning a bogus path if a
trailing '/' is present:
$ git -C WT/ rev-parse --git-path logs/refs --git-path logs/refs/
/home/szeder/src/git/.git/logs/refs
/home/szeder/src/git/.git/worktrees/WT/logs/refs/
We use a trie data structure to efficiently decide whether a path
belongs to the common dir or is working tree-specific. As it happens
b9317d55a3 triggered a bug that is as old as the trie implementation
itself, added in 4e09cf2acf (path: optimize common dir checking,
2015-08-31).
- According to the comment describing trie_find(), it should only
call the given match function 'fn' for a "/-or-\0-terminated
prefix of the key for which the trie contains a value". This is
not true: there are three places where trie_find() calls the match
function, but one of them is missing the check for value's
existence.
- b9317d55a3 added two new keys to the trie: 'logs/refs/rewritten'
and 'logs/refs/worktree', next to the already existing
'logs/refs/bisect'. This resulted in a trie node with the path
'logs/refs/', which didn't exist before, and which doesn't have a
value attached. A query for 'logs/refs/' finds this node and then
hits that one callsite of the match function which doesn't check
for the value's existence, and thus invokes the match function
with NULL as value.
- When the match function check_common() is invoked with a NULL
value, it returns 0, which indicates that the queried path doesn't
belong to the common directory, ultimately resulting the bogus
path shown above.
Add the missing condition to trie_find() so it will never invoke the
match function with a non-existing value. check_common() will then no
longer have to check that it got a non-NULL value, so remove that
condition.
I believe that there are no other paths that could cause similar bogus
output. AFAICT the only other key resulting in the match function
being called with a NULL value is 'co' (because of the keys 'common'
and 'config'). However, as they are not in a directory that belongs
to the common directory the resulting working tree-specific path is
expected.
Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-10-21 18:00:43 +02:00
|
|
|
if (root->value)
|
|
|
|
return fn(key, root->value, baton);
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
}
|
2015-09-01 04:13:10 +02:00
|
|
|
|
|
|
|
/* Partial path normalization: skip consecutive slashes */
|
|
|
|
while (key[0] == '/' && key[1] == '/')
|
|
|
|
key++;
|
|
|
|
|
|
|
|
child = root->children[(unsigned char)*key];
|
|
|
|
if (child)
|
|
|
|
result = trie_find(child, key + 1, fn, baton);
|
|
|
|
else
|
|
|
|
result = -1;
|
|
|
|
|
|
|
|
if (result >= 0 || (*key != '/' && *key != 0))
|
|
|
|
return result;
|
|
|
|
if (root->value)
|
|
|
|
return fn(key, root->value, baton);
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct trie common_trie;
|
|
|
|
static int common_trie_done_setup;
|
|
|
|
|
|
|
|
static void init_common_trie(void)
|
|
|
|
{
|
|
|
|
struct common_dir *p;
|
|
|
|
|
|
|
|
if (common_trie_done_setup)
|
|
|
|
return;
|
|
|
|
|
2019-10-21 18:00:42 +02:00
|
|
|
for (p = common_list; p->path; p++)
|
|
|
|
add_to_trie(&common_trie, p->path, p);
|
2015-09-01 04:13:10 +02:00
|
|
|
|
|
|
|
common_trie_done_setup = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Helper function for update_common_dir: returns 1 if the dir
|
|
|
|
* prefix is common.
|
|
|
|
*/
|
|
|
|
static int check_common(const char *unmatched, void *value, void *baton)
|
|
|
|
{
|
|
|
|
struct common_dir *dir = value;
|
|
|
|
|
|
|
|
if (dir->is_dir && (unmatched[0] == 0 || unmatched[0] == '/'))
|
2019-10-21 18:00:42 +02:00
|
|
|
return dir->is_common;
|
2015-09-01 04:13:10 +02:00
|
|
|
|
|
|
|
if (!dir->is_dir && unmatched[0] == 0)
|
2019-10-21 18:00:42 +02:00
|
|
|
return dir->is_common;
|
2015-09-01 04:13:10 +02:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-10-16 00:43:31 +02:00
|
|
|
static void update_common_dir(struct strbuf *buf, int git_dir_len,
|
|
|
|
const char *common_dir)
|
2015-09-01 04:13:10 +02:00
|
|
|
{
|
|
|
|
char *base = buf->buf + git_dir_len;
|
2019-10-28 13:57:18 +01:00
|
|
|
int has_lock_suffix = strbuf_strip_suffix(buf, LOCK_SUFFIX);
|
|
|
|
|
2015-09-01 04:13:10 +02:00
|
|
|
init_common_trie();
|
|
|
|
if (trie_find(&common_trie, base, check_common, NULL) > 0)
|
2015-10-16 00:43:31 +02:00
|
|
|
replace_dir(buf, git_dir_len, common_dir);
|
2019-10-28 13:57:18 +01:00
|
|
|
|
|
|
|
if (has_lock_suffix)
|
|
|
|
strbuf_addstr(buf, LOCK_SUFFIX);
|
$GIT_COMMON_DIR: a new environment variable
This variable is intended to support multiple working directories
attached to a repository. Such a repository may have a main working
directory, created by either "git init" or "git clone" and one or more
linked working directories. These working directories and the main
repository share the same repository directory.
In linked working directories, $GIT_COMMON_DIR must be defined to point
to the real repository directory and $GIT_DIR points to an unused
subdirectory inside $GIT_COMMON_DIR. File locations inside the
repository are reorganized from the linked worktree view point:
- worktree-specific such as HEAD, logs/HEAD, index, other top-level
refs and unrecognized files are from $GIT_DIR.
- the rest like objects, refs, info, hooks, packed-refs, shallow...
are from $GIT_COMMON_DIR (except info/sparse-checkout, but that's
a separate patch)
Scripts are supposed to retrieve paths in $GIT_DIR with "git rev-parse
--git-path", which will take care of "$GIT_DIR vs $GIT_COMMON_DIR"
business.
The redirection is done by git_path(), git_pathdup() and
strbuf_git_path(). The selected list of paths goes to $GIT_COMMON_DIR,
not the other way around in case a developer adds a new
worktree-specific file and it's accidentally promoted to be shared
across repositories (this includes unknown files added by third party
commands)
The list of known files that belong to $GIT_DIR are:
ADD_EDIT.patch BISECT_ANCESTORS_OK BISECT_EXPECTED_REV BISECT_LOG
BISECT_NAMES CHERRY_PICK_HEAD COMMIT_MSG FETCH_HEAD HEAD MERGE_HEAD
MERGE_MODE MERGE_RR NOTES_EDITMSG NOTES_MERGE_WORKTREE ORIG_HEAD
REVERT_HEAD SQUASH_MSG TAG_EDITMSG fast_import_crash_* logs/HEAD
next-index-* rebase-apply rebase-merge rsync-refs-* sequencer/*
shallow_*
Path mapping is NOT done for git_path_submodule(). Multi-checkouts are
not supported as submodules.
Helped-by: Jens Lehmann <Jens.Lehmann@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-11-30 09:24:36 +01:00
|
|
|
}
|
|
|
|
|
2014-11-30 09:24:54 +01:00
|
|
|
void report_linked_checkout_garbage(void)
|
|
|
|
{
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
2015-09-01 04:13:09 +02:00
|
|
|
const struct common_dir *p;
|
2014-11-30 09:24:54 +01:00
|
|
|
int len;
|
|
|
|
|
2017-06-22 20:43:33 +02:00
|
|
|
if (!the_repository->different_commondir)
|
2014-11-30 09:24:54 +01:00
|
|
|
return;
|
|
|
|
strbuf_addf(&sb, "%s/", get_git_dir());
|
|
|
|
len = sb.len;
|
2019-10-21 18:00:42 +02:00
|
|
|
for (p = common_list; p->path; p++) {
|
|
|
|
const char *path = p->path;
|
2015-09-01 04:13:09 +02:00
|
|
|
if (p->ignore_garbage)
|
2014-11-30 09:24:54 +01:00
|
|
|
continue;
|
|
|
|
strbuf_setlen(&sb, len);
|
|
|
|
strbuf_addstr(&sb, path);
|
|
|
|
if (file_exists(sb.buf))
|
2015-08-13 20:02:52 +02:00
|
|
|
report_garbage(PACKDIR_FILE_GARBAGE, sb.buf);
|
2014-11-30 09:24:54 +01:00
|
|
|
}
|
|
|
|
strbuf_release(&sb);
|
2008-10-27 10:22:21 +01:00
|
|
|
}
|
|
|
|
|
2017-06-22 20:43:38 +02:00
|
|
|
static void adjust_git_path(const struct repository *repo,
|
|
|
|
struct strbuf *buf, int git_dir_len)
|
2014-11-30 09:24:31 +01:00
|
|
|
{
|
|
|
|
const char *base = buf->buf + git_dir_len;
|
2017-06-22 20:43:33 +02:00
|
|
|
if (is_dir_file(base, "info", "grafts"))
|
2014-11-30 09:24:31 +01:00
|
|
|
strbuf_splice(buf, 0, buf->len,
|
2017-06-22 20:43:38 +02:00
|
|
|
repo->graft_file, strlen(repo->graft_file));
|
2017-06-22 20:43:33 +02:00
|
|
|
else if (!strcmp(base, "index"))
|
2014-11-30 09:24:31 +01:00
|
|
|
strbuf_splice(buf, 0, buf->len,
|
2017-06-22 20:43:38 +02:00
|
|
|
repo->index_file, strlen(repo->index_file));
|
2017-06-22 20:43:33 +02:00
|
|
|
else if (dir_prefix(base, "objects"))
|
sha1-file: use an object_directory for the main object dir
Our handling of alternate object directories is needlessly different
from the main object directory. As a result, many places in the code
basically look like this:
do_something(r->objects->objdir);
for (odb = r->objects->alt_odb_list; odb; odb = odb->next)
do_something(odb->path);
That gets annoying when do_something() is non-trivial, and we've
resorted to gross hacks like creating fake alternates (see
find_short_object_filename()).
Instead, let's give each raw_object_store a unified list of
object_directory structs. The first will be the main store, and
everything after is an alternate. Very few callers even care about the
distinction, and can just loop over the whole list (and those who care
can just treat the first element differently).
A few observations:
- we don't need r->objects->objectdir anymore, and can just
mechanically convert that to r->objects->odb->path
- object_directory's path field needs to become a real pointer rather
than a FLEX_ARRAY, in order to fill it with expand_base_dir()
- we'll call prepare_alt_odb() earlier in many functions (i.e.,
outside of the loop). This may result in us calling it even when our
function would be satisfied looking only at the main odb.
But this doesn't matter in practice. It's not a very expensive
operation in the first place, and in the majority of cases it will
be a noop. We call it already (and cache its results) in
prepare_packed_git(), and we'll generally check packs before loose
objects. So essentially every program is going to call it
immediately once per program.
Arguably we should just prepare_alt_odb() immediately upon setting
up the repository's object directory, which would save us sprinkling
calls throughout the code base (and forgetting to do so has been a
source of subtle bugs in the past). But I've stopped short of that
here, since there are already a lot of other moving parts in this
patch.
- Most call sites just get shorter. The check_and_freshen() functions
are an exception, because they have entry points to handle local and
nonlocal directories separately.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-11-12 15:50:39 +01:00
|
|
|
replace_dir(buf, git_dir_len + 7, repo->objects->odb->path);
|
2016-08-16 15:14:27 +02:00
|
|
|
else if (git_hooks_path && dir_prefix(base, "hooks"))
|
|
|
|
replace_dir(buf, git_dir_len + 5, git_hooks_path);
|
2017-06-22 20:43:38 +02:00
|
|
|
else if (repo->different_commondir)
|
|
|
|
update_common_dir(buf, git_dir_len, repo->commondir);
|
2014-11-30 09:24:31 +01:00
|
|
|
}
|
|
|
|
|
2017-06-22 20:43:38 +02:00
|
|
|
static void strbuf_worktree_gitdir(struct strbuf *buf,
|
|
|
|
const struct repository *repo,
|
|
|
|
const struct worktree *wt)
|
|
|
|
{
|
|
|
|
if (!wt)
|
|
|
|
strbuf_addstr(buf, repo->gitdir);
|
|
|
|
else if (!wt->id)
|
|
|
|
strbuf_addstr(buf, repo->commondir);
|
|
|
|
else
|
|
|
|
strbuf_git_common_path(buf, repo, "worktrees/%s", wt->id);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void do_git_path(const struct repository *repo,
|
|
|
|
const struct worktree *wt, struct strbuf *buf,
|
2016-04-22 15:01:29 +02:00
|
|
|
const char *fmt, va_list args)
|
2008-10-27 10:22:21 +01:00
|
|
|
{
|
2014-11-30 09:24:31 +01:00
|
|
|
int gitdir_len;
|
2017-06-22 20:43:38 +02:00
|
|
|
strbuf_worktree_gitdir(buf, repo, wt);
|
2014-11-30 09:24:26 +01:00
|
|
|
if (buf->len && !is_dir_sep(buf->buf[buf->len - 1]))
|
|
|
|
strbuf_addch(buf, '/');
|
2014-11-30 09:24:31 +01:00
|
|
|
gitdir_len = buf->len;
|
2014-11-30 09:24:26 +01:00
|
|
|
strbuf_vaddf(buf, fmt, args);
|
path: worktree_git_path() should not use file relocation
git_path is a convenience function that usually produces a string
$GIT_DIR/<path>. Since v2.5.0-rc0~143^2~35 (git_path(): be aware of
file relocation in $GIT_DIR, 2014-11-30), as a side benefit callers
get support for path relocation variables like $GIT_OBJECT_DIRECTORY:
- git_path("index") is $GIT_INDEX_FILE when set
- git_path("info/grafts") is $GIT_GRAFTS_FILE when set
- git_path("objects/<foo>") is $GIT_OBJECT_DIRECTORY/<foo> when set
- git_path("hooks/<foo>") is <foo> under core.hookspath when set
- git_path("refs/<foo>") etc (see path.c::common_list) is relative
to $GIT_COMMON_DIR instead of $GIT_DIR
worktree_git_path, by comparison, is designed to resolve files in a
specific worktree's git dir. Unfortunately, it shares code with
git_path and performs the same relocation. The result is that paths
that are meant to be relative to the specified worktree's git dir end
up replaced by paths from environment variables within the current git
dir.
Luckily, no current callers pass such arguments. The relocation was
noticed when testing the result of merging two patches under review,
one of which introduces a caller:
* The first patch made git prune check the index file in each
worktree's git dir (using worktree_git_path(wt, "index")) for
objects not to prune. This would trigger the unwanted relocation
when GIT_INDEX_FILE is set, causing objects reachable from the
index to be pruned.
* The second patch simplified the relocation logic for index,
info/grafts, objects, and hooks to happen unconditionally instead of
based on whether environment or configuration variables are set.
This caused the relocation to trigger even when GIT_INDEX_FILE is
not set.
[jn: rewrote commit message; skipping all relocation instead of just
GIT_INDEX_FILE]
Signed-off-by: Brandon Williams <bmwill@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-22 20:43:39 +02:00
|
|
|
if (!wt)
|
|
|
|
adjust_git_path(repo, buf, gitdir_len);
|
2014-11-30 09:24:26 +01:00
|
|
|
strbuf_cleanup_path(buf);
|
2008-10-27 10:22:21 +01:00
|
|
|
}
|
|
|
|
|
2017-06-22 20:43:40 +02:00
|
|
|
char *repo_git_path(const struct repository *repo,
|
|
|
|
const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct strbuf path = STRBUF_INIT;
|
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
|
|
|
do_git_path(repo, NULL, &path, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
return strbuf_detach(&path, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
void strbuf_repo_git_path(struct strbuf *sb,
|
|
|
|
const struct repository *repo,
|
|
|
|
const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
|
|
|
do_git_path(repo, NULL, sb, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
add git_path_buf helper function
If you have a function that uses git_path a lot, but would
prefer to avoid the static buffers, it's useful to keep a
single scratch buffer locally and reuse it for each call.
You used to be able to do this with git_snpath:
char buf[PATH_MAX];
foo(git_snpath(buf, sizeof(buf), "foo"));
bar(git_snpath(buf, sizeof(buf), "bar"));
but since 1a83c24, git_snpath has been replaced with
strbuf_git_path. This is good, because it removes the
arbitrary PATH_MAX limit. But using strbuf_git_path is more
awkward for two reasons:
1. It adds to the buffer, rather than replacing it. This
is consistent with other strbuf functions, but makes
reuse of a single buffer more tedious.
2. It doesn't return the buffer, so you can't format
as part of a function's arguments.
The new git_path_buf solves both of these, so you can use it
like:
struct strbuf buf = STRBUF_INIT;
foo(git_path_buf(&buf, "foo"));
bar(git_path_buf(&buf, "bar"));
strbuf_release(&buf);
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-24 23:05:40 +02:00
|
|
|
char *git_path_buf(struct strbuf *buf, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
strbuf_reset(buf);
|
|
|
|
va_start(args, fmt);
|
2017-06-22 20:43:38 +02:00
|
|
|
do_git_path(the_repository, NULL, buf, fmt, args);
|
add git_path_buf helper function
If you have a function that uses git_path a lot, but would
prefer to avoid the static buffers, it's useful to keep a
single scratch buffer locally and reuse it for each call.
You used to be able to do this with git_snpath:
char buf[PATH_MAX];
foo(git_snpath(buf, sizeof(buf), "foo"));
bar(git_snpath(buf, sizeof(buf), "bar"));
but since 1a83c24, git_snpath has been replaced with
strbuf_git_path. This is good, because it removes the
arbitrary PATH_MAX limit. But using strbuf_git_path is more
awkward for two reasons:
1. It adds to the buffer, rather than replacing it. This
is consistent with other strbuf functions, but makes
reuse of a single buffer more tedious.
2. It doesn't return the buffer, so you can't format
as part of a function's arguments.
The new git_path_buf solves both of these, so you can use it
like:
struct strbuf buf = STRBUF_INIT;
foo(git_path_buf(&buf, "foo"));
bar(git_path_buf(&buf, "bar"));
strbuf_release(&buf);
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-24 23:05:40 +02:00
|
|
|
va_end(args);
|
|
|
|
return buf->buf;
|
|
|
|
}
|
|
|
|
|
2014-11-30 09:24:28 +01:00
|
|
|
void strbuf_git_path(struct strbuf *sb, const char *fmt, ...)
|
2008-10-27 11:17:51 +01:00
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
2017-06-22 20:43:38 +02:00
|
|
|
do_git_path(the_repository, NULL, sb, fmt, args);
|
2008-10-27 11:17:51 +01:00
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
2014-11-30 09:24:30 +01:00
|
|
|
const char *git_path(const char *fmt, ...)
|
2008-10-27 11:17:51 +01:00
|
|
|
{
|
2014-11-30 09:24:30 +01:00
|
|
|
struct strbuf *pathname = get_pathname();
|
2008-10-27 11:17:51 +01:00
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
2017-06-22 20:43:38 +02:00
|
|
|
do_git_path(the_repository, NULL, pathname, fmt, args);
|
2008-10-27 11:17:51 +01:00
|
|
|
va_end(args);
|
2014-11-30 09:24:30 +01:00
|
|
|
return pathname->buf;
|
2008-10-27 11:17:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
char *git_pathdup(const char *fmt, ...)
|
2012-06-22 11:03:23 +02:00
|
|
|
{
|
2014-11-30 09:24:26 +01:00
|
|
|
struct strbuf path = STRBUF_INIT;
|
2012-06-22 11:03:23 +02:00
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
2017-06-22 20:43:38 +02:00
|
|
|
do_git_path(the_repository, NULL, &path, fmt, args);
|
2012-06-22 11:03:23 +02:00
|
|
|
va_end(args);
|
2014-11-30 09:24:26 +01:00
|
|
|
return strbuf_detach(&path, NULL);
|
2012-06-22 11:03:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
char *mkpathdup(const char *fmt, ...)
|
2005-07-09 01:20:59 +02:00
|
|
|
{
|
2012-06-22 11:03:23 +02:00
|
|
|
struct strbuf sb = STRBUF_INIT;
|
2005-07-09 01:20:59 +02:00
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
2012-06-22 11:03:23 +02:00
|
|
|
strbuf_vaddf(&sb, fmt, args);
|
2005-07-09 01:20:59 +02:00
|
|
|
va_end(args);
|
2014-11-30 09:24:26 +01:00
|
|
|
strbuf_cleanup_path(&sb);
|
|
|
|
return strbuf_detach(&sb, NULL);
|
2005-07-09 01:20:59 +02:00
|
|
|
}
|
|
|
|
|
2014-11-30 09:24:27 +01:00
|
|
|
const char *mkpath(const char *fmt, ...)
|
2005-07-09 01:20:59 +02:00
|
|
|
{
|
|
|
|
va_list args;
|
2014-11-30 09:24:26 +01:00
|
|
|
struct strbuf *pathname = get_pathname();
|
2005-07-09 01:20:59 +02:00
|
|
|
va_start(args, fmt);
|
2014-11-30 09:24:26 +01:00
|
|
|
strbuf_vaddf(pathname, fmt, args);
|
2005-07-09 01:20:59 +02:00
|
|
|
va_end(args);
|
2014-11-30 09:24:26 +01:00
|
|
|
return cleanup_path(pathname->buf);
|
2005-07-09 01:20:59 +02:00
|
|
|
}
|
2005-08-04 22:43:03 +02:00
|
|
|
|
2016-04-22 15:01:29 +02:00
|
|
|
const char *worktree_git_path(const struct worktree *wt, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct strbuf *pathname = get_pathname();
|
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
2017-06-22 20:43:38 +02:00
|
|
|
do_git_path(the_repository, wt, pathname, fmt, args);
|
2016-04-22 15:01:29 +02:00
|
|
|
va_end(args);
|
|
|
|
return pathname->buf;
|
|
|
|
}
|
|
|
|
|
2017-06-22 20:43:41 +02:00
|
|
|
static void do_worktree_path(const struct repository *repo,
|
|
|
|
struct strbuf *buf,
|
|
|
|
const char *fmt, va_list args)
|
|
|
|
{
|
|
|
|
strbuf_addstr(buf, repo->worktree);
|
|
|
|
if(buf->len && !is_dir_sep(buf->buf[buf->len - 1]))
|
|
|
|
strbuf_addch(buf, '/');
|
|
|
|
|
|
|
|
strbuf_vaddf(buf, fmt, args);
|
|
|
|
strbuf_cleanup_path(buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
char *repo_worktree_path(const struct repository *repo, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct strbuf path = STRBUF_INIT;
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
if (!repo->worktree)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
do_worktree_path(repo, &path, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
|
|
|
|
return strbuf_detach(&path, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
void strbuf_repo_worktree_path(struct strbuf *sb,
|
|
|
|
const struct repository *repo,
|
|
|
|
const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
if (!repo->worktree)
|
|
|
|
return;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
do_worktree_path(repo, sb, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
2016-09-01 01:27:22 +02:00
|
|
|
/* Returns 0 on success, negative on failure. */
|
|
|
|
static int do_submodule_path(struct strbuf *buf, const char *path,
|
|
|
|
const char *fmt, va_list args)
|
2010-07-07 15:39:11 +02:00
|
|
|
{
|
2015-09-14 00:17:42 +02:00
|
|
|
struct strbuf git_submodule_common_dir = STRBUF_INIT;
|
|
|
|
struct strbuf git_submodule_dir = STRBUF_INIT;
|
2017-03-26 04:42:30 +02:00
|
|
|
int ret;
|
2010-07-07 15:39:11 +02:00
|
|
|
|
2017-03-26 04:42:30 +02:00
|
|
|
ret = submodule_to_gitdir(&git_submodule_dir, path);
|
|
|
|
if (ret)
|
|
|
|
goto cleanup;
|
2010-07-07 15:39:11 +02:00
|
|
|
|
2017-03-26 04:42:30 +02:00
|
|
|
strbuf_complete(&git_submodule_dir, '/');
|
|
|
|
strbuf_addbuf(buf, &git_submodule_dir);
|
2014-11-30 09:24:26 +01:00
|
|
|
strbuf_vaddf(buf, fmt, args);
|
2015-09-14 00:17:42 +02:00
|
|
|
|
|
|
|
if (get_common_dir_noenv(&git_submodule_common_dir, git_submodule_dir.buf))
|
|
|
|
update_common_dir(buf, git_submodule_dir.len, git_submodule_common_dir.buf);
|
|
|
|
|
2014-11-30 09:24:26 +01:00
|
|
|
strbuf_cleanup_path(buf);
|
2015-09-14 00:17:42 +02:00
|
|
|
|
2016-09-01 01:27:22 +02:00
|
|
|
cleanup:
|
2015-09-14 00:17:42 +02:00
|
|
|
strbuf_release(&git_submodule_dir);
|
|
|
|
strbuf_release(&git_submodule_common_dir);
|
2017-03-26 04:42:30 +02:00
|
|
|
return ret;
|
2015-08-10 11:32:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
char *git_pathdup_submodule(const char *path, const char *fmt, ...)
|
|
|
|
{
|
2016-09-01 01:27:22 +02:00
|
|
|
int err;
|
2015-08-10 11:32:22 +02:00
|
|
|
va_list args;
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
va_start(args, fmt);
|
2016-09-01 01:27:22 +02:00
|
|
|
err = do_submodule_path(&buf, path, fmt, args);
|
2015-08-10 11:32:22 +02:00
|
|
|
va_end(args);
|
2016-09-01 01:27:22 +02:00
|
|
|
if (err) {
|
|
|
|
strbuf_release(&buf);
|
|
|
|
return NULL;
|
|
|
|
}
|
2015-08-10 11:32:22 +02:00
|
|
|
return strbuf_detach(&buf, NULL);
|
|
|
|
}
|
|
|
|
|
2016-09-01 01:27:22 +02:00
|
|
|
int strbuf_git_path_submodule(struct strbuf *buf, const char *path,
|
|
|
|
const char *fmt, ...)
|
2015-08-10 11:32:22 +02:00
|
|
|
{
|
2016-09-01 01:27:22 +02:00
|
|
|
int err;
|
2015-08-10 11:32:22 +02:00
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
2016-09-01 01:27:22 +02:00
|
|
|
err = do_submodule_path(buf, path, fmt, args);
|
2015-08-10 11:32:22 +02:00
|
|
|
va_end(args);
|
2016-09-01 01:27:22 +02:00
|
|
|
|
|
|
|
return err;
|
2015-08-10 11:32:22 +02:00
|
|
|
}
|
|
|
|
|
2017-06-22 20:43:37 +02:00
|
|
|
static void do_git_common_path(const struct repository *repo,
|
|
|
|
struct strbuf *buf,
|
2016-04-22 15:01:25 +02:00
|
|
|
const char *fmt,
|
|
|
|
va_list args)
|
|
|
|
{
|
2017-06-22 20:43:37 +02:00
|
|
|
strbuf_addstr(buf, repo->commondir);
|
2016-04-22 15:01:25 +02:00
|
|
|
if (buf->len && !is_dir_sep(buf->buf[buf->len - 1]))
|
|
|
|
strbuf_addch(buf, '/');
|
|
|
|
strbuf_vaddf(buf, fmt, args);
|
|
|
|
strbuf_cleanup_path(buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *git_common_path(const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct strbuf *pathname = get_pathname();
|
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
2017-06-22 20:43:37 +02:00
|
|
|
do_git_common_path(the_repository, pathname, fmt, args);
|
2016-04-22 15:01:25 +02:00
|
|
|
va_end(args);
|
|
|
|
return pathname->buf;
|
|
|
|
}
|
|
|
|
|
2017-06-22 20:43:37 +02:00
|
|
|
void strbuf_git_common_path(struct strbuf *sb,
|
|
|
|
const struct repository *repo,
|
|
|
|
const char *fmt, ...)
|
2016-04-22 15:01:25 +02:00
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
2017-06-22 20:43:37 +02:00
|
|
|
do_git_common_path(repo, sb, fmt, args);
|
2016-04-22 15:01:25 +02:00
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
2007-01-02 08:31:08 +01:00
|
|
|
int validate_headref(const char *path)
|
2005-11-18 23:59:34 +01:00
|
|
|
{
|
|
|
|
struct stat st;
|
2017-09-27 08:17:26 +02:00
|
|
|
char buffer[256];
|
|
|
|
const char *refname;
|
validate_headref: use get_oid_hex for detached HEADs
If a candidate HEAD isn't a symref, we check that it
contains a viable sha1. But in a post-sha1 world, we should
be checking whether it has any plausible object-id.
We can do that by switching to get_oid_hex().
Note that both before and after this patch, we only check
for a plausible object id at the start of the file, and then
call that good enough. We ignore any content _after_ the
hex, so a string like:
0123456789012345678901234567890123456789 foo
is accepted. Though we do put extra bytes like this into
some pseudorefs (e.g., FETCH_HEAD), we don't typically do so
with HEAD. We could tighten this up by using parse_oid_hex(),
like:
if (!parse_oid_hex(buffer, &oid, &end) &&
*end++ == '\n' && *end == '\0')
return 0;
But we're probably better to remain on the loose side. We're
just checking here for a plausible-looking repository
directory, so heuristics are acceptable (if we really want
to be meticulous, we should use the actual ref code to parse
HEAD).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-27 08:17:36 +02:00
|
|
|
struct object_id oid;
|
2008-04-27 20:21:58 +02:00
|
|
|
int fd;
|
|
|
|
ssize_t len;
|
2005-11-18 23:59:34 +01:00
|
|
|
|
|
|
|
if (lstat(path, &st) < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
/* Make sure it is a "refs/.." symlink */
|
|
|
|
if (S_ISLNK(st.st_mode)) {
|
|
|
|
len = readlink(path, buffer, sizeof(buffer)-1);
|
2009-02-12 22:02:09 +01:00
|
|
|
if (len >= 5 && !memcmp("refs/", buffer, 5))
|
2005-11-18 23:59:34 +01:00
|
|
|
return 0;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Anything else, just open it and try to see if it is a symbolic ref.
|
|
|
|
*/
|
|
|
|
fd = open(path, O_RDONLY);
|
|
|
|
if (fd < 0)
|
|
|
|
return -1;
|
2007-01-08 16:58:08 +01:00
|
|
|
len = read_in_full(fd, buffer, sizeof(buffer)-1);
|
2005-11-18 23:59:34 +01:00
|
|
|
close(fd);
|
|
|
|
|
validate_headref: NUL-terminate HEAD buffer
When we are checking to see if we have a git repo, we peek
into the HEAD file and see if it's a plausible symlink,
symref, or detached HEAD.
For the latter two, we read the contents with read_in_full(),
which means they aren't NUL-terminated. The symref check is
careful to respect the length we got, but the sha1 check
will happily parse up to 40 bytes, even if we read fewer.
E.g.,:
echo 1234 >.git/HEAD
git rev-parse
will parse 36 uninitialized bytes from our stack buffer.
This isn't a big deal in practice. Our buffer is 256 bytes,
so we know we'll never read outside of it. The worst case is
that the uninitialized bytes look like valid hex, and we
claim a bogus HEAD file is valid. The chances of this
happening randomly are quite slim, but let's be careful.
One option would be to check that "len == 41" before feeding
the buffer to get_sha1_hex(). But we'd like to eventually
prepare for a world with variable-length hashes. Let's
NUL-terminate as soon as we've read the buffer (we already
even leave a spare byte to do so!). That fixes this problem
without depending on the size of an object id.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-27 08:17:23 +02:00
|
|
|
if (len < 0)
|
|
|
|
return -1;
|
|
|
|
buffer[len] = '\0';
|
|
|
|
|
2005-11-18 23:59:34 +01:00
|
|
|
/*
|
|
|
|
* Is it a symbolic ref?
|
|
|
|
*/
|
2017-09-27 08:17:26 +02:00
|
|
|
if (skip_prefix(buffer, "ref:", &refname)) {
|
|
|
|
while (isspace(*refname))
|
|
|
|
refname++;
|
|
|
|
if (starts_with(refname, "refs/"))
|
2007-01-02 08:31:08 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Is this a detached HEAD?
|
|
|
|
*/
|
validate_headref: use get_oid_hex for detached HEADs
If a candidate HEAD isn't a symref, we check that it
contains a viable sha1. But in a post-sha1 world, we should
be checking whether it has any plausible object-id.
We can do that by switching to get_oid_hex().
Note that both before and after this patch, we only check
for a plausible object id at the start of the file, and then
call that good enough. We ignore any content _after_ the
hex, so a string like:
0123456789012345678901234567890123456789 foo
is accepted. Though we do put extra bytes like this into
some pseudorefs (e.g., FETCH_HEAD), we don't typically do so
with HEAD. We could tighten this up by using parse_oid_hex(),
like:
if (!parse_oid_hex(buffer, &oid, &end) &&
*end++ == '\n' && *end == '\0')
return 0;
But we're probably better to remain on the loose side. We're
just checking here for a plausible-looking repository
directory, so heuristics are acceptable (if we really want
to be meticulous, we should use the actual ref code to parse
HEAD).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-27 08:17:36 +02:00
|
|
|
if (!get_oid_hex(buffer, &oid))
|
2005-11-18 23:59:34 +01:00
|
|
|
return 0;
|
2007-01-02 08:31:08 +01:00
|
|
|
|
2005-11-18 23:59:34 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2009-11-17 18:24:25 +01:00
|
|
|
static struct passwd *getpw_str(const char *username, size_t len)
|
2005-11-17 20:37:14 +01:00
|
|
|
{
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
struct passwd *pw;
|
2014-07-19 17:35:34 +02:00
|
|
|
char *username_z = xmemdupz(username, len);
|
2009-11-17 18:24:25 +01:00
|
|
|
pw = getpwnam(username_z);
|
|
|
|
free(username_z);
|
|
|
|
return pw;
|
|
|
|
}
|
2005-11-17 20:37:14 +01:00
|
|
|
|
2009-11-17 18:24:25 +01:00
|
|
|
/*
|
|
|
|
* Return a string with ~ and ~user expanded via getpw*. If buf != NULL,
|
|
|
|
* then it is a newly allocated string. Returns NULL on getpw failure or
|
|
|
|
* if path is NULL.
|
2017-04-05 12:24:38 +02:00
|
|
|
*
|
2020-03-10 14:11:22 +01:00
|
|
|
* If real_home is true, strbuf_realpath($HOME) is used in the expansion.
|
2009-11-17 18:24:25 +01:00
|
|
|
*/
|
2017-04-05 12:24:38 +02:00
|
|
|
char *expand_user_path(const char *path, int real_home)
|
2009-11-17 18:24:25 +01:00
|
|
|
{
|
|
|
|
struct strbuf user_path = STRBUF_INIT;
|
|
|
|
const char *to_copy = path;
|
|
|
|
|
|
|
|
if (path == NULL)
|
|
|
|
goto return_null;
|
|
|
|
if (path[0] == '~') {
|
2014-01-28 02:36:12 +01:00
|
|
|
const char *first_slash = strchrnul(path, '/');
|
2009-11-17 18:24:25 +01:00
|
|
|
const char *username = path + 1;
|
|
|
|
size_t username_len = first_slash - username;
|
2009-11-19 16:21:15 +01:00
|
|
|
if (username_len == 0) {
|
|
|
|
const char *home = getenv("HOME");
|
2010-07-26 17:06:51 +02:00
|
|
|
if (!home)
|
|
|
|
goto return_null;
|
2017-04-05 12:24:38 +02:00
|
|
|
if (real_home)
|
2017-10-01 16:44:06 +02:00
|
|
|
strbuf_add_real_path(&user_path, home);
|
2017-04-05 12:24:38 +02:00
|
|
|
else
|
|
|
|
strbuf_addstr(&user_path, home);
|
2016-03-23 11:55:00 +01:00
|
|
|
#ifdef GIT_WINDOWS_NATIVE
|
|
|
|
convert_slashes(user_path.buf);
|
|
|
|
#endif
|
2009-11-19 16:21:15 +01:00
|
|
|
} else {
|
|
|
|
struct passwd *pw = getpw_str(username, username_len);
|
|
|
|
if (!pw)
|
|
|
|
goto return_null;
|
2014-07-17 01:38:18 +02:00
|
|
|
strbuf_addstr(&user_path, pw->pw_dir);
|
2005-11-17 20:37:14 +01:00
|
|
|
}
|
2009-11-17 18:24:25 +01:00
|
|
|
to_copy = first_slash;
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
}
|
2014-07-17 01:38:18 +02:00
|
|
|
strbuf_addstr(&user_path, to_copy);
|
2009-11-17 18:24:25 +01:00
|
|
|
return strbuf_detach(&user_path, NULL);
|
|
|
|
return_null:
|
|
|
|
strbuf_release(&user_path);
|
|
|
|
return NULL;
|
2005-11-17 20:37:14 +01:00
|
|
|
}
|
|
|
|
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
/*
|
|
|
|
* First, one directory to try is determined by the following algorithm.
|
|
|
|
*
|
|
|
|
* (0) If "strict" is given, the path is used as given and no DWIM is
|
|
|
|
* done. Otherwise:
|
|
|
|
* (1) "~/path" to mean path under the running user's home directory;
|
|
|
|
* (2) "~user/path" to mean path under named user's home directory;
|
|
|
|
* (3) "relative/path" to mean cwd relative directory; or
|
|
|
|
* (4) "/absolute/path" to mean absolute directory.
|
|
|
|
*
|
2015-03-31 15:39:27 +02:00
|
|
|
* Unless "strict" is given, we check "%s/.git", "%s", "%s.git/.git", "%s.git"
|
|
|
|
* in this order. We select the first one that is a valid git repository, and
|
|
|
|
* chdir() to it. If none match, or we fail to chdir, we return NULL.
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
*
|
|
|
|
* If all goes well, we return the directory we used to chdir() (but
|
|
|
|
* before ~user is expanded), avoiding getcwd() resolving symbolic
|
|
|
|
* links. User relative paths are also returned as they are given,
|
|
|
|
* except DWIM suffixing.
|
|
|
|
*/
|
2011-10-04 22:02:00 +02:00
|
|
|
const char *enter_repo(const char *path, int strict)
|
2005-11-17 20:37:14 +01:00
|
|
|
{
|
2015-09-24 23:07:45 +02:00
|
|
|
static struct strbuf validated_path = STRBUF_INIT;
|
|
|
|
static struct strbuf used_path = STRBUF_INIT;
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
|
|
|
|
if (!path)
|
2005-11-17 20:37:14 +01:00
|
|
|
return NULL;
|
|
|
|
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
if (!strict) {
|
|
|
|
static const char *suffix[] = {
|
standardize and improve lookup rules for external local repos
When you specify a local repository on the command line of
clone, ls-remote, upload-pack, receive-pack, or upload-archive,
or in a request to git-daemon, we perform a little bit of
lookup magic, doing things like looking in working trees for
.git directories and appending ".git" for bare repos.
For clone, this magic happens in get_repo_path. For
everything else, it happens in enter_repo. In both cases,
there are some ambiguous or confusing cases that aren't
handled well, and there is one case that is not handled the
same by both methods.
This patch tries to provide (and test!) standard, sensible
lookup rules for both code paths. The intended changes are:
1. When looking up "foo", we have always preferred
a working tree "foo" (containing "foo/.git" over the
bare "foo.git". But we did not prefer a bare "foo" over
"foo.git". With this patch, we do so.
2. We would select directories that existed but didn't
actually look like git repositories. With this patch,
we make sure a selected directory looks like a git
repo. Not only is this more sensible in general, but it
will help anybody who is negatively affected by change
(1) negatively (e.g., if they had "foo.git" next to its
separate work tree "foo", and expect to keep finding
"foo.git" when they reference "foo").
3. The enter_repo code path would, given "foo", look for
"foo.git/.git" (i.e., do the ".git" append magic even
for a repo with working tree). The clone code path did
not; with this patch, they now behave the same.
In the unlikely case of a working tree overlaying a bare
repo (i.e., a ".git" directory _inside_ a bare repo), we
continue to treat it as a working tree (prefering the
"inner" .git over the bare repo). This is mainly because the
combination seems nonsensical, and I'd rather stick with
existing behavior on the off chance that somebody is relying
on it.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-02-02 22:59:13 +01:00
|
|
|
"/.git", "", ".git/.git", ".git", NULL,
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
};
|
2011-10-04 22:05:17 +02:00
|
|
|
const char *gitfile;
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
int len = strlen(path);
|
|
|
|
int i;
|
2011-10-04 22:02:00 +02:00
|
|
|
while ((1 < len) && (path[len-1] == '/'))
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
len--;
|
2011-10-04 22:02:00 +02:00
|
|
|
|
2015-09-24 23:07:45 +02:00
|
|
|
/*
|
|
|
|
* We can handle arbitrary-sized buffers, but this remains as a
|
|
|
|
* sanity check on untrusted input.
|
|
|
|
*/
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
if (PATH_MAX <= len)
|
2005-11-17 20:37:14 +01:00
|
|
|
return NULL;
|
2011-10-04 22:02:00 +02:00
|
|
|
|
2015-09-24 23:07:45 +02:00
|
|
|
strbuf_reset(&used_path);
|
|
|
|
strbuf_reset(&validated_path);
|
|
|
|
strbuf_add(&used_path, path, len);
|
|
|
|
strbuf_add(&validated_path, path, len);
|
|
|
|
|
|
|
|
if (used_path.buf[0] == '~') {
|
2017-04-05 12:24:38 +02:00
|
|
|
char *newpath = expand_user_path(used_path.buf, 0);
|
2015-09-24 23:07:45 +02:00
|
|
|
if (!newpath)
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
return NULL;
|
2015-09-24 23:07:45 +02:00
|
|
|
strbuf_attach(&used_path, newpath, strlen(newpath),
|
|
|
|
strlen(newpath));
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
}
|
|
|
|
for (i = 0; suffix[i]; i++) {
|
standardize and improve lookup rules for external local repos
When you specify a local repository on the command line of
clone, ls-remote, upload-pack, receive-pack, or upload-archive,
or in a request to git-daemon, we perform a little bit of
lookup magic, doing things like looking in working trees for
.git directories and appending ".git" for bare repos.
For clone, this magic happens in get_repo_path. For
everything else, it happens in enter_repo. In both cases,
there are some ambiguous or confusing cases that aren't
handled well, and there is one case that is not handled the
same by both methods.
This patch tries to provide (and test!) standard, sensible
lookup rules for both code paths. The intended changes are:
1. When looking up "foo", we have always preferred
a working tree "foo" (containing "foo/.git" over the
bare "foo.git". But we did not prefer a bare "foo" over
"foo.git". With this patch, we do so.
2. We would select directories that existed but didn't
actually look like git repositories. With this patch,
we make sure a selected directory looks like a git
repo. Not only is this more sensible in general, but it
will help anybody who is negatively affected by change
(1) negatively (e.g., if they had "foo.git" next to its
separate work tree "foo", and expect to keep finding
"foo.git" when they reference "foo").
3. The enter_repo code path would, given "foo", look for
"foo.git/.git" (i.e., do the ".git" append magic even
for a repo with working tree). The clone code path did
not; with this patch, they now behave the same.
In the unlikely case of a working tree overlaying a bare
repo (i.e., a ".git" directory _inside_ a bare repo), we
continue to treat it as a working tree (prefering the
"inner" .git over the bare repo). This is mainly because the
combination seems nonsensical, and I'd rather stick with
existing behavior on the off chance that somebody is relying
on it.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-02-02 22:59:13 +01:00
|
|
|
struct stat st;
|
2015-09-24 23:07:45 +02:00
|
|
|
size_t baselen = used_path.len;
|
|
|
|
strbuf_addstr(&used_path, suffix[i]);
|
|
|
|
if (!stat(used_path.buf, &st) &&
|
standardize and improve lookup rules for external local repos
When you specify a local repository on the command line of
clone, ls-remote, upload-pack, receive-pack, or upload-archive,
or in a request to git-daemon, we perform a little bit of
lookup magic, doing things like looking in working trees for
.git directories and appending ".git" for bare repos.
For clone, this magic happens in get_repo_path. For
everything else, it happens in enter_repo. In both cases,
there are some ambiguous or confusing cases that aren't
handled well, and there is one case that is not handled the
same by both methods.
This patch tries to provide (and test!) standard, sensible
lookup rules for both code paths. The intended changes are:
1. When looking up "foo", we have always preferred
a working tree "foo" (containing "foo/.git" over the
bare "foo.git". But we did not prefer a bare "foo" over
"foo.git". With this patch, we do so.
2. We would select directories that existed but didn't
actually look like git repositories. With this patch,
we make sure a selected directory looks like a git
repo. Not only is this more sensible in general, but it
will help anybody who is negatively affected by change
(1) negatively (e.g., if they had "foo.git" next to its
separate work tree "foo", and expect to keep finding
"foo.git" when they reference "foo").
3. The enter_repo code path would, given "foo", look for
"foo.git/.git" (i.e., do the ".git" append magic even
for a repo with working tree). The clone code path did
not; with this patch, they now behave the same.
In the unlikely case of a working tree overlaying a bare
repo (i.e., a ".git" directory _inside_ a bare repo), we
continue to treat it as a working tree (prefering the
"inner" .git over the bare repo). This is mainly because the
combination seems nonsensical, and I'd rather stick with
existing behavior on the off chance that somebody is relying
on it.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-02-02 22:59:13 +01:00
|
|
|
(S_ISREG(st.st_mode) ||
|
2015-09-24 23:07:45 +02:00
|
|
|
(S_ISDIR(st.st_mode) && is_git_directory(used_path.buf)))) {
|
|
|
|
strbuf_addstr(&validated_path, suffix[i]);
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
break;
|
|
|
|
}
|
2015-09-24 23:07:45 +02:00
|
|
|
strbuf_setlen(&used_path, baselen);
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
}
|
2011-10-04 22:05:17 +02:00
|
|
|
if (!suffix[i])
|
|
|
|
return NULL;
|
2015-10-21 00:24:00 +02:00
|
|
|
gitfile = read_gitfile(used_path.buf);
|
2015-09-24 23:07:45 +02:00
|
|
|
if (gitfile) {
|
|
|
|
strbuf_reset(&used_path);
|
|
|
|
strbuf_addstr(&used_path, gitfile);
|
|
|
|
}
|
|
|
|
if (chdir(used_path.buf))
|
2005-11-18 23:59:34 +01:00
|
|
|
return NULL;
|
2015-09-24 23:07:45 +02:00
|
|
|
path = validated_path.buf;
|
2005-11-18 23:59:34 +01:00
|
|
|
}
|
2015-09-28 15:06:14 +02:00
|
|
|
else {
|
|
|
|
const char *gitfile = read_gitfile(path);
|
|
|
|
if (gitfile)
|
|
|
|
path = gitfile;
|
|
|
|
if (chdir(path))
|
|
|
|
return NULL;
|
|
|
|
}
|
2005-11-17 20:37:14 +01:00
|
|
|
|
2015-09-28 15:06:13 +02:00
|
|
|
if (is_git_directory(".")) {
|
2020-03-06 20:03:13 +01:00
|
|
|
set_git_dir(".", 0);
|
2020-02-22 21:17:37 +01:00
|
|
|
check_repository_format(NULL);
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
return path;
|
2005-11-17 20:37:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
2006-06-10 07:07:23 +02:00
|
|
|
|
2013-03-30 10:53:47 +01:00
|
|
|
static int calc_shared_perm(int mode)
|
2006-06-10 07:07:23 +02:00
|
|
|
{
|
2013-03-30 10:53:47 +01:00
|
|
|
int tweak;
|
2006-06-10 07:07:23 +02:00
|
|
|
|
2016-03-11 23:36:49 +01:00
|
|
|
if (get_shared_repository() < 0)
|
|
|
|
tweak = -get_shared_repository();
|
2009-03-26 00:19:36 +01:00
|
|
|
else
|
2016-03-11 23:36:49 +01:00
|
|
|
tweak = get_shared_repository();
|
2009-03-26 00:19:36 +01:00
|
|
|
|
|
|
|
if (!(mode & S_IWUSR))
|
|
|
|
tweak &= ~0222;
|
|
|
|
if (mode & S_IXUSR)
|
|
|
|
/* Copy read bits to execute bits */
|
|
|
|
tweak |= (tweak & 0444) >> 2;
|
2016-03-11 23:36:49 +01:00
|
|
|
if (get_shared_repository() < 0)
|
2009-03-26 00:19:36 +01:00
|
|
|
mode = (mode & ~0777) | tweak;
|
|
|
|
else
|
2008-07-12 03:15:03 +02:00
|
|
|
mode |= tweak;
|
2008-04-16 10:34:24 +02:00
|
|
|
|
2013-03-30 10:53:47 +01:00
|
|
|
return mode;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int adjust_shared_perm(const char *path)
|
|
|
|
{
|
|
|
|
int old_mode, new_mode;
|
|
|
|
|
2016-03-11 23:36:49 +01:00
|
|
|
if (!get_shared_repository())
|
2013-03-30 10:53:47 +01:00
|
|
|
return 0;
|
|
|
|
if (get_st_mode_bits(path, &old_mode) < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
new_mode = calc_shared_perm(old_mode);
|
|
|
|
if (S_ISDIR(old_mode)) {
|
2008-04-16 10:34:24 +02:00
|
|
|
/* Copy read bits to execute bits */
|
2013-03-30 10:53:47 +01:00
|
|
|
new_mode |= (new_mode & 0444) >> 2;
|
|
|
|
new_mode |= FORCE_DIR_SET_GID;
|
2008-04-16 10:34:24 +02:00
|
|
|
}
|
|
|
|
|
2013-03-30 10:53:47 +01:00
|
|
|
if (((old_mode ^ new_mode) & ~S_IFMT) &&
|
|
|
|
chmod(path, (new_mode & ~S_IFMT)) < 0)
|
2006-06-10 07:07:23 +02:00
|
|
|
return -2;
|
|
|
|
return 0;
|
|
|
|
}
|
2007-08-01 02:28:59 +02:00
|
|
|
|
2015-11-10 12:42:38 +01:00
|
|
|
void safe_create_dir(const char *dir, int share)
|
|
|
|
{
|
|
|
|
if (mkdir(dir, 0777) < 0) {
|
|
|
|
if (errno != EEXIST) {
|
|
|
|
perror(dir);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (share && adjust_shared_perm(dir))
|
|
|
|
die(_("Could not make %s writable by group"), dir);
|
|
|
|
}
|
|
|
|
|
2013-10-14 04:29:39 +02:00
|
|
|
static int have_same_root(const char *path1, const char *path2)
|
|
|
|
{
|
|
|
|
int is_abs1, is_abs2;
|
|
|
|
|
|
|
|
is_abs1 = is_absolute_path(path1);
|
|
|
|
is_abs2 = is_absolute_path(path2);
|
|
|
|
return (is_abs1 && is_abs2 && tolower(path1[0]) == tolower(path2[0])) ||
|
|
|
|
(!is_abs1 && !is_abs2);
|
|
|
|
}
|
|
|
|
|
2013-06-25 17:53:43 +02:00
|
|
|
/*
|
|
|
|
* Give path as relative to prefix.
|
|
|
|
*
|
|
|
|
* The strbuf may or may not be used, so do not assume it contains the
|
|
|
|
* returned path.
|
|
|
|
*/
|
|
|
|
const char *relative_path(const char *in, const char *prefix,
|
|
|
|
struct strbuf *sb)
|
Make git_dir a path relative to work_tree in setup_work_tree()
Once we find the absolute paths for git_dir and work_tree, we can make
git_dir a relative path since we know pwd will be work_tree. This should
save the kernel some time traversing the path to work_tree all the time
if git_dir is inside work_tree.
Daniel's patch didn't apply for me as-is, so I recreated it with some
differences, and here are the numbers from ten runs each.
There is some IO for me - probably due to more-or-less random flushing of
the journal - so the variation is bigger than I'd like, but whatever:
Before:
real 0m8.135s
real 0m7.933s
real 0m8.080s
real 0m7.954s
real 0m7.949s
real 0m8.112s
real 0m7.934s
real 0m8.059s
real 0m7.979s
real 0m8.038s
After:
real 0m7.685s
real 0m7.968s
real 0m7.703s
real 0m7.850s
real 0m7.995s
real 0m7.817s
real 0m7.963s
real 0m7.955s
real 0m7.848s
real 0m7.969s
Now, going by "best of ten" (on the assumption that the longer numbers
are all due to IO), I'm saying a 7.933s -> 7.685s reduction, and it does
seem to be outside of the noise (ie the "after" case never broke 8s, while
the "before" case did so half the time).
So looks like about 3% to me.
Doing it for a slightly smaller test-case (just the "arch" subdirectory)
gets more stable numbers probably due to not filling the journal with
metadata updates, so we have:
Before:
real 0m1.633s
real 0m1.633s
real 0m1.633s
real 0m1.632s
real 0m1.632s
real 0m1.630s
real 0m1.634s
real 0m1.631s
real 0m1.632s
real 0m1.632s
After:
real 0m1.610s
real 0m1.609s
real 0m1.610s
real 0m1.608s
real 0m1.607s
real 0m1.610s
real 0m1.609s
real 0m1.611s
real 0m1.608s
real 0m1.611s
where I'ld just take the averages and say 1.632 vs 1.610, which is just
over 1% peformance improvement.
So it's not in the noise, but it's not as big as I initially thought and
measured.
(That said, it obviously depends on how deep the working directory path is
too, and whether it is behind NFS or something else that might need to
cause more work to look up).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-19 21:34:06 +02:00
|
|
|
{
|
2013-06-25 17:53:43 +02:00
|
|
|
int in_len = in ? strlen(in) : 0;
|
|
|
|
int prefix_len = prefix ? strlen(prefix) : 0;
|
|
|
|
int in_off = 0;
|
|
|
|
int prefix_off = 0;
|
2010-01-22 04:05:19 +01:00
|
|
|
int i = 0, j = 0;
|
|
|
|
|
2013-06-25 17:53:43 +02:00
|
|
|
if (!in_len)
|
|
|
|
return "./";
|
|
|
|
else if (!prefix_len)
|
|
|
|
return in;
|
|
|
|
|
2016-01-12 08:57:22 +01:00
|
|
|
if (have_same_root(in, prefix))
|
2013-10-14 04:29:39 +02:00
|
|
|
/* bypass dos_drive, for "c:" is identical to "C:" */
|
2016-01-12 08:57:22 +01:00
|
|
|
i = j = has_dos_drive_prefix(in);
|
|
|
|
else {
|
2013-10-14 04:29:39 +02:00
|
|
|
return in;
|
|
|
|
}
|
|
|
|
|
2013-06-25 17:53:43 +02:00
|
|
|
while (i < prefix_len && j < in_len && prefix[i] == in[j]) {
|
|
|
|
if (is_dir_sep(prefix[i])) {
|
|
|
|
while (is_dir_sep(prefix[i]))
|
2010-01-22 04:05:19 +01:00
|
|
|
i++;
|
2013-06-25 17:53:43 +02:00
|
|
|
while (is_dir_sep(in[j]))
|
|
|
|
j++;
|
|
|
|
prefix_off = i;
|
|
|
|
in_off = j;
|
|
|
|
} else {
|
|
|
|
i++;
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (
|
|
|
|
/* "prefix" seems like prefix of "in" */
|
|
|
|
i >= prefix_len &&
|
|
|
|
/*
|
|
|
|
* but "/foo" is not a prefix of "/foobar"
|
|
|
|
* (i.e. prefix not end with '/')
|
|
|
|
*/
|
|
|
|
prefix_off < prefix_len) {
|
|
|
|
if (j >= in_len) {
|
|
|
|
/* in="/a/b", prefix="/a/b" */
|
|
|
|
in_off = in_len;
|
|
|
|
} else if (is_dir_sep(in[j])) {
|
|
|
|
/* in="/a/b/c", prefix="/a/b" */
|
|
|
|
while (is_dir_sep(in[j]))
|
2010-01-22 04:05:19 +01:00
|
|
|
j++;
|
2013-06-25 17:53:43 +02:00
|
|
|
in_off = j;
|
|
|
|
} else {
|
|
|
|
/* in="/a/bbb/c", prefix="/a/b" */
|
|
|
|
i = prefix_off;
|
|
|
|
}
|
|
|
|
} else if (
|
|
|
|
/* "in" is short than "prefix" */
|
|
|
|
j >= in_len &&
|
|
|
|
/* "in" not end with '/' */
|
|
|
|
in_off < in_len) {
|
|
|
|
if (is_dir_sep(prefix[i])) {
|
|
|
|
/* in="/a/b", prefix="/a/b/c/" */
|
|
|
|
while (is_dir_sep(prefix[i]))
|
|
|
|
i++;
|
|
|
|
in_off = in_len;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
in += in_off;
|
|
|
|
in_len -= in_off;
|
|
|
|
|
|
|
|
if (i >= prefix_len) {
|
|
|
|
if (!in_len)
|
|
|
|
return "./";
|
|
|
|
else
|
|
|
|
return in;
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_reset(sb);
|
|
|
|
strbuf_grow(sb, in_len);
|
|
|
|
|
|
|
|
while (i < prefix_len) {
|
|
|
|
if (is_dir_sep(prefix[i])) {
|
|
|
|
strbuf_addstr(sb, "../");
|
|
|
|
while (is_dir_sep(prefix[i]))
|
|
|
|
i++;
|
2010-01-22 04:05:19 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
i++;
|
|
|
|
}
|
2013-06-25 17:53:43 +02:00
|
|
|
if (!is_dir_sep(prefix[prefix_len - 1]))
|
|
|
|
strbuf_addstr(sb, "../");
|
|
|
|
|
|
|
|
strbuf_addstr(sb, in);
|
|
|
|
|
|
|
|
return sb->buf;
|
Make git_dir a path relative to work_tree in setup_work_tree()
Once we find the absolute paths for git_dir and work_tree, we can make
git_dir a relative path since we know pwd will be work_tree. This should
save the kernel some time traversing the path to work_tree all the time
if git_dir is inside work_tree.
Daniel's patch didn't apply for me as-is, so I recreated it with some
differences, and here are the numbers from ten runs each.
There is some IO for me - probably due to more-or-less random flushing of
the journal - so the variation is bigger than I'd like, but whatever:
Before:
real 0m8.135s
real 0m7.933s
real 0m8.080s
real 0m7.954s
real 0m7.949s
real 0m8.112s
real 0m7.934s
real 0m8.059s
real 0m7.979s
real 0m8.038s
After:
real 0m7.685s
real 0m7.968s
real 0m7.703s
real 0m7.850s
real 0m7.995s
real 0m7.817s
real 0m7.963s
real 0m7.955s
real 0m7.848s
real 0m7.969s
Now, going by "best of ten" (on the assumption that the longer numbers
are all due to IO), I'm saying a 7.933s -> 7.685s reduction, and it does
seem to be outside of the noise (ie the "after" case never broke 8s, while
the "before" case did so half the time).
So looks like about 3% to me.
Doing it for a slightly smaller test-case (just the "arch" subdirectory)
gets more stable numbers probably due to not filling the journal with
metadata updates, so we have:
Before:
real 0m1.633s
real 0m1.633s
real 0m1.633s
real 0m1.632s
real 0m1.632s
real 0m1.630s
real 0m1.634s
real 0m1.631s
real 0m1.632s
real 0m1.632s
After:
real 0m1.610s
real 0m1.609s
real 0m1.610s
real 0m1.608s
real 0m1.607s
real 0m1.610s
real 0m1.609s
real 0m1.611s
real 0m1.608s
real 0m1.611s
where I'ld just take the averages and say 1.632 vs 1.610, which is just
over 1% peformance improvement.
So it's not in the noise, but it's not as big as I initially thought and
measured.
(That said, it obviously depends on how deep the working directory path is
too, and whether it is behind NFS or something else that might need to
cause more work to look up).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-19 21:34:06 +02:00
|
|
|
}
|
2008-05-20 08:48:54 +02:00
|
|
|
|
2013-10-14 04:29:40 +02:00
|
|
|
/*
|
|
|
|
* A simpler implementation of relative_path
|
|
|
|
*
|
|
|
|
* Get relative path by removing "prefix" from "in". This function
|
|
|
|
* first appears in v1.5.6-1-g044bbbc, and makes git_dir shorter
|
|
|
|
* to increase performance when traversing the path to work_tree.
|
|
|
|
*/
|
|
|
|
const char *remove_leading_path(const char *in, const char *prefix)
|
|
|
|
{
|
2015-09-24 23:07:47 +02:00
|
|
|
static struct strbuf buf = STRBUF_INIT;
|
2013-10-14 04:29:40 +02:00
|
|
|
int i = 0, j = 0;
|
|
|
|
|
|
|
|
if (!prefix || !prefix[0])
|
|
|
|
return in;
|
|
|
|
while (prefix[i]) {
|
|
|
|
if (is_dir_sep(prefix[i])) {
|
|
|
|
if (!is_dir_sep(in[j]))
|
|
|
|
return in;
|
|
|
|
while (is_dir_sep(prefix[i]))
|
|
|
|
i++;
|
|
|
|
while (is_dir_sep(in[j]))
|
|
|
|
j++;
|
|
|
|
continue;
|
|
|
|
} else if (in[j] != prefix[i]) {
|
|
|
|
return in;
|
|
|
|
}
|
|
|
|
i++;
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
if (
|
|
|
|
/* "/foo" is a prefix of "/foo" */
|
|
|
|
in[j] &&
|
|
|
|
/* "/foo" is not a prefix of "/foobar" */
|
|
|
|
!is_dir_sep(prefix[i-1]) && !is_dir_sep(in[j])
|
|
|
|
)
|
|
|
|
return in;
|
|
|
|
while (is_dir_sep(in[j]))
|
|
|
|
j++;
|
2015-09-24 23:07:47 +02:00
|
|
|
|
|
|
|
strbuf_reset(&buf);
|
2013-10-14 04:29:40 +02:00
|
|
|
if (!in[j])
|
2015-09-24 23:07:47 +02:00
|
|
|
strbuf_addstr(&buf, ".");
|
2013-10-14 04:29:40 +02:00
|
|
|
else
|
2015-09-24 23:07:47 +02:00
|
|
|
strbuf_addstr(&buf, in + j);
|
|
|
|
return buf.buf;
|
2013-10-14 04:29:40 +02:00
|
|
|
}
|
|
|
|
|
2008-05-20 08:48:54 +02:00
|
|
|
/*
|
2009-02-07 16:08:31 +01:00
|
|
|
* It is okay if dst == src, but they should not overlap otherwise.
|
2020-01-30 10:52:19 +01:00
|
|
|
* The "dst" buffer must be at least as long as "src"; normalizing may shrink
|
|
|
|
* the size of the path, but will never grow it.
|
2008-05-20 08:48:54 +02:00
|
|
|
*
|
2009-02-07 16:08:31 +01:00
|
|
|
* Performs the following normalizations on src, storing the result in dst:
|
|
|
|
* - Ensures that components are separated by '/' (Windows only)
|
2016-12-14 20:37:38 +01:00
|
|
|
* - Squashes sequences of '/' except "//server/share" on Windows
|
2008-05-20 08:48:54 +02:00
|
|
|
* - Removes "." components.
|
|
|
|
* - Removes ".." components, and the components the precede them.
|
2009-02-07 16:08:31 +01:00
|
|
|
* Returns failure (non-zero) if a ".." component appears as first path
|
|
|
|
* component anytime during the normalization. Otherwise, returns success (0).
|
2008-05-20 08:48:54 +02:00
|
|
|
*
|
|
|
|
* Note that this function is purely textual. It does not follow symlinks,
|
|
|
|
* verify the existence of the path, or make any system calls.
|
2013-07-14 10:36:03 +02:00
|
|
|
*
|
|
|
|
* prefix_len != NULL is for a specific case of prefix_pathspec():
|
|
|
|
* assume that src == dst and src[0..prefix_len-1] is already
|
|
|
|
* normalized, any time "../" eats up to the prefix_len part,
|
|
|
|
* prefix_len is reduced. In the end prefix_len is the remaining
|
|
|
|
* prefix that has not been overridden by user pathspec.
|
2015-10-01 21:04:17 +02:00
|
|
|
*
|
|
|
|
* NEEDSWORK: This function doesn't perform normalization w.r.t. trailing '/'.
|
|
|
|
* For everything but the root folder itself, the normalized path should not
|
|
|
|
* end with a '/', then the callers need to be fixed up accordingly.
|
|
|
|
*
|
2008-05-20 08:48:54 +02:00
|
|
|
*/
|
2013-07-14 10:36:03 +02:00
|
|
|
int normalize_path_copy_len(char *dst, const char *src, int *prefix_len)
|
2008-05-20 08:48:54 +02:00
|
|
|
{
|
2009-02-07 16:08:28 +01:00
|
|
|
char *dst0;
|
2016-12-14 20:37:38 +01:00
|
|
|
const char *end;
|
2008-05-20 08:48:54 +02:00
|
|
|
|
2016-12-14 20:37:38 +01:00
|
|
|
/*
|
|
|
|
* Copy initial part of absolute path: "/", "C:/", "//server/share/".
|
|
|
|
*/
|
|
|
|
end = src + offset_1st_component(src);
|
|
|
|
while (src < end) {
|
|
|
|
char c = *src++;
|
|
|
|
if (is_dir_sep(c))
|
|
|
|
c = '/';
|
|
|
|
*dst++ = c;
|
|
|
|
}
|
2009-02-07 16:08:28 +01:00
|
|
|
dst0 = dst;
|
2008-05-20 08:48:54 +02:00
|
|
|
|
2016-12-14 20:37:38 +01:00
|
|
|
while (is_dir_sep(*src))
|
|
|
|
src++;
|
2009-02-07 16:08:28 +01:00
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
char c = *src;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A path component that begins with . could be
|
|
|
|
* special:
|
|
|
|
* (1) "." and ends -- ignore and terminate.
|
|
|
|
* (2) "./" -- ignore them, eat slash and continue.
|
|
|
|
* (3) ".." and ends -- strip one and terminate.
|
|
|
|
* (4) "../" -- strip one, eat slash and continue.
|
|
|
|
*/
|
|
|
|
if (c == '.') {
|
|
|
|
if (!src[1]) {
|
|
|
|
/* (1) */
|
|
|
|
src++;
|
|
|
|
} else if (is_dir_sep(src[1])) {
|
|
|
|
/* (2) */
|
|
|
|
src += 2;
|
|
|
|
while (is_dir_sep(*src))
|
|
|
|
src++;
|
|
|
|
continue;
|
|
|
|
} else if (src[1] == '.') {
|
|
|
|
if (!src[2]) {
|
|
|
|
/* (3) */
|
|
|
|
src += 2;
|
|
|
|
goto up_one;
|
|
|
|
} else if (is_dir_sep(src[2])) {
|
|
|
|
/* (4) */
|
|
|
|
src += 3;
|
|
|
|
while (is_dir_sep(*src))
|
|
|
|
src++;
|
|
|
|
goto up_one;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2008-05-20 08:48:54 +02:00
|
|
|
|
2009-02-07 16:08:28 +01:00
|
|
|
/* copy up to the next '/', and eat all '/' */
|
|
|
|
while ((c = *src++) != '\0' && !is_dir_sep(c))
|
|
|
|
*dst++ = c;
|
|
|
|
if (is_dir_sep(c)) {
|
|
|
|
*dst++ = '/';
|
|
|
|
while (is_dir_sep(c))
|
|
|
|
c = *src++;
|
|
|
|
src--;
|
|
|
|
} else if (!c)
|
|
|
|
break;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
up_one:
|
|
|
|
/*
|
|
|
|
* dst0..dst is prefix portion, and dst[-1] is '/';
|
|
|
|
* go up one level.
|
|
|
|
*/
|
2009-02-07 16:08:30 +01:00
|
|
|
dst--; /* go to trailing '/' */
|
|
|
|
if (dst <= dst0)
|
2009-02-07 16:08:28 +01:00
|
|
|
return -1;
|
2009-02-07 16:08:30 +01:00
|
|
|
/* Windows: dst[-1] cannot be backslash anymore */
|
|
|
|
while (dst0 < dst && dst[-1] != '/')
|
|
|
|
dst--;
|
2013-07-14 10:36:03 +02:00
|
|
|
if (prefix_len && *prefix_len > dst - dst0)
|
|
|
|
*prefix_len = dst - dst0;
|
2009-02-07 16:08:28 +01:00
|
|
|
}
|
2008-05-20 08:48:54 +02:00
|
|
|
*dst = '\0';
|
2009-02-07 16:08:28 +01:00
|
|
|
return 0;
|
2008-05-20 08:48:54 +02:00
|
|
|
}
|
2008-05-20 08:49:26 +02:00
|
|
|
|
2013-07-14 10:36:03 +02:00
|
|
|
int normalize_path_copy(char *dst, const char *src)
|
|
|
|
{
|
|
|
|
return normalize_path_copy_len(dst, src, NULL);
|
|
|
|
}
|
|
|
|
|
2008-05-20 08:49:26 +02:00
|
|
|
/*
|
|
|
|
* path = Canonical absolute path
|
2012-10-28 17:16:25 +01:00
|
|
|
* prefixes = string_list containing normalized, absolute paths without
|
|
|
|
* trailing slashes (except for the root directory, which is denoted by "/").
|
2008-05-20 08:49:26 +02:00
|
|
|
*
|
2012-10-28 17:16:25 +01:00
|
|
|
* Determines, for each path in prefixes, whether the "prefix"
|
2008-05-20 08:49:26 +02:00
|
|
|
* is an ancestor directory of path. Returns the length of the longest
|
|
|
|
* ancestor directory, excluding any trailing slashes, or -1 if no prefix
|
2012-10-28 17:16:24 +01:00
|
|
|
* is an ancestor. (Note that this means 0 is returned if prefixes is
|
|
|
|
* ["/"].) "/foo" is not considered an ancestor of "/foobar". Directories
|
2008-05-20 08:49:26 +02:00
|
|
|
* are not considered to be their own ancestors. path must be in a
|
|
|
|
* canonical form: empty components, or "." or ".." components are not
|
2012-10-28 17:16:25 +01:00
|
|
|
* allowed.
|
2008-05-20 08:49:26 +02:00
|
|
|
*/
|
2012-10-28 17:16:24 +01:00
|
|
|
int longest_ancestor_length(const char *path, struct string_list *prefixes)
|
2008-05-20 08:49:26 +02:00
|
|
|
{
|
2012-10-28 17:16:23 +01:00
|
|
|
int i, max_len = -1;
|
2008-05-20 08:49:26 +02:00
|
|
|
|
2012-10-28 17:16:24 +01:00
|
|
|
if (!strcmp(path, "/"))
|
2008-05-20 08:49:26 +02:00
|
|
|
return -1;
|
|
|
|
|
2012-10-28 17:16:24 +01:00
|
|
|
for (i = 0; i < prefixes->nr; i++) {
|
|
|
|
const char *ceil = prefixes->items[i].string;
|
2012-10-28 17:16:23 +01:00
|
|
|
int len = strlen(ceil);
|
|
|
|
|
2012-10-28 17:16:25 +01:00
|
|
|
if (len == 1 && ceil[0] == '/')
|
|
|
|
len = 0; /* root matches anything, with length 0 */
|
|
|
|
else if (!strncmp(path, ceil, len) && path[len] == '/')
|
|
|
|
; /* match of length len */
|
|
|
|
else
|
|
|
|
continue; /* no match */
|
2008-05-20 08:49:26 +02:00
|
|
|
|
2012-10-28 17:16:25 +01:00
|
|
|
if (len > max_len)
|
2008-05-20 08:49:26 +02:00
|
|
|
max_len = len;
|
|
|
|
}
|
|
|
|
|
|
|
|
return max_len;
|
|
|
|
}
|
2009-02-19 20:10:49 +01:00
|
|
|
|
|
|
|
/* strip arbitrary amount of directory separators at end of path */
|
|
|
|
static inline int chomp_trailing_dir_sep(const char *path, int len)
|
|
|
|
{
|
|
|
|
while (len && is_dir_sep(path[len - 1]))
|
|
|
|
len--;
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2019-08-26 01:33:39 +02:00
|
|
|
* If path ends with suffix (complete path components), returns the offset of
|
|
|
|
* the last character in the path before the suffix (sans trailing directory
|
|
|
|
* separators), and -1 otherwise.
|
2009-02-19 20:10:49 +01:00
|
|
|
*/
|
2019-08-26 01:33:39 +02:00
|
|
|
static ssize_t stripped_path_suffix_offset(const char *path, const char *suffix)
|
2009-02-19 20:10:49 +01:00
|
|
|
{
|
|
|
|
int path_len = strlen(path), suffix_len = strlen(suffix);
|
|
|
|
|
|
|
|
while (suffix_len) {
|
|
|
|
if (!path_len)
|
2019-08-26 01:33:39 +02:00
|
|
|
return -1;
|
2009-02-19 20:10:49 +01:00
|
|
|
|
|
|
|
if (is_dir_sep(path[path_len - 1])) {
|
|
|
|
if (!is_dir_sep(suffix[suffix_len - 1]))
|
2019-08-26 01:33:39 +02:00
|
|
|
return -1;
|
2009-02-19 20:10:49 +01:00
|
|
|
path_len = chomp_trailing_dir_sep(path, path_len);
|
|
|
|
suffix_len = chomp_trailing_dir_sep(suffix, suffix_len);
|
|
|
|
}
|
|
|
|
else if (path[--path_len] != suffix[--suffix_len])
|
2019-08-26 01:33:39 +02:00
|
|
|
return -1;
|
2009-02-19 20:10:49 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (path_len && !is_dir_sep(path[path_len - 1]))
|
2019-08-26 01:33:39 +02:00
|
|
|
return -1;
|
|
|
|
return chomp_trailing_dir_sep(path, path_len);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns true if the path ends with components, considering only complete path
|
|
|
|
* components, and false otherwise.
|
|
|
|
*/
|
|
|
|
int ends_with_path_components(const char *path, const char *components)
|
|
|
|
{
|
|
|
|
return stripped_path_suffix_offset(path, components) != -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If path ends with suffix (complete path components), returns the
|
|
|
|
* part before suffix (sans trailing directory separators).
|
|
|
|
* Otherwise returns NULL.
|
|
|
|
*/
|
|
|
|
char *strip_path_suffix(const char *path, const char *suffix)
|
|
|
|
{
|
|
|
|
ssize_t offset = stripped_path_suffix_offset(path, suffix);
|
|
|
|
|
|
|
|
return offset == -1 ? NULL : xstrndup(path, offset);
|
2009-02-19 20:10:49 +01:00
|
|
|
}
|
2009-11-09 20:26:43 +01:00
|
|
|
|
|
|
|
int daemon_avoid_alias(const char *p)
|
|
|
|
{
|
|
|
|
int sl, ndot;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This resurrects the belts and suspenders paranoia check by HPA
|
|
|
|
* done in <435560F7.4080006@zytor.com> thread, now enter_repo()
|
2010-02-04 06:23:18 +01:00
|
|
|
* does not do getcwd() based path canonicalization.
|
2009-11-09 20:26:43 +01:00
|
|
|
*
|
|
|
|
* sl becomes true immediately after seeing '/' and continues to
|
|
|
|
* be true as long as dots continue after that without intervening
|
|
|
|
* non-dot character.
|
|
|
|
*/
|
|
|
|
if (!p || (*p != '/' && *p != '~'))
|
|
|
|
return -1;
|
|
|
|
sl = 1; ndot = 0;
|
|
|
|
p++;
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
char ch = *p++;
|
|
|
|
if (sl) {
|
|
|
|
if (ch == '.')
|
|
|
|
ndot++;
|
|
|
|
else if (ch == '/') {
|
|
|
|
if (ndot < 3)
|
|
|
|
/* reject //, /./ and /../ */
|
|
|
|
return -1;
|
|
|
|
ndot = 0;
|
|
|
|
}
|
|
|
|
else if (ch == 0) {
|
|
|
|
if (0 < ndot && ndot < 3)
|
|
|
|
/* reject /.$ and /..$ */
|
|
|
|
return -1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
sl = ndot = 0;
|
|
|
|
}
|
|
|
|
else if (ch == 0)
|
|
|
|
return 0;
|
|
|
|
else if (ch == '/') {
|
|
|
|
sl = 1;
|
|
|
|
ndot = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2010-02-16 06:22:08 +01:00
|
|
|
|
2019-09-16 20:44:31 +02:00
|
|
|
/*
|
|
|
|
* On NTFS, we need to be careful to disallow certain synonyms of the `.git/`
|
|
|
|
* directory:
|
|
|
|
*
|
|
|
|
* - For historical reasons, file names that end in spaces or periods are
|
|
|
|
* automatically trimmed. Therefore, `.git . . ./` is a valid way to refer
|
|
|
|
* to `.git/`.
|
|
|
|
*
|
|
|
|
* - For other historical reasons, file names that do not conform to the 8.3
|
|
|
|
* format (up to eight characters for the basename, three for the file
|
|
|
|
* extension, certain characters not allowed such as `+`, etc) are associated
|
|
|
|
* with a so-called "short name", at least on the `C:` drive by default.
|
|
|
|
* Which means that `git~1/` is a valid way to refer to `.git/`.
|
|
|
|
*
|
|
|
|
* Note: Technically, `.git/` could receive the short name `git~2` if the
|
|
|
|
* short name `git~1` were already used. In Git, however, we guarantee that
|
|
|
|
* `.git` is the first item in a directory, therefore it will be associated
|
|
|
|
* with the short name `git~1` (unless short names are disabled).
|
|
|
|
*
|
path: safeguard `.git` against NTFS Alternate Streams Accesses
Probably inspired by HFS' resource streams, NTFS supports "Alternate
Data Streams": by appending `:<stream-name>` to the file name,
information in addition to the file contents can be written and read,
information that is copied together with the file (unless copied to a
non-NTFS location).
These Alternate Data Streams are typically used for things like marking
an executable as having just been downloaded from the internet (and
hence not necessarily being trustworthy).
In addition to a stream name, a stream type can be appended, like so:
`:<stream-name>:<stream-type>`. Unless specified, the default stream
type is `$DATA` for files and `$INDEX_ALLOCATION` for directories. In
other words, `.git::$INDEX_ALLOCATION` is a valid way to reference the
`.git` directory!
In our work in Git v2.2.1 to protect Git on NTFS drives under
`core.protectNTFS`, we focused exclusively on NTFS short names, unaware
of the fact that NTFS Alternate Data Streams offer a similar attack
vector.
Let's fix this.
Seeing as it is better to be safe than sorry, we simply disallow paths
referring to *any* NTFS Alternate Data Stream of `.git`, not just
`::$INDEX_ALLOCATION`. This also simplifies the implementation.
This closes CVE-2019-1352.
Further reading about NTFS Alternate Data Streams:
https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/c54dec26-1551-4d3a-a0ea-4fa40f848eb3
Reported-by: Nicolas Joly <Nicolas.Joly@microsoft.com>
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
2019-08-28 12:22:17 +02:00
|
|
|
* - For yet other historical reasons, NTFS supports so-called "Alternate Data
|
|
|
|
* Streams", i.e. metadata associated with a given file, referred to via
|
|
|
|
* `<filename>:<stream-name>:<stream-type>`. There exists a default stream
|
|
|
|
* type for directories, allowing `.git/` to be accessed via
|
|
|
|
* `.git::$INDEX_ALLOCATION/`.
|
|
|
|
*
|
2019-09-16 20:44:31 +02:00
|
|
|
* When this function returns 1, it indicates that the specified file/directory
|
|
|
|
* name refers to a `.git` file or directory, or to any of these synonyms, and
|
|
|
|
* Git should therefore not track it.
|
|
|
|
*
|
path: safeguard `.git` against NTFS Alternate Streams Accesses
Probably inspired by HFS' resource streams, NTFS supports "Alternate
Data Streams": by appending `:<stream-name>` to the file name,
information in addition to the file contents can be written and read,
information that is copied together with the file (unless copied to a
non-NTFS location).
These Alternate Data Streams are typically used for things like marking
an executable as having just been downloaded from the internet (and
hence not necessarily being trustworthy).
In addition to a stream name, a stream type can be appended, like so:
`:<stream-name>:<stream-type>`. Unless specified, the default stream
type is `$DATA` for files and `$INDEX_ALLOCATION` for directories. In
other words, `.git::$INDEX_ALLOCATION` is a valid way to reference the
`.git` directory!
In our work in Git v2.2.1 to protect Git on NTFS drives under
`core.protectNTFS`, we focused exclusively on NTFS short names, unaware
of the fact that NTFS Alternate Data Streams offer a similar attack
vector.
Let's fix this.
Seeing as it is better to be safe than sorry, we simply disallow paths
referring to *any* NTFS Alternate Data Stream of `.git`, not just
`::$INDEX_ALLOCATION`. This also simplifies the implementation.
This closes CVE-2019-1352.
Further reading about NTFS Alternate Data Streams:
https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/c54dec26-1551-4d3a-a0ea-4fa40f848eb3
Reported-by: Nicolas Joly <Nicolas.Joly@microsoft.com>
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
2019-08-28 12:22:17 +02:00
|
|
|
* For performance reasons, _all_ Alternate Data Streams of `.git/` are
|
|
|
|
* forbidden, not just `::$INDEX_ALLOCATION`.
|
|
|
|
*
|
2019-09-16 20:44:31 +02:00
|
|
|
* This function is intended to be used by `git fsck` even on platforms where
|
|
|
|
* the backslash is a regular filename character, therefore it needs to handle
|
|
|
|
* backlash characters in the provided `name` specially: they are interpreted
|
|
|
|
* as directory separators.
|
|
|
|
*/
|
path: add is_ntfs_dotgit() helper
We do not allow paths with a ".git" component to be added to
the index, as that would mean repository contents could
overwrite our repository files. However, asking "is this
path the same as .git" is not as simple as strcmp() on some
filesystems.
On NTFS (and FAT32), there exist so-called "short names" for
backwards-compatibility: 8.3 compliant names that refer to the same files
as their long names. As ".git" is not an 8.3 compliant name, a short name
is generated automatically, typically "git~1".
Depending on the Windows version, any combination of trailing spaces and
periods are ignored, too, so that both "git~1." and ".git." still refer
to the Git directory. The reason is that 8.3 stores file names shorter
than 8 characters with trailing spaces. So literally, it does not matter
for the short name whether it is padded with spaces or whether it is
shorter than 8 characters, it is considered to be the exact same.
The period is the separator between file name and file extension, and
again, an empty extension consists just of spaces in 8.3 format. So
technically, we would need only take care of the equivalent of this
regex:
(\.git {0,4}|git~1 {0,3})\. {0,3}
However, there are indications that at least some Windows versions might
be more lenient and accept arbitrary combinations of trailing spaces and
periods and strip them out. So we're playing it real safe here. Besides,
there can be little doubt about the intention behind using file names
matching even the more lenient pattern specified above, therefore we
should be fine with disallowing such patterns.
Extra care is taken to catch names such as '.\\.git\\booh' because the
backslash is marked as a directory separator only on Windows, and we want
to use this new helper function also in fsck on other platforms.
A big thank you goes to Ed Thomson and an unnamed Microsoft engineer for
the detailed analysis performed to come up with the corresponding fixes
for libgit2.
This commit adds a function to detect whether a given file name can refer
to the Git directory by mistake.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-16 23:31:03 +01:00
|
|
|
int is_ntfs_dotgit(const char *name)
|
|
|
|
{
|
2019-09-06 21:09:35 +02:00
|
|
|
char c;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Note that when we don't find `.git` or `git~1` we end up with `name`
|
|
|
|
* advanced partway through the string. That's okay, though, as we
|
|
|
|
* return immediately in those cases, without looking at `name` any
|
|
|
|
* further.
|
|
|
|
*/
|
|
|
|
c = *(name++);
|
|
|
|
if (c == '.') {
|
|
|
|
/* .git */
|
|
|
|
if (((c = *(name++)) != 'g' && c != 'G') ||
|
|
|
|
((c = *(name++)) != 'i' && c != 'I') ||
|
|
|
|
((c = *(name++)) != 't' && c != 'T'))
|
is_ntfs_dotgit(): only verify the leading segment
The config setting `core.protectNTFS` is specifically designed to work
not only on Windows, but anywhere, to allow for repositories hosted on,
say, Linux servers to be protected against NTFS-specific attack vectors.
As a consequence, `is_ntfs_dotgit()` manually splits backslash-separated
paths (but does not do the same for paths separated by forward slashes),
under the assumption that the backslash might not be a valid directory
separator on the _current_ Operating System.
However, the two callers, `verify_path()` and `fsck_tree()`, are
supposed to feed only individual path segments to the `is_ntfs_dotgit()`
function.
This causes a lot of duplicate scanning (and very inefficient scanning,
too, as the inner loop of `is_ntfs_dotgit()` was optimized for
readability rather than for speed.
Let's simplify the design of `is_ntfs_dotgit()` by putting the burden of
splitting the paths by backslashes as directory separators on the
callers of said function.
Consequently, the `verify_path()` function, which already splits the
path by directory separators, now treats backslashes as directory
separators _explicitly_ when `core.protectNTFS` is turned on, even on
platforms where the backslash is _not_ a directory separator.
Note that we have to repeat some code in `verify_path()`: if the
backslash is not a directory separator on the current Operating System,
we want to allow file names like `\`, but we _do_ want to disallow paths
that are clearly intended to cause harm when the repository is cloned on
Windows.
The `fsck_tree()` function (the other caller of `is_ntfs_dotgit()`) now
needs to look for backslashes in tree entries' names specifically when
`core.protectNTFS` is turned on. While it would be tempting to
completely disallow backslashes in that case (much like `fsck` reports
names containing forward slashes as "full paths"), this would be
overzealous: when `core.protectNTFS` is turned on in a non-Windows
setup, backslashes are perfectly valid characters in file names while we
_still_ want to disallow tree entries that are clearly designed to
exploit NTFS-specific behavior.
This simplification will make subsequent changes easier to implement,
such as turning `core.protectNTFS` on by default (not only on Windows)
or protecting against attack vectors involving NTFS Alternate Data
Streams.
Incidentally, this change allows for catching malicious repositories
that contain tree entries of the form `dir\.gitmodules` already on the
server side rather than only on the client side (and previously only on
Windows): in contrast to `is_ntfs_dotgit()`, the
`is_ntfs_dotgitmodules()` function already expects the caller to split
the paths by directory separators.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
2019-09-23 08:58:11 +02:00
|
|
|
return 0;
|
2019-09-06 21:09:35 +02:00
|
|
|
} else if (c == 'g' || c == 'G') {
|
|
|
|
/* git ~1 */
|
|
|
|
if (((c = *(name++)) != 'i' && c != 'I') ||
|
|
|
|
((c = *(name++)) != 't' && c != 'T') ||
|
|
|
|
*(name++) != '~' ||
|
|
|
|
*(name++) != '1')
|
|
|
|
return 0;
|
|
|
|
} else
|
path: add is_ntfs_dotgit() helper
We do not allow paths with a ".git" component to be added to
the index, as that would mean repository contents could
overwrite our repository files. However, asking "is this
path the same as .git" is not as simple as strcmp() on some
filesystems.
On NTFS (and FAT32), there exist so-called "short names" for
backwards-compatibility: 8.3 compliant names that refer to the same files
as their long names. As ".git" is not an 8.3 compliant name, a short name
is generated automatically, typically "git~1".
Depending on the Windows version, any combination of trailing spaces and
periods are ignored, too, so that both "git~1." and ".git." still refer
to the Git directory. The reason is that 8.3 stores file names shorter
than 8 characters with trailing spaces. So literally, it does not matter
for the short name whether it is padded with spaces or whether it is
shorter than 8 characters, it is considered to be the exact same.
The period is the separator between file name and file extension, and
again, an empty extension consists just of spaces in 8.3 format. So
technically, we would need only take care of the equivalent of this
regex:
(\.git {0,4}|git~1 {0,3})\. {0,3}
However, there are indications that at least some Windows versions might
be more lenient and accept arbitrary combinations of trailing spaces and
periods and strip them out. So we're playing it real safe here. Besides,
there can be little doubt about the intention behind using file names
matching even the more lenient pattern specified above, therefore we
should be fine with disallowing such patterns.
Extra care is taken to catch names such as '.\\.git\\booh' because the
backslash is marked as a directory separator only on Windows, and we want
to use this new helper function also in fsck on other platforms.
A big thank you goes to Ed Thomson and an unnamed Microsoft engineer for
the detailed analysis performed to come up with the corresponding fixes
for libgit2.
This commit adds a function to detect whether a given file name can refer
to the Git directory by mistake.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-16 23:31:03 +01:00
|
|
|
return 0;
|
2019-09-06 21:09:35 +02:00
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
c = *(name++);
|
|
|
|
if (!c || c == '\\' || c == '/' || c == ':')
|
|
|
|
return 1;
|
|
|
|
if (c != '.' && c != ' ')
|
path: add is_ntfs_dotgit() helper
We do not allow paths with a ".git" component to be added to
the index, as that would mean repository contents could
overwrite our repository files. However, asking "is this
path the same as .git" is not as simple as strcmp() on some
filesystems.
On NTFS (and FAT32), there exist so-called "short names" for
backwards-compatibility: 8.3 compliant names that refer to the same files
as their long names. As ".git" is not an 8.3 compliant name, a short name
is generated automatically, typically "git~1".
Depending on the Windows version, any combination of trailing spaces and
periods are ignored, too, so that both "git~1." and ".git." still refer
to the Git directory. The reason is that 8.3 stores file names shorter
than 8 characters with trailing spaces. So literally, it does not matter
for the short name whether it is padded with spaces or whether it is
shorter than 8 characters, it is considered to be the exact same.
The period is the separator between file name and file extension, and
again, an empty extension consists just of spaces in 8.3 format. So
technically, we would need only take care of the equivalent of this
regex:
(\.git {0,4}|git~1 {0,3})\. {0,3}
However, there are indications that at least some Windows versions might
be more lenient and accept arbitrary combinations of trailing spaces and
periods and strip them out. So we're playing it real safe here. Besides,
there can be little doubt about the intention behind using file names
matching even the more lenient pattern specified above, therefore we
should be fine with disallowing such patterns.
Extra care is taken to catch names such as '.\\.git\\booh' because the
backslash is marked as a directory separator only on Windows, and we want
to use this new helper function also in fsck on other platforms.
A big thank you goes to Ed Thomson and an unnamed Microsoft engineer for
the detailed analysis performed to come up with the corresponding fixes
for libgit2.
This commit adds a function to detect whether a given file name can refer
to the Git directory by mistake.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-16 23:31:03 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
2015-04-21 06:06:27 +02:00
|
|
|
|
is_ntfs_dotgit: match other .git files
When we started to catch NTFS short names that clash with .git, we only
looked for GIT~1. This is sufficient because we only ever clone into an
empty directory, so .git is guaranteed to be the first subdirectory or
file in that directory.
However, even with a fresh clone, .gitmodules is *not* necessarily the
first file to be written that would want the NTFS short name GITMOD~1: a
malicious repository can add .gitmodul0000 and friends, which sorts
before `.gitmodules` and is therefore checked out *first*. For that
reason, we have to test not only for ~1 short names, but for others,
too.
It's hard to just adapt the existing checks in is_ntfs_dotgit(): since
Windows 2000 (i.e., in all Windows versions still supported by Git),
NTFS short names are only generated in the <prefix>~<number> form up to
number 4. After that, a *different* prefix is used, calculated from the
long file name using an undocumented, but stable algorithm.
For example, the short name of .gitmodules would be GITMOD~1, but if it
is taken, and all of ~2, ~3 and ~4 are taken, too, the short name
GI7EBA~1 will be used. From there, collisions are handled by
incrementing the number, shortening the prefix as needed (until ~9999999
is reached, in which case NTFS will not allow the file to be created).
We'd also want to handle .gitignore and .gitattributes, which suffer
from a similar problem, using the fall-back short names GI250A~1 and
GI7D29~1, respectively.
To accommodate for that, we could reimplement the hashing algorithm, but
it is just safer and simpler to provide the known prefixes. This
algorithm has been reverse-engineered and described at
https://usn.pw/blog/gen/2015/06/09/filenames/, which is defunct but
still available via https://web.archive.org/.
These can be recomputed by running the following Perl script:
-- snip --
use warnings;
use strict;
sub compute_short_name_hash ($) {
my $checksum = 0;
foreach (split('', $_[0])) {
$checksum = ($checksum * 0x25 + ord($_)) & 0xffff;
}
$checksum = ($checksum * 314159269) & 0xffffffff;
$checksum = 1 + (~$checksum & 0x7fffffff) if ($checksum & 0x80000000);
$checksum -= (($checksum * 1152921497) >> 60) * 1000000007;
return scalar reverse sprintf("%x", $checksum & 0xffff);
}
print compute_short_name_hash($ARGV[0]);
-- snap --
E.g., running that with the argument ".gitignore" will
result in "250a" (which then becomes "gi250a" in the code).
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Jeff King <peff@peff.net>
2018-05-11 16:03:54 +02:00
|
|
|
static int is_ntfs_dot_generic(const char *name,
|
|
|
|
const char *dotgit_name,
|
|
|
|
size_t len,
|
|
|
|
const char *dotgit_ntfs_shortname_prefix)
|
|
|
|
{
|
|
|
|
int saw_tilde;
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
if ((name[0] == '.' && !strncasecmp(name + 1, dotgit_name, len))) {
|
|
|
|
i = len + 1;
|
|
|
|
only_spaces_and_periods:
|
|
|
|
for (;;) {
|
|
|
|
char c = name[i++];
|
2019-08-28 12:22:17 +02:00
|
|
|
if (!c || c == ':')
|
is_ntfs_dotgit: match other .git files
When we started to catch NTFS short names that clash with .git, we only
looked for GIT~1. This is sufficient because we only ever clone into an
empty directory, so .git is guaranteed to be the first subdirectory or
file in that directory.
However, even with a fresh clone, .gitmodules is *not* necessarily the
first file to be written that would want the NTFS short name GITMOD~1: a
malicious repository can add .gitmodul0000 and friends, which sorts
before `.gitmodules` and is therefore checked out *first*. For that
reason, we have to test not only for ~1 short names, but for others,
too.
It's hard to just adapt the existing checks in is_ntfs_dotgit(): since
Windows 2000 (i.e., in all Windows versions still supported by Git),
NTFS short names are only generated in the <prefix>~<number> form up to
number 4. After that, a *different* prefix is used, calculated from the
long file name using an undocumented, but stable algorithm.
For example, the short name of .gitmodules would be GITMOD~1, but if it
is taken, and all of ~2, ~3 and ~4 are taken, too, the short name
GI7EBA~1 will be used. From there, collisions are handled by
incrementing the number, shortening the prefix as needed (until ~9999999
is reached, in which case NTFS will not allow the file to be created).
We'd also want to handle .gitignore and .gitattributes, which suffer
from a similar problem, using the fall-back short names GI250A~1 and
GI7D29~1, respectively.
To accommodate for that, we could reimplement the hashing algorithm, but
it is just safer and simpler to provide the known prefixes. This
algorithm has been reverse-engineered and described at
https://usn.pw/blog/gen/2015/06/09/filenames/, which is defunct but
still available via https://web.archive.org/.
These can be recomputed by running the following Perl script:
-- snip --
use warnings;
use strict;
sub compute_short_name_hash ($) {
my $checksum = 0;
foreach (split('', $_[0])) {
$checksum = ($checksum * 0x25 + ord($_)) & 0xffff;
}
$checksum = ($checksum * 314159269) & 0xffffffff;
$checksum = 1 + (~$checksum & 0x7fffffff) if ($checksum & 0x80000000);
$checksum -= (($checksum * 1152921497) >> 60) * 1000000007;
return scalar reverse sprintf("%x", $checksum & 0xffff);
}
print compute_short_name_hash($ARGV[0]);
-- snap --
E.g., running that with the argument ".gitignore" will
result in "250a" (which then becomes "gi250a" in the code).
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Jeff King <peff@peff.net>
2018-05-11 16:03:54 +02:00
|
|
|
return 1;
|
|
|
|
if (c != ' ' && c != '.')
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Is it a regular NTFS short name, i.e. shortened to 6 characters,
|
|
|
|
* followed by ~1, ... ~4?
|
|
|
|
*/
|
|
|
|
if (!strncasecmp(name, dotgit_name, 6) && name[6] == '~' &&
|
|
|
|
name[7] >= '1' && name[7] <= '4') {
|
|
|
|
i = 8;
|
|
|
|
goto only_spaces_and_periods;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Is it a fall-back NTFS short name (for details, see
|
|
|
|
* https://en.wikipedia.org/wiki/8.3_filename?
|
|
|
|
*/
|
|
|
|
for (i = 0, saw_tilde = 0; i < 8; i++)
|
|
|
|
if (name[i] == '\0')
|
|
|
|
return 0;
|
|
|
|
else if (saw_tilde) {
|
|
|
|
if (name[i] < '0' || name[i] > '9')
|
|
|
|
return 0;
|
|
|
|
} else if (name[i] == '~') {
|
|
|
|
if (name[++i] < '1' || name[i] > '9')
|
|
|
|
return 0;
|
|
|
|
saw_tilde = 1;
|
|
|
|
} else if (i >= 6)
|
|
|
|
return 0;
|
2018-10-25 18:13:08 +02:00
|
|
|
else if (name[i] & 0x80) {
|
is_ntfs_dotgit: match other .git files
When we started to catch NTFS short names that clash with .git, we only
looked for GIT~1. This is sufficient because we only ever clone into an
empty directory, so .git is guaranteed to be the first subdirectory or
file in that directory.
However, even with a fresh clone, .gitmodules is *not* necessarily the
first file to be written that would want the NTFS short name GITMOD~1: a
malicious repository can add .gitmodul0000 and friends, which sorts
before `.gitmodules` and is therefore checked out *first*. For that
reason, we have to test not only for ~1 short names, but for others,
too.
It's hard to just adapt the existing checks in is_ntfs_dotgit(): since
Windows 2000 (i.e., in all Windows versions still supported by Git),
NTFS short names are only generated in the <prefix>~<number> form up to
number 4. After that, a *different* prefix is used, calculated from the
long file name using an undocumented, but stable algorithm.
For example, the short name of .gitmodules would be GITMOD~1, but if it
is taken, and all of ~2, ~3 and ~4 are taken, too, the short name
GI7EBA~1 will be used. From there, collisions are handled by
incrementing the number, shortening the prefix as needed (until ~9999999
is reached, in which case NTFS will not allow the file to be created).
We'd also want to handle .gitignore and .gitattributes, which suffer
from a similar problem, using the fall-back short names GI250A~1 and
GI7D29~1, respectively.
To accommodate for that, we could reimplement the hashing algorithm, but
it is just safer and simpler to provide the known prefixes. This
algorithm has been reverse-engineered and described at
https://usn.pw/blog/gen/2015/06/09/filenames/, which is defunct but
still available via https://web.archive.org/.
These can be recomputed by running the following Perl script:
-- snip --
use warnings;
use strict;
sub compute_short_name_hash ($) {
my $checksum = 0;
foreach (split('', $_[0])) {
$checksum = ($checksum * 0x25 + ord($_)) & 0xffff;
}
$checksum = ($checksum * 314159269) & 0xffffffff;
$checksum = 1 + (~$checksum & 0x7fffffff) if ($checksum & 0x80000000);
$checksum -= (($checksum * 1152921497) >> 60) * 1000000007;
return scalar reverse sprintf("%x", $checksum & 0xffff);
}
print compute_short_name_hash($ARGV[0]);
-- snap --
E.g., running that with the argument ".gitignore" will
result in "250a" (which then becomes "gi250a" in the code).
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Jeff King <peff@peff.net>
2018-05-11 16:03:54 +02:00
|
|
|
/*
|
|
|
|
* We know our needles contain only ASCII, so we clamp
|
|
|
|
* here to make the results of tolower() sane.
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
} else if (tolower(name[i]) != dotgit_ntfs_shortname_prefix[i])
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
goto only_spaces_and_periods;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Inline helper to make sure compiler resolves strlen() on literals at
|
|
|
|
* compile time.
|
|
|
|
*/
|
|
|
|
static inline int is_ntfs_dot_str(const char *name, const char *dotgit_name,
|
|
|
|
const char *dotgit_ntfs_shortname_prefix)
|
|
|
|
{
|
|
|
|
return is_ntfs_dot_generic(name, dotgit_name, strlen(dotgit_name),
|
|
|
|
dotgit_ntfs_shortname_prefix);
|
|
|
|
}
|
|
|
|
|
|
|
|
int is_ntfs_dotgitmodules(const char *name)
|
|
|
|
{
|
|
|
|
return is_ntfs_dot_str(name, "gitmodules", "gi7eba");
|
|
|
|
}
|
|
|
|
|
|
|
|
int is_ntfs_dotgitignore(const char *name)
|
|
|
|
{
|
|
|
|
return is_ntfs_dot_str(name, "gitignore", "gi250a");
|
|
|
|
}
|
|
|
|
|
|
|
|
int is_ntfs_dotgitattributes(const char *name)
|
|
|
|
{
|
|
|
|
return is_ntfs_dot_str(name, "gitattributes", "gi7d29");
|
|
|
|
}
|
|
|
|
|
2017-07-28 21:25:45 +02:00
|
|
|
int looks_like_command_line_option(const char *str)
|
|
|
|
{
|
|
|
|
return str && str[0] == '-';
|
|
|
|
}
|
|
|
|
|
2015-04-21 06:06:27 +02:00
|
|
|
char *xdg_config_home(const char *filename)
|
|
|
|
{
|
|
|
|
const char *home, *config_home;
|
|
|
|
|
|
|
|
assert(filename);
|
|
|
|
config_home = getenv("XDG_CONFIG_HOME");
|
|
|
|
if (config_home && *config_home)
|
|
|
|
return mkpathdup("%s/git/%s", config_home, filename);
|
|
|
|
|
|
|
|
home = getenv("HOME");
|
|
|
|
if (home)
|
|
|
|
return mkpathdup("%s/.config/git/%s", home, filename);
|
|
|
|
return NULL;
|
|
|
|
}
|
memoize common git-path "constant" files
One of the most common uses of git_path() is to pass a
constant, like git_path("MERGE_MSG"). This has two
drawbacks:
1. The return value is a static buffer, and the lifetime
is dependent on other calls to git_path, etc.
2. There's no compile-time checking of the pathname. This
is OK for a one-off (after all, we have to spell it
correctly at least once), but many of these constant
strings appear throughout the code.
This patch introduces a series of functions to "memoize"
these strings, which are essentially globals for the
lifetime of the program. We compute the value once, take
ownership of the buffer, and return the cached value for
subsequent calls. cache.h provides a helper macro for
defining these functions as one-liners, and defines a few
common ones for global use.
Using a macro is a little bit gross, but it does nicely
document the purpose of the functions. If we need to touch
them all later (e.g., because we learned how to change the
git_dir variable at runtime, and need to invalidate all of
the stored values), it will be much easier to have the
complete list.
Note that the shared-global functions have separate, manual
declarations. We could do something clever with the macros
(e.g., expand it to a declaration in some places, and a
declaration _and_ a definition in path.c). But there aren't
that many, and it's probably better to stay away from
too-magical macros.
Likewise, if we abandon the C preprocessor in favor of
generating these with a script, we could get much fancier.
E.g., normalizing "FOO/BAR-BAZ" into "git_path_foo_bar_baz".
But the small amount of saved typing is probably not worth
the resulting confusion to readers who want to grep for the
function's definition.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-08-10 11:38:57 +02:00
|
|
|
|
2017-03-13 21:43:54 +01:00
|
|
|
char *xdg_cache_home(const char *filename)
|
|
|
|
{
|
|
|
|
const char *home, *cache_home;
|
|
|
|
|
|
|
|
assert(filename);
|
|
|
|
cache_home = getenv("XDG_CACHE_HOME");
|
|
|
|
if (cache_home && *cache_home)
|
|
|
|
return mkpathdup("%s/git/%s", cache_home, filename);
|
|
|
|
|
|
|
|
home = getenv("HOME");
|
|
|
|
if (home)
|
|
|
|
return mkpathdup("%s/.cache/git/%s", home, filename);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2018-05-18 00:51:51 +02:00
|
|
|
REPO_GIT_PATH_FUNC(cherry_pick_head, "CHERRY_PICK_HEAD")
|
|
|
|
REPO_GIT_PATH_FUNC(revert_head, "REVERT_HEAD")
|
|
|
|
REPO_GIT_PATH_FUNC(squash_msg, "SQUASH_MSG")
|
|
|
|
REPO_GIT_PATH_FUNC(merge_msg, "MERGE_MSG")
|
|
|
|
REPO_GIT_PATH_FUNC(merge_rr, "MERGE_RR")
|
|
|
|
REPO_GIT_PATH_FUNC(merge_mode, "MERGE_MODE")
|
|
|
|
REPO_GIT_PATH_FUNC(merge_head, "MERGE_HEAD")
|
2020-04-07 16:28:07 +02:00
|
|
|
REPO_GIT_PATH_FUNC(merge_autostash, "MERGE_AUTOSTASH")
|
2018-05-18 00:51:51 +02:00
|
|
|
REPO_GIT_PATH_FUNC(fetch_head, "FETCH_HEAD")
|
|
|
|
REPO_GIT_PATH_FUNC(shallow, "shallow")
|