2005-07-09 01:20:59 +02:00
|
|
|
/*
|
|
|
|
* I'm tired of doing "vsnprintf()" etc just to open a
|
|
|
|
* file, so here's a "return static buffer with printf"
|
|
|
|
* interface for paths.
|
|
|
|
*
|
|
|
|
* It's obviously not thread-safe. Sue me. But it's quite
|
|
|
|
* useful for doing things like
|
|
|
|
*
|
|
|
|
* f = open(mkpath("%s/%s.git", base, name), O_RDONLY);
|
|
|
|
*
|
|
|
|
* which is what it's designed for.
|
|
|
|
*/
|
|
|
|
#include "cache.h"
|
2009-11-17 18:24:25 +01:00
|
|
|
#include "strbuf.h"
|
2005-07-09 01:20:59 +02:00
|
|
|
|
|
|
|
static char bad_path[] = "/bad-path/";
|
|
|
|
|
2006-09-11 21:03:15 +02:00
|
|
|
static char *get_pathname(void)
|
|
|
|
{
|
|
|
|
static char pathname_array[4][PATH_MAX];
|
|
|
|
static int index;
|
|
|
|
return pathname_array[3 & ++index];
|
|
|
|
}
|
|
|
|
|
2005-07-09 01:20:59 +02:00
|
|
|
static char *cleanup_path(char *path)
|
|
|
|
{
|
|
|
|
/* Clean it up */
|
|
|
|
if (!memcmp(path, "./", 2)) {
|
|
|
|
path += 2;
|
|
|
|
while (*path == '/')
|
|
|
|
path++;
|
|
|
|
}
|
|
|
|
return path;
|
|
|
|
}
|
|
|
|
|
2008-10-26 22:59:13 +01:00
|
|
|
char *mksnpath(char *buf, size_t n, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
unsigned len;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
len = vsnprintf(buf, n, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
if (len >= n) {
|
2008-11-10 22:07:52 +01:00
|
|
|
strlcpy(buf, bad_path, n);
|
2008-10-26 22:59:13 +01:00
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
return cleanup_path(buf);
|
|
|
|
}
|
|
|
|
|
2008-10-27 11:17:51 +01:00
|
|
|
static char *git_vsnpath(char *buf, size_t n, const char *fmt, va_list args)
|
2008-10-27 10:22:21 +01:00
|
|
|
{
|
|
|
|
const char *git_dir = get_git_dir();
|
|
|
|
size_t len;
|
|
|
|
|
|
|
|
len = strlen(git_dir);
|
|
|
|
if (n < len + 1)
|
|
|
|
goto bad;
|
|
|
|
memcpy(buf, git_dir, len);
|
|
|
|
if (len && !is_dir_sep(git_dir[len-1]))
|
|
|
|
buf[len++] = '/';
|
|
|
|
len += vsnprintf(buf + len, n - len, fmt, args);
|
|
|
|
if (len >= n)
|
|
|
|
goto bad;
|
|
|
|
return cleanup_path(buf);
|
|
|
|
bad:
|
2008-11-10 22:07:52 +01:00
|
|
|
strlcpy(buf, bad_path, n);
|
2008-10-27 10:22:21 +01:00
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
2008-10-27 11:17:51 +01:00
|
|
|
char *git_snpath(char *buf, size_t n, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
|
|
|
(void)git_vsnpath(buf, n, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *git_pathdup(const char *fmt, ...)
|
|
|
|
{
|
|
|
|
char path[PATH_MAX];
|
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
|
|
|
(void)git_vsnpath(path, sizeof(path), fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
return xstrdup(path);
|
|
|
|
}
|
|
|
|
|
2005-07-09 01:20:59 +02:00
|
|
|
char *mkpath(const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
unsigned len;
|
2006-09-11 21:03:15 +02:00
|
|
|
char *pathname = get_pathname();
|
2005-07-09 01:20:59 +02:00
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
len = vsnprintf(pathname, PATH_MAX, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
if (len >= PATH_MAX)
|
|
|
|
return bad_path;
|
|
|
|
return cleanup_path(pathname);
|
|
|
|
}
|
|
|
|
|
|
|
|
char *git_path(const char *fmt, ...)
|
|
|
|
{
|
2005-09-26 22:54:01 +02:00
|
|
|
const char *git_dir = get_git_dir();
|
2006-09-11 21:03:15 +02:00
|
|
|
char *pathname = get_pathname();
|
2005-07-09 01:20:59 +02:00
|
|
|
va_list args;
|
|
|
|
unsigned len;
|
|
|
|
|
|
|
|
len = strlen(git_dir);
|
|
|
|
if (len > PATH_MAX-100)
|
|
|
|
return bad_path;
|
|
|
|
memcpy(pathname, git_dir, len);
|
|
|
|
if (len && git_dir[len-1] != '/')
|
|
|
|
pathname[len++] = '/';
|
|
|
|
va_start(args, fmt);
|
|
|
|
len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
if (len >= PATH_MAX)
|
|
|
|
return bad_path;
|
|
|
|
return cleanup_path(pathname);
|
|
|
|
}
|
2005-08-04 22:43:03 +02:00
|
|
|
|
|
|
|
|
|
|
|
/* git_mkstemp() - create tmp file honoring TMPDIR variable */
|
|
|
|
int git_mkstemp(char *path, size_t len, const char *template)
|
|
|
|
{
|
2007-07-26 06:34:53 +02:00
|
|
|
const char *tmp;
|
|
|
|
size_t n;
|
|
|
|
|
|
|
|
tmp = getenv("TMPDIR");
|
|
|
|
if (!tmp)
|
|
|
|
tmp = "/tmp";
|
|
|
|
n = snprintf(path, len, "%s/%s", tmp, template);
|
|
|
|
if (len <= n) {
|
|
|
|
errno = ENAMETOOLONG;
|
|
|
|
return -1;
|
2005-08-08 22:33:08 +02:00
|
|
|
}
|
2005-08-04 22:43:03 +02:00
|
|
|
return mkstemp(path);
|
|
|
|
}
|
|
|
|
|
2009-05-31 10:35:52 +02:00
|
|
|
/* git_mkstemps() - create tmp file with suffix honoring TMPDIR variable. */
|
|
|
|
int git_mkstemps(char *path, size_t len, const char *template, int suffix_len)
|
|
|
|
{
|
|
|
|
const char *tmp;
|
|
|
|
size_t n;
|
|
|
|
|
|
|
|
tmp = getenv("TMPDIR");
|
|
|
|
if (!tmp)
|
|
|
|
tmp = "/tmp";
|
|
|
|
n = snprintf(path, len, "%s/%s", tmp, template);
|
|
|
|
if (len <= n) {
|
|
|
|
errno = ENAMETOOLONG;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return mkstemps(path, suffix_len);
|
|
|
|
}
|
2005-08-04 22:43:03 +02:00
|
|
|
|
2010-02-22 23:32:12 +01:00
|
|
|
/* Adapted from libiberty's mkstemp.c. */
|
|
|
|
|
|
|
|
#undef TMP_MAX
|
|
|
|
#define TMP_MAX 16384
|
|
|
|
|
2010-02-22 23:32:13 +01:00
|
|
|
int git_mkstemps_mode(char *pattern, int suffix_len, int mode)
|
2010-02-22 23:32:12 +01:00
|
|
|
{
|
|
|
|
static const char letters[] =
|
|
|
|
"abcdefghijklmnopqrstuvwxyz"
|
|
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
|
|
"0123456789";
|
|
|
|
static const int num_letters = 62;
|
|
|
|
uint64_t value;
|
|
|
|
struct timeval tv;
|
|
|
|
char *template;
|
|
|
|
size_t len;
|
|
|
|
int fd, count;
|
|
|
|
|
|
|
|
len = strlen(pattern);
|
|
|
|
|
|
|
|
if (len < 6 + suffix_len) {
|
|
|
|
errno = EINVAL;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (strncmp(&pattern[len - 6 - suffix_len], "XXXXXX", 6)) {
|
|
|
|
errno = EINVAL;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Replace pattern's XXXXXX characters with randomness.
|
|
|
|
* Try TMP_MAX different filenames.
|
|
|
|
*/
|
|
|
|
gettimeofday(&tv, NULL);
|
|
|
|
value = ((size_t)(tv.tv_usec << 16)) ^ tv.tv_sec ^ getpid();
|
|
|
|
template = &pattern[len - 6 - suffix_len];
|
|
|
|
for (count = 0; count < TMP_MAX; ++count) {
|
|
|
|
uint64_t v = value;
|
|
|
|
/* Fill in the random bits. */
|
|
|
|
template[0] = letters[v % num_letters]; v /= num_letters;
|
|
|
|
template[1] = letters[v % num_letters]; v /= num_letters;
|
|
|
|
template[2] = letters[v % num_letters]; v /= num_letters;
|
|
|
|
template[3] = letters[v % num_letters]; v /= num_letters;
|
|
|
|
template[4] = letters[v % num_letters]; v /= num_letters;
|
|
|
|
template[5] = letters[v % num_letters]; v /= num_letters;
|
|
|
|
|
2010-02-22 23:32:13 +01:00
|
|
|
fd = open(pattern, O_CREAT | O_EXCL | O_RDWR, mode);
|
2010-02-22 23:32:12 +01:00
|
|
|
if (fd > 0)
|
|
|
|
return fd;
|
|
|
|
/*
|
|
|
|
* Fatal error (EPERM, ENOSPC etc).
|
|
|
|
* It doesn't make sense to loop.
|
|
|
|
*/
|
|
|
|
if (errno != EEXIST)
|
|
|
|
break;
|
|
|
|
/*
|
|
|
|
* This is a random value. It is only necessary that
|
|
|
|
* the next TMP_MAX values generated by adding 7777 to
|
|
|
|
* VALUE are different with (module 2^32).
|
|
|
|
*/
|
|
|
|
value += 7777;
|
|
|
|
}
|
|
|
|
/* We return the null string if we can't find a unique file name. */
|
|
|
|
pattern[0] = '\0';
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2010-02-22 23:32:13 +01:00
|
|
|
int git_mkstemp_mode(char *pattern, int mode)
|
|
|
|
{
|
|
|
|
/* mkstemp is just mkstemps with no suffix */
|
|
|
|
return git_mkstemps_mode(pattern, 0, mode);
|
|
|
|
}
|
|
|
|
|
|
|
|
int gitmkstemps(char *pattern, int suffix_len)
|
|
|
|
{
|
|
|
|
return git_mkstemps_mode(pattern, suffix_len, 0600);
|
|
|
|
}
|
|
|
|
|
2007-01-02 08:31:08 +01:00
|
|
|
int validate_headref(const char *path)
|
2005-11-18 23:59:34 +01:00
|
|
|
{
|
|
|
|
struct stat st;
|
|
|
|
char *buf, buffer[256];
|
2007-01-02 08:31:08 +01:00
|
|
|
unsigned char sha1[20];
|
2008-04-27 20:21:58 +02:00
|
|
|
int fd;
|
|
|
|
ssize_t len;
|
2005-11-18 23:59:34 +01:00
|
|
|
|
|
|
|
if (lstat(path, &st) < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
/* Make sure it is a "refs/.." symlink */
|
|
|
|
if (S_ISLNK(st.st_mode)) {
|
|
|
|
len = readlink(path, buffer, sizeof(buffer)-1);
|
2009-02-12 22:02:09 +01:00
|
|
|
if (len >= 5 && !memcmp("refs/", buffer, 5))
|
2005-11-18 23:59:34 +01:00
|
|
|
return 0;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Anything else, just open it and try to see if it is a symbolic ref.
|
|
|
|
*/
|
|
|
|
fd = open(path, O_RDONLY);
|
|
|
|
if (fd < 0)
|
|
|
|
return -1;
|
2007-01-08 16:58:08 +01:00
|
|
|
len = read_in_full(fd, buffer, sizeof(buffer)-1);
|
2005-11-18 23:59:34 +01:00
|
|
|
close(fd);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Is it a symbolic ref?
|
|
|
|
*/
|
2007-01-02 08:31:08 +01:00
|
|
|
if (len < 4)
|
2005-11-18 23:59:34 +01:00
|
|
|
return -1;
|
2007-01-02 08:31:08 +01:00
|
|
|
if (!memcmp("ref:", buffer, 4)) {
|
|
|
|
buf = buffer + 4;
|
|
|
|
len -= 4;
|
|
|
|
while (len && isspace(*buf))
|
|
|
|
buf++, len--;
|
2009-02-12 22:02:09 +01:00
|
|
|
if (len >= 5 && !memcmp("refs/", buf, 5))
|
2007-01-02 08:31:08 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Is this a detached HEAD?
|
|
|
|
*/
|
|
|
|
if (!get_sha1_hex(buffer, sha1))
|
2005-11-18 23:59:34 +01:00
|
|
|
return 0;
|
2007-01-02 08:31:08 +01:00
|
|
|
|
2005-11-18 23:59:34 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2009-11-17 18:24:25 +01:00
|
|
|
static struct passwd *getpw_str(const char *username, size_t len)
|
2005-11-17 20:37:14 +01:00
|
|
|
{
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
struct passwd *pw;
|
2009-11-17 18:24:25 +01:00
|
|
|
char *username_z = xmalloc(len + 1);
|
|
|
|
memcpy(username_z, username, len);
|
|
|
|
username_z[len] = '\0';
|
|
|
|
pw = getpwnam(username_z);
|
|
|
|
free(username_z);
|
|
|
|
return pw;
|
|
|
|
}
|
2005-11-17 20:37:14 +01:00
|
|
|
|
2009-11-17 18:24:25 +01:00
|
|
|
/*
|
|
|
|
* Return a string with ~ and ~user expanded via getpw*. If buf != NULL,
|
|
|
|
* then it is a newly allocated string. Returns NULL on getpw failure or
|
|
|
|
* if path is NULL.
|
|
|
|
*/
|
|
|
|
char *expand_user_path(const char *path)
|
|
|
|
{
|
|
|
|
struct strbuf user_path = STRBUF_INIT;
|
|
|
|
const char *first_slash = strchrnul(path, '/');
|
|
|
|
const char *to_copy = path;
|
|
|
|
|
|
|
|
if (path == NULL)
|
|
|
|
goto return_null;
|
|
|
|
if (path[0] == '~') {
|
|
|
|
const char *username = path + 1;
|
|
|
|
size_t username_len = first_slash - username;
|
2009-11-19 16:21:15 +01:00
|
|
|
if (username_len == 0) {
|
|
|
|
const char *home = getenv("HOME");
|
|
|
|
strbuf_add(&user_path, home, strlen(home));
|
|
|
|
} else {
|
|
|
|
struct passwd *pw = getpw_str(username, username_len);
|
|
|
|
if (!pw)
|
|
|
|
goto return_null;
|
|
|
|
strbuf_add(&user_path, pw->pw_dir, strlen(pw->pw_dir));
|
2005-11-17 20:37:14 +01:00
|
|
|
}
|
2009-11-17 18:24:25 +01:00
|
|
|
to_copy = first_slash;
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
}
|
2009-11-17 18:24:25 +01:00
|
|
|
strbuf_add(&user_path, to_copy, strlen(to_copy));
|
|
|
|
return strbuf_detach(&user_path, NULL);
|
|
|
|
return_null:
|
|
|
|
strbuf_release(&user_path);
|
|
|
|
return NULL;
|
2005-11-17 20:37:14 +01:00
|
|
|
}
|
|
|
|
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
/*
|
|
|
|
* First, one directory to try is determined by the following algorithm.
|
|
|
|
*
|
|
|
|
* (0) If "strict" is given, the path is used as given and no DWIM is
|
|
|
|
* done. Otherwise:
|
|
|
|
* (1) "~/path" to mean path under the running user's home directory;
|
|
|
|
* (2) "~user/path" to mean path under named user's home directory;
|
|
|
|
* (3) "relative/path" to mean cwd relative directory; or
|
|
|
|
* (4) "/absolute/path" to mean absolute directory.
|
|
|
|
*
|
|
|
|
* Unless "strict" is given, we try access() for existence of "%s.git/.git",
|
|
|
|
* "%s/.git", "%s.git", "%s" in this order. The first one that exists is
|
|
|
|
* what we try.
|
|
|
|
*
|
|
|
|
* Second, we try chdir() to that. Upon failure, we return NULL.
|
|
|
|
*
|
|
|
|
* Then, we try if the current directory is a valid git repository.
|
|
|
|
* Upon failure, we return NULL.
|
|
|
|
*
|
|
|
|
* If all goes well, we return the directory we used to chdir() (but
|
|
|
|
* before ~user is expanded), avoiding getcwd() resolving symbolic
|
|
|
|
* links. User relative paths are also returned as they are given,
|
|
|
|
* except DWIM suffixing.
|
|
|
|
*/
|
2005-11-17 20:37:14 +01:00
|
|
|
char *enter_repo(char *path, int strict)
|
|
|
|
{
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
static char used_path[PATH_MAX];
|
|
|
|
static char validated_path[PATH_MAX];
|
|
|
|
|
|
|
|
if (!path)
|
2005-11-17 20:37:14 +01:00
|
|
|
return NULL;
|
|
|
|
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
if (!strict) {
|
|
|
|
static const char *suffix[] = {
|
|
|
|
".git/.git", "/.git", ".git", "", NULL,
|
|
|
|
};
|
|
|
|
int len = strlen(path);
|
|
|
|
int i;
|
|
|
|
while ((1 < len) && (path[len-1] == '/')) {
|
|
|
|
path[len-1] = 0;
|
|
|
|
len--;
|
|
|
|
}
|
|
|
|
if (PATH_MAX <= len)
|
2005-11-17 20:37:14 +01:00
|
|
|
return NULL;
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
if (path[0] == '~') {
|
2009-11-17 18:24:25 +01:00
|
|
|
char *newpath = expand_user_path(path);
|
|
|
|
if (!newpath || (PATH_MAX - 10 < strlen(newpath))) {
|
|
|
|
free(newpath);
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
return NULL;
|
2009-11-17 18:24:25 +01:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Copy back into the static buffer. A pity
|
|
|
|
* since newpath was not bounded, but other
|
|
|
|
* branches of the if are limited by PATH_MAX
|
|
|
|
* anyway.
|
|
|
|
*/
|
|
|
|
strcpy(used_path, newpath); free(newpath);
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
strcpy(validated_path, path);
|
|
|
|
path = used_path;
|
|
|
|
}
|
|
|
|
else if (PATH_MAX - 10 < len)
|
|
|
|
return NULL;
|
|
|
|
else {
|
|
|
|
path = strcpy(used_path, path);
|
|
|
|
strcpy(validated_path, path);
|
|
|
|
}
|
|
|
|
len = strlen(path);
|
|
|
|
for (i = 0; suffix[i]; i++) {
|
|
|
|
strcpy(path + len, suffix[i]);
|
|
|
|
if (!access(path, F_OK)) {
|
|
|
|
strcat(validated_path, suffix[i]);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!suffix[i] || chdir(path))
|
2005-11-18 23:59:34 +01:00
|
|
|
return NULL;
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
path = validated_path;
|
2005-11-18 23:59:34 +01:00
|
|
|
}
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
else if (chdir(path))
|
|
|
|
return NULL;
|
2005-11-17 20:37:14 +01:00
|
|
|
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
if (access("objects", X_OK) == 0 && access("refs", X_OK) == 0 &&
|
2007-01-02 08:31:08 +01:00
|
|
|
validate_headref("HEAD") == 0) {
|
2007-06-28 16:15:25 +02:00
|
|
|
setenv(GIT_DIR_ENVIRONMENT, ".", 1);
|
2005-11-25 19:48:26 +01:00
|
|
|
check_repository_format();
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
return path;
|
2005-11-17 20:37:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
2006-06-10 07:07:23 +02:00
|
|
|
|
2009-03-28 07:21:00 +01:00
|
|
|
int set_shared_perm(const char *path, int mode)
|
2006-06-10 07:07:23 +02:00
|
|
|
{
|
|
|
|
struct stat st;
|
2009-03-28 07:21:00 +01:00
|
|
|
int tweak, shared, orig_mode;
|
2006-06-10 07:07:23 +02:00
|
|
|
|
2009-03-28 07:21:00 +01:00
|
|
|
if (!shared_repository) {
|
|
|
|
if (mode)
|
|
|
|
return chmod(path, mode & ~S_IFMT);
|
2006-06-10 07:07:23 +02:00
|
|
|
return 0;
|
2009-03-28 07:21:00 +01:00
|
|
|
}
|
|
|
|
if (!mode) {
|
|
|
|
if (lstat(path, &st) < 0)
|
|
|
|
return -1;
|
|
|
|
mode = st.st_mode;
|
|
|
|
orig_mode = mode;
|
|
|
|
} else
|
|
|
|
orig_mode = 0;
|
2009-03-26 00:19:36 +01:00
|
|
|
if (shared_repository < 0)
|
|
|
|
shared = -shared_repository;
|
|
|
|
else
|
|
|
|
shared = shared_repository;
|
|
|
|
tweak = shared;
|
|
|
|
|
|
|
|
if (!(mode & S_IWUSR))
|
|
|
|
tweak &= ~0222;
|
|
|
|
if (mode & S_IXUSR)
|
|
|
|
/* Copy read bits to execute bits */
|
|
|
|
tweak |= (tweak & 0444) >> 2;
|
|
|
|
if (shared_repository < 0)
|
|
|
|
mode = (mode & ~0777) | tweak;
|
|
|
|
else
|
2008-07-12 03:15:03 +02:00
|
|
|
mode |= tweak;
|
2008-04-16 10:34:24 +02:00
|
|
|
|
|
|
|
if (S_ISDIR(mode)) {
|
|
|
|
/* Copy read bits to execute bits */
|
2009-03-26 00:19:36 +01:00
|
|
|
mode |= (shared & 0444) >> 2;
|
|
|
|
mode |= FORCE_DIR_SET_GID;
|
2008-04-16 10:34:24 +02:00
|
|
|
}
|
|
|
|
|
2009-03-26 00:19:36 +01:00
|
|
|
if (((shared_repository < 0
|
2009-03-28 07:21:00 +01:00
|
|
|
? (orig_mode & (FORCE_DIR_SET_GID | 0777))
|
|
|
|
: (orig_mode & mode)) != mode) &&
|
|
|
|
chmod(path, (mode & ~S_IFMT)) < 0)
|
2006-06-10 07:07:23 +02:00
|
|
|
return -2;
|
|
|
|
return 0;
|
|
|
|
}
|
2007-08-01 02:28:59 +02:00
|
|
|
|
Make git_dir a path relative to work_tree in setup_work_tree()
Once we find the absolute paths for git_dir and work_tree, we can make
git_dir a relative path since we know pwd will be work_tree. This should
save the kernel some time traversing the path to work_tree all the time
if git_dir is inside work_tree.
Daniel's patch didn't apply for me as-is, so I recreated it with some
differences, and here are the numbers from ten runs each.
There is some IO for me - probably due to more-or-less random flushing of
the journal - so the variation is bigger than I'd like, but whatever:
Before:
real 0m8.135s
real 0m7.933s
real 0m8.080s
real 0m7.954s
real 0m7.949s
real 0m8.112s
real 0m7.934s
real 0m8.059s
real 0m7.979s
real 0m8.038s
After:
real 0m7.685s
real 0m7.968s
real 0m7.703s
real 0m7.850s
real 0m7.995s
real 0m7.817s
real 0m7.963s
real 0m7.955s
real 0m7.848s
real 0m7.969s
Now, going by "best of ten" (on the assumption that the longer numbers
are all due to IO), I'm saying a 7.933s -> 7.685s reduction, and it does
seem to be outside of the noise (ie the "after" case never broke 8s, while
the "before" case did so half the time).
So looks like about 3% to me.
Doing it for a slightly smaller test-case (just the "arch" subdirectory)
gets more stable numbers probably due to not filling the journal with
metadata updates, so we have:
Before:
real 0m1.633s
real 0m1.633s
real 0m1.633s
real 0m1.632s
real 0m1.632s
real 0m1.630s
real 0m1.634s
real 0m1.631s
real 0m1.632s
real 0m1.632s
After:
real 0m1.610s
real 0m1.609s
real 0m1.610s
real 0m1.608s
real 0m1.607s
real 0m1.610s
real 0m1.609s
real 0m1.611s
real 0m1.608s
real 0m1.611s
where I'ld just take the averages and say 1.632 vs 1.610, which is just
over 1% peformance improvement.
So it's not in the noise, but it's not as big as I initially thought and
measured.
(That said, it obviously depends on how deep the working directory path is
too, and whether it is behind NFS or something else that might need to
cause more work to look up).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-19 21:34:06 +02:00
|
|
|
const char *make_relative_path(const char *abs, const char *base)
|
|
|
|
{
|
|
|
|
static char buf[PATH_MAX + 1];
|
2010-01-22 04:05:19 +01:00
|
|
|
int i = 0, j = 0;
|
|
|
|
|
|
|
|
if (!base || !base[0])
|
Make git_dir a path relative to work_tree in setup_work_tree()
Once we find the absolute paths for git_dir and work_tree, we can make
git_dir a relative path since we know pwd will be work_tree. This should
save the kernel some time traversing the path to work_tree all the time
if git_dir is inside work_tree.
Daniel's patch didn't apply for me as-is, so I recreated it with some
differences, and here are the numbers from ten runs each.
There is some IO for me - probably due to more-or-less random flushing of
the journal - so the variation is bigger than I'd like, but whatever:
Before:
real 0m8.135s
real 0m7.933s
real 0m8.080s
real 0m7.954s
real 0m7.949s
real 0m8.112s
real 0m7.934s
real 0m8.059s
real 0m7.979s
real 0m8.038s
After:
real 0m7.685s
real 0m7.968s
real 0m7.703s
real 0m7.850s
real 0m7.995s
real 0m7.817s
real 0m7.963s
real 0m7.955s
real 0m7.848s
real 0m7.969s
Now, going by "best of ten" (on the assumption that the longer numbers
are all due to IO), I'm saying a 7.933s -> 7.685s reduction, and it does
seem to be outside of the noise (ie the "after" case never broke 8s, while
the "before" case did so half the time).
So looks like about 3% to me.
Doing it for a slightly smaller test-case (just the "arch" subdirectory)
gets more stable numbers probably due to not filling the journal with
metadata updates, so we have:
Before:
real 0m1.633s
real 0m1.633s
real 0m1.633s
real 0m1.632s
real 0m1.632s
real 0m1.630s
real 0m1.634s
real 0m1.631s
real 0m1.632s
real 0m1.632s
After:
real 0m1.610s
real 0m1.609s
real 0m1.610s
real 0m1.608s
real 0m1.607s
real 0m1.610s
real 0m1.609s
real 0m1.611s
real 0m1.608s
real 0m1.611s
where I'ld just take the averages and say 1.632 vs 1.610, which is just
over 1% peformance improvement.
So it's not in the noise, but it's not as big as I initially thought and
measured.
(That said, it obviously depends on how deep the working directory path is
too, and whether it is behind NFS or something else that might need to
cause more work to look up).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-19 21:34:06 +02:00
|
|
|
return abs;
|
2010-01-22 04:05:19 +01:00
|
|
|
while (base[i]) {
|
|
|
|
if (is_dir_sep(base[i])) {
|
|
|
|
if (!is_dir_sep(abs[j]))
|
|
|
|
return abs;
|
|
|
|
while (is_dir_sep(base[i]))
|
|
|
|
i++;
|
|
|
|
while (is_dir_sep(abs[j]))
|
|
|
|
j++;
|
|
|
|
continue;
|
|
|
|
} else if (abs[j] != base[i]) {
|
|
|
|
return abs;
|
|
|
|
}
|
|
|
|
i++;
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
if (
|
|
|
|
/* "/foo" is a prefix of "/foo" */
|
|
|
|
abs[j] &&
|
|
|
|
/* "/foo" is not a prefix of "/foobar" */
|
|
|
|
!is_dir_sep(base[i-1]) && !is_dir_sep(abs[j])
|
|
|
|
)
|
Make git_dir a path relative to work_tree in setup_work_tree()
Once we find the absolute paths for git_dir and work_tree, we can make
git_dir a relative path since we know pwd will be work_tree. This should
save the kernel some time traversing the path to work_tree all the time
if git_dir is inside work_tree.
Daniel's patch didn't apply for me as-is, so I recreated it with some
differences, and here are the numbers from ten runs each.
There is some IO for me - probably due to more-or-less random flushing of
the journal - so the variation is bigger than I'd like, but whatever:
Before:
real 0m8.135s
real 0m7.933s
real 0m8.080s
real 0m7.954s
real 0m7.949s
real 0m8.112s
real 0m7.934s
real 0m8.059s
real 0m7.979s
real 0m8.038s
After:
real 0m7.685s
real 0m7.968s
real 0m7.703s
real 0m7.850s
real 0m7.995s
real 0m7.817s
real 0m7.963s
real 0m7.955s
real 0m7.848s
real 0m7.969s
Now, going by "best of ten" (on the assumption that the longer numbers
are all due to IO), I'm saying a 7.933s -> 7.685s reduction, and it does
seem to be outside of the noise (ie the "after" case never broke 8s, while
the "before" case did so half the time).
So looks like about 3% to me.
Doing it for a slightly smaller test-case (just the "arch" subdirectory)
gets more stable numbers probably due to not filling the journal with
metadata updates, so we have:
Before:
real 0m1.633s
real 0m1.633s
real 0m1.633s
real 0m1.632s
real 0m1.632s
real 0m1.630s
real 0m1.634s
real 0m1.631s
real 0m1.632s
real 0m1.632s
After:
real 0m1.610s
real 0m1.609s
real 0m1.610s
real 0m1.608s
real 0m1.607s
real 0m1.610s
real 0m1.609s
real 0m1.611s
real 0m1.608s
real 0m1.611s
where I'ld just take the averages and say 1.632 vs 1.610, which is just
over 1% peformance improvement.
So it's not in the noise, but it's not as big as I initially thought and
measured.
(That said, it obviously depends on how deep the working directory path is
too, and whether it is behind NFS or something else that might need to
cause more work to look up).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-19 21:34:06 +02:00
|
|
|
return abs;
|
2010-01-22 04:05:19 +01:00
|
|
|
while (is_dir_sep(abs[j]))
|
|
|
|
j++;
|
|
|
|
if (!abs[j])
|
|
|
|
strcpy(buf, ".");
|
|
|
|
else
|
|
|
|
strcpy(buf, abs + j);
|
Make git_dir a path relative to work_tree in setup_work_tree()
Once we find the absolute paths for git_dir and work_tree, we can make
git_dir a relative path since we know pwd will be work_tree. This should
save the kernel some time traversing the path to work_tree all the time
if git_dir is inside work_tree.
Daniel's patch didn't apply for me as-is, so I recreated it with some
differences, and here are the numbers from ten runs each.
There is some IO for me - probably due to more-or-less random flushing of
the journal - so the variation is bigger than I'd like, but whatever:
Before:
real 0m8.135s
real 0m7.933s
real 0m8.080s
real 0m7.954s
real 0m7.949s
real 0m8.112s
real 0m7.934s
real 0m8.059s
real 0m7.979s
real 0m8.038s
After:
real 0m7.685s
real 0m7.968s
real 0m7.703s
real 0m7.850s
real 0m7.995s
real 0m7.817s
real 0m7.963s
real 0m7.955s
real 0m7.848s
real 0m7.969s
Now, going by "best of ten" (on the assumption that the longer numbers
are all due to IO), I'm saying a 7.933s -> 7.685s reduction, and it does
seem to be outside of the noise (ie the "after" case never broke 8s, while
the "before" case did so half the time).
So looks like about 3% to me.
Doing it for a slightly smaller test-case (just the "arch" subdirectory)
gets more stable numbers probably due to not filling the journal with
metadata updates, so we have:
Before:
real 0m1.633s
real 0m1.633s
real 0m1.633s
real 0m1.632s
real 0m1.632s
real 0m1.630s
real 0m1.634s
real 0m1.631s
real 0m1.632s
real 0m1.632s
After:
real 0m1.610s
real 0m1.609s
real 0m1.610s
real 0m1.608s
real 0m1.607s
real 0m1.610s
real 0m1.609s
real 0m1.611s
real 0m1.608s
real 0m1.611s
where I'ld just take the averages and say 1.632 vs 1.610, which is just
over 1% peformance improvement.
So it's not in the noise, but it's not as big as I initially thought and
measured.
(That said, it obviously depends on how deep the working directory path is
too, and whether it is behind NFS or something else that might need to
cause more work to look up).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-19 21:34:06 +02:00
|
|
|
return buf;
|
|
|
|
}
|
2008-05-20 08:48:54 +02:00
|
|
|
|
|
|
|
/*
|
2009-02-07 16:08:31 +01:00
|
|
|
* It is okay if dst == src, but they should not overlap otherwise.
|
2008-05-20 08:48:54 +02:00
|
|
|
*
|
2009-02-07 16:08:31 +01:00
|
|
|
* Performs the following normalizations on src, storing the result in dst:
|
|
|
|
* - Ensures that components are separated by '/' (Windows only)
|
|
|
|
* - Squashes sequences of '/'.
|
2008-05-20 08:48:54 +02:00
|
|
|
* - Removes "." components.
|
|
|
|
* - Removes ".." components, and the components the precede them.
|
2009-02-07 16:08:31 +01:00
|
|
|
* Returns failure (non-zero) if a ".." component appears as first path
|
|
|
|
* component anytime during the normalization. Otherwise, returns success (0).
|
2008-05-20 08:48:54 +02:00
|
|
|
*
|
|
|
|
* Note that this function is purely textual. It does not follow symlinks,
|
|
|
|
* verify the existence of the path, or make any system calls.
|
|
|
|
*/
|
2009-02-07 16:08:28 +01:00
|
|
|
int normalize_path_copy(char *dst, const char *src)
|
2008-05-20 08:48:54 +02:00
|
|
|
{
|
2009-02-07 16:08:28 +01:00
|
|
|
char *dst0;
|
2008-05-20 08:48:54 +02:00
|
|
|
|
2009-02-07 16:08:28 +01:00
|
|
|
if (has_dos_drive_prefix(src)) {
|
|
|
|
*dst++ = *src++;
|
|
|
|
*dst++ = *src++;
|
2008-05-20 08:48:54 +02:00
|
|
|
}
|
2009-02-07 16:08:28 +01:00
|
|
|
dst0 = dst;
|
2008-05-20 08:48:54 +02:00
|
|
|
|
2009-02-07 16:08:28 +01:00
|
|
|
if (is_dir_sep(*src)) {
|
2008-05-20 08:48:54 +02:00
|
|
|
*dst++ = '/';
|
2009-02-07 16:08:28 +01:00
|
|
|
while (is_dir_sep(*src))
|
|
|
|
src++;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
char c = *src;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A path component that begins with . could be
|
|
|
|
* special:
|
|
|
|
* (1) "." and ends -- ignore and terminate.
|
|
|
|
* (2) "./" -- ignore them, eat slash and continue.
|
|
|
|
* (3) ".." and ends -- strip one and terminate.
|
|
|
|
* (4) "../" -- strip one, eat slash and continue.
|
|
|
|
*/
|
|
|
|
if (c == '.') {
|
|
|
|
if (!src[1]) {
|
|
|
|
/* (1) */
|
|
|
|
src++;
|
|
|
|
} else if (is_dir_sep(src[1])) {
|
|
|
|
/* (2) */
|
|
|
|
src += 2;
|
|
|
|
while (is_dir_sep(*src))
|
|
|
|
src++;
|
|
|
|
continue;
|
|
|
|
} else if (src[1] == '.') {
|
|
|
|
if (!src[2]) {
|
|
|
|
/* (3) */
|
|
|
|
src += 2;
|
|
|
|
goto up_one;
|
|
|
|
} else if (is_dir_sep(src[2])) {
|
|
|
|
/* (4) */
|
|
|
|
src += 3;
|
|
|
|
while (is_dir_sep(*src))
|
|
|
|
src++;
|
|
|
|
goto up_one;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2008-05-20 08:48:54 +02:00
|
|
|
|
2009-02-07 16:08:28 +01:00
|
|
|
/* copy up to the next '/', and eat all '/' */
|
|
|
|
while ((c = *src++) != '\0' && !is_dir_sep(c))
|
|
|
|
*dst++ = c;
|
|
|
|
if (is_dir_sep(c)) {
|
|
|
|
*dst++ = '/';
|
|
|
|
while (is_dir_sep(c))
|
|
|
|
c = *src++;
|
|
|
|
src--;
|
|
|
|
} else if (!c)
|
|
|
|
break;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
up_one:
|
|
|
|
/*
|
|
|
|
* dst0..dst is prefix portion, and dst[-1] is '/';
|
|
|
|
* go up one level.
|
|
|
|
*/
|
2009-02-07 16:08:30 +01:00
|
|
|
dst--; /* go to trailing '/' */
|
|
|
|
if (dst <= dst0)
|
2009-02-07 16:08:28 +01:00
|
|
|
return -1;
|
2009-02-07 16:08:30 +01:00
|
|
|
/* Windows: dst[-1] cannot be backslash anymore */
|
|
|
|
while (dst0 < dst && dst[-1] != '/')
|
|
|
|
dst--;
|
2009-02-07 16:08:28 +01:00
|
|
|
}
|
2008-05-20 08:48:54 +02:00
|
|
|
*dst = '\0';
|
2009-02-07 16:08:28 +01:00
|
|
|
return 0;
|
2008-05-20 08:48:54 +02:00
|
|
|
}
|
2008-05-20 08:49:26 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* path = Canonical absolute path
|
|
|
|
* prefix_list = Colon-separated list of absolute paths
|
|
|
|
*
|
2008-08-10 17:26:23 +02:00
|
|
|
* Determines, for each path in prefix_list, whether the "prefix" really
|
2008-05-20 08:49:26 +02:00
|
|
|
* is an ancestor directory of path. Returns the length of the longest
|
|
|
|
* ancestor directory, excluding any trailing slashes, or -1 if no prefix
|
|
|
|
* is an ancestor. (Note that this means 0 is returned if prefix_list is
|
|
|
|
* "/".) "/foo" is not considered an ancestor of "/foobar". Directories
|
|
|
|
* are not considered to be their own ancestors. path must be in a
|
|
|
|
* canonical form: empty components, or "." or ".." components are not
|
|
|
|
* allowed. prefix_list may be null, which is like "".
|
|
|
|
*/
|
|
|
|
int longest_ancestor_length(const char *path, const char *prefix_list)
|
|
|
|
{
|
|
|
|
char buf[PATH_MAX+1];
|
|
|
|
const char *ceil, *colon;
|
|
|
|
int len, max_len = -1;
|
|
|
|
|
|
|
|
if (prefix_list == NULL || !strcmp(path, "/"))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
for (colon = ceil = prefix_list; *colon; ceil = colon+1) {
|
2009-02-07 16:08:29 +01:00
|
|
|
for (colon = ceil; *colon && *colon != PATH_SEP; colon++);
|
2008-05-20 08:49:26 +02:00
|
|
|
len = colon - ceil;
|
|
|
|
if (len == 0 || len > PATH_MAX || !is_absolute_path(ceil))
|
|
|
|
continue;
|
|
|
|
strlcpy(buf, ceil, len+1);
|
2009-02-07 16:08:29 +01:00
|
|
|
if (normalize_path_copy(buf, buf) < 0)
|
|
|
|
continue;
|
|
|
|
len = strlen(buf);
|
|
|
|
if (len > 0 && buf[len-1] == '/')
|
|
|
|
buf[--len] = '\0';
|
2008-05-20 08:49:26 +02:00
|
|
|
|
|
|
|
if (!strncmp(path, buf, len) &&
|
|
|
|
path[len] == '/' &&
|
|
|
|
len > max_len) {
|
|
|
|
max_len = len;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return max_len;
|
|
|
|
}
|
2009-02-19 20:10:49 +01:00
|
|
|
|
|
|
|
/* strip arbitrary amount of directory separators at end of path */
|
|
|
|
static inline int chomp_trailing_dir_sep(const char *path, int len)
|
|
|
|
{
|
|
|
|
while (len && is_dir_sep(path[len - 1]))
|
|
|
|
len--;
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If path ends with suffix (complete path components), returns the
|
|
|
|
* part before suffix (sans trailing directory separators).
|
|
|
|
* Otherwise returns NULL.
|
|
|
|
*/
|
|
|
|
char *strip_path_suffix(const char *path, const char *suffix)
|
|
|
|
{
|
|
|
|
int path_len = strlen(path), suffix_len = strlen(suffix);
|
|
|
|
|
|
|
|
while (suffix_len) {
|
|
|
|
if (!path_len)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (is_dir_sep(path[path_len - 1])) {
|
|
|
|
if (!is_dir_sep(suffix[suffix_len - 1]))
|
|
|
|
return NULL;
|
|
|
|
path_len = chomp_trailing_dir_sep(path, path_len);
|
|
|
|
suffix_len = chomp_trailing_dir_sep(suffix, suffix_len);
|
|
|
|
}
|
|
|
|
else if (path[--path_len] != suffix[--suffix_len])
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (path_len && !is_dir_sep(path[path_len - 1]))
|
|
|
|
return NULL;
|
|
|
|
return xstrndup(path, chomp_trailing_dir_sep(path, path_len));
|
|
|
|
}
|
2009-11-09 20:26:43 +01:00
|
|
|
|
|
|
|
int daemon_avoid_alias(const char *p)
|
|
|
|
{
|
|
|
|
int sl, ndot;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This resurrects the belts and suspenders paranoia check by HPA
|
|
|
|
* done in <435560F7.4080006@zytor.com> thread, now enter_repo()
|
2010-02-04 06:23:18 +01:00
|
|
|
* does not do getcwd() based path canonicalization.
|
2009-11-09 20:26:43 +01:00
|
|
|
*
|
|
|
|
* sl becomes true immediately after seeing '/' and continues to
|
|
|
|
* be true as long as dots continue after that without intervening
|
|
|
|
* non-dot character.
|
|
|
|
*/
|
|
|
|
if (!p || (*p != '/' && *p != '~'))
|
|
|
|
return -1;
|
|
|
|
sl = 1; ndot = 0;
|
|
|
|
p++;
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
char ch = *p++;
|
|
|
|
if (sl) {
|
|
|
|
if (ch == '.')
|
|
|
|
ndot++;
|
|
|
|
else if (ch == '/') {
|
|
|
|
if (ndot < 3)
|
|
|
|
/* reject //, /./ and /../ */
|
|
|
|
return -1;
|
|
|
|
ndot = 0;
|
|
|
|
}
|
|
|
|
else if (ch == 0) {
|
|
|
|
if (0 < ndot && ndot < 3)
|
|
|
|
/* reject /.$ and /..$ */
|
|
|
|
return -1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
sl = ndot = 0;
|
|
|
|
}
|
|
|
|
else if (ch == 0)
|
|
|
|
return 0;
|
|
|
|
else if (ch == '/') {
|
|
|
|
sl = 1;
|
|
|
|
ndot = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|