2005-07-09 01:20:59 +02:00
|
|
|
/*
|
|
|
|
* I'm tired of doing "vsnprintf()" etc just to open a
|
|
|
|
* file, so here's a "return static buffer with printf"
|
|
|
|
* interface for paths.
|
|
|
|
*
|
|
|
|
* It's obviously not thread-safe. Sue me. But it's quite
|
|
|
|
* useful for doing things like
|
|
|
|
*
|
|
|
|
* f = open(mkpath("%s/%s.git", base, name), O_RDONLY);
|
|
|
|
*
|
|
|
|
* which is what it's designed for.
|
|
|
|
*/
|
|
|
|
#include "cache.h"
|
|
|
|
|
|
|
|
static char bad_path[] = "/bad-path/";
|
|
|
|
|
2006-09-11 21:03:15 +02:00
|
|
|
static char *get_pathname(void)
|
|
|
|
{
|
|
|
|
static char pathname_array[4][PATH_MAX];
|
|
|
|
static int index;
|
|
|
|
return pathname_array[3 & ++index];
|
|
|
|
}
|
|
|
|
|
2005-07-09 01:20:59 +02:00
|
|
|
static char *cleanup_path(char *path)
|
|
|
|
{
|
|
|
|
/* Clean it up */
|
|
|
|
if (!memcmp(path, "./", 2)) {
|
|
|
|
path += 2;
|
|
|
|
while (*path == '/')
|
|
|
|
path++;
|
|
|
|
}
|
|
|
|
return path;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *mkpath(const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
unsigned len;
|
2006-09-11 21:03:15 +02:00
|
|
|
char *pathname = get_pathname();
|
2005-07-09 01:20:59 +02:00
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
len = vsnprintf(pathname, PATH_MAX, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
if (len >= PATH_MAX)
|
|
|
|
return bad_path;
|
|
|
|
return cleanup_path(pathname);
|
|
|
|
}
|
|
|
|
|
|
|
|
char *git_path(const char *fmt, ...)
|
|
|
|
{
|
2005-09-26 22:54:01 +02:00
|
|
|
const char *git_dir = get_git_dir();
|
2006-09-11 21:03:15 +02:00
|
|
|
char *pathname = get_pathname();
|
2005-07-09 01:20:59 +02:00
|
|
|
va_list args;
|
|
|
|
unsigned len;
|
|
|
|
|
|
|
|
len = strlen(git_dir);
|
|
|
|
if (len > PATH_MAX-100)
|
|
|
|
return bad_path;
|
|
|
|
memcpy(pathname, git_dir, len);
|
|
|
|
if (len && git_dir[len-1] != '/')
|
|
|
|
pathname[len++] = '/';
|
|
|
|
va_start(args, fmt);
|
|
|
|
len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
if (len >= PATH_MAX)
|
|
|
|
return bad_path;
|
|
|
|
return cleanup_path(pathname);
|
|
|
|
}
|
2005-08-04 22:43:03 +02:00
|
|
|
|
|
|
|
|
|
|
|
/* git_mkstemp() - create tmp file honoring TMPDIR variable */
|
|
|
|
int git_mkstemp(char *path, size_t len, const char *template)
|
|
|
|
{
|
2007-07-26 06:34:53 +02:00
|
|
|
const char *tmp;
|
|
|
|
size_t n;
|
|
|
|
|
|
|
|
tmp = getenv("TMPDIR");
|
|
|
|
if (!tmp)
|
|
|
|
tmp = "/tmp";
|
|
|
|
n = snprintf(path, len, "%s/%s", tmp, template);
|
|
|
|
if (len <= n) {
|
|
|
|
errno = ENAMETOOLONG;
|
|
|
|
return -1;
|
2005-08-08 22:33:08 +02:00
|
|
|
}
|
2005-08-04 22:43:03 +02:00
|
|
|
return mkstemp(path);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-01-02 08:31:08 +01:00
|
|
|
int validate_headref(const char *path)
|
2005-11-18 23:59:34 +01:00
|
|
|
{
|
|
|
|
struct stat st;
|
|
|
|
char *buf, buffer[256];
|
2007-01-02 08:31:08 +01:00
|
|
|
unsigned char sha1[20];
|
2008-04-27 20:21:58 +02:00
|
|
|
int fd;
|
|
|
|
ssize_t len;
|
2005-11-18 23:59:34 +01:00
|
|
|
|
|
|
|
if (lstat(path, &st) < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
/* Make sure it is a "refs/.." symlink */
|
|
|
|
if (S_ISLNK(st.st_mode)) {
|
|
|
|
len = readlink(path, buffer, sizeof(buffer)-1);
|
|
|
|
if (len >= 5 && !memcmp("refs/", buffer, 5))
|
|
|
|
return 0;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Anything else, just open it and try to see if it is a symbolic ref.
|
|
|
|
*/
|
|
|
|
fd = open(path, O_RDONLY);
|
|
|
|
if (fd < 0)
|
|
|
|
return -1;
|
2007-01-08 16:58:08 +01:00
|
|
|
len = read_in_full(fd, buffer, sizeof(buffer)-1);
|
2005-11-18 23:59:34 +01:00
|
|
|
close(fd);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Is it a symbolic ref?
|
|
|
|
*/
|
2007-01-02 08:31:08 +01:00
|
|
|
if (len < 4)
|
2005-11-18 23:59:34 +01:00
|
|
|
return -1;
|
2007-01-02 08:31:08 +01:00
|
|
|
if (!memcmp("ref:", buffer, 4)) {
|
|
|
|
buf = buffer + 4;
|
|
|
|
len -= 4;
|
|
|
|
while (len && isspace(*buf))
|
|
|
|
buf++, len--;
|
|
|
|
if (len >= 5 && !memcmp("refs/", buf, 5))
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Is this a detached HEAD?
|
|
|
|
*/
|
|
|
|
if (!get_sha1_hex(buffer, sha1))
|
2005-11-18 23:59:34 +01:00
|
|
|
return 0;
|
2007-01-02 08:31:08 +01:00
|
|
|
|
2005-11-18 23:59:34 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
static char *user_path(char *buf, char *path, int sz)
|
2005-11-17 20:37:14 +01:00
|
|
|
{
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
struct passwd *pw;
|
|
|
|
char *slash;
|
|
|
|
int len, baselen;
|
2005-11-17 20:37:14 +01:00
|
|
|
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
if (!path || path[0] != '~')
|
|
|
|
return NULL;
|
|
|
|
path++;
|
|
|
|
slash = strchr(path, '/');
|
|
|
|
if (path[0] == '/' || !path[0]) {
|
|
|
|
pw = getpwuid(getuid());
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (slash) {
|
|
|
|
*slash = 0;
|
|
|
|
pw = getpwnam(path);
|
|
|
|
*slash = '/';
|
2005-11-17 20:37:14 +01:00
|
|
|
}
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
else
|
|
|
|
pw = getpwnam(path);
|
2005-11-17 20:37:14 +01:00
|
|
|
}
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
if (!pw || !pw->pw_dir || sz <= strlen(pw->pw_dir))
|
|
|
|
return NULL;
|
|
|
|
baselen = strlen(pw->pw_dir);
|
|
|
|
memcpy(buf, pw->pw_dir, baselen);
|
|
|
|
while ((1 < baselen) && (buf[baselen-1] == '/')) {
|
|
|
|
buf[baselen-1] = 0;
|
|
|
|
baselen--;
|
|
|
|
}
|
|
|
|
if (slash && slash[1]) {
|
|
|
|
len = strlen(slash);
|
|
|
|
if (sz <= baselen + len)
|
|
|
|
return NULL;
|
|
|
|
memcpy(buf + baselen, slash, len + 1);
|
|
|
|
}
|
|
|
|
return buf;
|
2005-11-17 20:37:14 +01:00
|
|
|
}
|
|
|
|
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
/*
|
|
|
|
* First, one directory to try is determined by the following algorithm.
|
|
|
|
*
|
|
|
|
* (0) If "strict" is given, the path is used as given and no DWIM is
|
|
|
|
* done. Otherwise:
|
|
|
|
* (1) "~/path" to mean path under the running user's home directory;
|
|
|
|
* (2) "~user/path" to mean path under named user's home directory;
|
|
|
|
* (3) "relative/path" to mean cwd relative directory; or
|
|
|
|
* (4) "/absolute/path" to mean absolute directory.
|
|
|
|
*
|
|
|
|
* Unless "strict" is given, we try access() for existence of "%s.git/.git",
|
|
|
|
* "%s/.git", "%s.git", "%s" in this order. The first one that exists is
|
|
|
|
* what we try.
|
|
|
|
*
|
|
|
|
* Second, we try chdir() to that. Upon failure, we return NULL.
|
|
|
|
*
|
|
|
|
* Then, we try if the current directory is a valid git repository.
|
|
|
|
* Upon failure, we return NULL.
|
|
|
|
*
|
|
|
|
* If all goes well, we return the directory we used to chdir() (but
|
|
|
|
* before ~user is expanded), avoiding getcwd() resolving symbolic
|
|
|
|
* links. User relative paths are also returned as they are given,
|
|
|
|
* except DWIM suffixing.
|
|
|
|
*/
|
2005-11-17 20:37:14 +01:00
|
|
|
char *enter_repo(char *path, int strict)
|
|
|
|
{
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
static char used_path[PATH_MAX];
|
|
|
|
static char validated_path[PATH_MAX];
|
|
|
|
|
|
|
|
if (!path)
|
2005-11-17 20:37:14 +01:00
|
|
|
return NULL;
|
|
|
|
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
if (!strict) {
|
|
|
|
static const char *suffix[] = {
|
|
|
|
".git/.git", "/.git", ".git", "", NULL,
|
|
|
|
};
|
|
|
|
int len = strlen(path);
|
|
|
|
int i;
|
|
|
|
while ((1 < len) && (path[len-1] == '/')) {
|
|
|
|
path[len-1] = 0;
|
|
|
|
len--;
|
|
|
|
}
|
|
|
|
if (PATH_MAX <= len)
|
2005-11-17 20:37:14 +01:00
|
|
|
return NULL;
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
if (path[0] == '~') {
|
|
|
|
if (!user_path(used_path, path, PATH_MAX))
|
|
|
|
return NULL;
|
|
|
|
strcpy(validated_path, path);
|
|
|
|
path = used_path;
|
|
|
|
}
|
|
|
|
else if (PATH_MAX - 10 < len)
|
|
|
|
return NULL;
|
|
|
|
else {
|
|
|
|
path = strcpy(used_path, path);
|
|
|
|
strcpy(validated_path, path);
|
|
|
|
}
|
|
|
|
len = strlen(path);
|
|
|
|
for (i = 0; suffix[i]; i++) {
|
|
|
|
strcpy(path + len, suffix[i]);
|
|
|
|
if (!access(path, F_OK)) {
|
|
|
|
strcat(validated_path, suffix[i]);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!suffix[i] || chdir(path))
|
2005-11-18 23:59:34 +01:00
|
|
|
return NULL;
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
path = validated_path;
|
2005-11-18 23:59:34 +01:00
|
|
|
}
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
else if (chdir(path))
|
|
|
|
return NULL;
|
2005-11-17 20:37:14 +01:00
|
|
|
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
if (access("objects", X_OK) == 0 && access("refs", X_OK) == 0 &&
|
2007-01-02 08:31:08 +01:00
|
|
|
validate_headref("HEAD") == 0) {
|
2007-06-28 16:15:25 +02:00
|
|
|
setenv(GIT_DIR_ENVIRONMENT, ".", 1);
|
2005-11-25 19:48:26 +01:00
|
|
|
check_repository_format();
|
[PATCH] daemon.c and path.enter_repo(): revamp path validation.
The whitelist of git-daemon is checked against return value from
enter_repo(), and enter_repo() used to return the value obtained
from getcwd() to avoid directory aliasing issues as discussed
earier (mid October 2005).
Unfortunately, it did not go well as we hoped.
For example, /pub on a kernel.org public machine is a symlink to
its real mountpoint, and it is understandable that the
administrator does not want to adjust the whitelist every time
/pub needs to point at a different partition for storage
allcation or whatever reasons. Being able to keep using
/pub/scm as the whitelist is a desirable property.
So this version of enter_repo() reports what it used to chdir()
and validate, but does not use getcwd() to canonicalize the
directory name. When it sees a user relative path ~user/path,
it internally resolves it to try chdir() there, but it still
reports ~user/path (possibly after appending .git if allowed to
do so, in which case it would report ~user/path.git).
What this means is that if a whitelist wants to allow a user
relative path, it needs to say "~" (for all users) or list user
home directories like "~alice" "~bob". And no, you cannot say
/home if the advertised way to access user home directories are
~alice,~bob, etc. The whole point of this is to avoid
unnecessary aliasing issues.
Anyway, because of this, daemon needs to do a bit more work to
guard itself. Namely, it needs to make sure that the accessor
does not try to exploit its leading path match rule by inserting
/../ in the middle or hanging /.. at the end. I resurrected the
belts and suspender paranoia code HPA did for this purpose.
This check cannot be done in the enter_repo() unconditionally,
because there are valid callers of enter_repo() that want to
honor /../; authorized users coming over ssh to run send-pack
and fetch-pack should be allowed to do so.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 10:45:57 +01:00
|
|
|
return path;
|
2005-11-17 20:37:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
2006-06-10 07:07:23 +02:00
|
|
|
|
|
|
|
int adjust_shared_perm(const char *path)
|
|
|
|
{
|
|
|
|
struct stat st;
|
|
|
|
int mode;
|
|
|
|
|
|
|
|
if (!shared_repository)
|
|
|
|
return 0;
|
|
|
|
if (lstat(path, &st) < 0)
|
|
|
|
return -1;
|
|
|
|
mode = st.st_mode;
|
2008-04-16 10:34:24 +02:00
|
|
|
|
|
|
|
if (shared_repository) {
|
|
|
|
int tweak = shared_repository;
|
|
|
|
if (!(mode & S_IWUSR))
|
|
|
|
tweak &= ~0222;
|
2008-07-12 03:15:03 +02:00
|
|
|
mode |= tweak;
|
2008-04-16 10:34:24 +02:00
|
|
|
} else {
|
|
|
|
/* Preserve old PERM_UMASK behaviour */
|
|
|
|
if (mode & S_IWUSR)
|
|
|
|
mode |= S_IWGRP;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (S_ISDIR(mode)) {
|
2008-03-05 00:15:39 +01:00
|
|
|
mode |= FORCE_DIR_SET_GID;
|
2008-04-16 10:34:24 +02:00
|
|
|
|
|
|
|
/* Copy read bits to execute bits */
|
|
|
|
mode |= (shared_repository & 0444) >> 2;
|
|
|
|
}
|
|
|
|
|
2006-11-04 21:24:05 +01:00
|
|
|
if ((mode & st.st_mode) != mode && chmod(path, mode) < 0)
|
2006-06-10 07:07:23 +02:00
|
|
|
return -2;
|
|
|
|
return 0;
|
|
|
|
}
|
2007-08-01 02:28:59 +02:00
|
|
|
|
2008-06-06 05:15:19 +02:00
|
|
|
static const char *get_pwd_cwd(void)
|
|
|
|
{
|
|
|
|
static char cwd[PATH_MAX + 1];
|
|
|
|
char *pwd;
|
|
|
|
struct stat cwd_stat, pwd_stat;
|
|
|
|
if (getcwd(cwd, PATH_MAX) == NULL)
|
|
|
|
return NULL;
|
|
|
|
pwd = getenv("PWD");
|
|
|
|
if (pwd && strcmp(pwd, cwd)) {
|
|
|
|
stat(cwd, &cwd_stat);
|
|
|
|
if (!stat(pwd, &pwd_stat) &&
|
|
|
|
pwd_stat.st_dev == cwd_stat.st_dev &&
|
|
|
|
pwd_stat.st_ino == cwd_stat.st_ino) {
|
|
|
|
strlcpy(cwd, pwd, PATH_MAX);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return cwd;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *make_nonrelative_path(const char *path)
|
|
|
|
{
|
|
|
|
static char buf[PATH_MAX + 1];
|
|
|
|
|
2008-06-08 16:34:40 +02:00
|
|
|
if (is_absolute_path(path)) {
|
2008-06-06 05:15:19 +02:00
|
|
|
if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
|
|
|
|
die ("Too long path: %.*s", 60, path);
|
|
|
|
} else {
|
|
|
|
const char *cwd = get_pwd_cwd();
|
|
|
|
if (!cwd)
|
|
|
|
die("Cannot determine the current working directory");
|
|
|
|
if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX)
|
|
|
|
die ("Too long path: %.*s", 60, path);
|
|
|
|
}
|
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
Make git_dir a path relative to work_tree in setup_work_tree()
Once we find the absolute paths for git_dir and work_tree, we can make
git_dir a relative path since we know pwd will be work_tree. This should
save the kernel some time traversing the path to work_tree all the time
if git_dir is inside work_tree.
Daniel's patch didn't apply for me as-is, so I recreated it with some
differences, and here are the numbers from ten runs each.
There is some IO for me - probably due to more-or-less random flushing of
the journal - so the variation is bigger than I'd like, but whatever:
Before:
real 0m8.135s
real 0m7.933s
real 0m8.080s
real 0m7.954s
real 0m7.949s
real 0m8.112s
real 0m7.934s
real 0m8.059s
real 0m7.979s
real 0m8.038s
After:
real 0m7.685s
real 0m7.968s
real 0m7.703s
real 0m7.850s
real 0m7.995s
real 0m7.817s
real 0m7.963s
real 0m7.955s
real 0m7.848s
real 0m7.969s
Now, going by "best of ten" (on the assumption that the longer numbers
are all due to IO), I'm saying a 7.933s -> 7.685s reduction, and it does
seem to be outside of the noise (ie the "after" case never broke 8s, while
the "before" case did so half the time).
So looks like about 3% to me.
Doing it for a slightly smaller test-case (just the "arch" subdirectory)
gets more stable numbers probably due to not filling the journal with
metadata updates, so we have:
Before:
real 0m1.633s
real 0m1.633s
real 0m1.633s
real 0m1.632s
real 0m1.632s
real 0m1.630s
real 0m1.634s
real 0m1.631s
real 0m1.632s
real 0m1.632s
After:
real 0m1.610s
real 0m1.609s
real 0m1.610s
real 0m1.608s
real 0m1.607s
real 0m1.610s
real 0m1.609s
real 0m1.611s
real 0m1.608s
real 0m1.611s
where I'ld just take the averages and say 1.632 vs 1.610, which is just
over 1% peformance improvement.
So it's not in the noise, but it's not as big as I initially thought and
measured.
(That said, it obviously depends on how deep the working directory path is
too, and whether it is behind NFS or something else that might need to
cause more work to look up).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-19 21:34:06 +02:00
|
|
|
const char *make_relative_path(const char *abs, const char *base)
|
|
|
|
{
|
|
|
|
static char buf[PATH_MAX + 1];
|
|
|
|
int baselen;
|
|
|
|
if (!base)
|
|
|
|
return abs;
|
|
|
|
baselen = strlen(base);
|
|
|
|
if (prefixcmp(abs, base))
|
|
|
|
return abs;
|
|
|
|
if (abs[baselen] == '/')
|
|
|
|
baselen++;
|
|
|
|
else if (base[baselen - 1] != '/')
|
|
|
|
return abs;
|
|
|
|
strcpy(buf, abs + baselen);
|
|
|
|
return buf;
|
|
|
|
}
|
2008-05-20 08:48:54 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* path = absolute path
|
|
|
|
* buf = buffer of at least max(2, strlen(path)+1) bytes
|
|
|
|
* It is okay if buf == path, but they should not overlap otherwise.
|
|
|
|
*
|
|
|
|
* Performs the following normalizations on path, storing the result in buf:
|
|
|
|
* - Removes trailing slashes.
|
|
|
|
* - Removes empty components.
|
|
|
|
* - Removes "." components.
|
|
|
|
* - Removes ".." components, and the components the precede them.
|
|
|
|
* "" and paths that contain only slashes are normalized to "/".
|
|
|
|
* Returns the length of the output.
|
|
|
|
*
|
|
|
|
* Note that this function is purely textual. It does not follow symlinks,
|
|
|
|
* verify the existence of the path, or make any system calls.
|
|
|
|
*/
|
|
|
|
int normalize_absolute_path(char *buf, const char *path)
|
|
|
|
{
|
|
|
|
const char *comp_start = path, *comp_end = path;
|
|
|
|
char *dst = buf;
|
|
|
|
int comp_len;
|
|
|
|
assert(buf);
|
|
|
|
assert(path);
|
|
|
|
|
|
|
|
while (*comp_start) {
|
|
|
|
assert(*comp_start == '/');
|
|
|
|
while (*++comp_end && *comp_end != '/')
|
|
|
|
; /* nothing */
|
|
|
|
comp_len = comp_end - comp_start;
|
|
|
|
|
|
|
|
if (!strncmp("/", comp_start, comp_len) ||
|
|
|
|
!strncmp("/.", comp_start, comp_len))
|
|
|
|
goto next;
|
|
|
|
|
|
|
|
if (!strncmp("/..", comp_start, comp_len)) {
|
|
|
|
while (dst > buf && *--dst != '/')
|
|
|
|
; /* nothing */
|
|
|
|
goto next;
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(dst, comp_start, comp_len);
|
|
|
|
dst += comp_len;
|
|
|
|
next:
|
|
|
|
comp_start = comp_end;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dst == buf)
|
|
|
|
*dst++ = '/';
|
|
|
|
|
|
|
|
*dst = '\0';
|
|
|
|
return dst - buf;
|
|
|
|
}
|
2008-05-20 08:49:26 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* path = Canonical absolute path
|
|
|
|
* prefix_list = Colon-separated list of absolute paths
|
|
|
|
*
|
|
|
|
* Determines, for each path in parent_list, whether the "prefix" really
|
|
|
|
* is an ancestor directory of path. Returns the length of the longest
|
|
|
|
* ancestor directory, excluding any trailing slashes, or -1 if no prefix
|
|
|
|
* is an ancestor. (Note that this means 0 is returned if prefix_list is
|
|
|
|
* "/".) "/foo" is not considered an ancestor of "/foobar". Directories
|
|
|
|
* are not considered to be their own ancestors. path must be in a
|
|
|
|
* canonical form: empty components, or "." or ".." components are not
|
|
|
|
* allowed. prefix_list may be null, which is like "".
|
|
|
|
*/
|
|
|
|
int longest_ancestor_length(const char *path, const char *prefix_list)
|
|
|
|
{
|
|
|
|
char buf[PATH_MAX+1];
|
|
|
|
const char *ceil, *colon;
|
|
|
|
int len, max_len = -1;
|
|
|
|
|
|
|
|
if (prefix_list == NULL || !strcmp(path, "/"))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
for (colon = ceil = prefix_list; *colon; ceil = colon+1) {
|
|
|
|
for (colon = ceil; *colon && *colon != ':'; colon++);
|
|
|
|
len = colon - ceil;
|
|
|
|
if (len == 0 || len > PATH_MAX || !is_absolute_path(ceil))
|
|
|
|
continue;
|
|
|
|
strlcpy(buf, ceil, len+1);
|
|
|
|
len = normalize_absolute_path(buf, buf);
|
|
|
|
/* Strip "trailing slashes" from "/". */
|
|
|
|
if (len == 1)
|
|
|
|
len = 0;
|
|
|
|
|
|
|
|
if (!strncmp(path, buf, len) &&
|
|
|
|
path[len] == '/' &&
|
|
|
|
len > max_len) {
|
|
|
|
max_len = len;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return max_len;
|
|
|
|
}
|