From 4cedb78cb5eeb26d6dd47f866b51f061737616f2 Mon Sep 17 00:00:00 2001 From: Dmitry Ivankov Date: Thu, 11 Aug 2011 16:21:06 +0600 Subject: [PATCH 1/5] fast-import: add input format tests Documentation/git-fast-import.txt says that git-fast-import is strict about it's input format. But committer/author field parsing is a bit loose. Invalid values can be unnoticed and written out to the commit, either with format-conforming input or with non-format-conforming one. Add one passing and one failing test for empty/absent committer name with well-formed input. And a failed test with unnoticed ill-formed input. Reported-by: SASAKI Suguru Signed-off-by: Dmitry Ivankov Signed-off-by: Junio C Hamano --- t/t9300-fast-import.sh | 99 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index 2a53640c5b..a659dd408f 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -324,6 +324,105 @@ test_expect_success \ test `git rev-parse master` = `git rev-parse TEMP_TAG^`' rm -f .git/TEMP_TAG +git gc 2>/dev/null >/dev/null +git prune 2>/dev/null >/dev/null + +cat >input < $GIT_COMMITTER_DATE +data </dev/null >/dev/null +git prune 2>/dev/null >/dev/null + +cat >input < $GIT_COMMITTER_DATE +data </dev/null >/dev/null +git prune 2>/dev/null >/dev/null + +cat >input < $GIT_COMMITTER_DATE +data <input < $GIT_COMMITTER_DATE +data <input <> $GIT_COMMITTER_DATE +data <input <input < $GIT_COMMITTER_DATE +data < Date: Thu, 11 Aug 2011 16:21:07 +0600 Subject: [PATCH 2/5] fast-import: don't fail on omitted committer name fast-import format declares 'committer_name SP' to be optional in 'committer_name SP LT email GT'. But for a (commit) object SP is obligatory while zero length committer_name is ok. git-fsck checks that SP is present, so fast-import must prepend it if the name SP part is omitted. It doesn't do so and thus for "LT email GT" ident it writes a bad object. Name cannot contain LT or GT, ident always comes after SP in fast-import. So if ident starts with LT reuse the SP as if a valid 'SP LT email GT' ident was passed. This fixes a ident parsing bug for a well-formed fast-import input. Though the parsing is still loose and can accept a ill-formed input. Signed-off-by: Dmitry Ivankov Signed-off-by: Junio C Hamano --- fast-import.c | 4 ++++ t/t9300-fast-import.sh | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fast-import.c b/fast-import.c index 78d978684d..c07f155723 100644 --- a/fast-import.c +++ b/fast-import.c @@ -1971,6 +1971,10 @@ static char *parse_ident(const char *buf) size_t name_len; char *ident; + /* ensure there is a space delimiter even if there is no name */ + if (*buf == '<') + --buf; + gt = strrchr(buf, '>'); if (!gt) die("Missing > in ident string: %s", buf); diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index a659dd408f..09ef6ba1d9 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -352,7 +352,7 @@ data < Date: Thu, 11 Aug 2011 16:21:08 +0600 Subject: [PATCH 3/5] fast-import: check committer name more strictly The documentation declares following identity format: ( SP)? LT GT where name is any string without LF and LT characters. But fast-import just accepts any string up to first GT instead of checking the whole format, and moreover just writes it as is to the commit object. git-fsck checks for [^<\n]* <[^<>\n]*> format. Note that the space is mandatory. And the space quirk is already handled via extending the string to the left when needed. Modify fast-import input identity format to a slightly stricter one - deny LF, LT and GT in both and . And check for it. This is stricter then git-fsck as fsck accepts "Name> " currently, but soon fsck check will be adjusted likewise. Signed-off-by: Dmitry Ivankov Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.txt | 4 ++-- fast-import.c | 29 +++++++++++++++++------------ t/t9300-fast-import.sh | 10 +++++----- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index 3f5b9126b1..0ca24a89f8 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -413,8 +413,8 @@ Here `` is the person's display name (for example (``cm@example.com''). `LT` and `GT` are the literal less-than (\x3c) and greater-than (\x3e) symbols. These are required to delimit the email address from the other fields in the line. Note that -`` is free-form and may contain any sequence of bytes, except -`LT` and `LF`. It is typically UTF-8 encoded. +`` and `` are free-form and may contain any sequence +of bytes, except `LT`, `GT` and `LF`. `` is typically UTF-8 encoded. The time of the change is specified by `` using the date format that was selected by the \--date-format= command line option. diff --git a/fast-import.c b/fast-import.c index c07f155723..967d70c700 100644 --- a/fast-import.c +++ b/fast-import.c @@ -1967,7 +1967,7 @@ static int validate_raw_date(const char *src, char *result, int maxlen) static char *parse_ident(const char *buf) { - const char *gt; + const char *ltgt; size_t name_len; char *ident; @@ -1975,28 +1975,33 @@ static char *parse_ident(const char *buf) if (*buf == '<') --buf; - gt = strrchr(buf, '>'); - if (!gt) + ltgt = buf + strcspn(buf, "<>"); + if (*ltgt != '<') + die("Missing < in ident string: %s", buf); + if (ltgt != buf && ltgt[-1] != ' ') + die("Missing space before < in ident string: %s", buf); + ltgt = ltgt + 1 + strcspn(ltgt + 1, "<>"); + if (*ltgt != '>') die("Missing > in ident string: %s", buf); - gt++; - if (*gt != ' ') + ltgt++; + if (*ltgt != ' ') die("Missing space after > in ident string: %s", buf); - gt++; - name_len = gt - buf; + ltgt++; + name_len = ltgt - buf; ident = xmalloc(name_len + 24); strncpy(ident, buf, name_len); switch (whenspec) { case WHENSPEC_RAW: - if (validate_raw_date(gt, ident + name_len, 24) < 0) - die("Invalid raw date \"%s\" in ident: %s", gt, buf); + if (validate_raw_date(ltgt, ident + name_len, 24) < 0) + die("Invalid raw date \"%s\" in ident: %s", ltgt, buf); break; case WHENSPEC_RFC2822: - if (parse_date(gt, ident + name_len, 24) < 0) - die("Invalid rfc2822 date \"%s\" in ident: %s", gt, buf); + if (parse_date(ltgt, ident + name_len, 24) < 0) + die("Invalid rfc2822 date \"%s\" in ident: %s", ltgt, buf); break; case WHENSPEC_NOW: - if (strcmp("now", gt)) + if (strcmp("now", ltgt)) die("Date in ident must be 'now': %s", buf); datestamp(ident + name_len, 24); break; diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index 09ef6ba1d9..18441f8fcb 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -370,7 +370,7 @@ data < Date: Thu, 11 Aug 2011 16:21:09 +0600 Subject: [PATCH 4/5] fsck: add a few committer name tests fsck reports "missing space before " for committer string equal to "name email>" or to "". It'd be nicer to say "missing email" for the second string and "name is bad" (has > in it) for the first one. Add a failing test for these messages. For "name> " no error is reported. Looks like a bug, so add such a failing test." Signed-off-by: Dmitry Ivankov Signed-off-by: Junio C Hamano --- t/t1450-fsck.sh | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index bb01d5ab8f..01ccefdb19 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -110,6 +110,42 @@ test_expect_success 'email with embedded > is not okay' ' grep "error in commit $new" out ' +test_expect_failure 'missing < email delimiter is reported nicely' ' + git cat-file commit HEAD >basis && + sed "s/bad-email-2 && + new=$(git hash-object -t commit -w --stdin out && + cat out && + grep "error in commit $new.* - bad name" out +' + +test_expect_failure 'missing email is reported nicely' ' + git cat-file commit HEAD >basis && + sed "s/[a-z]* <[^>]*>//" basis >bad-email-3 && + new=$(git hash-object -t commit -w --stdin out && + cat out && + grep "error in commit $new.* - missing email" out +' + +test_expect_failure '> in name is reported' ' + git cat-file commit HEAD >basis && + sed "s/ bad-email-4 && + new=$(git hash-object -t commit -w --stdin out && + cat out && + grep "error in commit $new" out +' + test_expect_success 'tag pointing to nonexistent' ' cat >invalid-tag <<-\EOF && object ffffffffffffffffffffffffffffffffffffffff From 53f53cff24c5fe6683234bcd5386a447b8b17074 Mon Sep 17 00:00:00 2001 From: Dmitry Ivankov Date: Thu, 11 Aug 2011 16:21:10 +0600 Subject: [PATCH 5/5] fsck: improve committer/author check fsck allows a name with > character in it like "name> ". Also for "name email>" fsck says "missing space before email". More precisely, it seeks for a first '<', checks that ' ' preceeds it. Then seeks to '<' or '>' and checks that it is the '>'. Missing space is reported if either '<' is not found or it's not preceeded with ' '. Change it to following. Seek to '<' or '>', check that it is '<' and is preceeded with ' '. Seek to '<' or '>' and check that it is '>'. So now "name> " is rejected as "bad name". More strict name check is the only change in what is accepted. Report 'missing space' only if '<' is found and is not preceeded with a space. Signed-off-by: Dmitry Ivankov Signed-off-by: Junio C Hamano --- fsck.c | 10 ++++++---- t/t1450-fsck.sh | 6 +++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/fsck.c b/fsck.c index 60bd4bbf6a..6c855f84f0 100644 --- a/fsck.c +++ b/fsck.c @@ -224,13 +224,15 @@ static int fsck_tree(struct tree *item, int strict, fsck_error error_func) static int fsck_ident(char **ident, struct object *obj, fsck_error error_func) { - if (**ident == '<' || **ident == '\n') - return error_func(obj, FSCK_ERROR, "invalid author/committer line - missing space before email"); - *ident += strcspn(*ident, "<\n"); - if ((*ident)[-1] != ' ') + if (**ident == '<') return error_func(obj, FSCK_ERROR, "invalid author/committer line - missing space before email"); + *ident += strcspn(*ident, "<>\n"); + if (**ident == '>') + return error_func(obj, FSCK_ERROR, "invalid author/committer line - bad name"); if (**ident != '<') return error_func(obj, FSCK_ERROR, "invalid author/committer line - missing email"); + if ((*ident)[-1] != ' ') + return error_func(obj, FSCK_ERROR, "invalid author/committer line - missing space before email"); (*ident)++; *ident += strcspn(*ident, "<>\n"); if (**ident != '>') diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index 01ccefdb19..523ce9c45b 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -110,7 +110,7 @@ test_expect_success 'email with embedded > is not okay' ' grep "error in commit $new" out ' -test_expect_failure 'missing < email delimiter is reported nicely' ' +test_expect_success 'missing < email delimiter is reported nicely' ' git cat-file commit HEAD >basis && sed "s/bad-email-2 && new=$(git hash-object -t commit -w --stdin basis && sed "s/[a-z]* <[^>]*>//" basis >bad-email-3 && new=$(git hash-object -t commit -w --stdin in name is reported' ' +test_expect_success '> in name is reported' ' git cat-file commit HEAD >basis && sed "s/ bad-email-4 && new=$(git hash-object -t commit -w --stdin