git-commit-vandalism/quote.c

546 lines
12 KiB
C
Raw Normal View History

#include "cache.h"
#include "quote.h"
#include "strvec.h"
int quote_path_fully = 1;
static inline int need_bs_quote(char c)
{
return (c == '\'' || c == '!');
}
/* Help to copy the thing properly quoted for the shell safety.
* any single quote is replaced with '\'', any exclamation point
* is replaced with '\!', and the whole thing is enclosed in a
* single quote pair.
*
* E.g.
* original sq_quote result
* name ==> name ==> 'name'
* a b ==> a b ==> 'a b'
* a'b ==> a'\''b ==> 'a'\''b'
* a!b ==> a'\!'b ==> 'a'\!'b'
*/
void sq_quote_buf(struct strbuf *dst, const char *src)
{
char *to_free = NULL;
if (dst->buf == src)
to_free = strbuf_detach(dst, NULL);
strbuf_addch(dst, '\'');
while (*src) {
size_t len = strcspn(src, "'!");
strbuf_add(dst, src, len);
src += len;
while (need_bs_quote(*src)) {
strbuf_addstr(dst, "'\\");
strbuf_addch(dst, *src++);
strbuf_addch(dst, '\'');
}
}
strbuf_addch(dst, '\'');
free(to_free);
}
void sq_quote_buf_pretty(struct strbuf *dst, const char *src)
{
static const char ok_punct[] = "+,-./:=@_^";
const char *p;
/* Avoid losing a zero-length string by adding '' */
if (!*src) {
strbuf_addstr(dst, "''");
return;
}
for (p = src; *p; p++) {
if (!isalnum(*p) && !strchr(ok_punct, *p)) {
sq_quote_buf(dst, src);
return;
}
}
/* if we get here, we did not need quoting */
strbuf_addstr(dst, src);
}
void sq_quotef(struct strbuf *dst, const char *fmt, ...)
{
struct strbuf src = STRBUF_INIT;
va_list ap;
va_start(ap, fmt);
strbuf_vaddf(&src, fmt, ap);
va_end(ap);
sq_quote_buf(dst, src.buf);
strbuf_release(&src);
}
void sq_quote_argv(struct strbuf *dst, const char **argv)
{
int i;
/* Copy into destination buffer. */
strbuf_grow(dst, 255);
for (i = 0; argv[i]; ++i) {
strbuf_addch(dst, ' ');
sq_quote_buf(dst, argv[i]);
}
}
/*
* Legacy function to append each argv value, quoted as necessasry,
* with whitespace before each value. This results in a leading
* space in the result.
*/
void sq_quote_argv_pretty(struct strbuf *dst, const char **argv)
{
if (argv[0])
strbuf_addch(dst, ' ');
sq_append_quote_argv_pretty(dst, argv);
}
/*
* Append each argv value, quoted as necessary, with whitespace between them.
*/
void sq_append_quote_argv_pretty(struct strbuf *dst, const char **argv)
{
int i;
for (i = 0; argv[i]; i++) {
if (i > 0)
strbuf_addch(dst, ' ');
sq_quote_buf_pretty(dst, argv[i]);
}
}
static char *sq_dequote_step(char *arg, char **next)
{
char *dst = arg;
char *src = arg;
char c;
if (*src != '\'')
return NULL;
for (;;) {
c = *++src;
if (!c)
return NULL;
if (c != '\'') {
*dst++ = c;
continue;
}
/* We stepped out of sq */
switch (*++src) {
case '\0':
*dst = 0;
if (next)
*next = NULL;
return arg;
case '\\':
sq_dequote: fix extra consumption of source string This fixes a (probably harmless) parsing problem in sq_dequote_step(), in which we parse some bogus input incorrectly rather than complaining that it's bogus. Our shell-dequoting function is very strict: it can unquote everything generated by sq_quote(), but not arbitrary strings. In particular, it only allows characters outside of the single-quoted string if they are immediately backslashed and then the single-quoted string is resumed. So: 'foo'\''bar' is OK. But these are not: 'foo'\'bar 'foo'\' 'foo'\'\''bar' even though they are all valid shell. The parser has a funny corner case here. When we see a backslashed character, we keep incrementing the "src" pointer as we parse it. For a single sq_dequote() call, that's OK; our next step is to bail with an error, and we don't care where "src" points. But if we're parsing multiple strings with sq_dequote_to_argv(), then our next step is to see if the string is followed by whitespace. Because we erroneously incremented the "src" pointer, we don't barf on the bogus backslash that we skipped. Instead, we may find whitespace that immediately follows it, and continue as if all is well (skipping the backslashed character completely!). In practice, this shouldn't be a big deal. The input is bogus, and our sq_quote() would never generate this bogus input. In all but one callers, we are parsing input created by an earlier call to sq_quote(). That final case is "git shell", which parses shell-quoting generated by the client. And in that case we use the singular sq_quote(), which has always behaved correctly. One might also wonder if you could provoke a read past the end of the string. But the answer is no; we still parse character by character, and would never advance past a NUL. This patch implements the minimal fix, along with documenting the restriction (which confused at least me while reading the code). We should possibly consider being more liberal in accepting valid shell-quoted words. I suspect the code may actually be simpler, and it would be more friendly to anybody generating or editing input by hand. But I wanted to fix just the immediate bug in this patch. We don't have a direct way to unit-test the sq_dequote() functions, but we can do this by feeding input to GIT_CONFIG_PARAMETERS (which is not normally a user-facing interface, but serves here as it expects to see sq_quote() input from "git -c"). I've included both a bogus example, and a related "good" one to confirm that we still parse it correctly. Noticed-by: Michael Haggerty <mhagger@alum.mit.edu> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-02-14 00:41:49 +01:00
/*
* Allow backslashed characters outside of
* single-quotes only if they need escaping,
* and only if we resume the single-quoted part
* afterward.
*/
if (need_bs_quote(src[1]) && src[2] == '\'') {
*dst++ = src[1];
src += 2;
continue;
}
/* Fallthrough */
default:
if (!next || !isspace(*src))
return NULL;
do {
c = *++src;
} while (isspace(c));
*dst = 0;
*next = src;
return arg;
}
}
}
char *sq_dequote(char *arg)
{
return sq_dequote_step(arg, NULL);
}
static int sq_dequote_to_argv_internal(char *arg,
const char ***argv, int *nr, int *alloc,
struct strvec *array)
{
char *next = arg;
if (!*arg)
return 0;
do {
char *dequoted = sq_dequote_step(next, &next);
if (!dequoted)
return -1;
if (argv) {
ALLOC_GROW(*argv, *nr + 1, *alloc);
(*argv)[(*nr)++] = dequoted;
}
if (array)
strvec_push(array, dequoted);
} while (next);
return 0;
}
int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc)
{
return sq_dequote_to_argv_internal(arg, argv, nr, alloc, NULL);
}
int sq_dequote_to_strvec(char *arg, struct strvec *array)
{
return sq_dequote_to_argv_internal(arg, NULL, NULL, NULL, array);
}
/* 1 means: quote as octal
* 0 means: quote as octal if (quote_path_fully)
* -1 means: never quote
* c: quote as "\\c"
*/
#define X8(x) x, x, x, x, x, x, x, x
#define X16(x) X8(x), X8(x)
static signed char const sq_lookup[256] = {
/* 0 1 2 3 4 5 6 7 */
/* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 'a',
/* 0x08 */ 'b', 't', 'n', 'v', 'f', 'r', 1, 1,
/* 0x10 */ X16(1),
/* 0x20 */ -1, -1, '"', -1, -1, -1, -1, -1,
/* 0x28 */ X16(-1), X16(-1), X16(-1),
/* 0x58 */ -1, -1, -1, -1,'\\', -1, -1, -1,
/* 0x60 */ X16(-1), X8(-1),
/* 0x78 */ -1, -1, -1, -1, -1, -1, -1, 1,
/* 0x80 */ /* set to 0 */
};
static inline int sq_must_quote(char c)
{
return sq_lookup[(unsigned char)c] + quote_path_fully > 0;
}
/* returns the longest prefix not needing a quote up to maxlen if positive.
This stops at the first \0 because it's marked as a character needing an
escape */
static size_t next_quote_pos(const char *s, ssize_t maxlen)
{
size_t len;
if (maxlen < 0) {
for (len = 0; !sq_must_quote(s[len]); len++);
} else {
for (len = 0; len < maxlen && !sq_must_quote(s[len]); len++);
}
return len;
}
/*
* C-style name quoting.
*
* (1) if sb and fp are both NULL, inspect the input name and counts the
* number of bytes that are needed to hold c_style quoted version of name,
* counting the double quotes around it but not terminating NUL, and
* returns it.
* However, if name does not need c_style quoting, it returns 0.
*
* (2) if sb or fp are not NULL, it emits the c_style quoted version
* of name, enclosed with double quotes if asked and needed only.
* Return value is the same as in (1).
*/
static size_t quote_c_style_counted(const char *name, ssize_t maxlen,
struct strbuf *sb, FILE *fp, int no_dq)
{
#undef EMIT
#define EMIT(c) \
do { \
if (sb) strbuf_addch(sb, (c)); \
if (fp) fputc((c), fp); \
count++; \
} while (0)
#define EMITBUF(s, l) \
do { \
if (sb) strbuf_add(sb, (s), (l)); \
if (fp) fwrite((s), (l), 1, fp); \
count += (l); \
} while (0)
size_t len, count = 0;
const char *p = name;
for (;;) {
int ch;
len = next_quote_pos(p, maxlen);
if (len == maxlen || (maxlen < 0 && !p[len]))
break;
if (!no_dq && p == name)
EMIT('"');
EMITBUF(p, len);
EMIT('\\');
p += len;
ch = (unsigned char)*p++;
if (maxlen >= 0)
maxlen -= len + 1;
if (sq_lookup[ch] >= ' ') {
EMIT(sq_lookup[ch]);
} else {
EMIT(((ch >> 6) & 03) + '0');
EMIT(((ch >> 3) & 07) + '0');
EMIT(((ch >> 0) & 07) + '0');
}
}
EMITBUF(p, len);
if (p == name) /* no ending quote needed */
return 0;
if (!no_dq)
EMIT('"');
return count;
}
size_t quote_c_style(const char *name, struct strbuf *sb, FILE *fp, int nodq)
{
return quote_c_style_counted(name, -1, sb, fp, nodq);
}
void quote_two_c_style(struct strbuf *sb, const char *prefix, const char *path, int nodq)
{
if (quote_c_style(prefix, NULL, NULL, 0) ||
quote_c_style(path, NULL, NULL, 0)) {
if (!nodq)
strbuf_addch(sb, '"');
quote_c_style(prefix, sb, NULL, 1);
quote_c_style(path, sb, NULL, 1);
if (!nodq)
strbuf_addch(sb, '"');
} else {
strbuf_addstr(sb, prefix);
strbuf_addstr(sb, path);
}
}
void write_name_quoted(const char *name, FILE *fp, int terminator)
{
if (terminator) {
quote_c_style(name, NULL, fp, 0);
} else {
fputs(name, fp);
}
fputc(terminator, fp);
}
void write_name_quoted_relative(const char *name, const char *prefix,
FILE *fp, int terminator)
{
struct strbuf sb = STRBUF_INIT;
quote.c: substitute path_relative with relative_path Substitute the function path_relative in quote.c with the function relative_path. Function relative_path can be treated as an enhanced and more robust version of path_relative. Outputs of path_relative and it's replacement (relative_path) are the same for the following cases: path prefix output of path_relative output of relative_path ======== ========= ======================= ======================= /a/b/c/ /a/b/ c/ c/ /a/b/c /a/b/ c c /a/ /a/b/ ../ ../ / /a/b/ ../../ ../../ /a/c /a/b/ ../c ../c /x/y /a/b/ ../../x/y ../../x/y a/b/c/ a/b/ c/ c/ a/ a/b/ ../ ../ x/y a/b/ ../../x/y ../../x/y /a/b (empty) /a/b /a/b /a/b (null) /a/b /a/b a/b (empty) a/b a/b a/b (null) a/b a/b But if both of the path and the prefix are the same, or the returned relative path should be the current directory, the outputs of both functions are different. Function relative_path returns "./", while function path_relative returns empty string. path prefix output of path_relative output of relative_path ======== ========= ======================= ======================= /a/b/ /a/b/ (empty) ./ a/b/ a/b/ (empty) ./ (empty) (null) (empty) ./ (empty) (empty) (empty) ./ But the callers of path_relative can handle such cases, or never encounter this issue at all, because: * In function quote_path_relative, if the output of path_relative is empty, append "./" to it, like: if (!out->len) strbuf_addstr(out, "./"); * Another caller is write_name_quoted_relative, which is only used by builtin/ls-files.c. git-ls-files only show files, so path of files will never be identical with the prefix of a directory. The following differences show that path_relative does not handle extra slashes properly: path prefix output of path_relative output of relative_path ======== ========= ======================= ======================= /a//b//c/ //a/b// ../../../../a//b//c/ c/ a/b//c a//b ../b//c c And if prefix has no trailing slash, path_relative does not work properly either. But since prefix always has a trailing slash, it's not a problem. path prefix output of path_relative output of relative_path ======== ========= ======================= ======================= /a/b/c/ /a/b b/c/ c/ /a/b /a/b b ./ /a/b/ /a/b b/ ./ /a /a/b/ ../../a ../ a/b/c/ a/b b/c/ c/ a/b/ a/b b/ ./ a a/b ../a ../ x/y a/b/ ../x/y ../../x/y a/c a/b c ../c /a/ /a/b (empty) ../ (empty) /a/b ../../ ./ One tricky part in this conversion is write_name() function in ls-files.c. It takes a counted string, <name, len>, that is to be made relative to <prefix, prefix_len> and then quoted. Because write_name_quoted_relative() still takes these two parameters as counted string, but ignores the count and treat these two as NUL-terminated strings, this conversion needs to be audited for its callers: - For <name, len>, all three callers of write_name() passes a NUL-terminated string and its true length, so this patch makes "len" unused. - For <prefix, prefix_len>, prefix could be a string that is longer than empty while prefix_len could be 0 when "--full-name" option is used. This is fixed by checking prefix_len in write_name() and calling write_name_quoted_relative() with NULL when prefix_len is set to 0. Again, this makes "prefix_len" given to write_name_quoted_relative() unused, without introducing a bug. Signed-off-by: Jiang Xin <worldhello.net@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-25 17:53:44 +02:00
name = relative_path(name, prefix, &sb);
write_name_quoted(name, fp, terminator);
strbuf_release(&sb);
}
/* quote path as relative to the given prefix */
char *quote_path_relative(const char *in, const char *prefix,
struct strbuf *out)
{
struct strbuf sb = STRBUF_INIT;
quote.c: substitute path_relative with relative_path Substitute the function path_relative in quote.c with the function relative_path. Function relative_path can be treated as an enhanced and more robust version of path_relative. Outputs of path_relative and it's replacement (relative_path) are the same for the following cases: path prefix output of path_relative output of relative_path ======== ========= ======================= ======================= /a/b/c/ /a/b/ c/ c/ /a/b/c /a/b/ c c /a/ /a/b/ ../ ../ / /a/b/ ../../ ../../ /a/c /a/b/ ../c ../c /x/y /a/b/ ../../x/y ../../x/y a/b/c/ a/b/ c/ c/ a/ a/b/ ../ ../ x/y a/b/ ../../x/y ../../x/y /a/b (empty) /a/b /a/b /a/b (null) /a/b /a/b a/b (empty) a/b a/b a/b (null) a/b a/b But if both of the path and the prefix are the same, or the returned relative path should be the current directory, the outputs of both functions are different. Function relative_path returns "./", while function path_relative returns empty string. path prefix output of path_relative output of relative_path ======== ========= ======================= ======================= /a/b/ /a/b/ (empty) ./ a/b/ a/b/ (empty) ./ (empty) (null) (empty) ./ (empty) (empty) (empty) ./ But the callers of path_relative can handle such cases, or never encounter this issue at all, because: * In function quote_path_relative, if the output of path_relative is empty, append "./" to it, like: if (!out->len) strbuf_addstr(out, "./"); * Another caller is write_name_quoted_relative, which is only used by builtin/ls-files.c. git-ls-files only show files, so path of files will never be identical with the prefix of a directory. The following differences show that path_relative does not handle extra slashes properly: path prefix output of path_relative output of relative_path ======== ========= ======================= ======================= /a//b//c/ //a/b// ../../../../a//b//c/ c/ a/b//c a//b ../b//c c And if prefix has no trailing slash, path_relative does not work properly either. But since prefix always has a trailing slash, it's not a problem. path prefix output of path_relative output of relative_path ======== ========= ======================= ======================= /a/b/c/ /a/b b/c/ c/ /a/b /a/b b ./ /a/b/ /a/b b/ ./ /a /a/b/ ../../a ../ a/b/c/ a/b b/c/ c/ a/b/ a/b b/ ./ a a/b ../a ../ x/y a/b/ ../x/y ../../x/y a/c a/b c ../c /a/ /a/b (empty) ../ (empty) /a/b ../../ ./ One tricky part in this conversion is write_name() function in ls-files.c. It takes a counted string, <name, len>, that is to be made relative to <prefix, prefix_len> and then quoted. Because write_name_quoted_relative() still takes these two parameters as counted string, but ignores the count and treat these two as NUL-terminated strings, this conversion needs to be audited for its callers: - For <name, len>, all three callers of write_name() passes a NUL-terminated string and its true length, so this patch makes "len" unused. - For <prefix, prefix_len>, prefix could be a string that is longer than empty while prefix_len could be 0 when "--full-name" option is used. This is fixed by checking prefix_len in write_name() and calling write_name_quoted_relative() with NULL when prefix_len is set to 0. Again, this makes "prefix_len" given to write_name_quoted_relative() unused, without introducing a bug. Signed-off-by: Jiang Xin <worldhello.net@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-25 17:53:44 +02:00
const char *rel = relative_path(in, prefix, &sb);
strbuf_reset(out);
quote_c_style_counted(rel, strlen(rel), out, NULL, 0);
strbuf_release(&sb);
return out->buf;
}
/*
* C-style name unquoting.
*
* Quoted should point at the opening double quote.
* + Returns 0 if it was able to unquote the string properly, and appends the
* result in the strbuf `sb'.
* + Returns -1 in case of error, and doesn't touch the strbuf. Though note
* that this function will allocate memory in the strbuf, so calling
* strbuf_release is mandatory whichever result unquote_c_style returns.
*
* Updates endp pointer to point at one past the ending double quote if given.
*/
int unquote_c_style(struct strbuf *sb, const char *quoted, const char **endp)
{
size_t oldlen = sb->len, len;
int ch, ac;
if (*quoted++ != '"')
return -1;
for (;;) {
len = strcspn(quoted, "\"\\");
strbuf_add(sb, quoted, len);
quoted += len;
switch (*quoted++) {
case '"':
if (endp)
*endp = quoted;
return 0;
case '\\':
break;
default:
goto error;
}
switch ((ch = *quoted++)) {
case 'a': ch = '\a'; break;
case 'b': ch = '\b'; break;
case 'f': ch = '\f'; break;
case 'n': ch = '\n'; break;
case 'r': ch = '\r'; break;
case 't': ch = '\t'; break;
case 'v': ch = '\v'; break;
case '\\': case '"':
break; /* verbatim */
/* octal values with first digit over 4 overflow */
case '0': case '1': case '2': case '3':
ac = ((ch - '0') << 6);
if ((ch = *quoted++) < '0' || '7' < ch)
goto error;
ac |= ((ch - '0') << 3);
if ((ch = *quoted++) < '0' || '7' < ch)
goto error;
ac |= (ch - '0');
ch = ac;
break;
default:
goto error;
}
strbuf_addch(sb, ch);
}
error:
strbuf_setlen(sb, oldlen);
return -1;
}
/* quoting as a string literal for other languages */
void perl_quote_buf(struct strbuf *sb, const char *src)
{
const char sq = '\'';
const char bq = '\\';
char c;
strbuf_addch(sb, sq);
while ((c = *src++)) {
if (c == sq || c == bq)
strbuf_addch(sb, bq);
strbuf_addch(sb, c);
}
strbuf_addch(sb, sq);
}
void python_quote_buf(struct strbuf *sb, const char *src)
{
const char sq = '\'';
const char bq = '\\';
const char nl = '\n';
char c;
strbuf_addch(sb, sq);
while ((c = *src++)) {
if (c == nl) {
strbuf_addch(sb, bq);
strbuf_addch(sb, 'n');
continue;
}
if (c == sq || c == bq)
strbuf_addch(sb, bq);
strbuf_addch(sb, c);
}
strbuf_addch(sb, sq);
}
void tcl_quote_buf(struct strbuf *sb, const char *src)
{
char c;
strbuf_addch(sb, '"');
while ((c = *src++)) {
switch (c) {
case '[': case ']':
case '{': case '}':
case '$': case '\\': case '"':
strbuf_addch(sb, '\\');
consistently use "fallthrough" comments in switches Gcc 7 adds -Wimplicit-fallthrough, which can warn when a switch case falls through to the next case. The general idea is that the compiler can't tell if this was intentional or not, so you should annotate any intentional fall-throughs as such, leaving it to complain about any unannotated ones. There's a GNU __attribute__ which can be used for annotation, but of course we'd have to #ifdef it away on non-gcc compilers. Gcc will also recognize specially-formatted comments, which matches our current practice. Let's extend that practice to all of the unannotated sites (which I did look over and verify that they were behaving as intended). Ideally in each case we'd actually give some reasons in the comment about why we're falling through, or what we're falling through to. And gcc does support that with -Wimplicit-fallthrough=2, which relaxes the comment pattern matching to anything that contains "fallthrough" (or a variety of spelling variants). However, this isn't the default for -Wimplicit-fallthrough, nor for -Wextra. In the name of simplicity, it's probably better for us to support the default level, which requires "fallthrough" to be the only thing in the comment (modulo some window dressing like "else" and some punctuation; see the gcc manual for the complete set of patterns). This patch suppresses all warnings due to -Wimplicit-fallthrough. We might eventually want to add that to the DEVELOPER Makefile knob, but we should probably wait until gcc 7 is more widely adopted (since earlier versions will complain about the unknown warning type). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-21 08:25:41 +02:00
/* fallthrough */
default:
strbuf_addch(sb, c);
break;
case '\f':
strbuf_addstr(sb, "\\f");
break;
case '\r':
strbuf_addstr(sb, "\\r");
break;
case '\n':
strbuf_addstr(sb, "\\n");
break;
case '\t':
strbuf_addstr(sb, "\\t");
break;
case '\v':
strbuf_addstr(sb, "\\v");
break;
}
}
strbuf_addch(sb, '"');
}
void basic_regex_quote_buf(struct strbuf *sb, const char *src)
{
char c;
if (*src == '^') {
/* only beginning '^' is special and needs quoting */
strbuf_addch(sb, '\\');
strbuf_addch(sb, *src++);
}
if (*src == '*')
/* beginning '*' is not special, no quoting */
strbuf_addch(sb, *src++);
while ((c = *src++)) {
switch (c) {
case '[':
case '.':
case '\\':
case '*':
strbuf_addch(sb, '\\');
strbuf_addch(sb, c);
break;
case '$':
/* only the end '$' is special and needs quoting */
if (*src == '\0')
strbuf_addch(sb, '\\');
strbuf_addch(sb, c);
break;
default:
strbuf_addch(sb, c);
break;
}
}
}