git-commit-vandalism/t/helper/test-xml-encode.c
Jeff King 126e3b3d2a t/helper: mark unused argv/argc arguments
Many test helper programs do not bother to look at argc or argv, because
they don't take any options. In a user-facing program, it's a good idea
to check for unexpected arguments and complain. But for a test helper,
it's not worth the trouble to enforce this.

But we do want to tell the compiler we're OK with ignoring them, to
silence -Wunused-parameter (and obviously we can't get rid of them,
since we have to conform to the usual cmd__foo() interface).

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-28 14:11:24 -07:00

81 lines
1.7 KiB
C

#include "test-tool.h"
static const char *utf8_replace_character = "&#xfffd;";
/*
* Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded
* in an XML file.
*/
int cmd__xml_encode(int argc UNUSED, const char **argv UNUSED)
{
unsigned char buf[1024], tmp[4], *tmp2 = NULL;
ssize_t cur = 0, len = 1, remaining = 0;
unsigned char ch;
for (;;) {
if (++cur == len) {
len = xread(0, buf, sizeof(buf));
if (!len)
return 0;
if (len < 0)
die_errno("Could not read <stdin>");
cur = 0;
}
ch = buf[cur];
if (tmp2) {
if ((ch & 0xc0) != 0x80) {
fputs(utf8_replace_character, stdout);
tmp2 = NULL;
cur--;
continue;
}
*tmp2 = ch;
tmp2++;
if (--remaining == 0) {
fwrite(tmp, tmp2 - tmp, 1, stdout);
tmp2 = NULL;
}
continue;
}
if (!(ch & 0x80)) {
/* 0xxxxxxx */
if (ch == '&')
fputs("&amp;", stdout);
else if (ch == '\'')
fputs("&apos;", stdout);
else if (ch == '"')
fputs("&quot;", stdout);
else if (ch == '<')
fputs("&lt;", stdout);
else if (ch == '>')
fputs("&gt;", stdout);
else if (ch >= 0x20)
fputc(ch, stdout);
else if (ch == 0x09 || ch == 0x0a || ch == 0x0d)
fprintf(stdout, "&#x%02x;", ch);
else
fputs(utf8_replace_character, stdout);
} else if ((ch & 0xe0) == 0xc0) {
/* 110XXXXx 10xxxxxx */
tmp[0] = ch;
remaining = 1;
tmp2 = tmp + 1;
} else if ((ch & 0xf0) == 0xe0) {
/* 1110XXXX 10Xxxxxx 10xxxxxx */
tmp[0] = ch;
remaining = 2;
tmp2 = tmp + 1;
} else if ((ch & 0xf8) == 0xf0) {
/* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
tmp[0] = ch;
remaining = 3;
tmp2 = tmp + 1;
} else
fputs(utf8_replace_character, stdout);
}
return 0;
}