2223190815
This will come in handy when publishing the results of Git's test suite during an automated Azure DevOps run. Note: we need to make extra sure that invalid UTF-8 encoding is turned into valid UTF-8 (using the Replacement Character, \uFFFD) because t9902's trace contains such invalid byte sequences, and the task in the Azure Pipeline that uploads the test results would refuse to do anything if it was asked to parse an .xml file with invalid UTF-8 in it. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
81 lines
1.7 KiB
C
81 lines
1.7 KiB
C
#include "test-tool.h"
|
|
|
|
static const char *utf8_replace_character = "�";
|
|
|
|
/*
|
|
* Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded
|
|
* in an XML file.
|
|
*/
|
|
int cmd__xml_encode(int argc, const char **argv)
|
|
{
|
|
unsigned char buf[1024], tmp[4], *tmp2 = NULL;
|
|
ssize_t cur = 0, len = 1, remaining = 0;
|
|
unsigned char ch;
|
|
|
|
for (;;) {
|
|
if (++cur == len) {
|
|
len = xread(0, buf, sizeof(buf));
|
|
if (!len)
|
|
return 0;
|
|
if (len < 0)
|
|
die_errno("Could not read <stdin>");
|
|
cur = 0;
|
|
}
|
|
ch = buf[cur];
|
|
|
|
if (tmp2) {
|
|
if ((ch & 0xc0) != 0x80) {
|
|
fputs(utf8_replace_character, stdout);
|
|
tmp2 = NULL;
|
|
cur--;
|
|
continue;
|
|
}
|
|
*tmp2 = ch;
|
|
tmp2++;
|
|
if (--remaining == 0) {
|
|
fwrite(tmp, tmp2 - tmp, 1, stdout);
|
|
tmp2 = NULL;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (!(ch & 0x80)) {
|
|
/* 0xxxxxxx */
|
|
if (ch == '&')
|
|
fputs("&", stdout);
|
|
else if (ch == '\'')
|
|
fputs("'", stdout);
|
|
else if (ch == '"')
|
|
fputs(""", stdout);
|
|
else if (ch == '<')
|
|
fputs("<", stdout);
|
|
else if (ch == '>')
|
|
fputs(">", stdout);
|
|
else if (ch >= 0x20)
|
|
fputc(ch, stdout);
|
|
else if (ch == 0x09 || ch == 0x0a || ch == 0x0d)
|
|
fprintf(stdout, "&#x%02x;", ch);
|
|
else
|
|
fputs(utf8_replace_character, stdout);
|
|
} else if ((ch & 0xe0) == 0xc0) {
|
|
/* 110XXXXx 10xxxxxx */
|
|
tmp[0] = ch;
|
|
remaining = 1;
|
|
tmp2 = tmp + 1;
|
|
} else if ((ch & 0xf0) == 0xe0) {
|
|
/* 1110XXXX 10Xxxxxx 10xxxxxx */
|
|
tmp[0] = ch;
|
|
remaining = 2;
|
|
tmp2 = tmp + 1;
|
|
} else if ((ch & 0xf8) == 0xf0) {
|
|
/* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
|
|
tmp[0] = ch;
|
|
remaining = 3;
|
|
tmp2 = tmp + 1;
|
|
} else
|
|
fputs(utf8_replace_character, stdout);
|
|
}
|
|
|
|
return 0;
|
|
}
|