Merge branch 'cb/t4210-illseq-auto-detect'

As FreeBSD is not the only platform whose regexp library reports
a REG_ILLSEQ error when fed invalid UTF-8, add logic to detect that
automatically and skip the affected tests.

* cb/t4210-illseq-auto-detect:
  t4210: detect REG_ILLSEQ dynamically and skip affected tests
  t/helper: teach test-regex to report pattern errors (like REG_ILLSEQ)
This commit is contained in:
Junio C Hamano 2020-06-08 18:06:27 -07:00
commit f4cec40dbd
3 changed files with 125 additions and 52 deletions

View File

@ -1,5 +1,4 @@
#include "test-tool.h" #include "test-tool.h"
#include "git-compat-util.h"
#include "gettext.h" #include "gettext.h"
struct reg_flag { struct reg_flag {
@ -8,12 +7,13 @@ struct reg_flag {
}; };
static struct reg_flag reg_flags[] = { static struct reg_flag reg_flags[] = {
{ "EXTENDED", REG_EXTENDED }, { "EXTENDED", REG_EXTENDED },
{ "NEWLINE", REG_NEWLINE }, { "NEWLINE", REG_NEWLINE },
{ "ICASE", REG_ICASE }, { "ICASE", REG_ICASE },
{ "NOTBOL", REG_NOTBOL }, { "NOTBOL", REG_NOTBOL },
{ "NOTEOL", REG_NOTEOL },
#ifdef REG_STARTEND #ifdef REG_STARTEND
{ "STARTEND", REG_STARTEND }, { "STARTEND", REG_STARTEND },
#endif #endif
{ NULL, 0 } { NULL, 0 }
}; };
@ -41,36 +41,74 @@ int cmd__regex(int argc, const char **argv)
{ {
const char *pat; const char *pat;
const char *str; const char *str;
int flags = 0; int ret, silent = 0, flags = 0;
regex_t r; regex_t r;
regmatch_t m[1]; regmatch_t m[1];
char errbuf[64];
if (argc == 2 && !strcmp(argv[1], "--bug"))
return test_regex_bug();
else if (argc < 3)
usage("test-tool regex --bug\n"
"test-tool regex <pattern> <string> [<options>]");
argv++; argv++;
pat = *argv++; argc--;
str = *argv++;
while (*argv) { if (!argc)
struct reg_flag *rf; goto usage;
for (rf = reg_flags; rf->name; rf++)
if (!strcmp(*argv, rf->name)) { if (!strcmp(*argv, "--bug")) {
flags |= rf->flag; if (argc == 1)
break; return test_regex_bug();
} else
if (!rf->name) goto usage;
die("do not recognize %s", *argv); }
if (!strcmp(*argv, "--silent")) {
silent = 1;
argv++; argv++;
argc--;
}
if (!argc)
goto usage;
pat = *argv++;
if (argc == 1)
str = NULL;
else {
str = *argv++;
while (*argv) {
struct reg_flag *rf;
for (rf = reg_flags; rf->name; rf++)
if (!strcmp(*argv, rf->name)) {
flags |= rf->flag;
break;
}
if (!rf->name)
die("do not recognize flag %s", *argv);
argv++;
}
} }
git_setup_gettext(); git_setup_gettext();
if (regcomp(&r, pat, flags)) ret = regcomp(&r, pat, flags);
die("failed regcomp() for pattern '%s'", pat); if (ret) {
if (regexec(&r, str, 1, m, 0)) if (silent)
return 1; return ret;
regerror(ret, &r, errbuf, sizeof(errbuf));
die("failed regcomp() for pattern '%s' (%s)", pat, errbuf);
}
if (!str)
return 0;
ret = regexec(&r, str, 1, m, 0);
if (ret) {
if (silent || ret == REG_NOMATCH)
return ret;
regerror(ret, &r, errbuf, sizeof(errbuf));
die("failed regexec() for subject '%s' (%s)", str, errbuf);
}
return 0; return 0;
usage:
usage("\ttest-tool regex --bug\n"
"\ttest-tool regex [--silent] <pattern>\n"
"\ttest-tool regex [--silent] <pattern> <string> [<options>]");
return -1;
} }

View File

@ -10,6 +10,13 @@ latin1_e=$(printf '\351')
# invalid UTF-8 # invalid UTF-8
invalid_e=$(printf '\303\50)') # ")" at end to close opening "(" invalid_e=$(printf '\303\50)') # ")" at end to close opening "("
have_reg_illseq=
if test_have_prereq GETTEXT_LOCALE &&
! LC_ALL=$is_IS_locale test-tool regex --silent $latin1_e
then
have_reg_illseq=1
fi
test_expect_success 'create commits in different encodings' ' test_expect_success 'create commits in different encodings' '
test_tick && test_tick &&
cat >msg <<-EOF && cat >msg <<-EOF &&
@ -51,43 +58,77 @@ test_expect_success !MINGW 'log --grep does not find non-reencoded values (utf8)
test_must_be_empty actual test_must_be_empty actual
' '
test_expect_success !MINGW 'log --grep does not find non-reencoded values (latin1)' ' test_expect_success 'log --grep does not find non-reencoded values (latin1)' '
git log --encoding=ISO-8859-1 --format=%s --grep=$utf8_e >actual && git log --encoding=ISO-8859-1 --format=%s --grep=$utf8_e >actual &&
test_must_be_empty actual test_must_be_empty actual
' '
triggers_undefined_behaviour () {
local engine=$1
case $engine in
fixed)
if test -n "$have_reg_illseq" &&
! test_have_prereq LIBPCRE2
then
return 0
fi
;;
basic|extended)
if test -n "$have_reg_illseq"
then
return 0
fi
;;
esac
return 1
}
mismatched_git_log () {
local pattern=$1
LC_ALL=$is_IS_locale git log --encoding=ISO-8859-1 --format=%s \
--grep=$pattern
}
for engine in fixed basic extended perl for engine in fixed basic extended perl
do do
prereq= prereq=
if test $engine = "perl" if test $engine = "perl"
then then
prereq="PCRE" prereq=PCRE
else
prereq=""
fi fi
force_regex= force_regex=
if test $engine != "fixed" if test $engine != "fixed"
then then
force_regex=.* force_regex='.*'
fi fi
test_expect_success !MINGW,!REGEX_ILLSEQ,GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not find non-reencoded values (latin1 + locale)" "
cat >expect <<-\EOF && test_expect_success $prereq "config grep.patternType=$engine" "
latin1 git config grep.patternType $engine
utf8
EOF
LC_ALL=\"$is_IS_locale\" git -c grep.patternType=$engine log --encoding=ISO-8859-1 --format=%s --grep=\"$force_regex$latin1_e\" >actual &&
test_cmp expect actual
" "
test_expect_success !MINGW,GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not find non-reencoded values (latin1 + locale)" " test_expect_success GETTEXT_LOCALE,$prereq "log --grep does not find non-reencoded values (latin1 + locale)" "
LC_ALL=\"$is_IS_locale\" git -c grep.patternType=$engine log --encoding=ISO-8859-1 --format=%s --grep=\"$force_regex$utf8_e\" >actual && mismatched_git_log '$force_regex$utf8_e' >actual &&
test_must_be_empty actual test_must_be_empty actual
" "
test_expect_success !MINGW,!REGEX_ILLSEQ,GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not die on invalid UTF-8 value (latin1 + locale + invalid needle)" " if ! triggers_undefined_behaviour $engine
LC_ALL=\"$is_IS_locale\" git -c grep.patternType=$engine log --encoding=ISO-8859-1 --format=%s --grep=\"$force_regex$invalid_e\" >actual && then
test_must_be_empty actual test_expect_success !MINGW,GETTEXT_LOCALE,$prereq "log --grep searches in log output encoding (latin1 + locale)" "
" cat >expect <<-\EOF &&
latin1
utf8
EOF
mismatched_git_log '$force_regex$latin1_e' >actual &&
test_cmp expect actual
"
test_expect_success GETTEXT_LOCALE,$prereq "log --grep does not die on invalid UTF-8 value (latin1 + locale + invalid needle)" "
mismatched_git_log '$force_regex$invalid_e' >actual &&
test_must_be_empty actual
"
fi
done done
test_done test_done

View File

@ -1489,12 +1489,6 @@ case $uname_s in
test_set_prereq SED_STRIPS_CR test_set_prereq SED_STRIPS_CR
test_set_prereq GREP_STRIPS_CR test_set_prereq GREP_STRIPS_CR
;; ;;
FreeBSD)
test_set_prereq REGEX_ILLSEQ
test_set_prereq POSIXPERM
test_set_prereq BSLASHPSPEC
test_set_prereq EXECKEEPSPID
;;
*) *)
test_set_prereq POSIXPERM test_set_prereq POSIXPERM
test_set_prereq BSLASHPSPEC test_set_prereq BSLASHPSPEC