Merge branch 'rs/userdiff-multibyte-regex'
The userdiff regexp patterns for various filetypes that are built into the system have been updated to avoid triggering regexp errors from UTF-8 aware regex engines. * rs/userdiff-multibyte-regex: userdiff: support regexec(3) with multi-byte support
This commit is contained in:
commit
cbfe844aa1
@ -69,6 +69,10 @@ test_language_driver () {
|
|||||||
echo "* diff='"$lang"'" >.gitattributes &&
|
echo "* diff='"$lang"'" >.gitattributes &&
|
||||||
word_diff --color-words
|
word_diff --color-words
|
||||||
'
|
'
|
||||||
|
test_expect_success "diff driver '$lang' in Islandic" '
|
||||||
|
LANG=is_IS.UTF-8 LANGUAGE=is LC_ALL="$is_IS_locale" \
|
||||||
|
word_diff --color-words
|
||||||
|
'
|
||||||
}
|
}
|
||||||
|
|
||||||
test_expect_success setup '
|
test_expect_success setup '
|
||||||
|
31
userdiff.c
31
userdiff.c
@ -17,6 +17,7 @@ static int drivers_alloc;
|
|||||||
.cflags = REG_EXTENDED, \
|
.cflags = REG_EXTENDED, \
|
||||||
}, \
|
}, \
|
||||||
.word_regex = wrx "|[^[:space:]]|[\xc0-\xff][\x80-\xbf]+", \
|
.word_regex = wrx "|[^[:space:]]|[\xc0-\xff][\x80-\xbf]+", \
|
||||||
|
.word_regex_multi_byte = wrx "|[^[:space:]]", \
|
||||||
}
|
}
|
||||||
#define IPATTERN(lang, rx, wrx) { \
|
#define IPATTERN(lang, rx, wrx) { \
|
||||||
.name = lang, \
|
.name = lang, \
|
||||||
@ -26,6 +27,7 @@ static int drivers_alloc;
|
|||||||
.cflags = REG_EXTENDED | REG_ICASE, \
|
.cflags = REG_EXTENDED | REG_ICASE, \
|
||||||
}, \
|
}, \
|
||||||
.word_regex = wrx "|[^[:space:]]|[\xc0-\xff][\x80-\xbf]+", \
|
.word_regex = wrx "|[^[:space:]]|[\xc0-\xff][\x80-\xbf]+", \
|
||||||
|
.word_regex_multi_byte = wrx "|[^[:space:]]", \
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -294,7 +296,7 @@ PATTERNS("scheme",
|
|||||||
/* All other words should be delimited by spaces or parentheses */
|
/* All other words should be delimited by spaces or parentheses */
|
||||||
"|([^][)(}{[ \t])+"),
|
"|([^][)(}{[ \t])+"),
|
||||||
PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",
|
PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",
|
||||||
"\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff]+"),
|
"\\\\[a-zA-Z@]+|\\\\.|([a-zA-Z0-9]|[^\x01-\x7f])+"),
|
||||||
{ "default", NULL, NULL, -1, { NULL, 0 } },
|
{ "default", NULL, NULL, -1, { NULL, 0 } },
|
||||||
};
|
};
|
||||||
#undef PATTERNS
|
#undef PATTERNS
|
||||||
@ -330,6 +332,25 @@ static int userdiff_find_by_namelen_cb(struct userdiff_driver *driver,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int regexec_supports_multi_byte_chars(void)
|
||||||
|
{
|
||||||
|
static const char not_space[] = "[^[:space:]]";
|
||||||
|
static const char utf8_multi_byte_char[] = "\xc2\xa3";
|
||||||
|
regex_t re;
|
||||||
|
regmatch_t match;
|
||||||
|
static int result = -1;
|
||||||
|
|
||||||
|
if (result != -1)
|
||||||
|
return result;
|
||||||
|
if (regcomp(&re, not_space, REG_EXTENDED))
|
||||||
|
BUG("invalid regular expression: %s", not_space);
|
||||||
|
result = !regexec(&re, utf8_multi_byte_char, 1, &match, 0) &&
|
||||||
|
match.rm_so == 0 &&
|
||||||
|
match.rm_eo == strlen(utf8_multi_byte_char);
|
||||||
|
regfree(&re);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
static struct userdiff_driver *userdiff_find_by_namelen(const char *name, size_t len)
|
static struct userdiff_driver *userdiff_find_by_namelen(const char *name, size_t len)
|
||||||
{
|
{
|
||||||
struct find_by_namelen_data udcbdata = {
|
struct find_by_namelen_data udcbdata = {
|
||||||
@ -405,7 +426,13 @@ int userdiff_config(const char *k, const char *v)
|
|||||||
struct userdiff_driver *userdiff_find_by_name(const char *name)
|
struct userdiff_driver *userdiff_find_by_name(const char *name)
|
||||||
{
|
{
|
||||||
int len = strlen(name);
|
int len = strlen(name);
|
||||||
return userdiff_find_by_namelen(name, len);
|
struct userdiff_driver *driver = userdiff_find_by_namelen(name, len);
|
||||||
|
if (driver && driver->word_regex_multi_byte) {
|
||||||
|
if (regexec_supports_multi_byte_chars())
|
||||||
|
driver->word_regex = driver->word_regex_multi_byte;
|
||||||
|
driver->word_regex_multi_byte = NULL;
|
||||||
|
}
|
||||||
|
return driver;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct userdiff_driver *userdiff_find_by_path(struct index_state *istate,
|
struct userdiff_driver *userdiff_find_by_path(struct index_state *istate,
|
||||||
|
@ -18,6 +18,7 @@ struct userdiff_driver {
|
|||||||
int binary;
|
int binary;
|
||||||
struct userdiff_funcname funcname;
|
struct userdiff_funcname funcname;
|
||||||
const char *word_regex;
|
const char *word_regex;
|
||||||
|
const char *word_regex_multi_byte;
|
||||||
const char *textconv;
|
const char *textconv;
|
||||||
struct notes_cache *textconv_cache;
|
struct notes_cache *textconv_cache;
|
||||||
int textconv_want_cache;
|
int textconv_want_cache;
|
||||||
|
Loading…
Reference in New Issue
Block a user