Merge branch 'bb/unicode-9.0' into maint
The character width table has been updated to match Unicode 9.0 * bb/unicode-9.0: unicode_width.h: update the width tables to Unicode 9.0 update_unicode.sh: remove the plane filter update_unicode.sh: automatically download newer definition files update_unicode.sh: pin the uniset repo to a known good commit update_unicode.sh: remove an unnecessary subshell level update_unicode.sh: move it into contrib/update-unicode
This commit is contained in:
commit
9d1e8ddc73
1
.gitignore
vendored
1
.gitignore
vendored
@ -203,7 +203,6 @@
|
||||
/config.mak.autogen
|
||||
/config.mak.append
|
||||
/configure
|
||||
/unicode
|
||||
/tags
|
||||
/TAGS
|
||||
/cscope*
|
||||
|
3
contrib/update-unicode/.gitignore
vendored
Normal file
3
contrib/update-unicode/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
uniset/
|
||||
UnicodeData.txt
|
||||
EastAsianWidth.txt
|
20
contrib/update-unicode/README
Normal file
20
contrib/update-unicode/README
Normal file
@ -0,0 +1,20 @@
|
||||
TL;DR: Run update_unicode.sh after the publication of a new Unicode
|
||||
standard and commit the resulting unicode_widths.h file.
|
||||
|
||||
The long version
|
||||
================
|
||||
|
||||
The Git source code ships the file unicode_widths.h which contains
|
||||
tables of zero and double width Unicode code points, respectively.
|
||||
These tables are generated using update_unicode.sh in this directory.
|
||||
update_unicode.sh itself uses a third-party tool, uniset, to query two
|
||||
Unicode data files for the interesting code points.
|
||||
|
||||
On first run, update_unicode.sh clones uniset from Github and builds it.
|
||||
This requires a current-ish version of autoconf (2.69 works per December
|
||||
2016).
|
||||
|
||||
On each run, update_unicode.sh checks whether more recent Unicode data
|
||||
files are available from the Unicode consortium, and rebuilds the header
|
||||
unicode_widths.h with the new data. The new header can then be
|
||||
committed.
|
33
contrib/update-unicode/update_unicode.sh
Executable file
33
contrib/update-unicode/update_unicode.sh
Executable file
@ -0,0 +1,33 @@
|
||||
#!/bin/sh
|
||||
#See http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
#Me Enclosing_Mark an enclosing combining mark
|
||||
#Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
|
||||
#Cf Format a format control character
|
||||
#
|
||||
cd "$(dirname "$0")"
|
||||
UNICODEWIDTH_H=$(git rev-parse --show-toplevel)/unicode_width.h
|
||||
|
||||
wget -N http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt \
|
||||
http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt &&
|
||||
if ! test -d uniset; then
|
||||
git clone https://github.com/depp/uniset.git &&
|
||||
( cd uniset && git checkout 4b186196dd )
|
||||
fi &&
|
||||
(
|
||||
cd uniset &&
|
||||
if ! test -x uniset; then
|
||||
autoreconf -i &&
|
||||
./configure --enable-warnings=-Werror CFLAGS='-O0 -ggdb'
|
||||
fi &&
|
||||
make
|
||||
) &&
|
||||
UNICODE_DIR=. && export UNICODE_DIR &&
|
||||
cat >$UNICODEWIDTH_H <<-EOF
|
||||
static const struct interval zero_width[] = {
|
||||
$(uniset/uniset --32 cat:Me,Mn,Cf + U+1160..U+11FF - U+00AD)
|
||||
};
|
||||
static const struct interval double_width[] = {
|
||||
$(uniset/uniset --32 eaw:F,W)
|
||||
};
|
||||
EOF
|
131
unicode_width.h
131
unicode_width.h
@ -25,7 +25,7 @@ static const struct interval zero_width[] = {
|
||||
{ 0x0825, 0x0827 },
|
||||
{ 0x0829, 0x082D },
|
||||
{ 0x0859, 0x085B },
|
||||
{ 0x08E4, 0x0902 },
|
||||
{ 0x08D4, 0x0902 },
|
||||
{ 0x093A, 0x093A },
|
||||
{ 0x093C, 0x093C },
|
||||
{ 0x0941, 0x0948 },
|
||||
@ -120,6 +120,7 @@ static const struct interval zero_width[] = {
|
||||
{ 0x17C9, 0x17D3 },
|
||||
{ 0x17DD, 0x17DD },
|
||||
{ 0x180B, 0x180E },
|
||||
{ 0x1885, 0x1886 },
|
||||
{ 0x18A9, 0x18A9 },
|
||||
{ 0x1920, 0x1922 },
|
||||
{ 0x1927, 0x1928 },
|
||||
@ -158,7 +159,7 @@ static const struct interval zero_width[] = {
|
||||
{ 0x1CF4, 0x1CF4 },
|
||||
{ 0x1CF8, 0x1CF9 },
|
||||
{ 0x1DC0, 0x1DF5 },
|
||||
{ 0x1DFC, 0x1DFF },
|
||||
{ 0x1DFB, 0x1DFF },
|
||||
{ 0x200B, 0x200F },
|
||||
{ 0x202A, 0x202E },
|
||||
{ 0x2060, 0x2064 },
|
||||
@ -171,13 +172,13 @@ static const struct interval zero_width[] = {
|
||||
{ 0x3099, 0x309A },
|
||||
{ 0xA66F, 0xA672 },
|
||||
{ 0xA674, 0xA67D },
|
||||
{ 0xA69F, 0xA69F },
|
||||
{ 0xA69E, 0xA69F },
|
||||
{ 0xA6F0, 0xA6F1 },
|
||||
{ 0xA802, 0xA802 },
|
||||
{ 0xA806, 0xA806 },
|
||||
{ 0xA80B, 0xA80B },
|
||||
{ 0xA825, 0xA826 },
|
||||
{ 0xA8C4, 0xA8C4 },
|
||||
{ 0xA8C4, 0xA8C5 },
|
||||
{ 0xA8E0, 0xA8F1 },
|
||||
{ 0xA926, 0xA92D },
|
||||
{ 0xA947, 0xA951 },
|
||||
@ -204,7 +205,7 @@ static const struct interval zero_width[] = {
|
||||
{ 0xABED, 0xABED },
|
||||
{ 0xFB1E, 0xFB1E },
|
||||
{ 0xFE00, 0xFE0F },
|
||||
{ 0xFE20, 0xFE2D },
|
||||
{ 0xFE20, 0xFE2F },
|
||||
{ 0xFEFF, 0xFEFF },
|
||||
{ 0xFFF9, 0xFFFB },
|
||||
{ 0x101FD, 0x101FD },
|
||||
@ -228,16 +229,21 @@ static const struct interval zero_width[] = {
|
||||
{ 0x11173, 0x11173 },
|
||||
{ 0x11180, 0x11181 },
|
||||
{ 0x111B6, 0x111BE },
|
||||
{ 0x111CA, 0x111CC },
|
||||
{ 0x1122F, 0x11231 },
|
||||
{ 0x11234, 0x11234 },
|
||||
{ 0x11236, 0x11237 },
|
||||
{ 0x1123E, 0x1123E },
|
||||
{ 0x112DF, 0x112DF },
|
||||
{ 0x112E3, 0x112EA },
|
||||
{ 0x11301, 0x11301 },
|
||||
{ 0x11300, 0x11301 },
|
||||
{ 0x1133C, 0x1133C },
|
||||
{ 0x11340, 0x11340 },
|
||||
{ 0x11366, 0x1136C },
|
||||
{ 0x11370, 0x11374 },
|
||||
{ 0x11438, 0x1143F },
|
||||
{ 0x11442, 0x11444 },
|
||||
{ 0x11446, 0x11446 },
|
||||
{ 0x114B3, 0x114B8 },
|
||||
{ 0x114BA, 0x114BA },
|
||||
{ 0x114BF, 0x114C0 },
|
||||
@ -245,6 +251,7 @@ static const struct interval zero_width[] = {
|
||||
{ 0x115B2, 0x115B5 },
|
||||
{ 0x115BC, 0x115BD },
|
||||
{ 0x115BF, 0x115C0 },
|
||||
{ 0x115DC, 0x115DD },
|
||||
{ 0x11633, 0x1163A },
|
||||
{ 0x1163D, 0x1163D },
|
||||
{ 0x1163F, 0x11640 },
|
||||
@ -252,6 +259,16 @@ static const struct interval zero_width[] = {
|
||||
{ 0x116AD, 0x116AD },
|
||||
{ 0x116B0, 0x116B5 },
|
||||
{ 0x116B7, 0x116B7 },
|
||||
{ 0x1171D, 0x1171F },
|
||||
{ 0x11722, 0x11725 },
|
||||
{ 0x11727, 0x1172B },
|
||||
{ 0x11C30, 0x11C36 },
|
||||
{ 0x11C38, 0x11C3D },
|
||||
{ 0x11C3F, 0x11C3F },
|
||||
{ 0x11C92, 0x11CA7 },
|
||||
{ 0x11CAA, 0x11CB0 },
|
||||
{ 0x11CB2, 0x11CB3 },
|
||||
{ 0x11CB5, 0x11CB6 },
|
||||
{ 0x16AF0, 0x16AF4 },
|
||||
{ 0x16B30, 0x16B36 },
|
||||
{ 0x16F8F, 0x16F92 },
|
||||
@ -262,31 +279,59 @@ static const struct interval zero_width[] = {
|
||||
{ 0x1D185, 0x1D18B },
|
||||
{ 0x1D1AA, 0x1D1AD },
|
||||
{ 0x1D242, 0x1D244 },
|
||||
{ 0x1DA00, 0x1DA36 },
|
||||
{ 0x1DA3B, 0x1DA6C },
|
||||
{ 0x1DA75, 0x1DA75 },
|
||||
{ 0x1DA84, 0x1DA84 },
|
||||
{ 0x1DA9B, 0x1DA9F },
|
||||
{ 0x1DAA1, 0x1DAAF },
|
||||
{ 0x1E000, 0x1E006 },
|
||||
{ 0x1E008, 0x1E018 },
|
||||
{ 0x1E01B, 0x1E021 },
|
||||
{ 0x1E023, 0x1E024 },
|
||||
{ 0x1E026, 0x1E02A },
|
||||
{ 0x1E8D0, 0x1E8D6 },
|
||||
{ 0x1E944, 0x1E94A },
|
||||
{ 0xE0001, 0xE0001 },
|
||||
{ 0xE0020, 0xE007F },
|
||||
{ 0xE0100, 0xE01EF }
|
||||
};
|
||||
static const struct interval double_width[] = {
|
||||
{ /* plane */ 0x0, 0x1C },
|
||||
{ /* plane */ 0x1C, 0x21 },
|
||||
{ /* plane */ 0x21, 0x22 },
|
||||
{ /* plane */ 0x22, 0x23 },
|
||||
{ /* plane */ 0x0, 0x0 },
|
||||
{ /* plane */ 0x0, 0x0 },
|
||||
{ /* plane */ 0x0, 0x0 },
|
||||
{ /* plane */ 0x0, 0x0 },
|
||||
{ /* plane */ 0x0, 0x0 },
|
||||
{ /* plane */ 0x0, 0x0 },
|
||||
{ /* plane */ 0x0, 0x0 },
|
||||
{ /* plane */ 0x0, 0x0 },
|
||||
{ /* plane */ 0x0, 0x0 },
|
||||
{ /* plane */ 0x0, 0x0 },
|
||||
{ /* plane */ 0x0, 0x0 },
|
||||
{ /* plane */ 0x0, 0x0 },
|
||||
{ /* plane */ 0x0, 0x0 },
|
||||
{ 0x1100, 0x115F },
|
||||
{ 0x231A, 0x231B },
|
||||
{ 0x2329, 0x232A },
|
||||
{ 0x23E9, 0x23EC },
|
||||
{ 0x23F0, 0x23F0 },
|
||||
{ 0x23F3, 0x23F3 },
|
||||
{ 0x25FD, 0x25FE },
|
||||
{ 0x2614, 0x2615 },
|
||||
{ 0x2648, 0x2653 },
|
||||
{ 0x267F, 0x267F },
|
||||
{ 0x2693, 0x2693 },
|
||||
{ 0x26A1, 0x26A1 },
|
||||
{ 0x26AA, 0x26AB },
|
||||
{ 0x26BD, 0x26BE },
|
||||
{ 0x26C4, 0x26C5 },
|
||||
{ 0x26CE, 0x26CE },
|
||||
{ 0x26D4, 0x26D4 },
|
||||
{ 0x26EA, 0x26EA },
|
||||
{ 0x26F2, 0x26F3 },
|
||||
{ 0x26F5, 0x26F5 },
|
||||
{ 0x26FA, 0x26FA },
|
||||
{ 0x26FD, 0x26FD },
|
||||
{ 0x2705, 0x2705 },
|
||||
{ 0x270A, 0x270B },
|
||||
{ 0x2728, 0x2728 },
|
||||
{ 0x274C, 0x274C },
|
||||
{ 0x274E, 0x274E },
|
||||
{ 0x2753, 0x2755 },
|
||||
{ 0x2757, 0x2757 },
|
||||
{ 0x2795, 0x2797 },
|
||||
{ 0x27B0, 0x27B0 },
|
||||
{ 0x27BF, 0x27BF },
|
||||
{ 0x2B1B, 0x2B1C },
|
||||
{ 0x2B50, 0x2B50 },
|
||||
{ 0x2B55, 0x2B55 },
|
||||
{ 0x2E80, 0x2E99 },
|
||||
{ 0x2E9B, 0x2EF3 },
|
||||
{ 0x2F00, 0x2FD5 },
|
||||
@ -313,11 +358,49 @@ static const struct interval double_width[] = {
|
||||
{ 0xFE68, 0xFE6B },
|
||||
{ 0xFF01, 0xFF60 },
|
||||
{ 0xFFE0, 0xFFE6 },
|
||||
{ 0x16FE0, 0x16FE0 },
|
||||
{ 0x17000, 0x187EC },
|
||||
{ 0x18800, 0x18AF2 },
|
||||
{ 0x1B000, 0x1B001 },
|
||||
{ 0x1F004, 0x1F004 },
|
||||
{ 0x1F0CF, 0x1F0CF },
|
||||
{ 0x1F18E, 0x1F18E },
|
||||
{ 0x1F191, 0x1F19A },
|
||||
{ 0x1F200, 0x1F202 },
|
||||
{ 0x1F210, 0x1F23A },
|
||||
{ 0x1F210, 0x1F23B },
|
||||
{ 0x1F240, 0x1F248 },
|
||||
{ 0x1F250, 0x1F251 },
|
||||
{ 0x1F300, 0x1F320 },
|
||||
{ 0x1F32D, 0x1F335 },
|
||||
{ 0x1F337, 0x1F37C },
|
||||
{ 0x1F37E, 0x1F393 },
|
||||
{ 0x1F3A0, 0x1F3CA },
|
||||
{ 0x1F3CF, 0x1F3D3 },
|
||||
{ 0x1F3E0, 0x1F3F0 },
|
||||
{ 0x1F3F4, 0x1F3F4 },
|
||||
{ 0x1F3F8, 0x1F43E },
|
||||
{ 0x1F440, 0x1F440 },
|
||||
{ 0x1F442, 0x1F4FC },
|
||||
{ 0x1F4FF, 0x1F53D },
|
||||
{ 0x1F54B, 0x1F54E },
|
||||
{ 0x1F550, 0x1F567 },
|
||||
{ 0x1F57A, 0x1F57A },
|
||||
{ 0x1F595, 0x1F596 },
|
||||
{ 0x1F5A4, 0x1F5A4 },
|
||||
{ 0x1F5FB, 0x1F64F },
|
||||
{ 0x1F680, 0x1F6C5 },
|
||||
{ 0x1F6CC, 0x1F6CC },
|
||||
{ 0x1F6D0, 0x1F6D2 },
|
||||
{ 0x1F6EB, 0x1F6EC },
|
||||
{ 0x1F6F4, 0x1F6F6 },
|
||||
{ 0x1F910, 0x1F91E },
|
||||
{ 0x1F920, 0x1F927 },
|
||||
{ 0x1F930, 0x1F930 },
|
||||
{ 0x1F933, 0x1F93E },
|
||||
{ 0x1F940, 0x1F94B },
|
||||
{ 0x1F950, 0x1F95E },
|
||||
{ 0x1F980, 0x1F991 },
|
||||
{ 0x1F9C0, 0x1F9C0 },
|
||||
{ 0x20000, 0x2FFFD },
|
||||
{ 0x30000, 0x3FFFD }
|
||||
};
|
||||
|
@ -1,40 +0,0 @@
|
||||
#!/bin/sh
|
||||
#See http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
#Me Enclosing_Mark an enclosing combining mark
|
||||
#Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
|
||||
#Cf Format a format control character
|
||||
#
|
||||
UNICODEWIDTH_H=../unicode_width.h
|
||||
if ! test -d unicode; then
|
||||
mkdir unicode
|
||||
fi &&
|
||||
( cd unicode &&
|
||||
if ! test -f UnicodeData.txt; then
|
||||
wget http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
|
||||
fi &&
|
||||
if ! test -f EastAsianWidth.txt; then
|
||||
wget http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
|
||||
fi &&
|
||||
if ! test -d uniset; then
|
||||
git clone https://github.com/depp/uniset.git
|
||||
fi &&
|
||||
(
|
||||
cd uniset &&
|
||||
if ! test -x uniset; then
|
||||
autoreconf -i &&
|
||||
./configure --enable-warnings=-Werror CFLAGS='-O0 -ggdb'
|
||||
fi &&
|
||||
make
|
||||
) &&
|
||||
UNICODE_DIR=. && export UNICODE_DIR &&
|
||||
cat >$UNICODEWIDTH_H <<-EOF
|
||||
static const struct interval zero_width[] = {
|
||||
$(uniset/uniset --32 cat:Me,Mn,Cf + U+1160..U+11FF - U+00AD |
|
||||
grep -v plane)
|
||||
};
|
||||
static const struct interval double_width[] = {
|
||||
$(uniset/uniset --32 eaw:F,W)
|
||||
};
|
||||
EOF
|
||||
)
|
Loading…
Reference in New Issue
Block a user