utf8.c: partially update to version 6.3
Unicode 6.3 defines more code points as combining or accents. For example, the character "ö" could be expressed as an "o" followed by U+0308 COMBINING DIARESIS (aka umlaut, double-dot-above). We should consider that such a sequence of two codepoints occupies one display column for the alignment purposes, and for that, git_wcwidth() should return 0 for them. Affected codepoints are: U+0358..U+035C U+0487 U+05A2, U+05BA, U+05C5, U+05C7 U+0604, U+0616..U+061A, U+0659..U+065F Earlier unicode standards had defined these as "reserved". Only the range 0..U+07FF has been checked to see which codepoints need to be marked as 0-width while preparing for this commit; more updates may be needed. Signed-off-by: Torsten Bögershausen <tboegi@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
5f95c9f850
commit
d813ab970d
9
utf8.c
9
utf8.c
@ -84,11 +84,10 @@ static int git_wcwidth(ucs_char_t ch)
|
||||
* "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c".
|
||||
*/
|
||||
static const struct interval combining[] = {
|
||||
{ 0x0300, 0x0357 }, { 0x035D, 0x036F }, { 0x0483, 0x0486 },
|
||||
{ 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },
|
||||
{ 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
|
||||
{ 0x05C4, 0x05C4 }, { 0x0600, 0x0603 }, { 0x0610, 0x0615 },
|
||||
{ 0x064B, 0x0658 }, { 0x0670, 0x0670 }, { 0x06D6, 0x06E4 },
|
||||
{ 0x0300, 0x036F }, { 0x0483, 0x0489 }, { 0x0591, 0x05BD },
|
||||
{ 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, { 0x05C4, 0x05C5 },
|
||||
{ 0x05C7, 0x05C7 }, { 0x0600, 0x0604 }, { 0x0610, 0x061A },
|
||||
{ 0x064B, 0x065F }, { 0x0670, 0x0670 }, { 0x06D6, 0x06E4 },
|
||||
{ 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, { 0x070F, 0x070F },
|
||||
{ 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 },
|
||||
{ 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 },
|
||||
|
Loading…
Reference in New Issue
Block a user