unicode: exclude FDD0..FDEF range too
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
40e4e88a28
commit
b1edf20f18
@ -90,13 +90,13 @@
|
|||||||
* until Unicode committee assigns something there.
|
* until Unicode committee assigns something there.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if CONFIG_LAST_SUPPORTED_WCHAR < 126 || CONFIG_LAST_SUPPORTED_WCHAR > 0x30000
|
#if CONFIG_LAST_SUPPORTED_WCHAR < 126 || CONFIG_LAST_SUPPORTED_WCHAR >= 0x30000
|
||||||
# define LAST_SUPPORTED_WCHAR 0x30000
|
# define LAST_SUPPORTED_WCHAR 0x2ffff
|
||||||
#else
|
#else
|
||||||
# define LAST_SUPPORTED_WCHAR CONFIG_LAST_SUPPORTED_WCHAR
|
# define LAST_SUPPORTED_WCHAR CONFIG_LAST_SUPPORTED_WCHAR
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if LAST_SUPPORTED_WCHAR >= 0x0300
|
#if LAST_SUPPORTED_WCHAR >= 0x300
|
||||||
struct interval {
|
struct interval {
|
||||||
uint16_t first;
|
uint16_t first;
|
||||||
uint16_t last;
|
uint16_t last;
|
||||||
@ -185,7 +185,7 @@ static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max)
|
|||||||
*/
|
*/
|
||||||
static int wcwidth(unsigned ucs)
|
static int wcwidth(unsigned ucs)
|
||||||
{
|
{
|
||||||
#if LAST_SUPPORTED_WCHAR >= 0x0300
|
#if LAST_SUPPORTED_WCHAR >= 0x300
|
||||||
/* sorted list of non-overlapping intervals of non-spacing characters */
|
/* sorted list of non-overlapping intervals of non-spacing characters */
|
||||||
/* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
|
/* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
|
||||||
static const struct interval combining[] = {
|
static const struct interval combining[] = {
|
||||||
@ -460,75 +460,75 @@ static int wcwidth(unsigned ucs)
|
|||||||
#undef BIG_
|
#undef BIG_
|
||||||
#undef PAIR
|
#undef PAIR
|
||||||
};
|
};
|
||||||
# if LAST_SUPPORTED_WCHAR >= 0x10000
|
|
||||||
/* Combining chars in Supplementary Multilingual Plane 0x1xxxx */
|
|
||||||
static const struct interval combining0x10000[] = {
|
|
||||||
{ 0x0A01, 0x0A03 }, { 0x0A05, 0x0A06 }, { 0x0A0C, 0x0A0F },
|
|
||||||
{ 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },
|
|
||||||
{ 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },
|
|
||||||
{ 0xD242, 0xD244 }
|
|
||||||
};
|
|
||||||
# endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (ucs == 0)
|
if (ucs == 0)
|
||||||
return 0;
|
return 0;
|
||||||
/* test for 8-bit control characters (00-1f, 80-9f, 7f) */
|
|
||||||
|
/* Test for 8-bit control characters (00-1f, 80-9f, 7f) */
|
||||||
if ((ucs & ~0x80) < 0x20 || ucs == 0x7f)
|
if ((ucs & ~0x80) < 0x20 || ucs == 0x7f)
|
||||||
return -1;
|
return -1;
|
||||||
if (ucs < 0x0300) /* optimization */
|
/* Quick abort if it is an obviously invalid char */
|
||||||
|
if (ucs > LAST_SUPPORTED_WCHAR)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
/* Optimization: no combining chars below 0x300 */
|
||||||
|
if (LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
#if LAST_SUPPORTED_WCHAR < 0x0300
|
#if LAST_SUPPORTED_WCHAR >= 0x300
|
||||||
return -1;
|
/* Binary search in table of non-spacing characters */
|
||||||
#else
|
|
||||||
/* binary search in table of non-spacing characters */
|
|
||||||
if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1))
|
if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1))
|
||||||
return 0;
|
return 0;
|
||||||
if (in_uint16_table(ucs, combining1, ARRAY_SIZE(combining1) - 1))
|
if (in_uint16_table(ucs, combining1, ARRAY_SIZE(combining1) - 1))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (ucs < 0x1100) /* optimization */
|
/* Optimization: all chars below 0x1100 are not double-width */
|
||||||
|
if (LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
# if LAST_SUPPORTED_WCHAR < 0x1100
|
# if LAST_SUPPORTED_WCHAR >= 0x1100
|
||||||
return -1;
|
/* Invalid code points: */
|
||||||
# else
|
/* High (d800..dbff) and low (dc00..dfff) surrogates (valid only in UTF16) */
|
||||||
if (ucs >= LAST_SUPPORTED_WCHAR)
|
/* Private Use Area (e000..f8ff) */
|
||||||
return -1;
|
/* Noncharacters fdd0..fdef */
|
||||||
|
if ((LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff)
|
||||||
/* High (d800..dbff) and low (dc00..dfff) surrogates are invalid (used only by UTF16) */
|
|| (LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef)
|
||||||
/* We also exclude Private Use Area (e000..f8ff) */
|
|
||||||
if (LAST_SUPPORTED_WCHAR >= 0xd800
|
|
||||||
&& (ucs >= 0xd800 || ucs <= 0xf8ff)
|
|
||||||
) {
|
) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 0xfffe and 0xffff in every plane are invalid */
|
/* 0xfffe and 0xffff in every plane are invalid */
|
||||||
if (LAST_SUPPORTED_WCHAR >= 0xfffe
|
if (LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) {
|
||||||
&& (ucs & 0xfffe) == 0xfffe
|
|
||||||
) {
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
# if LAST_SUPPORTED_WCHAR >= 0x10000
|
# if LAST_SUPPORTED_WCHAR >= 0x10000
|
||||||
/* binary search in table of non-spacing characters in Supplementary Multilingual Plane */
|
if (ucs >= 0x10000) {
|
||||||
if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
|
/* Combining chars in Supplementary Multilingual Plane 0x1xxxx */
|
||||||
return 0;
|
static const struct interval combining0x10000[] = {
|
||||||
# endif
|
{ 0x0A01, 0x0A03 }, { 0x0A05, 0x0A06 }, { 0x0A0C, 0x0A0F },
|
||||||
/* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */
|
{ 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },
|
||||||
if (LAST_SUPPORTED_WCHAR >= 0xE0001
|
{ 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },
|
||||||
&& ( ucs == 0xE0001
|
{ 0xD242, 0xD244 }
|
||||||
|| (ucs >= 0xE0020 && ucs <= 0xE007F)
|
};
|
||||||
|| (ucs >= 0xE0100 && ucs <= 0xE01EF)
|
/* Binary search in table of non-spacing characters in Supplementary Multilingual Plane */
|
||||||
)
|
if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
|
||||||
) {
|
return 0;
|
||||||
return 0;
|
/* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */
|
||||||
|
if (LAST_SUPPORTED_WCHAR >= 0xE0001
|
||||||
|
&& ( ucs == 0xE0001
|
||||||
|
|| (ucs >= 0xE0020 && ucs <= 0xE007F)
|
||||||
|
|| (ucs >= 0xE0100 && ucs <= 0xE01EF)
|
||||||
|
)
|
||||||
|
) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
/* if we arrive here, ucs is not a combining or C0/C1 control character */
|
/* If we arrive here, ucs is not a combining or C0/C1 control character.
|
||||||
|
* Check whether it's 1 char or 2-shar wide.
|
||||||
|
*/
|
||||||
return 1 +
|
return 1 +
|
||||||
( (/*ucs >= 0x1100 &&*/ ucs <= 0x115f) /* Hangul Jamo init. consonants */
|
( (/*ucs >= 0x1100 &&*/ ucs <= 0x115f) /* Hangul Jamo init. consonants */
|
||||||
|| ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */
|
|| ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */
|
||||||
|
@ -13,7 +13,7 @@ mkdir ls.testdir || exit 1
|
|||||||
|
|
||||||
# With Unicode provided by libc locale, I'm not sure this test can pass.
|
# With Unicode provided by libc locale, I'm not sure this test can pass.
|
||||||
# I suspect we might fail to skip exactly correct number of bytes
|
# I suspect we might fail to skip exactly correct number of bytes
|
||||||
# over broken unicode sequences.
|
# over broked unicode sequences.
|
||||||
test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \
|
test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \
|
||||||
&& test x"$CONFIG_LOCALE_SUPPORT" != x"y" \
|
&& test x"$CONFIG_LOCALE_SUPPORT" != x"y" \
|
||||||
&& test x"$CONFIG_SUBST_WCHAR" = x"63" \
|
&& test x"$CONFIG_SUBST_WCHAR" = x"63" \
|
||||||
@ -144,7 +144,7 @@ test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \
|
|||||||
0003_2.1__First_possible_sequence_of_a_certain_length_____________________|
|
0003_2.1__First_possible_sequence_of_a_certain_length_____________________|
|
||||||
0004_2.1.2__2_bytes__U-00000080_:________"?"______________________________|
|
0004_2.1.2__2_bytes__U-00000080_:________"?"______________________________|
|
||||||
0005_2.1.3__3_bytes__U-00000800_:________"ࠀ"______________________________|
|
0005_2.1.3__3_bytes__U-00000800_:________"ࠀ"______________________________|
|
||||||
0006_2.1.4__4_bytes__U-00010000_:________"?"______________________________|
|
0006_2.1.4__4_bytes__U-00010000_:________"𐀀"______________________________|
|
||||||
0007_2.1.5__5_bytes__U-00200000_:________"?"______________________________|
|
0007_2.1.5__5_bytes__U-00200000_:________"?"______________________________|
|
||||||
0008_2.1.6__6_bytes__U-04000000_:________"?"______________________________|
|
0008_2.1.6__6_bytes__U-04000000_:________"?"______________________________|
|
||||||
0009_2.2__Last_possible_sequence_of_a_certain_length______________________|
|
0009_2.2__Last_possible_sequence_of_a_certain_length______________________|
|
||||||
@ -155,9 +155,9 @@ test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \
|
|||||||
0014_2.2.5__5_bytes__U-03FFFFFF_:________"?"______________________________|
|
0014_2.2.5__5_bytes__U-03FFFFFF_:________"?"______________________________|
|
||||||
0015_2.2.6__6_bytes__U-7FFFFFFF_:________"?"______________________________|
|
0015_2.2.6__6_bytes__U-7FFFFFFF_:________"?"______________________________|
|
||||||
0016_2.3__Other_boundary_conditions_______________________________________|
|
0016_2.3__Other_boundary_conditions_______________________________________|
|
||||||
0017_2.3.1__U-0000D7FF_=_ed_9f_bf_=_"?"___________________________________|
|
0017_2.3.1__U-0000D7FF_=_ed_9f_bf_=_""___________________________________|
|
||||||
0018_2.3.2__U-0000E000_=_ee_80_80_=_"?"___________________________________|
|
0018_2.3.2__U-0000E000_=_ee_80_80_=_"?"___________________________________|
|
||||||
0019_2.3.3__U-0000FFFD_=_ef_bf_bd_=_"?"___________________________________|
|
0019_2.3.3__U-0000FFFD_=_ef_bf_bd_=_"<EFBFBD>"___________________________________|
|
||||||
0020_2.3.4__U-0010FFFF_=_f4_8f_bf_bf_=_"?"________________________________|
|
0020_2.3.4__U-0010FFFF_=_f4_8f_bf_bf_=_"?"________________________________|
|
||||||
0021_2.3.5__U-00110000_=_f4_90_80_80_=_"?"________________________________|
|
0021_2.3.5__U-00110000_=_f4_90_80_80_=_"?"________________________________|
|
||||||
0022_3__Malformed_sequences_______________________________________________|
|
0022_3__Malformed_sequences_______________________________________________|
|
||||||
|
Loading…
Reference in New Issue
Block a user