[USE] Update the data files

This uses the data files from
<c355d04194/USE>.
This commit is contained in:
David Corbett 2021-09-29 19:18:25 -04:00 committed by Behdad Esfahbod
parent 564afc425a
commit f8ebb8a657
3 changed files with 283 additions and 163 deletions

View File

@ -17,17 +17,19 @@
* # Override values For Indic_Syllabic_Category
* # Not derivable
* # Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17
* # Updated for Unicode 10.0 by Andrew Glass 2017-07-25
* # Updated for Unicode 12.1 by Andrew Glass 2019-05-24
* # Updated for Unicode 13.0 by Andrew Glass 2020-07-28
* # Updated for Unicode 10.0 by Andrew Glass 2017-07-25
* # Updated for Unicode 12.1 by Andrew Glass 2019-05-24
* # Updated for Unicode 13.0 by Andrew Glass 2020-07-28
* # Updated for Unicode 14.0 by Andrew Glass 2021-09-25
* # Override values For Indic_Positional_Category
* # Not derivable
* # Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17
* # Updated for Unicode 10.0 by Andrew Glass 2017-07-25
* # Updated for Unicode 10.0 by Andrew Glass 2017-07-25
* # Ammended for Unicode 10.0 by Andrew Glass 2018-09-21
* # Updated for L2/19-083 by Andrew Glass 2019-05-06
* # Updated for Unicode 12.1 by Andrew Glass 2019-05-30
* # Updated for Unicode 13.0 by Andrew Glass 2020-07-28
* # Updated for L2/19-083 by Andrew Glass 2019-05-06
* # Updated for Unicode 12.1 by Andrew Glass 2019-05-30
* # Updated for Unicode 13.0 by Andrew Glass 2020-07-28
* # Updated for Unicode 14.0 by Andrew Glass 2021-09-28
* UnicodeData.txt does not have a header.
*/
@ -552,7 +554,18 @@ static const uint8_t use_table[] = {
/* ABE0 */ B, B, B, VPst, VPst, VAbv, VPst, VPst, VBlw, VPst, VPst, O, VMPst, VBlw, O, O,
/* ABF0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0x10a00u 4008
#define use_offset_0x10570u 4008
/* Vithkuqi */
/* 10570 */ B, B, B, B, B, B, B, B, B, B, B, O, B, B, B, B,
/* 10580 */ B, B, B, B, B, B, B, B, B, B, B, O, B, B, B, B,
/* 10590 */ B, B, B, O, B, B, O, B, B, B, B, B, B, B, B, B,
/* 105A0 */ B, B, O, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 105B0 */ B, B, O, B, B, B, B, B, B, B, O, B, B, O, O, O,
#define use_offset_0x10a00u 4088
/* Kharoshthi */
@ -563,7 +576,7 @@ static const uint8_t use_table[] = {
/* 10A30 */ B, B, B, B, B, B, O, O, CMAbv, CMBlw, CMBlw, O, O, O, O, H,
/* 10A40 */ B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, O,
#define use_offset_0x10ac0u 4088
#define use_offset_0x10ac0u 4168
/* Manichaean */
@ -572,7 +585,7 @@ static const uint8_t use_table[] = {
/* 10AD0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 10AE0 */ B, B, B, B, B, CMBlw, CMBlw, O,
#define use_offset_0x10b80u 4128
#define use_offset_0x10b80u 4208
/* Psalter Pahlavi */
@ -581,7 +594,7 @@ static const uint8_t use_table[] = {
/* 10B90 */ B, B, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
/* 10BA0 */ O, O, O, O, O, O, O, O, O, B, B, B, B, B, B, O,
#define use_offset_0x10d00u 4176
#define use_offset_0x10d00u 4256
/* Hanifi Rohingya */
@ -591,7 +604,7 @@ static const uint8_t use_table[] = {
/* 10D20 */ B, B, B, B, VMAbv, VMAbv, VMAbv, CMAbv, O, O, O, O, O, O, O, O,
/* 10D30 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0x10e80u 4240
#define use_offset_0x10e80u 4320
/* Yezidi */
@ -601,17 +614,22 @@ static const uint8_t use_table[] = {
/* 10EA0 */ B, B, B, B, B, B, B, B, B, B, O, VAbv, VAbv, O, O, O,
/* 10EB0 */ B, B, O, O, O, O, O, O,
#define use_offset_0x10f30u 4296
#define use_offset_0x10f30u 4376
/* Sogdian */
/* 10F30 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 10F40 */ B, B, B, B, B, B, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw,
/* 10F50 */ VMBlw, B, B, B, B, O, O, O,
/* 10F50 */ VMBlw, B, B, B, B, O, O, O, O, O, O, O, O, O, O, O,
/* 10F60 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
/* 10F70 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
#define use_offset_0x10fb0u 4336
/* Old Uyghur */
/* 10F80 */ O, O, CMBlw, CMBlw, CMBlw, CMBlw, O, O, O, O, O, O, O, O, O, O,
/* 10F90 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
/* 10FA0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
/* Chorasmian */
@ -640,7 +658,7 @@ static const uint8_t use_table[] = {
/* 110B0 */ VPst, VPre, VPst, VBlw, VBlw, VAbv, VAbv, VPst, VPst, H, CMBlw, O, O, O, O, O,
/* 110C0 */ O, O, VBlw, O, O, O, O, O,
#define use_offset_0x11100u 4616
#define use_offset_0x11100u 4784
/* Chakma */
@ -678,7 +696,7 @@ static const uint8_t use_table[] = {
/* 11220 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPst, VPst, VBlw,
/* 11230 */ VAbv, VAbv, VAbv, VAbv, VMAbv, H, CMAbv, CMAbv, O, O, O, O, O, O, VMAbv, O,
#define use_offset_0x11280u 4936
#define use_offset_0x11280u 5104
/* Multani */
@ -706,7 +724,7 @@ static const uint8_t use_table[] = {
/* 11360 */ B, B, VPst, VPst, O, O, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O,
/* 11370 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O,
#define use_offset_0x11400u 5184
#define use_offset_0x11400u 5352
/* Newa */
@ -729,7 +747,7 @@ static const uint8_t use_table[] = {
/* 114C0 */ VMAbv, VMAbv, H, CMBlw, B, O, O, O, O, O, O, O, O, O, O, O,
/* 114D0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0x11580u 5408
#define use_offset_0x11580u 5576
/* Siddham */
@ -773,7 +791,7 @@ static const uint8_t use_table[] = {
/* 11730 */ B, B, B, B, B, B, B, B, B, B, B, B, O, O, O, O,
/* 11740 */ B, B, B, B, B, B, B, O,
#define use_offset_0x11800u 5864
#define use_offset_0x11800u 6032
/* Dogra */
@ -783,7 +801,7 @@ static const uint8_t use_table[] = {
/* 11820 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPre, VPst, VBlw,
/* 11830 */ VBlw, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VMAbv, VMPst, H, CMBlw, O, O, O, O, O,
#define use_offset_0x11900u 5928
#define use_offset_0x11900u 6096
/* Dives Akuru */
@ -795,7 +813,7 @@ static const uint8_t use_table[] = {
/* 11940 */ MPst, R, MPst, CMBlw, O, O, O, O, O, O, O, O, O, O, O, O,
/* 11950 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0x119a0u 6024
#define use_offset_0x119a0u 6192
/* Nandinagari */
@ -823,7 +841,7 @@ static const uint8_t use_table[] = {
/* 11A80 */ B, B, B, B, R, R, R, R, R, R, FBlw, FBlw, FBlw, FBlw, FBlw, FBlw,
/* 11A90 */ FBlw, FBlw, FBlw, FBlw, FBlw, FBlw, VMAbv, VMPst, CMAbv, H, O, O, O, B, O, O,
#define use_offset_0x11c00u 6280
#define use_offset_0x11c00u 6448
/* Bhaiksuki */
@ -844,7 +862,7 @@ static const uint8_t use_table[] = {
/* 11CA0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, SUB, SUB, SUB, SUB, SUB, SUB, SUB,
/* 11CB0 */ VBlw, VPre, VBlw, VAbv, VPst, VMAbv, VMAbv, O,
#define use_offset_0x11d00u 6464
#define use_offset_0x11d00u 6632
/* Masaram Gondi */
@ -864,7 +882,7 @@ static const uint8_t use_table[] = {
/* 11D90 */ VAbv, VAbv, O, VPst, VPst, VMAbv, VMPst, H, O, O, O, O, O, O, O, O,
/* 11DA0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0x11ee0u 6640
#define use_offset_0x11ee0u 6808
/* Makasar */
@ -872,16 +890,93 @@ static const uint8_t use_table[] = {
/* 11EE0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 11EF0 */ B, B, GB, VAbv, VBlw, VPre, VPst, O,
#define use_offset_0x13430u 6664
#define use_offset_0x13000u 6832
/* Egyptian Hieroglyphs */
/* 13000 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13010 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13020 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13030 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13040 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13050 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13060 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13070 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13080 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13090 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 130A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 130B0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 130C0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 130D0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 130E0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 130F0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13100 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13110 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13120 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13130 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13140 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13150 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13160 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13170 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13180 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13190 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 131A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 131B0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 131C0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 131D0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 131E0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 131F0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13200 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13210 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13220 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13230 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13240 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13250 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13260 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13270 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13280 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13290 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 132A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 132B0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 132C0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 132D0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 132E0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 132F0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13300 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13310 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13320 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13330 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13340 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13350 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13360 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13370 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13380 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13390 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 133A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 133B0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 133C0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 133D0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 133E0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 133F0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13400 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13410 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 13420 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, O,
/* Egyptian Hieroglyph Format Controls */
/* 13430 */ H, H, H, H, H, H, H, O,
/* 13430 */ H, H, H, H, H, H, H, B, B, O, O, O, O, O, O, O,
#define use_offset_0x16b00u 6672
#define use_offset_0x16ac0u 7920
/* Tangsa */
/* 16AC0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
/* 16AD0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
/* 16AE0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
/* 16AF0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
/* Pahawh Hmong */
/* 16B00 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
@ -889,7 +984,7 @@ static const uint8_t use_table[] = {
/* 16B20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 16B30 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O,
#define use_offset_0x16f00u 6728
#define use_offset_0x16f00u 8040
/* Miao */
@ -905,14 +1000,14 @@ static const uint8_t use_table[] = {
/* 16F80 */ VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, O, O, O, O, O, O, O, VMBlw,
/* 16F90 */ VMBlw, VMBlw, VMBlw, O, O, O, O, O,
#define use_offset_0x16fe0u 6880
#define use_offset_0x16fe0u 8192
/* Ideographic Symbols and Punctuation */
/* 16FE0 */ O, O, O, O, B, O, O, O,
#define use_offset_0x18b00u 6888
#define use_offset_0x18b00u 8200
/* Khitan Small Script */
@ -948,7 +1043,7 @@ static const uint8_t use_table[] = {
/* 18CC0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 18CD0 */ B, B, B, B, B, B, O, O,
#define use_offset_0x1bc00u 7360
#define use_offset_0x1bc00u 8672
/* Duployan */
@ -964,7 +1059,7 @@ static const uint8_t use_table[] = {
/* 1BC80 */ B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, O,
/* 1BC90 */ B, B, B, B, B, B, B, B, B, B, O, O, O, CMBlw, CMBlw, O,
#define use_offset_0x1e100u 7520
#define use_offset_0x1e100u 8832
/* Nyiakeng Puachue Hmong */
@ -975,9 +1070,15 @@ static const uint8_t use_table[] = {
/* 1E130 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, B, B, B, B, B, B, B, O, O,
/* 1E140 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, B, B,
#define use_offset_0x1e2c0u 7600
#define use_offset_0x1e290u 8912
/* Toto */
/* 1E290 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 1E2A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, VMAbv, O,
/* 1E2B0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
/* Wancho */
/* 1E2C0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
@ -985,7 +1086,7 @@ static const uint8_t use_table[] = {
/* 1E2E0 */ B, B, B, B, B, B, B, B, B, B, B, B, VMAbv, VMAbv, VMAbv, VMAbv,
/* 1E2F0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0x1e900u 7664
#define use_offset_0x1e900u 9024
/* Adlam */
@ -997,7 +1098,7 @@ static const uint8_t use_table[] = {
/* 1E940 */ B, B, B, B, CMAbv, CMAbv, CMAbv, CMAbv, CMAbv, CMAbv, CMAbv, B, O, O, O, O,
/* 1E950 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
}; /* Table items: 7760; occupancy: 76% */
}; /* Table items: 9120; occupancy: 78% */
static inline uint8_t
hb_use_get_category (hb_codepoint_t u)
@ -1037,17 +1138,17 @@ hb_use_get_category (hb_codepoint_t u)
break;
case 0x10u:
if (hb_in_range<hb_codepoint_t> (u, 0x10570u, 0x105BFu)) return use_table[u - 0x10570u + use_offset_0x10570u];
if (hb_in_range<hb_codepoint_t> (u, 0x10A00u, 0x10A4Fu)) return use_table[u - 0x10A00u + use_offset_0x10a00u];
if (hb_in_range<hb_codepoint_t> (u, 0x10AC0u, 0x10AE7u)) return use_table[u - 0x10AC0u + use_offset_0x10ac0u];
if (hb_in_range<hb_codepoint_t> (u, 0x10B80u, 0x10BAFu)) return use_table[u - 0x10B80u + use_offset_0x10b80u];
if (hb_in_range<hb_codepoint_t> (u, 0x10D00u, 0x10D3Fu)) return use_table[u - 0x10D00u + use_offset_0x10d00u];
if (hb_in_range<hb_codepoint_t> (u, 0x10E80u, 0x10EB7u)) return use_table[u - 0x10E80u + use_offset_0x10e80u];
if (hb_in_range<hb_codepoint_t> (u, 0x10F30u, 0x10F57u)) return use_table[u - 0x10F30u + use_offset_0x10f30u];
if (hb_in_range<hb_codepoint_t> (u, 0x10FB0u, 0x110C7u)) return use_table[u - 0x10FB0u + use_offset_0x10fb0u];
if (hb_in_range<hb_codepoint_t> (u, 0x10F30u, 0x110C7u)) return use_table[u - 0x10F30u + use_offset_0x10f30u];
break;
case 0x11u:
if (hb_in_range<hb_codepoint_t> (u, 0x10FB0u, 0x110C7u)) return use_table[u - 0x10FB0u + use_offset_0x10fb0u];
if (hb_in_range<hb_codepoint_t> (u, 0x10F30u, 0x110C7u)) return use_table[u - 0x10F30u + use_offset_0x10f30u];
if (hb_in_range<hb_codepoint_t> (u, 0x11100u, 0x1123Fu)) return use_table[u - 0x11100u + use_offset_0x11100u];
if (hb_in_range<hb_codepoint_t> (u, 0x11280u, 0x11377u)) return use_table[u - 0x11280u + use_offset_0x11280u];
if (hb_in_range<hb_codepoint_t> (u, 0x11400u, 0x114DFu)) return use_table[u - 0x11400u + use_offset_0x11400u];
@ -1061,11 +1162,11 @@ hb_use_get_category (hb_codepoint_t u)
break;
case 0x13u:
if (hb_in_range<hb_codepoint_t> (u, 0x13430u, 0x13437u)) return use_table[u - 0x13430u + use_offset_0x13430u];
if (hb_in_range<hb_codepoint_t> (u, 0x13000u, 0x1343Fu)) return use_table[u - 0x13000u + use_offset_0x13000u];
break;
case 0x16u:
if (hb_in_range<hb_codepoint_t> (u, 0x16B00u, 0x16B37u)) return use_table[u - 0x16B00u + use_offset_0x16b00u];
if (hb_in_range<hb_codepoint_t> (u, 0x16AC0u, 0x16B37u)) return use_table[u - 0x16AC0u + use_offset_0x16ac0u];
if (hb_in_range<hb_codepoint_t> (u, 0x16F00u, 0x16F97u)) return use_table[u - 0x16F00u + use_offset_0x16f00u];
if (hb_in_range<hb_codepoint_t> (u, 0x16FE0u, 0x16FE7u)) return use_table[u - 0x16FE0u + use_offset_0x16fe0u];
break;
@ -1080,7 +1181,7 @@ hb_use_get_category (hb_codepoint_t u)
case 0x1Eu:
if (hb_in_range<hb_codepoint_t> (u, 0x1E100u, 0x1E14Fu)) return use_table[u - 0x1E100u + use_offset_0x1e100u];
if (hb_in_range<hb_codepoint_t> (u, 0x1E2C0u, 0x1E2FFu)) return use_table[u - 0x1E2C0u + use_offset_0x1e2c0u];
if (hb_in_range<hb_codepoint_t> (u, 0x1E290u, 0x1E2FFu)) return use_table[u - 0x1E290u + use_offset_0x1e290u];
if (hb_in_range<hb_codepoint_t> (u, 0x1E900u, 0x1E95Fu)) return use_table[u - 0x1E900u + use_offset_0x1e900u];
break;

View File

@ -1,11 +1,12 @@
# Override values For Indic_Positional_Category
# Not derivable
# Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17
# Updated for Unicode 10.0 by Andrew Glass 2017-07-25
# Updated for Unicode 10.0 by Andrew Glass 2017-07-25
# Ammended for Unicode 10.0 by Andrew Glass 2018-09-21
# Updated for L2/19-083 by Andrew Glass 2019-05-06
# Updated for Unicode 12.1 by Andrew Glass 2019-05-30
# Updated for Unicode 13.0 by Andrew Glass 2020-07-28
# Updated for L2/19-083 by Andrew Glass 2019-05-06
# Updated for Unicode 12.1 by Andrew Glass 2019-05-30
# Updated for Unicode 13.0 by Andrew Glass 2020-07-28
# Updated for Unicode 14.0 by Andrew Glass 2021-09-28
# ================================================
# ================================================
@ -14,39 +15,39 @@
# ================================================
# Indic_Positional_Category=Bottom
0F72 ; Bottom # Mn TIBETAN VOWEL SIGN I # Not really below, but need to override to fit into Universal model
0F7A..0F7D ; Bottom # Mn [4] TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN OO # Not really below, but need to override to fit into Universal model
0F80 ; Bottom # Mn TIBETAN VOWEL SIGN REVERSED I # Not really below, but need to override to fit into Universal model
A9BF ; Bottom # Mc JAVANESE CONSONANT SIGN CAKRA
11127..11129; Bottom # Mn [3] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN II
1112D ; Bottom # Mn CHAKMA VOWEL SIGN AI
11130 ; Bottom # Mn CHAKMA VOWEL SIGN OI
0F72 ; Bottom # Mn TIBETAN VOWEL SIGN I # Not really below, but need to override to fit into Universal model
0F7A..0F7D ; Bottom # Mn [4] TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN OO # Not really below, but need to override to fit into Universal model
0F80 ; Bottom # Mn TIBETAN VOWEL SIGN REVERSED I # Not really below, but need to override to fit into Universal model
A9BF ; Bottom # Mc JAVANESE CONSONANT SIGN CAKRA
11127..11129 ; Bottom # Mn [3] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN II
1112D ; Bottom # Mn CHAKMA VOWEL SIGN AI
11130 ; Bottom # Mn CHAKMA VOWEL SIGN OI
# ================================================
# Indic_Positional_Category=Left
1C29 ; Left # Mc LEPCHA VOWEL SIGN OO # Reduced from Top_And_Left
1C29 ; Left # Mc LEPCHA VOWEL SIGN OO # Reduced from Top_And_Left
# ================================================
# Indic_Positional_Category=Right
A9BE ; Right # Mc JAVANESE CONSONANT SIGN PENGKAL # Reduced from Bottom_And_Right
10A0C ; Right # Mn KHAROSHTHI VOWEL LENGTH MARK # Follows vowels and precedes vowel modifiers
11942 ; Right # Mc DIVES AKURU MEDIAL RA # Reduced from Bottom_And_Right
A9BE ; Right # Mc JAVANESE CONSONANT SIGN PENGKAL # Reduced from Bottom_And_Right
10A0C ; Right # Mn KHAROSHTHI VOWEL LENGTH MARK # Follows vowels and precedes vowel modifiers
11942 ; Right # Mc DIVES AKURU MEDIAL RA # Reduced from Bottom_And_Right
# ================================================
# Indic_Positional_Category=Top
0F74 ; Top # Mn TIBETAN VOWEL SIGN U # Not really above, but need to override to fit into Universal model
1A18 ; Top # Mn BUGINESE VOWEL SIGN U # Workaround to allow below to occur before above by treating all below marks as above
AA35   ; Top # Mn       CHAM CONSONANT SIGN
0F74 ; Top # Mn TIBETAN VOWEL SIGN U # Not really above, but need to override to fit into Universal model
1A18 ; Top # Mn BUGINESE VOWEL SIGN U # Workaround to allow below to occur before above by treating all below marks as above
AA35   ; Top # Mn       CHAM CONSONANT SIGN
# ================================================
# Indic_Positional_Category=Top_And_Right
0E33 ; Top_And_Right # Lo THAI CHARACTER SARA AM # IMC has Right, which seems to be a mistake.
0EB3 ; Top_And_Right # Lo LAO VOWEL SIGN AM # IMC has Right, which seems to be a mistake.
0E33 ; Top_And_Right # Lo THAI CHARACTER SARA AM # IMC has Right, which seems to be a mistake.
0EB3 ; Top_And_Right # Lo LAO VOWEL SIGN AM # IMC has Right, which seems to be a mistake.
# ================================================
# ================================================
@ -55,41 +56,46 @@ AA35   ; Top # Mn       CHAM CONSONANT SIGN
# ================================================
# Indic_Positional_Category=Bottom
0859..085B ; Bottom # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
18A9 ; Bottom # Mn MONGOLIAN LETTER ALI GALI DAGALGA
10AE5 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK ABOVE # Not really bottom, but here for ccc to control
10AE6 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK BELOW
10F46..10F47 ; Bottom # Mn [2] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING TWO DOTS BELOW
10F48..10F4A ; Bottom # Mn [3] SOGDIAN COMBINING DOT ABOVE..SOGDIAN COMBINING CURVE ABOVE # Overriden to below because ccc-based Normalization controls order
10F4B ; Bottom # Mn SOGDIAN COMBINING CURVE BELOW
10F4C ; Bottom # Mn SOGDIAN COMBINING HOOK ABOVE # Overriden to below because ccc-based Normalization controls order
10F4D..10F50 ; Bottom # Mn [4] SOGDIAN COMBINING HOOK BELOW..SOGDIAN COMBINING STROKE BELOW
16F4F ; Bottom # Mn MIAO SIGN CONSONANT MODIFIER BAR
16F51..16F87 ; Bottom # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
16F8F..16F92 ; Bottom # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
0859..085B ; Bottom # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
18A9 ; Bottom # Mn MONGOLIAN LETTER ALI GALI DAGALGA
10AE5 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK ABOVE # Overriden, ccc controls order
10AE6 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK BELOW
10F46..10F47 ; Bottom # Mn [2] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING TWO DOTS BELOW
10F48..10F4A ; Bottom # Mn [3] SOGDIAN COMBINING DOT ABOVE..SOGDIAN COMBINING CURVE ABOVE # Overriden, ccc controls order
10F4B ; Bottom # Mn SOGDIAN COMBINING CURVE BELOW
10F4C ; Bottom # Mn SOGDIAN COMBINING HOOK ABOVE # Overriden, ccc controls order
10F4D..10F50 ; Bottom # Mn [4] SOGDIAN COMBINING HOOK BELOW..SOGDIAN COMBINING STROKE BELOW
10F82 ; Bottom # Mn OLD UYGHUR COMBINING DOT ABOVE # Overriden, ccc controls order
10F83 ; Bottom # Mn OLD UYGHUR COMBINING DOT BELOW
10F84 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS ABOVE # Overriden, ccc controls order
10F85 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS BELOW
16F4F ; Bottom # Mn MIAO SIGN CONSONANT MODIFIER BAR
16F51..16F87 ; Bottom # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
16F8F..16F92 ; Bottom # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
# ================================================
# Indic_Positional_Category=Left
103C ; Left # Mc MYANMAR CONSONANT SIGN MEDIAL RA
103C ; Left # Mc MYANMAR CONSONANT SIGN MEDIAL RA
# ================================================
# Indic_Positional_Category=Top
07EB..07F3 ; Top # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
07FD ; Top # Mn NKO DANTAYALAN # Not really top, but assigned here to allow ccc to control mark order
1885..1886 ; Top # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
10EAB..10EAC ; Top # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
1E944..1E94A ; Top # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
10D24..10D27 ; Top # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
16B30..16B36 ; Top # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
1E130..1E136 ; Top # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
1E2EC..1E2EF ; Top # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
07EB..07F3 ; Top # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
07FD ; Top # Mn NKO DANTAYALAN # Not really top, but assigned here to allow ccc to control mark order
1885..1886 ; Top # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
10D24..10D27 ; Top # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; Top # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
16B30..16B36 ; Top # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
1E130..1E136 ; Top # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
1E2AE ; Top # Mn TOTO SIGN RISING TONE
1E2EC..1E2EF ; Top # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
1E944..1E94A ; Top # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
# ================================================
# Indic_Positional_Category=Overstruck
1BC9D..1BC9E ; Overstruck # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
1BC9D..1BC9E ; Overstruck # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
# ================================================
# ================================================
@ -98,5 +104,6 @@ AA35   ; Top # Mn       CHAM CONSONANT SIGN
# ================================================
# Indic_Positional_Category=NA
180B..180D ; NA # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
2D7F ; NA # Mn TIFINAGH CONSONANT JOINER
180B..180D ; NA # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
180F ; NA # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR
2D7F ; NA # Mn TIFINAGH CONSONANT JOINER

View File

@ -1,9 +1,10 @@
# Override values For Indic_Syllabic_Category
# Not derivable
# Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17
# Updated for Unicode 10.0 by Andrew Glass 2017-07-25
# Updated for Unicode 12.1 by Andrew Glass 2019-05-24
# Updated for Unicode 13.0 by Andrew Glass 2020-07-28
# Updated for Unicode 10.0 by Andrew Glass 2017-07-25
# Updated for Unicode 12.1 by Andrew Glass 2019-05-24
# Updated for Unicode 13.0 by Andrew Glass 2020-07-28
# Updated for Unicode 14.0 by Andrew Glass 2021-09-25
# ================================================
# OVERRIDES TO ASSIGNED VALUES
@ -17,28 +18,28 @@ AA29 ; Bindu # Mn  CHAM VOWEL SIGN AA
# ================================================
# Indic_Syllabic_Category=Consonant
0840..0858 ; Consonant # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
0F00..0F01 ; Consonant # Lo [2] TIBETAN SYLLABLE OM..TIBETAN MARK GTER YIG MGO TRUNCATED
0F04..0F06 ; Consonant # Po TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK CARET YIG MGO PHUR SHAD MA
19C1..19C7 ; Consonant # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B # Reassigned to avoid clustering with a base consonant
25CC ; Consonant # So DOTTED CIRCLE
0840..0858 ; Consonant # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
0F00..0F01 ; Consonant # Lo [2] TIBETAN SYLLABLE OM..TIBETAN MARK GTER YIG MGO TRUNCATED
0F04..0F06 ; Consonant # Po TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK CARET YIG MGO PHUR SHAD MA
19C1..19C7 ; Consonant # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B # Reassigned to avoid clustering with a base consonant
25CC ; Consonant # So DOTTED CIRCLE
# ================================================
# Indic_Syllabic_Category=Consonant_Dead
0F7F ; Consonant_Dead # Mc TIBETAN SIGN RNAM BCAD # reassigned so that visarga will form an independent cluster
0F7F ; Consonant_Dead # Mc TIBETAN SIGN RNAM BCAD # reassigned so that visarga will form an independent cluster
# ================================================
# Indic_Syllabic_Category=Consonant_Final
0F35 ; Consonant_Final # Mn TIBETAN MARK NGAS BZUNG NYI ZLA
0F37 ; Consonant_Final # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS
0FC6 ; Consonant_Final # Mn TIBETAN SYMBOL PADMA GDAN
0F35 ; Consonant_Final # Mn TIBETAN MARK NGAS BZUNG NYI ZLA
0F37 ; Consonant_Final # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS
0FC6 ; Consonant_Final # Mn TIBETAN SYMBOL PADMA GDAN
# ================================================
# Indic_Syllabic_Category=Consonant_Final_Modifier
1C36 ; Consonant_Final_Modifier # Mn LEPCHA SIGN RAN
1C36 ; Consonant_Final_Modifier # Mn LEPCHA SIGN RAN
# ================================================
@ -54,9 +55,8 @@ AA29 ; Bindu # Mn  CHAM VOWEL SIGN AA
# ================================================
# Indic_Syllabic_Category=Tone_Mark
A982 ; Tone_Mark # Mn JAVANESE SIGN LAYAR# Not a repha, because it does not reorder to front of cluster
1A7B..1A7C ; Tone_Mark # Mn [2] TAI THAM SIGN MAI SAM..TAI THAM SIGN KHUEN-LUE KARAN
1A7F ; Tone_Mark # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1A7B..1A7C ; Tone_Mark # Mn [2] TAI THAM SIGN MAI SAM..TAI THAM SIGN KHUEN-LUE KARAN
1A7F ; Tone_Mark # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
# ================================================
@ -72,41 +72,50 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN
# ================================================
# Indic_Syllabic_Category=Consonant
0800..0815 ; Consonant # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF
1800 ; Consonant # Po MONGOLIAN BIRGA # Reassigned so that legacy Birga + MFVS sequences still work
1807 ; Consonant # Po MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER
180A ; Consonant # Po MONGOLIAN NIRUGU
1820..1878 ; Consonant # Lo [88] MONGOLIAN LETTER A..MONGOLIAN LETTER CHA WITH TWO DOTS
1843 ; Consonant # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
2D30..2D67 ; Consonant # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO
2D6F ; Consonant # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
10AC0..10AC7 ; Consonant # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW
10AC9..10AE4 ; Consonant # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW
10D00..10D23 ; Consonant # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
10E80..10EA9 ; Consonant # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EB0..10EB1 ; Consonant # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
10F30..10F45 ; Consonant # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
111DA ; Consonant # Lo SHARADA EKAM
#HIEROGLYPHS moved to new category
#13000..1342E ; Consonant # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
0800..0815 ; Consonant # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF
1800 ; Consonant # Po MONGOLIAN BIRGA # Reassigned so that legacy Birga + MFVS sequences still work
1807 ; Consonant # Po MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER
180A ; Consonant # Po MONGOLIAN NIRUGU
1820..1878 ; Consonant # Lo [88] MONGOLIAN LETTER A..MONGOLIAN LETTER CHA WITH TWO DOTS
1843 ; Consonant # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
2D30..2D67 ; Consonant # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO
2D6F ; Consonant # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
10570..1057A ; Consonant # Lo [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA
1057C..1058A ; Consonant # Lo [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE
1058C..10592 ; Consonant # Lo [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE
10594..10595 ; Consonant # Lo [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE
10597..105A1 ; Consonant # Lo [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA
105A3..105B1 ; Consonant # Lo [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
105B3..105B9 ; Consonant # Lo [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
105BB..105BC ; Consonant # Lo [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
10AC0..10AC7 ; Consonant # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW
10AC9..10AE4 ; Consonant # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW
10D00..10D23 ; Consonant # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
10E80..10EA9 ; Consonant # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EB0..10EB1 ; Consonant # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
10F30..10F45 ; Consonant # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
111DA ; Consonant # Lo SHARADA EKAM
#HIEROGLYPHS to be moved to new category
13000..1342E ; Consonant # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
#For the Begin and End segment to be handled fully correctly, the cluster model needs to be modified.
#13437..13438 ; Consonant # Lo [2] EGYPTIAN HIEROGLYPH BEGIN SEGMENT..EGYPTIAN HIEROGLYPH END SEGMENT
16B00..16B2F ; Consonant # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU
16F00..16F4A ; Consonant # Lo [75] MIAO LETTER PA..MIAO LETTER RTE
16FE4 ; Consonant # Mn KHITAN SMALL SCRIPT FILLER
18B00..18CD5 ; Consonant # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5
1BC00..1BC6A ; Consonant # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
1BC70..1BC7C ; Consonant # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
1BC80..1BC88 ; Consonant # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL
1BC90..1BC99 ; Consonant # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW
1E100..1E12C ; Consonant # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
1E137..1E13D ; Consonant # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
1E14E ; Consonant # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ
1E14F ; Consonant # So NYIAKENG PUACHUE HMONG CIRCLED CA
1E2C0..1E2EB ; Consonant # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH
1E900..1E921 ; Consonant # Lu [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA
1E922..1E943 ; Consonant # Ll [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
1E94B ; Consonant # Lm ADLAM NASALIZATION MARK
13437..13438 ; Consonant # Lo [2] EGYPTIAN HIEROGLYPH BEGIN SEGMENT..EGYPTIAN HIEROGLYPH END SEGMENT
16B00..16B2F ; Consonant # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU
16F00..16F4A ; Consonant # Lo [75] MIAO LETTER PA..MIAO LETTER RTE
16FE4 ; Consonant # Mn KHITAN SMALL SCRIPT FILLER # Avoids Mn pushing this into VOWEL class
18B00..18CD5 ; Consonant # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5
1BC00..1BC6A ; Consonant # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
1BC70..1BC7C ; Consonant # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
1BC80..1BC88 ; Consonant # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL
1BC90..1BC99 ; Consonant # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW
1E100..1E12C ; Consonant # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
1E137..1E13D ; Consonant # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
1E14E ; Consonant # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ
1E14F ; Consonant # So NYIAKENG PUACHUE HMONG CIRCLED CA
1E290..1E2AD ; Consonant # Lo [30] TOTO LETTER PA..TOTO LETTER A
1E2C0..1E2EB ; Consonant # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH
1E900..1E921 ; Consonant # Lu [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA
1E922..1E943 ; Consonant # Ll [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
1E94B ; Consonant # Lm ADLAM NASALIZATION MARK
# ================================================
@ -116,13 +125,13 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN
# ================================================
# Indic_Syllabic_Category=Gemination_Mark
10D27 ; Gemination_Mark # Mn HANIFI ROHINGYA SIGN TASSI
10D27 ; Gemination_Mark # Mn HANIFI ROHINGYA SIGN TASSI
# ================================================
# Indic_Syllabic_Category=Modifying_Letter
FE00..FE0F ; Modifying_Letter # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16# Need to treat them as isolated bases so they don't merge with a cluster in invalid scenarios
16F50 ; Modifying_Letter # Lo MIAO LETTER NASALIZATION
FE00..FE0F ; Modifying_Letter # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16# Need to treat them as isolated bases so they don't merge with a cluster in invalid scenarios
16F50 ; Modifying_Letter # Lo MIAO LETTER NASALIZATION
# ================================================
@ -136,49 +145,52 @@ FE00..FE0F ; Modifying_Letter # Mn [16] VARIATION SELECTOR-1..VARIATION SELE
16F4F ; Nukta # Mn MIAO SIGN CONSONANT MODIFIER BAR
1BC9D..1BC9E ; Nukta # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
1E944..1E94A ; Nukta # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
10F82..10F85 ; Nukta # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
# ================================================
# Indic_Syllabic_Category=Number
10D30..10D39 ; Number # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE
10F51..10F54 ; Number # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED
1E140..1E149 ; Number # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
1E2F0..1E2F9 ; Number # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
1E950..1E959 ; Number # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
10D30..10D39 ; Number # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE
10F51..10F54 ; Number # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED
16AC0..16AC9 ; Number # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE
1E140..1E149 ; Number # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
1E2F0..1E2F9 ; Number # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
1E950..1E959 ; Number # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
# ================================================
# Indic_Syllabic_Category=Tone_Mark
07EB..07F3 ; Tone_Mark # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
07FD ; Tone_Mark # Mn NKO DANTAYALAN
0F86..0F87 ; Tone_Mark # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
17CF ; Tone_Mark # Mn KHMER SIGN AHSDA
10D24..10D26 ; Tone_Mark # Mn [3] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TANA
10F46..10F50 ; Tone_Mark # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
16B30..16B36 ; Tone_Mark # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
16F8F..16F92 ; Tone_Mark # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
1E130..1E136 ; Tone_Mark # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
1E2EC..1E2EF ; Tone_Mark # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
07EB..07F3 ; Tone_Mark # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
07FD ; Tone_Mark # Mn NKO DANTAYALAN
0F86..0F87 ; Tone_Mark # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
17CF ; Tone_Mark # Mn KHMER SIGN AHSDA
10D24..10D26 ; Tone_Mark # Mn [3] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TANA
10F46..10F50 ; Tone_Mark # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
16B30..16B36 ; Tone_Mark # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
16F8F..16F92 ; Tone_Mark # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
1E130..1E136 ; Tone_Mark # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
1E2AE ; Tone_Mark # Mn TOTO SIGN RISING TONE
1E2EC..1E2EF ; Tone_Mark # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
# ================================================
# Indic_Syllabic_Category=Virama
2D7F ; Virama # Mn TIFINAGH CONSONANT JOINER
13430..13436 ; Virama # Cf [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE
2D7F ; Virama # Mn TIFINAGH CONSONANT JOINER
13430..13436 ; Virama # Cf [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE
# ================================================
# Indic_Syllabic_Category=Vowel_Independent
AAB1 ; Vowel_Independent # Lo TAI VIET VOWEL AA
AABA ; Vowel_Independent # Lo TAI VIET VOWEL UA
AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN
AAB1 ; Vowel_Independent # Lo TAI VIET VOWEL AA
AABA ; Vowel_Independent # Lo TAI VIET VOWEL UA
AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN
# ================================================
# Indic_Syllabic_Category=Vowel_Dependent
0B55 ; Vowel_Dependent # Mn ORIYA SIGN OVERLINE
10EAB..10EAC ; Vowel_Dependent # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
16F51..16F87 ; Vowel_Dependent # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
0B55 ; Vowel_Dependent # Mn ORIYA SIGN OVERLINE
10EAB..10EAC ; Vowel_Dependent # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
16F51..16F87 ; Vowel_Dependent # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
# ================================================
# ================================================