[Indic] Better categorize Register Shifters and Khmer Various signs

Down another 500 or so Khmer failures!
This commit is contained in:
Behdad Esfahbod 2012-07-17 17:53:03 -04:00
parent 39b17837b4
commit 25bc489498
6 changed files with 24 additions and 12 deletions

View File

@ -55,16 +55,16 @@ DOTTEDCIRCLE = 13;
RS = 14; RS = 14;
Coeng = 15; Coeng = 15;
c = C | Ra; c = C | Ra; # is_consonant
n = (N.N? | ZWNJ?.RS); n = (N.N? | ZWNJ?.RS); # is_consonant_modifier
z = ZWJ|ZWNJ; z = ZWJ|ZWNJ; # is_joiner
h = H | Coeng; h = H | Coeng; # is_halant_or_coeng
matra_group = (M | RS) N? H?; matra_group = M.N?.H?;
syllable_tail = SM? (VD VD?)?; syllable_tail = SM? (Coeng (c|V))? (VD VD?)?;
place_holder = NBSP | DOTTEDCIRCLE; place_holder = NBSP | DOTTEDCIRCLE;
consonant_syllable = (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? (Coeng (c|V))? syllable_tail; consonant_syllable = (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
vowel_syllable = (Ra H)? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail; vowel_syllable = (Ra H)? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
standalone_cluster = (Ra H)? place_holder.n? (z? h c)* matra_group* syllable_tail; standalone_cluster = (Ra H)? place_holder.n? (z? h c)* matra_group* syllable_tail;
other = any; other = any;

View File

@ -59,7 +59,7 @@ enum indic_category_t {
OT_A, OT_A,
OT_NBSP, OT_NBSP,
OT_DOTTEDCIRCLE, /* Not in the spec, but special in Uniscribe. /Very very/ special! */ OT_DOTTEDCIRCLE, /* Not in the spec, but special in Uniscribe. /Very very/ special! */
OT_RS, /* Register Shifter (and other marks), used in Khmer OT spec */ OT_RS, /* Register Shifter, used in Khmer OT spec */
OT_Coeng OT_Coeng
}; };

View File

@ -275,8 +275,8 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED,
info.indic_category() = OT_VD; info.indic_category() = OT_VD;
if (info.indic_category() == OT_X && if (info.indic_category() == OT_X &&
unlikely (hb_in_range<hb_codepoint_t> (info.codepoint, 0x17CB, 0x17D0))) unlikely (hb_in_range<hb_codepoint_t> (info.codepoint, 0x17CB, 0x17D2))) /* Khmer Various signs */
info.indic_category() = OT_RS; info.indic_category() = OT_N;
/* Khmer Virama is different since it can be used to form a final consonant. */ /* Khmer Virama is different since it can be used to form a final consonant. */
if (unlikely (info.codepoint == 0x17D2)) if (unlikely (info.codepoint == 0x17D2))
@ -488,7 +488,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
{ {
/* Please update the Uniscribe branch when touching this! */ /* Please update the Uniscribe branch when touching this! */
for (unsigned int i = start + 1; i < end; i++) for (unsigned int i = start + 1; i < end; i++)
if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_H)))) if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_RS) | FLAG (OT_H))))
info[i].indic_position() = info[i - 1].indic_position(); info[i].indic_position() = info[i - 1].indic_position();
} else { } else {
/* /*
@ -497,7 +497,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
*/ */
/* Please update the non-Uniscribe branch when touching this! */ /* Please update the non-Uniscribe branch when touching this! */
for (unsigned int i = start + 1; i < end; i++) for (unsigned int i = start + 1; i < end; i++)
if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_H)))) { if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_RS) | FLAG (OT_H)))) {
info[i].indic_position() = info[i - 1].indic_position(); info[i].indic_position() = info[i - 1].indic_position();
if (info[i].indic_category() == OT_H && info[i].indic_position() == POS_PRE_M) if (info[i].indic_category() == OT_H && info[i].indic_position() == POS_PRE_M)
for (unsigned int j = i; j > start; j--) for (unsigned int j = i; j > start; j--)

View File

@ -1 +1,3 @@
misc.txt misc.txt
other-marks-invalid.txt
other-marks.txt

View File

@ -0,0 +1,4 @@
ព់្ឈា
ព្ឈា៉
ព្ឈា៌
ព្ឈ៌ា

View File

@ -0,0 +1,6 @@
ព្ឈា
ព្ឈា់
ព្ឈ់ា
ព្ឈ៉ា
ព៉្ឈា
ព៌្ឈា