[Indic] Improve Sinhala base algorithm and reph positioning
Sinhala does not have half forms. And most (all?) consonants can be base, except when preceded by ZWJ, which would request a subjoined form. Hence switch the base algorithm to categorize with Khmer, start search at start, and stop at a ZWJ. Also, mark all pos=base consonants after base to be subjoined. Mark base itself to have pos=base. Finally, adjust Sinhala's reph position to after-main. Brings down Sinhala failures from 455 to 328 (0.120656%).
This commit is contained in:
parent
2ec934c6c2
commit
34c215036f
|
@ -501,6 +501,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
|
||||||
|
|
||||||
switch ((hb_tag_t) buffer->props.script)
|
switch ((hb_tag_t) buffer->props.script)
|
||||||
{
|
{
|
||||||
|
case HB_SCRIPT_SINHALA:
|
||||||
case HB_SCRIPT_KHMER:
|
case HB_SCRIPT_KHMER:
|
||||||
base_pos = BASE_FIRST;
|
base_pos = BASE_FIRST;
|
||||||
break;
|
break;
|
||||||
|
@ -557,6 +558,19 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
|
||||||
|
|
||||||
if (!has_reph)
|
if (!has_reph)
|
||||||
base = limit;
|
base = limit;
|
||||||
|
|
||||||
|
/* Find the last base consonant that is not blocked by ZWJ. If there is
|
||||||
|
* a ZWJ before a bse consonant, that would request a subjoined form. */
|
||||||
|
for (unsigned int i = limit; i < end; i++)
|
||||||
|
if (is_consonant (info[i]) && info[i].indic_position() == POS_BASE_C)
|
||||||
|
base = i;
|
||||||
|
else if (info[i].indic_category() == OT_ZWJ)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Mark all subsequent consonants as below. */
|
||||||
|
for (unsigned int i = base + 1; i < end; i++)
|
||||||
|
if (is_consonant (info[i]) && info[i].indic_position() == POS_BASE_C)
|
||||||
|
info[i].indic_position() = POS_BELOW_C;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (base < start)
|
if (base < start)
|
||||||
|
@ -570,6 +584,8 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
|
||||||
has_reph = false;
|
has_reph = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (base < end)
|
||||||
|
info[base].indic_position() = POS_BASE_C;
|
||||||
|
|
||||||
|
|
||||||
/* 2. Decompose and reorder Matras:
|
/* 2. Decompose and reorder Matras:
|
||||||
|
@ -931,6 +947,7 @@ final_reordering_syllable (hb_buffer_t *buffer,
|
||||||
{
|
{
|
||||||
case HB_SCRIPT_MALAYALAM:
|
case HB_SCRIPT_MALAYALAM:
|
||||||
case HB_SCRIPT_ORIYA:
|
case HB_SCRIPT_ORIYA:
|
||||||
|
case HB_SCRIPT_SINHALA:
|
||||||
reph_pos = REPH_AFTER_MAIN;
|
reph_pos = REPH_AFTER_MAIN;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -945,7 +962,6 @@ final_reordering_syllable (hb_buffer_t *buffer,
|
||||||
default:
|
default:
|
||||||
case HB_SCRIPT_DEVANAGARI:
|
case HB_SCRIPT_DEVANAGARI:
|
||||||
case HB_SCRIPT_GUJARATI:
|
case HB_SCRIPT_GUJARATI:
|
||||||
case HB_SCRIPT_SINHALA:
|
|
||||||
reph_pos = REPH_BEFORE_POSTSCRIPT;
|
reph_pos = REPH_BEFORE_POSTSCRIPT;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
|
@ -30,3 +30,4 @@
|
||||||
ශී්ර
|
ශී්ර
|
||||||
ස්ට්රේ
|
ස්ට්රේ
|
||||||
ග්යෙ
|
ග්යෙ
|
||||||
|
ර්ය්ය
|
||||||
|
|
Loading…
Reference in New Issue