[khmer] Remove indic_config->reph_pos
This commit is contained in:
parent
ffcd6c7efd
commit
c135324af1
|
@ -110,36 +110,6 @@ matra_position (hb_codepoint_t u, khmer_position_t side)
|
||||||
return side;
|
return side;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* XXX
|
|
||||||
* This is a hack for now. We should move this data into the main Indic table.
|
|
||||||
* Or completely remove it and just check in the tables.
|
|
||||||
*/
|
|
||||||
static const hb_codepoint_t ra_chars[] = {
|
|
||||||
0x0930u, /* Devanagari */
|
|
||||||
0x09B0u, /* Bengali */
|
|
||||||
0x09F0u, /* Bengali */
|
|
||||||
0x0A30u, /* Gurmukhi */ /* No Reph */
|
|
||||||
0x0AB0u, /* Gujarati */
|
|
||||||
0x0B30u, /* Oriya */
|
|
||||||
0x0BB0u, /* Tamil */ /* No Reph */
|
|
||||||
0x0C30u, /* Telugu */ /* Reph formed only with ZWJ */
|
|
||||||
0x0CB0u, /* Kannada */
|
|
||||||
0x0D30u, /* Malayalam */ /* No Reph, Logical Repha */
|
|
||||||
|
|
||||||
0x0DBBu, /* Sinhala */ /* Reph formed only with ZWJ */
|
|
||||||
|
|
||||||
0x179Au, /* Khmer */ /* No Reph, Visual Repha */
|
|
||||||
};
|
|
||||||
|
|
||||||
static inline bool
|
|
||||||
is_ra (hb_codepoint_t u)
|
|
||||||
{
|
|
||||||
for (unsigned int i = 0; i < ARRAY_LENGTH (ra_chars); i++)
|
|
||||||
if (u == ra_chars[i])
|
|
||||||
return true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
is_one_of (const hb_glyph_info_t &info, unsigned int flags)
|
is_one_of (const hb_glyph_info_t &info, unsigned int flags)
|
||||||
{
|
{
|
||||||
|
@ -240,7 +210,7 @@ set_khmer_properties (hb_glyph_info_t &info)
|
||||||
if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS))
|
if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS))
|
||||||
{
|
{
|
||||||
pos = POS_BASE_C;
|
pos = POS_BASE_C;
|
||||||
if (is_ra (u))
|
if (u == 0x179Au)
|
||||||
cat = OT_Ra;
|
cat = OT_Ra;
|
||||||
}
|
}
|
||||||
else if (cat == OT_M)
|
else if (cat == OT_M)
|
||||||
|
@ -273,14 +243,6 @@ set_khmer_properties (hb_glyph_info_t &info)
|
||||||
* instead of adding a new flag in these structs.
|
* instead of adding a new flag in these structs.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
enum reph_position_t {
|
|
||||||
REPH_POS_AFTER_MAIN = POS_AFTER_MAIN,
|
|
||||||
REPH_POS_BEFORE_SUB = POS_BEFORE_SUB,
|
|
||||||
REPH_POS_AFTER_SUB = POS_AFTER_SUB,
|
|
||||||
REPH_POS_BEFORE_POST = POS_BEFORE_POST,
|
|
||||||
REPH_POS_AFTER_POST = POS_AFTER_POST,
|
|
||||||
REPH_POS_DONT_CARE = POS_RA_TO_BECOME_REPH
|
|
||||||
};
|
|
||||||
enum reph_mode_t {
|
enum reph_mode_t {
|
||||||
REPH_MODE_IMPLICIT, /* Reph formed out of initial Ra,H sequence. */
|
REPH_MODE_IMPLICIT, /* Reph formed out of initial Ra,H sequence. */
|
||||||
REPH_MODE_EXPLICIT, /* Reph formed out of initial Ra,H,ZWJ sequence. */
|
REPH_MODE_EXPLICIT, /* Reph formed out of initial Ra,H,ZWJ sequence. */
|
||||||
|
@ -293,14 +255,13 @@ enum blwf_mode_t {
|
||||||
};
|
};
|
||||||
struct indic_config_t
|
struct indic_config_t
|
||||||
{
|
{
|
||||||
reph_position_t reph_pos;
|
|
||||||
reph_mode_t reph_mode;
|
reph_mode_t reph_mode;
|
||||||
blwf_mode_t blwf_mode;
|
blwf_mode_t blwf_mode;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const indic_config_t indic_configs[] =
|
static const indic_config_t indic_configs[] =
|
||||||
{
|
{
|
||||||
{REPH_POS_DONT_CARE, REPH_MODE_VIS_REPHA,BLWF_MODE_PRE_AND_POST},
|
{REPH_MODE_VIS_REPHA,BLWF_MODE_PRE_AND_POST},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -590,120 +551,17 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
|
||||||
const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *) plan->data;
|
const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *) plan->data;
|
||||||
hb_glyph_info_t *info = buffer->info;
|
hb_glyph_info_t *info = buffer->info;
|
||||||
|
|
||||||
/* 1. Find base consonant:
|
/* 1. Khmer shaping assumes that a syllable will begin with a Cons, IndV, or Number. */
|
||||||
*
|
|
||||||
* The shaping engine finds the base consonant of the syllable, using the
|
|
||||||
* following algorithm: starting from the end of the syllable, move backwards
|
|
||||||
* until a consonant is found that does not have a below-base or post-base
|
|
||||||
* form (post-base forms have to follow below-base forms), or that is not a
|
|
||||||
* pre-base-reordering Ra, or arrive at the first consonant. The consonant
|
|
||||||
* stopped at will be the base.
|
|
||||||
*
|
|
||||||
* o If the syllable starts with Ra + Halant (in a script that has Reph)
|
|
||||||
* and has more than one consonant, Ra is excluded from candidates for
|
|
||||||
* base consonants.
|
|
||||||
*/
|
|
||||||
|
|
||||||
unsigned int base = end;
|
|
||||||
bool has_reph = false;
|
|
||||||
|
|
||||||
{
|
|
||||||
/* -> If the syllable starts with Ra + Halant (in a script that has Reph)
|
|
||||||
* and has more than one consonant, Ra is excluded from candidates for
|
|
||||||
* base consonants. */
|
|
||||||
unsigned int limit = start;
|
|
||||||
if (khmer_plan->config->reph_pos != REPH_POS_DONT_CARE &&
|
|
||||||
khmer_plan->mask_array[RPHF] &&
|
|
||||||
start + 3 <= end &&
|
|
||||||
(
|
|
||||||
(khmer_plan->config->reph_mode == REPH_MODE_IMPLICIT && !is_joiner (info[start + 2])) ||
|
|
||||||
(khmer_plan->config->reph_mode == REPH_MODE_EXPLICIT && info[start + 2].khmer_category() == OT_ZWJ)
|
|
||||||
))
|
|
||||||
{
|
|
||||||
/* See if it matches the 'rphf' feature. */
|
|
||||||
hb_codepoint_t glyphs[3] = {info[start].codepoint,
|
|
||||||
info[start + 1].codepoint,
|
|
||||||
khmer_plan->config->reph_mode == REPH_MODE_EXPLICIT ?
|
|
||||||
info[start + 2].codepoint : 0};
|
|
||||||
if (khmer_plan->rphf.would_substitute (glyphs, 2, face) ||
|
|
||||||
(khmer_plan->config->reph_mode == REPH_MODE_EXPLICIT &&
|
|
||||||
khmer_plan->rphf.would_substitute (glyphs, 3, face)))
|
|
||||||
{
|
|
||||||
limit += 2;
|
|
||||||
while (limit < end && is_joiner (info[limit]))
|
|
||||||
limit++;
|
|
||||||
base = start;
|
|
||||||
has_reph = true;
|
|
||||||
}
|
|
||||||
} else if (khmer_plan->config->reph_mode == REPH_MODE_LOG_REPHA && info[start].khmer_category() == OT_Repha)
|
|
||||||
{
|
|
||||||
limit += 1;
|
|
||||||
while (limit < end && is_joiner (info[limit]))
|
|
||||||
limit++;
|
|
||||||
base = start;
|
|
||||||
has_reph = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The first consonant is always the base. */
|
/* The first consonant is always the base. */
|
||||||
base = start;
|
unsigned int base = start;
|
||||||
|
info[base].khmer_position() = POS_BASE_C;
|
||||||
|
|
||||||
/* Mark all subsequent consonants as below. */
|
/* Mark all subsequent consonants as below. */
|
||||||
for (unsigned int i = base + 1; i < end; i++)
|
for (unsigned int i = base + 1; i < end; i++)
|
||||||
if (is_consonant (info[i]))
|
if (is_consonant (info[i]))
|
||||||
info[i].khmer_position() = POS_BELOW_C;
|
info[i].khmer_position() = POS_BELOW_C;
|
||||||
|
|
||||||
/* -> If the syllable starts with Ra + Halant (in a script that has Reph)
|
|
||||||
* and has more than one consonant, Ra is excluded from candidates for
|
|
||||||
* base consonants.
|
|
||||||
*
|
|
||||||
* Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. */
|
|
||||||
if (has_reph && base == start && limit - base <= 2) {
|
|
||||||
/* Have no other consonant, so Reph is not formed and Ra becomes base. */
|
|
||||||
has_reph = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* 2. Decompose and reorder Matras:
|
|
||||||
*
|
|
||||||
* Each matra and any syllable modifier sign in the syllable are moved to the
|
|
||||||
* appropriate position relative to the consonant(s) in the syllable. The
|
|
||||||
* shaping engine decomposes two- or three-part matras into their constituent
|
|
||||||
* parts before any repositioning. Matra characters are classified by which
|
|
||||||
* consonant in a conjunct they have affinity for and are reordered to the
|
|
||||||
* following positions:
|
|
||||||
*
|
|
||||||
* o Before first half form in the syllable
|
|
||||||
* o After subjoined consonants
|
|
||||||
* o After post-form consonant
|
|
||||||
* o After main consonant (for above marks)
|
|
||||||
*
|
|
||||||
* IMPLEMENTATION NOTES:
|
|
||||||
*
|
|
||||||
* The normalize() routine has already decomposed matras for us, so we don't
|
|
||||||
* need to worry about that.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/* 3. Reorder marks to canonical order:
|
|
||||||
*
|
|
||||||
* Adjacent nukta and halant or nukta and vedic sign are always repositioned
|
|
||||||
* if necessary, so that the nukta is first.
|
|
||||||
*
|
|
||||||
* IMPLEMENTATION NOTES:
|
|
||||||
*
|
|
||||||
* We don't need to do this: the normalize() routine already did this for us.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/* Reorder characters */
|
|
||||||
|
|
||||||
for (unsigned int i = start; i < base; i++)
|
|
||||||
info[i].khmer_position() = MIN (POS_PRE_C, (khmer_position_t) info[i].khmer_position());
|
|
||||||
|
|
||||||
if (base < end)
|
|
||||||
info[base].khmer_position() = POS_BASE_C;
|
|
||||||
|
|
||||||
/* Mark final consonants. A final consonant is one appearing after a matra,
|
/* Mark final consonants. A final consonant is one appearing after a matra,
|
||||||
* like in Khmer. */
|
* like in Khmer. */
|
||||||
for (unsigned int i = base + 1; i < end; i++)
|
for (unsigned int i = base + 1; i < end; i++)
|
||||||
|
@ -716,10 +574,6 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Handle beginning Ra */
|
|
||||||
if (has_reph)
|
|
||||||
info[start].khmer_position() = POS_RA_TO_BECOME_REPH;
|
|
||||||
|
|
||||||
/* Attach misc marks to previous char to move with them. */
|
/* Attach misc marks to previous char to move with them. */
|
||||||
{
|
{
|
||||||
khmer_position_t last_pos = POS_START;
|
khmer_position_t last_pos = POS_START;
|
||||||
|
@ -766,7 +620,6 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
|
||||||
last = i;
|
last = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
{
|
{
|
||||||
/* Use syllable() for sort accounting temporarily. */
|
/* Use syllable() for sort accounting temporarily. */
|
||||||
unsigned int syllable = info[start].syllable();
|
unsigned int syllable = info[start].syllable();
|
||||||
|
@ -811,20 +664,6 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
|
||||||
{
|
{
|
||||||
hb_mask_t mask;
|
hb_mask_t mask;
|
||||||
|
|
||||||
/* Reph */
|
|
||||||
for (unsigned int i = start; i < end && info[i].khmer_position() == POS_RA_TO_BECOME_REPH; i++)
|
|
||||||
info[i].mask |= khmer_plan->mask_array[RPHF];
|
|
||||||
|
|
||||||
/* Pre-base */
|
|
||||||
mask = khmer_plan->mask_array[HALF];
|
|
||||||
if (khmer_plan->config->blwf_mode == BLWF_MODE_PRE_AND_POST)
|
|
||||||
mask |= khmer_plan->mask_array[BLWF];
|
|
||||||
for (unsigned int i = start; i < base; i++)
|
|
||||||
info[i].mask |= mask;
|
|
||||||
/* Base */
|
|
||||||
mask = 0;
|
|
||||||
if (base < end)
|
|
||||||
info[base].mask |= mask;
|
|
||||||
/* Post-base */
|
/* Post-base */
|
||||||
mask = khmer_plan->mask_array[BLWF] | khmer_plan->mask_array[ABVF] | khmer_plan->mask_array[PSTF];
|
mask = khmer_plan->mask_array[BLWF] | khmer_plan->mask_array[ABVF] | khmer_plan->mask_array[PSTF];
|
||||||
for (unsigned int i = base + 1; i < end; i++)
|
for (unsigned int i = base + 1; i < end; i++)
|
||||||
|
@ -1138,158 +977,6 @@ final_reordering_syllable (const hb_ot_shape_plan_t *plan,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* o Reorder reph:
|
|
||||||
*
|
|
||||||
* Reph’s original position is always at the beginning of the syllable,
|
|
||||||
* (i.e. it is not reordered at the character reordering stage). However,
|
|
||||||
* it will be reordered according to the basic-forms shaping results.
|
|
||||||
* Possible positions for reph, depending on the script, are; after main,
|
|
||||||
* before post-base consonant forms, and after post-base consonant forms.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Two cases:
|
|
||||||
*
|
|
||||||
* - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then
|
|
||||||
* we should only move it if the sequence ligated to the repha form.
|
|
||||||
*
|
|
||||||
* - If repha is encoded separately and in the logical position, we should only
|
|
||||||
* move it if it did NOT ligate. If it ligated, it's probably the font trying
|
|
||||||
* to make it work without the reordering.
|
|
||||||
*/
|
|
||||||
if (start + 1 < end &&
|
|
||||||
info[start].khmer_position() == POS_RA_TO_BECOME_REPH &&
|
|
||||||
((info[start].khmer_category() == OT_Repha) ^
|
|
||||||
_hb_glyph_info_ligated_and_didnt_multiply (&info[start])))
|
|
||||||
{
|
|
||||||
unsigned int new_reph_pos;
|
|
||||||
reph_position_t reph_pos = khmer_plan->config->reph_pos;
|
|
||||||
|
|
||||||
assert (reph_pos != REPH_POS_DONT_CARE);
|
|
||||||
|
|
||||||
/* 1. If reph should be positioned after post-base consonant forms,
|
|
||||||
* proceed to step 5.
|
|
||||||
*/
|
|
||||||
if (reph_pos == REPH_POS_AFTER_POST)
|
|
||||||
{
|
|
||||||
goto reph_step_5;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* 2. If the reph repositioning class is not after post-base: target
|
|
||||||
* position is after the first explicit halant glyph between the
|
|
||||||
* first post-reph consonant and last main consonant. If ZWJ or ZWNJ
|
|
||||||
* are following this halant, position is moved after it. If such
|
|
||||||
* position is found, this is the target position. Otherwise,
|
|
||||||
* proceed to the next step.
|
|
||||||
*
|
|
||||||
* Note: in old-implementation fonts, where classifications were
|
|
||||||
* fixed in shaping engine, there was no case where reph position
|
|
||||||
* will be found on this step.
|
|
||||||
*/
|
|
||||||
{
|
|
||||||
new_reph_pos = start + 1;
|
|
||||||
while (new_reph_pos < base && !is_coeng (info[new_reph_pos]))
|
|
||||||
new_reph_pos++;
|
|
||||||
|
|
||||||
if (new_reph_pos < base && is_coeng (info[new_reph_pos]))
|
|
||||||
{
|
|
||||||
/* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
|
|
||||||
if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1]))
|
|
||||||
new_reph_pos++;
|
|
||||||
goto reph_move;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* 3. If reph should be repositioned after the main consonant: find the
|
|
||||||
* first consonant not ligated with main, or find the first
|
|
||||||
* consonant that is not a potential pre-base-reordering Ra.
|
|
||||||
*/
|
|
||||||
if (reph_pos == REPH_POS_AFTER_MAIN)
|
|
||||||
{
|
|
||||||
new_reph_pos = base;
|
|
||||||
while (new_reph_pos + 1 < end && info[new_reph_pos + 1].khmer_position() <= POS_AFTER_MAIN)
|
|
||||||
new_reph_pos++;
|
|
||||||
if (new_reph_pos < end)
|
|
||||||
goto reph_move;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* 4. If reph should be positioned before post-base consonant, find
|
|
||||||
* first post-base classified consonant not ligated with main. If no
|
|
||||||
* consonant is found, the target position should be before the
|
|
||||||
* first matra, syllable modifier sign or vedic sign.
|
|
||||||
*/
|
|
||||||
/* This is our take on what step 4 is trying to say (and failing, BADLY). */
|
|
||||||
if (reph_pos == REPH_POS_AFTER_SUB)
|
|
||||||
{
|
|
||||||
new_reph_pos = base;
|
|
||||||
while (new_reph_pos + 1 < end &&
|
|
||||||
!( FLAG_UNSAFE (info[new_reph_pos + 1].khmer_position()) & (FLAG (POS_POST_C) | FLAG (POS_AFTER_POST) | FLAG (POS_SMVD))))
|
|
||||||
new_reph_pos++;
|
|
||||||
if (new_reph_pos < end)
|
|
||||||
goto reph_move;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* 5. If no consonant is found in steps 3 or 4, move reph to a position
|
|
||||||
* immediately before the first post-base matra, syllable modifier
|
|
||||||
* sign or vedic sign that has a reordering class after the intended
|
|
||||||
* reph position. For example, if the reordering position for reph
|
|
||||||
* is post-main, it will skip above-base matras that also have a
|
|
||||||
* post-main position.
|
|
||||||
*/
|
|
||||||
reph_step_5:
|
|
||||||
{
|
|
||||||
/* Copied from step 2. */
|
|
||||||
new_reph_pos = start + 1;
|
|
||||||
while (new_reph_pos < base && !is_coeng (info[new_reph_pos]))
|
|
||||||
new_reph_pos++;
|
|
||||||
|
|
||||||
if (new_reph_pos < base && is_coeng (info[new_reph_pos]))
|
|
||||||
{
|
|
||||||
/* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
|
|
||||||
if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1]))
|
|
||||||
new_reph_pos++;
|
|
||||||
goto reph_move;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* 6. Otherwise, reorder reph to the end of the syllable.
|
|
||||||
*/
|
|
||||||
{
|
|
||||||
new_reph_pos = end - 1;
|
|
||||||
while (new_reph_pos > start && info[new_reph_pos].khmer_position() == POS_SMVD)
|
|
||||||
new_reph_pos--;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If the Reph is to be ending up after a Matra,Halant sequence,
|
|
||||||
* position it before that Halant so it can interact with the Matra.
|
|
||||||
* However, if it's a plain Consonant,Halant we shouldn't do that.
|
|
||||||
* Uniscribe doesn't do this.
|
|
||||||
* TEST: U+0930,U+094D,U+0915,U+094B,U+094D
|
|
||||||
*/
|
|
||||||
if (!hb_options ().uniscribe_bug_compatible &&
|
|
||||||
unlikely (is_coeng (info[new_reph_pos]))) {
|
|
||||||
for (unsigned int i = base + 1; i < new_reph_pos; i++)
|
|
||||||
if (info[i].khmer_category() == OT_M) {
|
|
||||||
/* Ok, got it. */
|
|
||||||
new_reph_pos--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
goto reph_move;
|
|
||||||
}
|
|
||||||
|
|
||||||
reph_move:
|
|
||||||
{
|
|
||||||
/* Move */
|
|
||||||
buffer->merge_clusters (start, new_reph_pos + 1);
|
|
||||||
hb_glyph_info_t reph = info[start];
|
|
||||||
memmove (&info[start], &info[start + 1], (new_reph_pos - start) * sizeof (info[0]));
|
|
||||||
info[new_reph_pos] = reph;
|
|
||||||
|
|
||||||
if (start < base && base <= new_reph_pos)
|
|
||||||
base--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* o Reorder pre-base-reordering consonants:
|
/* o Reorder pre-base-reordering consonants:
|
||||||
*
|
*
|
||||||
* If a pre-base-reordering consonant is found, reorder it according to
|
* If a pre-base-reordering consonant is found, reorder it according to
|
||||||
|
|
Loading…
Reference in New Issue