[indic] Misc harmless fixes!

First, we were abusing OT_VD instead of OT_A. Fix that but moving OT_A in the grammar where it belongs (which is different from what the spec says). Also, only allow medial consonants after all other consonants. This doesn't affect any current character. Finally, fix Halant attachment in presence of medial consonants. Again, this currently doesn't affect any sequence. I lied. There's Gurmukhi U+0A75 which is Consonant_Medial. Uniscribe allows one of those in each of these positions: before matras, after matras and before syllable modifiers, and after syllable modifiers! We currently just allow unlimited numbers of it, before matras.
2013-10-16 19:06:29 +02:00 · 2013-10-16 19:06:29 +02:00 · 3756efaf4e
parent c52ddab72e
commit 3756efaf4e
3 changed files with 11 additions and 10 deletions
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@ -58,7 +58,7 @@ Ra    = 16;
 CM    = 17;
 Avag  = 18;

-c = (C | Ra)CM*;		# is_consonant
+c = (C | Ra);			# is_consonant
 n = ((ZWNJ?.RS)? (N.N?)?);	# is_consonant_modifier
 z = ZWJ|ZWNJ;			# is_joiner
 h = H | Coeng;			# is_halant_or_coeng
@ -67,14 +67,14 @@ reph = (Ra H | Repha);		# possible reph
 cn = c.ZWJ?.n?;
 forced_rakar = ZWJ H ZWJ Ra;
 matra_group = z{0,3}.M.N?.(H | forced_rakar)?;
-syllable_tail =  (Coeng (cn|V))? (Avag.N?)? (SM.SM?.ZWNJ?)? (VD.VD?)?;
+syllable_tail =  (Coeng (cn|V))? (Avag.N?)? (SM.SM?.ZWNJ?)? (A.A?)? VD?;
 place_holder = NBSP | DOTTEDCIRCLE;
 halant_group = (z?.h.(ZWJ.N?)?);
 final_halant_group = halant_group | h.ZWNJ;
-halant_or_matra_group = (final_halant_group | (h.ZWJ)? matra_group{0,4});
+halant_or_matra_group = (CM.CM* | final_halant_group | (h.ZWJ)? matra_group{0,4});


-consonant_syllable =	Repha? (cn.halant_group){0,4} cn A? halant_or_matra_group? syllable_tail;
+consonant_syllable =	Repha? (cn.halant_group){0,4} cn halant_or_matra_group? syllable_tail;
 vowel_syllable =	reph? V.n? (ZWJ | (halant_group.cn){0,4} halant_or_matra_group? syllable_tail);
 standalone_cluster =	reph? place_holder.n? (halant_group.cn){0,4} halant_or_matra_group? syllable_tail;
 avagraha_cluster = 	Avag.N? (SM.ZWNJ?)? (VD VD?)?;
--- a/src/hb-ot-shape-complex-indic-private.hh
+++ b/src/hb-ot-shape-complex-indic-private.hh
@ -102,7 +102,7 @@ enum indic_syllabic_category_t {
  INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER	= OT_C,
  INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL	= OT_CM,
  INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER	= OT_NBSP,
-  INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED	= OT_C,
+  INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED	= OT_CM,
  INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA	= OT_Repha,
  INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER	= OT_X,
  INDIC_SYLLABIC_CATEGORY_NUKTA			= OT_N,
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@ -194,15 +194,15 @@ set_indic_properties (hb_glyph_info_t &info)


  /* The spec says U+0952 is OT_A.  However, testing shows that Uniscribe
-   * treats U+0951..U+0952 all as OT_VD.
+   * treats U+0951..U+0954 all behave similarly.
   * TESTS:
   * U+092E,U+0947,U+0952
   * U+092E,U+0952,U+0947
   * U+092E,U+0947,U+0951
   * U+092E,U+0951,U+0947
-   * */
+   */
  if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0951, 0x0954)))
-    cat = OT_VD;
+    cat = OT_A;

  if (unlikely (u == 0x17D1))
    cat = OT_X;
@ -220,7 +220,7 @@ set_indic_properties (hb_glyph_info_t &info)
  else if (unlikely (u == 0x200C)) cat = OT_ZWNJ;
  else if (unlikely (u == 0x200D)) cat = OT_ZWJ;
  else if (unlikely (u == 0x25CC)) cat = OT_DOTTEDCIRCLE;
-  else if (unlikely (u == 0x0A71)) cat = OT_SM; /* GURMUKHI ADDAK.  More like consonant medial. like 0A75. */
+  else if (unlikely (u == 0x0A71)) cat = OT_SM; /* GURMUKHI ADDAK.  Move it to the end. */

  if (cat == OT_Repha) {
    /* There are two kinds of characters marked as Repha:
@ -249,7 +249,7 @@ set_indic_properties (hb_glyph_info_t &info)
  {
    pos = matra_position (u, pos);
  }
-  else if (cat == OT_SM || cat == OT_VD || cat == OT_Avag)
+  else if ((FLAG (cat) & (FLAG (OT_SM) | FLAG (OT_VD) | FLAG (OT_A) | FLAG (OT_Avag))))
  {
    pos = POS_SMVD;
  }
@ -933,6 +933,7 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
 	for (unsigned int j = last_halant; j < i; j++)
 	  if (info[j].indic_position() != POS_SMVD)
 	    info[j].indic_position() = info[i].indic_position();
+	last_halant = end;
      }
  }