[Indic] Position Khmer Robat

It's a visual Repha. Still not positioning logical Repha as occurs in Malayalam. Another 200 Khmer failures fixed. 547 to go. That's better than Devanagari!
2012-07-17 18:17:30 -04:00 · 2012-07-17 18:17:30 -04:00 · db8981f1e0
parent 25bc489498
commit db8981f1e0
3 changed files with 36 additions and 20 deletions
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@ -40,33 +40,35 @@
 # Same order as enum indic_category_t.  Not sure how to avoid duplication.
 X    = 0;
 C    = 1;
-Ra   = 2;
-V    = 3;
-N    = 4;
-H    = 5;
-ZWNJ = 6;
-ZWJ  = 7;
-M    = 8;
-SM   = 9;
-VD   = 10;
-A    = 11;
-NBSP = 12;
-DOTTEDCIRCLE = 13;
-RS   = 14;
-Coeng = 15;
+V    = 2;
+N    = 3;
+H    = 4;
+ZWNJ = 5;
+ZWJ  = 6;
+M    = 7;
+SM   = 8;
+VD   = 9;
+A    = 10;
+NBSP = 11;
+DOTTEDCIRCLE = 12;
+RS   = 13;
+Coeng = 14;
+Repha = 15;
+Ra    = 16;

 c = C | Ra;			# is_consonant
 n = (N.N? | ZWNJ?.RS);		# is_consonant_modifier
 z = ZWJ|ZWNJ;			# is_joiner
 h = H | Coeng;			# is_halant_or_coeng
+reph = (Ra H | Repha);		# possible reph
 matra_group = M.N?.H?;
 syllable_tail = SM? (Coeng (c|V))? (VD VD?)?;
 place_holder = NBSP | DOTTEDCIRCLE;


-consonant_syllable =	(c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
-vowel_syllable =	(Ra H)? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
-standalone_cluster =	(Ra H)? place_holder.n? (z? h c)* matra_group* syllable_tail;
+consonant_syllable =	Repha? (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
+vowel_syllable =	reph? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
+standalone_cluster =	reph? place_holder.n? (z? h c)* matra_group* syllable_tail;
 other =			any;

 main := |*
--- a/src/hb-ot-shape-complex-indic-private.hh
+++ b/src/hb-ot-shape-complex-indic-private.hh
@ -47,7 +47,6 @@
 enum indic_category_t {
  OT_X = 0,
  OT_C,
-  OT_Ra, /* Not explicitly listed in the OT spec, but used in the grammar. */
  OT_V,
  OT_N,
  OT_H,
@ -60,7 +59,9 @@ enum indic_category_t {
  OT_NBSP,
  OT_DOTTEDCIRCLE, /* Not in the spec, but special in Uniscribe. /Very very/ special! */
  OT_RS, /* Register Shifter, used in Khmer OT spec */
-  OT_Coeng
+  OT_Coeng,
+  OT_Repha,
+  OT_Ra /* Not explicitly listed in the OT spec, but used in the grammar. */
 };

 /* Visual positions in a syllable from left to right. */
@ -92,7 +93,7 @@ enum indic_syllabic_category_t {
  INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL	= OT_C,
  INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER	= OT_NBSP,
  INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED	= OT_C,
-  INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA	= OT_C,
+  INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA	= OT_Repha,
  INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER	= OT_X,
  INDIC_SYLLABIC_CATEGORY_NUKTA			= OT_N,
  INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER	= OT_RS,
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@ -282,6 +282,19 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED,
    if (unlikely (info.codepoint == 0x17D2))
      info.indic_category() = OT_Coeng;

+    if (info.indic_category() == OT_Repha) {
+      /* There are two kinds of characters marked as Repha:
+       * - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer)
+       * - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam)
+       *
+       * We recategorize the first kind to look like a Nukta and attached to the base directly.
+       */
+      if (_hb_glyph_info_get_general_category (&info) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
+        info.indic_category() = OT_N;
+    }
+
+
+    /* Assign positions... */
    if (is_consonant (info)) {
      info.indic_position() = consonant_position (info.codepoint);
      if (is_ra (info.codepoint))