More refactoring

2012-08-07 16:57:02 -04:00 · 2012-08-07 16:57:02 -04:00 · 0f8881d6bb
parent 428dfcab66
commit 0f8881d6bb
2 changed files with 162 additions and 163 deletions
--- a/src/hb-ot-shape-normalize.cc
+++ b/src/hb-ot-shape-normalize.cc
@ -87,6 +87,39 @@ decompose_func (hb_unicode_funcs_t *unicode,
 		hb_codepoint_t *a,
 		hb_codepoint_t *b)
 {
+  /* XXX FIXME, move these to complex shapers and propagage to normalizer.*/
+  switch (ab) {
+    case 0x0AC9  : return false;
+
+    case 0x0931  : return false;
+    case 0x0B94  : return false;
+
+    /* These ones have Unicode decompositions, but we do it
+     * this way to be close to what Uniscribe does. */
+    case 0x0DDA  : *a = 0x0DD9; *b= 0x0DDA; return true;
+    case 0x0DDC  : *a = 0x0DD9; *b= 0x0DDC; return true;
+    case 0x0DDD  : *a = 0x0DD9; *b= 0x0DDD; return true;
+    case 0x0DDE  : *a = 0x0DD9; *b= 0x0DDE; return true;
+
+    case 0x0F77  : *a = 0x0FB2; *b= 0x0F81; return true;
+    case 0x0F79  : *a = 0x0FB3; *b= 0x0F81; return true;
+    case 0x17BE  : *a = 0x17C1; *b= 0x17BE; return true;
+    case 0x17BF  : *a = 0x17C1; *b= 0x17BF; return true;
+    case 0x17C0  : *a = 0x17C1; *b= 0x17C0; return true;
+    case 0x17C4  : *a = 0x17C1; *b= 0x17C4; return true;
+    case 0x17C5  : *a = 0x17C1; *b= 0x17C5; return true;
+    case 0x1925  : *a = 0x1920; *b= 0x1923; return true;
+    case 0x1926  : *a = 0x1920; *b= 0x1924; return true;
+    case 0x1B3C  : *a = 0x1B42; *b= 0x1B3C; return true;
+    case 0x1112E  : *a = 0x11127; *b= 0x11131; return true;
+    case 0x1112F  : *a = 0x11127; *b= 0x11132; return true;
+#if 0
+    case 0x0B57  : *a = 0xno decomp, -> RIGHT; return true;
+    case 0x1C29  : *a = 0xno decomp, -> LEFT; return true;
+    case 0xA9C0  : *a = 0xno decomp, -> RIGHT; return true;
+    case 0x111BF  : *a = 0xno decomp, -> ABOVE; return true;
+#endif
+  }
  return unicode->decompose (ab, a, b);
 }

@ -96,7 +129,133 @@ compose_func (hb_unicode_funcs_t *unicode,
 	      hb_codepoint_t  b,
 	      hb_codepoint_t *ab)
 {
-  return unicode->compose (a, b, ab);
+  /* XXX, this belongs to indic normalizer. */
+  if ((FLAG (unicode->general_category (a)) &
+       (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
+	FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
+	FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))))
+    return false;
+  /* XXX, add composition-exclusion exceptions to Indic shaper. */
+  if (a == 0x09AF && b == 0x09BC) { *ab = 0x09DF; return true; }
+
+  /* XXX, these belong to the hebew / default shaper. */
+  /* Hebrew presentation-form shaping.
+   * https://bugzilla.mozilla.org/show_bug.cgi?id=728866 */
+  // Hebrew presentation forms with dagesh, for characters 0x05D0..0x05EA;
+  // note that some letters do not have a dagesh presForm encoded
+  static const hb_codepoint_t sDageshForms[0x05EA - 0x05D0 + 1] = {
+    0xFB30, // ALEF
+    0xFB31, // BET
+    0xFB32, // GIMEL
+    0xFB33, // DALET
+    0xFB34, // HE
+    0xFB35, // VAV
+    0xFB36, // ZAYIN
+    0, // HET
+    0xFB38, // TET
+    0xFB39, // YOD
+    0xFB3A, // FINAL KAF
+    0xFB3B, // KAF
+    0xFB3C, // LAMED
+    0, // FINAL MEM
+    0xFB3E, // MEM
+    0, // FINAL NUN
+    0xFB40, // NUN
+    0xFB41, // SAMEKH
+    0, // AYIN
+    0xFB43, // FINAL PE
+    0xFB44, // PE
+    0, // FINAL TSADI
+    0xFB46, // TSADI
+    0xFB47, // QOF
+    0xFB48, // RESH
+    0xFB49, // SHIN
+    0xFB4A // TAV
+  };
+
+  hb_bool_t found = unicode->compose (a, b, ab);
+
+  if (!found && (b & ~0x7F) == 0x0580) {
+      // special-case Hebrew presentation forms that are excluded from
+      // standard normalization, but wanted for old fonts
+      switch (b) {
+      case 0x05B4: // HIRIQ
+	  if (a == 0x05D9) { // YOD
+	      *ab = 0xFB1D;
+	      found = true;
+	  }
+	  break;
+      case 0x05B7: // patah
+	  if (a == 0x05F2) { // YIDDISH YOD YOD
+	      *ab = 0xFB1F;
+	      found = true;
+	  } else if (a == 0x05D0) { // ALEF
+	      *ab = 0xFB2E;
+	      found = true;
+	  }
+	  break;
+      case 0x05B8: // QAMATS
+	  if (a == 0x05D0) { // ALEF
+	      *ab = 0xFB2F;
+	      found = true;
+	  }
+	  break;
+      case 0x05B9: // HOLAM
+	  if (a == 0x05D5) { // VAV
+	      *ab = 0xFB4B;
+	      found = true;
+	  }
+	  break;
+      case 0x05BC: // DAGESH
+	  if (a >= 0x05D0 && a <= 0x05EA) {
+	      *ab = sDageshForms[a - 0x05D0];
+	      found = (*ab != 0);
+	  } else if (a == 0xFB2A) { // SHIN WITH SHIN DOT
+	      *ab = 0xFB2C;
+	      found = true;
+	  } else if (a == 0xFB2B) { // SHIN WITH SIN DOT
+	      *ab = 0xFB2D;
+	      found = true;
+	  }
+	  break;
+      case 0x05BF: // RAFE
+	  switch (a) {
+	  case 0x05D1: // BET
+	      *ab = 0xFB4C;
+	      found = true;
+	      break;
+	  case 0x05DB: // KAF
+	      *ab = 0xFB4D;
+	      found = true;
+	      break;
+	  case 0x05E4: // PE
+	      *ab = 0xFB4E;
+	      found = true;
+	      break;
+	  }
+	  break;
+      case 0x05C1: // SHIN DOT
+	  if (a == 0x05E9) { // SHIN
+	      *ab = 0xFB2A;
+	      found = true;
+	  } else if (a == 0xFB49) { // SHIN WITH DAGESH
+	      *ab = 0xFB2C;
+	      found = true;
+	  }
+	  break;
+      case 0x05C2: // SIN DOT
+	  if (a == 0x05E9) { // SHIN
+	      *ab = 0xFB2B;
+	      found = true;
+	  } else if (a == 0xFB49) { // SHIN WITH DAGESH
+	      *ab = 0xFB2D;
+	      found = true;
+	  }
+	  break;
+      }
+  }
+
+  return found;
 }

 static void
--- a/src/hb-unicode-private.hh
+++ b/src/hb-unicode-private.hh
@ -80,173 +80,13 @@ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
 			    hb_codepoint_t *ab)
  {
    *ab = 0;
-
-    /* XXX, this belongs to indic normalizer. */
-    if ((FLAG (general_category (a)) &
-	 (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
-	  FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
-	  FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))))
-      return false;
-    /* XXX, add composition-exclusion exceptions to Indic shaper. */
-    if (a == 0x09AF && b == 0x09BC) { *ab = 0x09DF; return true; }
-
-    /* XXX, these belong to the hebew / default shaper. */
-    /* Hebrew presentation-form shaping.
-     * https://bugzilla.mozilla.org/show_bug.cgi?id=728866 */
-    // Hebrew presentation forms with dagesh, for characters 0x05D0..0x05EA;
-    // note that some letters do not have a dagesh presForm encoded
-    static const hb_codepoint_t sDageshForms[0x05EA - 0x05D0 + 1] = {
-      0xFB30, // ALEF
-      0xFB31, // BET
-      0xFB32, // GIMEL
-      0xFB33, // DALET
-      0xFB34, // HE
-      0xFB35, // VAV
-      0xFB36, // ZAYIN
-      0, // HET
-      0xFB38, // TET
-      0xFB39, // YOD
-      0xFB3A, // FINAL KAF
-      0xFB3B, // KAF
-      0xFB3C, // LAMED
-      0, // FINAL MEM
-      0xFB3E, // MEM
-      0, // FINAL NUN
-      0xFB40, // NUN
-      0xFB41, // SAMEKH
-      0, // AYIN
-      0xFB43, // FINAL PE
-      0xFB44, // PE
-      0, // FINAL TSADI
-      0xFB46, // TSADI
-      0xFB47, // QOF
-      0xFB48, // RESH
-      0xFB49, // SHIN
-      0xFB4A // TAV
-    };
-
-    hb_bool_t found = func.compose (this, a, b, ab, user_data.compose);
-
-    if (!found && (b & ~0x7F) == 0x0580) {
-	// special-case Hebrew presentation forms that are excluded from
-	// standard normalization, but wanted for old fonts
-	switch (b) {
-	case 0x05B4: // HIRIQ
-	    if (a == 0x05D9) { // YOD
-		*ab = 0xFB1D;
-		found = true;
-	    }
-	    break;
-	case 0x05B7: // patah
-	    if (a == 0x05F2) { // YIDDISH YOD YOD
-		*ab = 0xFB1F;
-		found = true;
-	    } else if (a == 0x05D0) { // ALEF
-		*ab = 0xFB2E;
-		found = true;
-	    }
-	    break;
-	case 0x05B8: // QAMATS
-	    if (a == 0x05D0) { // ALEF
-		*ab = 0xFB2F;
-		found = true;
-	    }
-	    break;
-	case 0x05B9: // HOLAM
-	    if (a == 0x05D5) { // VAV
-		*ab = 0xFB4B;
-		found = true;
-	    }
-	    break;
-	case 0x05BC: // DAGESH
-	    if (a >= 0x05D0 && a <= 0x05EA) {
-		*ab = sDageshForms[a - 0x05D0];
-		found = (*ab != 0);
-	    } else if (a == 0xFB2A) { // SHIN WITH SHIN DOT
-		*ab = 0xFB2C;
-		found = true;
-	    } else if (a == 0xFB2B) { // SHIN WITH SIN DOT
-		*ab = 0xFB2D;
-		found = true;
-	    }
-	    break;
-	case 0x05BF: // RAFE
-	    switch (a) {
-	    case 0x05D1: // BET
-		*ab = 0xFB4C;
-		found = true;
-		break;
-	    case 0x05DB: // KAF
-		*ab = 0xFB4D;
-		found = true;
-		break;
-	    case 0x05E4: // PE
-		*ab = 0xFB4E;
-		found = true;
-		break;
-	    }
-	    break;
-	case 0x05C1: // SHIN DOT
-	    if (a == 0x05E9) { // SHIN
-		*ab = 0xFB2A;
-		found = true;
-	    } else if (a == 0xFB49) { // SHIN WITH DAGESH
-		*ab = 0xFB2C;
-		found = true;
-	    }
-	    break;
-	case 0x05C2: // SIN DOT
-	    if (a == 0x05E9) { // SHIN
-		*ab = 0xFB2B;
-		found = true;
-	    } else if (a == 0xFB49) { // SHIN WITH DAGESH
-		*ab = 0xFB2D;
-		found = true;
-	    }
-	    break;
-	}
-    }
-
-    return found;
+    return func.compose (this, a, b, ab, user_data.compose);
  }

  inline hb_bool_t decompose (hb_codepoint_t ab,
 			      hb_codepoint_t *a, hb_codepoint_t *b)
  {
-    /* XXX FIXME, move these to complex shapers and propagage to normalizer.*/
-    switch (ab) {
-      case 0x0AC9  : return false;
-
-      case 0x0931  : return false;
-      case 0x0B94  : return false;
-
-      /* These ones have Unicode decompositions, but we do it
-       * this way to be close to what Uniscribe does. */
-      case 0x0DDA  : *a = 0x0DD9; *b= 0x0DDA; return true;
-      case 0x0DDC  : *a = 0x0DD9; *b= 0x0DDC; return true;
-      case 0x0DDD  : *a = 0x0DD9; *b= 0x0DDD; return true;
-      case 0x0DDE  : *a = 0x0DD9; *b= 0x0DDE; return true;
-
-      case 0x0F77  : *a = 0x0FB2; *b= 0x0F81; return true;
-      case 0x0F79  : *a = 0x0FB3; *b= 0x0F81; return true;
-      case 0x17BE  : *a = 0x17C1; *b= 0x17BE; return true;
-      case 0x17BF  : *a = 0x17C1; *b= 0x17BF; return true;
-      case 0x17C0  : *a = 0x17C1; *b= 0x17C0; return true;
-      case 0x17C4  : *a = 0x17C1; *b= 0x17C4; return true;
-      case 0x17C5  : *a = 0x17C1; *b= 0x17C5; return true;
-      case 0x1925  : *a = 0x1920; *b= 0x1923; return true;
-      case 0x1926  : *a = 0x1920; *b= 0x1924; return true;
-      case 0x1B3C  : *a = 0x1B42; *b= 0x1B3C; return true;
-      case 0x1112E  : *a = 0x11127; *b= 0x11131; return true;
-      case 0x1112F  : *a = 0x11127; *b= 0x11132; return true;
-#if 0
-      case 0x0B57  : *a = 0xno decomp, -> RIGHT; return true;
-      case 0x1C29  : *a = 0xno decomp, -> LEFT; return true;
-      case 0xA9C0  : *a = 0xno decomp, -> RIGHT; return true;
-      case 0x111BF  : *a = 0xno decomp, -> ABOVE; return true;
-#endif
-    }
-    *a = ab; *b = 0;
+    *a = *b = 0;
    return func.decompose (this, ab, a, b, user_data.decompose);
  }