Use a lookup table for modified_combining_class
This commit is contained in:
parent
208f70f055
commit
6adf417bc1
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
* Copyright © 2009 Red Hat, Inc.
|
||||
* Copyright © 2011 Codethink Limited
|
||||
* Copyright © 2010,2011 Google, Inc.
|
||||
* Copyright © 2010,2011,2012 Google, Inc.
|
||||
*
|
||||
* This is part of HarfBuzz, a text shaping library.
|
||||
*
|
||||
|
@ -37,6 +37,7 @@
|
|||
#include "hb-object-private.hh"
|
||||
|
||||
|
||||
extern HB_INTERNAL const uint8_t _hb_modified_combining_class[256];
|
||||
|
||||
/*
|
||||
* hb_unicode_funcs_t
|
||||
|
@ -143,8 +144,11 @@ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
|
|||
}
|
||||
|
||||
|
||||
HB_INTERNAL unsigned int
|
||||
modified_combining_class (hb_codepoint_t unicode);
|
||||
unsigned int
|
||||
modified_combining_class (hb_codepoint_t unicode)
|
||||
{
|
||||
return _hb_modified_combining_class[combining_class (unicode)];
|
||||
}
|
||||
|
||||
inline hb_bool_t
|
||||
is_variation_selector (hb_codepoint_t unicode)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
* Copyright © 2009 Red Hat, Inc.
|
||||
* Copyright © 2011 Codethink Limited
|
||||
* Copyright © 2010,2011 Google, Inc.
|
||||
* Copyright © 2011 Codethink Limited
|
||||
* Copyright © 2010,2011,2012 Google, Inc.
|
||||
*
|
||||
* This is part of HarfBuzz, a text shaping library.
|
||||
*
|
||||
|
@ -287,69 +287,148 @@ hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
|
|||
}
|
||||
|
||||
|
||||
unsigned int
|
||||
hb_unicode_funcs_t::modified_combining_class (hb_codepoint_t unicode)
|
||||
const uint8_t
|
||||
_hb_modified_combining_class[256] =
|
||||
{
|
||||
int c = combining_class (unicode);
|
||||
0, /* HB_UNICODE_COMBINING_CLASS_NOT_REORDERED */
|
||||
1, /* HB_UNICODE_COMBINING_CLASS_OVERLAY */
|
||||
2, 3, 4, 5, 6,
|
||||
7, /* HB_UNICODE_COMBINING_CLASS_NUKTA */
|
||||
8, /* HB_UNICODE_COMBINING_CLASS_KANA_VOICING */
|
||||
9, /* HB_UNICODE_COMBINING_CLASS_VIRAMA */
|
||||
|
||||
if (unlikely (hb_in_range<int> (c, 27, 33)))
|
||||
{
|
||||
/* Modify the combining-class to suit Arabic better. See:
|
||||
* http://unicode.org/faq/normalization.html#8
|
||||
* http://unicode.org/faq/normalization.html#9
|
||||
*/
|
||||
c = c == 33 ? 27 : c + 1;
|
||||
}
|
||||
else if (unlikely (hb_in_range<int> (c, 10, 26)))
|
||||
{
|
||||
/* The equivalent fix for Hebrew is more complex.
|
||||
*
|
||||
* We permute the "fixed-position" classes 10-26 into the order
|
||||
* described in the SBL Hebrew manual:
|
||||
*
|
||||
* http://www.sbl-site.org/Fonts/SBLHebrewUserManual1.5x.pdf
|
||||
*
|
||||
* (as recommended by:
|
||||
* http://forum.fontlab.com/archive-old-microsoft-volt-group/vista-and-diacritic-ordering-t6751.0.html)
|
||||
*
|
||||
* More details here:
|
||||
* https://bugzilla.mozilla.org/show_bug.cgi?id=662055
|
||||
*/
|
||||
static const int permuted_hebrew_classes[26 - 10 + 1] = {
|
||||
/* 10 sheva */ 22,
|
||||
/* 11 hataf segol */ 15,
|
||||
/* 12 hataf patah */ 16,
|
||||
/* 13 hataf qamats */ 17,
|
||||
/* 14 hiriq */ 23,
|
||||
/* 15 tsere */ 18,
|
||||
/* 16 segol */ 19,
|
||||
/* 17 patah */ 20,
|
||||
/* 18 qamats */ 21,
|
||||
/* 19 holam */ 14,
|
||||
/* 20 qubuts */ 24,
|
||||
/* 21 dagesh */ 12,
|
||||
/* 22 meteg */ 25,
|
||||
/* 23 rafe */ 13,
|
||||
/* 24 shin dot */ 10,
|
||||
/* 25 sin dot */ 11,
|
||||
/* 26 point varika */ 26,
|
||||
};
|
||||
c = permuted_hebrew_classes[c - 10];
|
||||
}
|
||||
else if (unlikely (unicode == 0x0E3A)) /* THAI VOWEL SIGN PHINTHU */
|
||||
{
|
||||
/* Assign 104, so it reorders after the THAI ccc=103 marks.
|
||||
* Uniscribe does this. */
|
||||
c = 104;
|
||||
}
|
||||
else if (unlikely (hb_in_range<hb_codepoint_t> (unicode, 0x0C55, 0x0C56)))
|
||||
{
|
||||
/* Telugu length marks.
|
||||
* These are the only matras in the main Indic script range that have
|
||||
* a non-zero ccc. That makes them reorder with the Halant that is
|
||||
* ccc=9. Just zero them, we don't need them in our Indic shaper. */
|
||||
c = 0;
|
||||
}
|
||||
/* Hebrew */
|
||||
|
||||
return c;
|
||||
}
|
||||
/*
|
||||
* We permute the "fixed-position" classes 10-26 into the order
|
||||
* described in the SBL Hebrew manual:
|
||||
*
|
||||
* http://www.sbl-site.org/Fonts/SBLHebrewUserManual1.5x.pdf
|
||||
*
|
||||
* (as recommended by:
|
||||
* http://forum.fontlab.com/archive-old-microsoft-volt-group/vista-and-diacritic-ordering-t6751.0.html)
|
||||
*
|
||||
* More details here:
|
||||
* https://bugzilla.mozilla.org/show_bug.cgi?id=662055
|
||||
*/
|
||||
22, /* HB_UNICODE_COMBINING_CLASS_CCC10 sheva */
|
||||
15, /* HB_UNICODE_COMBINING_CLASS_CCC11 hataf segol */
|
||||
16, /* HB_UNICODE_COMBINING_CLASS_CCC12 hataf patah*/
|
||||
17, /* HB_UNICODE_COMBINING_CLASS_CCC13 hataf qamats */
|
||||
23, /* HB_UNICODE_COMBINING_CLASS_CCC14 hiriq */
|
||||
18, /* HB_UNICODE_COMBINING_CLASS_CCC15 tsere */
|
||||
19, /* HB_UNICODE_COMBINING_CLASS_CCC16 segol */
|
||||
20, /* HB_UNICODE_COMBINING_CLASS_CCC17 patah */
|
||||
21, /* HB_UNICODE_COMBINING_CLASS_CCC18 qamats */
|
||||
14, /* HB_UNICODE_COMBINING_CLASS_CCC19 holam */
|
||||
24, /* HB_UNICODE_COMBINING_CLASS_CCC20 qubuts */
|
||||
12, /* HB_UNICODE_COMBINING_CLASS_CCC21 dagesh */
|
||||
25, /* HB_UNICODE_COMBINING_CLASS_CCC22 meteg */
|
||||
13, /* HB_UNICODE_COMBINING_CLASS_CCC23 rafe */
|
||||
10, /* HB_UNICODE_COMBINING_CLASS_CCC24 shin dot */
|
||||
11, /* HB_UNICODE_COMBINING_CLASS_CCC25 sin dot */
|
||||
|
||||
26, /* HB_UNICODE_COMBINING_CLASS_CCC26 */
|
||||
|
||||
/* Arabic */
|
||||
|
||||
/*
|
||||
* Modify to move Shadda (ccc=33) before other marks. See:
|
||||
* http://unicode.org/faq/normalization.html#8
|
||||
* http://unicode.org/faq/normalization.html#9
|
||||
*/
|
||||
28, /* HB_UNICODE_COMBINING_CLASS_CCC27 */
|
||||
29, /* HB_UNICODE_COMBINING_CLASS_CCC28 */
|
||||
30, /* HB_UNICODE_COMBINING_CLASS_CCC29 */
|
||||
31, /* HB_UNICODE_COMBINING_CLASS_CCC30 */
|
||||
32, /* HB_UNICODE_COMBINING_CLASS_CCC31 */
|
||||
33, /* HB_UNICODE_COMBINING_CLASS_CCC32 */
|
||||
27, /* HB_UNICODE_COMBINING_CLASS_CCC33 shadda */
|
||||
|
||||
34, /* HB_UNICODE_COMBINING_CLASS_CCC34 */
|
||||
35, /* HB_UNICODE_COMBINING_CLASS_CCC35 */
|
||||
|
||||
/* Syriac */
|
||||
36, /* HB_UNICODE_COMBINING_CLASS_CCC36 */
|
||||
|
||||
37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
|
||||
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83,
|
||||
|
||||
/* Telugu */
|
||||
|
||||
/*
|
||||
* Modify Telugu length marks (ccc=84, ccc=91).
|
||||
* These are the only matras in the main Indic scripts range that have
|
||||
* a non-zero ccc. That makes them reorder with the Halant that is
|
||||
* ccc=9. Just zero them, we don't need them in our Indic shaper.
|
||||
*/
|
||||
0, /* HB_UNICODE_COMBINING_CLASS_CCC84 */
|
||||
85, 86, 87, 88, 89, 90,
|
||||
0, /* HB_UNICODE_COMBINING_CLASS_CCC91 */
|
||||
92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
|
||||
|
||||
/* Thai */
|
||||
|
||||
/*
|
||||
* Modify U+0E38 and U+0E39 (ccc=104) to be reordered before U+0E3A (ccc=9).
|
||||
* Uniscribe does this too.
|
||||
*/
|
||||
3, /* HB_UNICODE_COMBINING_CLASS_CCC103 */
|
||||
|
||||
104, 105, 106,
|
||||
107, /* HB_UNICODE_COMBINING_CLASS_CCC107 */
|
||||
108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
|
||||
|
||||
/* Lao */
|
||||
118, /* HB_UNICODE_COMBINING_CLASS_CCC118 */
|
||||
119, 120, 121,
|
||||
122, /* HB_UNICODE_COMBINING_CLASS_CCC122 */
|
||||
123, 124, 125, 126, 127, 128,
|
||||
|
||||
/* Tibetan */
|
||||
129, /* HB_UNICODE_COMBINING_CLASS_CCC129 */
|
||||
130, /* HB_UNICODE_COMBINING_CLASS_CCC130 */
|
||||
131,
|
||||
132, /* HB_UNICODE_COMBINING_CLASS_CCC133 */
|
||||
133, 134, 135, 136, 137, 138, 139,
|
||||
|
||||
|
||||
140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
|
||||
150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
|
||||
160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
|
||||
170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
|
||||
180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
|
||||
190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
|
||||
|
||||
200, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT */
|
||||
201,
|
||||
202, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW */
|
||||
203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213,
|
||||
214, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE */
|
||||
215,
|
||||
216, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT */
|
||||
217,
|
||||
218, /* HB_UNICODE_COMBINING_CLASS_BELOW_LEFT */
|
||||
219,
|
||||
220, /* HB_UNICODE_COMBINING_CLASS_BELOW */
|
||||
221,
|
||||
222, /* HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT */
|
||||
223,
|
||||
224, /* HB_UNICODE_COMBINING_CLASS_LEFT */
|
||||
225,
|
||||
226, /* HB_UNICODE_COMBINING_CLASS_RIGHT */
|
||||
227,
|
||||
228, /* HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT */
|
||||
229,
|
||||
230, /* HB_UNICODE_COMBINING_CLASS_ABOVE */
|
||||
231,
|
||||
232, /* HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT */
|
||||
233, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW */
|
||||
234, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE */
|
||||
235, 236, 237, 238, 239,
|
||||
240, /* HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT */
|
||||
241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
|
||||
255, /* HB_UNICODE_COMBINING_CLASS_INVALID */
|
||||
};
|
||||
|
|
|
@ -79,6 +79,10 @@ typedef enum
|
|||
|
||||
/* hb_unicode_combining_class_t */
|
||||
|
||||
/* Note: newer versions of Unicode may add new values. Clients should be ready to handle
|
||||
* any value in the 0..254 range being returned from hb_unicode_combining_class().
|
||||
*/
|
||||
|
||||
/* Unicode Character Database property: Canonical_Combining_Class (ccc) */
|
||||
typedef enum
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue