[ICU] Use new normalizer2 compose/decompose API

It's considerably faster than the fallback implementation we had
previously!
This commit is contained in:
Behdad Esfahbod 2012-08-11 21:26:25 -04:00
parent 2b73a1f112
commit d5045a5f40
6 changed files with 112 additions and 58 deletions

View File

@ -37,8 +37,6 @@
#define HB_DEBUG_BUFFER (HB_DEBUG+0)
#endif
#define _HB_BUFFER_UNICODE_FUNCS_DEFAULT (const_cast<hb_unicode_funcs_t *> (&_hb_unicode_funcs_default))
/* Here is how the buffer works internally:
*
* There are two info pointers: info and out_info. They always have
@ -144,7 +142,7 @@ hb_buffer_t::reset (void)
return;
hb_unicode_funcs_destroy (unicode);
unicode = _HB_BUFFER_UNICODE_FUNCS_DEFAULT;
unicode = hb_unicode_funcs_get_default ();
hb_segment_properties_t default_props = _HB_BUFFER_PROPS_DEFAULT;
props = default_props;
@ -552,7 +550,7 @@ hb_buffer_get_empty (void)
static const hb_buffer_t _hb_buffer_nil = {
HB_OBJECT_HEADER_STATIC,
_HB_BUFFER_UNICODE_FUNCS_DEFAULT,
const_cast<hb_unicode_funcs_t *> (&_hb_unicode_funcs_nil),
_HB_BUFFER_PROPS_DEFAULT,
true, /* in_error */
@ -608,7 +606,8 @@ hb_buffer_set_unicode_funcs (hb_buffer_t *buffer,
return;
if (!unicode)
unicode = _HB_BUFFER_UNICODE_FUNCS_DEFAULT;
unicode = hb_unicode_funcs_get_default ();
hb_unicode_funcs_reference (unicode);
hb_unicode_funcs_destroy (buffer->unicode);

View File

@ -250,9 +250,6 @@ hb_glib_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
/* We don't ifdef-out the fallback code such that compiler always
* sees it and makes sure it's compilable. */
if (!a || !b)
return false;
gchar utf8[12];
gchar *normalized;
int len;
@ -367,22 +364,21 @@ hb_glib_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
return utf8_decomposed_len;
}
extern HB_INTERNAL const hb_unicode_funcs_t _hb_glib_unicode_funcs;
const hb_unicode_funcs_t _hb_glib_unicode_funcs = {
HB_OBJECT_HEADER_STATIC,
NULL, /* parent */
true, /* immutable */
{
#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_glib_unicode_##name,
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
#undef HB_UNICODE_FUNC_IMPLEMENT
}
};
hb_unicode_funcs_t *
hb_glib_get_unicode_funcs (void)
{
static const hb_unicode_funcs_t _hb_glib_unicode_funcs = {
HB_OBJECT_HEADER_STATIC,
NULL, /* parent */
true, /* immutable */
{
#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_glib_unicode_##name,
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
#undef HB_UNICODE_FUNC_IMPLEMENT
}
};
return const_cast<hb_unicode_funcs_t *> (&_hb_glib_unicode_funcs);
}

View File

@ -33,7 +33,7 @@
#include "hb-unicode-private.hh"
#include <unicode/uversion.h>
#include <unicode/uvernum.h>
#include <unicode/uchar.h>
#include <unicode/unorm.h>
#include <unicode/ustring.h>
@ -164,6 +164,10 @@ hb_icu_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
return hb_icu_script_to_script (scriptCode);
}
#if U_ICU_VERSION_MAJOR_NUM >= 49
static const UNormalizer2 *normalizer;
#endif
static hb_bool_t
hb_icu_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t a,
@ -171,8 +175,17 @@ hb_icu_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t *ab,
void *user_data HB_UNUSED)
{
if (!a || !b)
return false;
#if U_ICU_VERSION_MAJOR_NUM >= 49
{
UChar32 ret = unorm2_composePair (normalizer, a, b);
if (ret < 0) return false;
*ab = ret;
return true;
}
#endif
/* We don't ifdef-out the fallback code such that compiler always
* sees it and makes sure it's compilable. */
UChar utf16[4], normalized[5];
unsigned int len;
@ -207,6 +220,32 @@ hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t *b,
void *user_data HB_UNUSED)
{
#if U_ICU_VERSION_MAJOR_NUM >= 49
{
UChar decomposed[4];
int len;
UErrorCode icu_err = U_ZERO_ERROR;
len = unorm2_getRawDecomposition (normalizer, ab, decomposed,
ARRAY_LENGTH (decomposed), &icu_err);
if (U_FAILURE (icu_err) || len < 0) return false;
len = u_countChar32 (decomposed, len);
if (len == 1) {
U16_GET_UNSAFE (decomposed, 0, *a);
*b = 0;
return *a != ab;
} else if (len == 2) {
len =0;
U16_NEXT_UNSAFE (decomposed, len, *a);
U16_NEXT_UNSAFE (decomposed, len, *b);
}
return true;
}
#endif
/* We don't ifdef-out the fallback code such that compiler always
* sees it and makes sure it's compilable. */
UChar utf16[2], normalized[2 * HB_UNICODE_MAX_DECOMPOSITION_LEN + 1];
unsigned int len;
hb_bool_t ret, err;
@ -306,22 +345,28 @@ hb_icu_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs HB_UNUSED,
}
extern HB_INTERNAL const hb_unicode_funcs_t _hb_icu_unicode_funcs;
const hb_unicode_funcs_t _hb_icu_unicode_funcs = {
HB_OBJECT_HEADER_STATIC,
NULL, /* parent */
true, /* immutable */
{
#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_icu_unicode_##name,
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
#undef HB_UNICODE_FUNC_IMPLEMENT
}
};
hb_unicode_funcs_t *
hb_icu_get_unicode_funcs (void)
{
static const hb_unicode_funcs_t _hb_icu_unicode_funcs = {
HB_OBJECT_HEADER_STATIC,
NULL, /* parent */
true, /* immutable */
{
#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_icu_unicode_##name,
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
#undef HB_UNICODE_FUNC_IMPLEMENT
}
};
#if U_ICU_VERSION_MAJOR_NUM >= 49
if (!hb_atomic_ptr_get (&normalizer)) {
UErrorCode icu_err = U_ZERO_ERROR;
/* We ignore failure in getNFCInstace(). */
hb_atomic_ptr_cmpexch (&normalizer, NULL, unorm2_getNFCInstance (&icu_err));
}
#endif
return const_cast<hb_unicode_funcs_t *> (&_hb_icu_unicode_funcs);
}

View File

@ -80,13 +80,14 @@ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
hb_codepoint_t *ab)
{
*ab = 0;
if (unlikely (!a || !b)) return false;
return func.compose (this, a, b, ab, user_data.compose);
}
inline hb_bool_t decompose (hb_codepoint_t ab,
hb_codepoint_t *a, hb_codepoint_t *b)
{
*a = *b = 0;
*a = ab; *b = 0;
return func.decompose (this, ab, a, b, user_data.decompose);
}
@ -182,17 +183,7 @@ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
};
#ifdef HAVE_GLIB
extern HB_INTERNAL const hb_unicode_funcs_t _hb_glib_unicode_funcs;
#define _hb_unicode_funcs_default _hb_glib_unicode_funcs
#elif defined(HAVE_ICU)
extern HB_INTERNAL const hb_unicode_funcs_t _hb_icu_unicode_funcs;
#define _hb_unicode_funcs_default _hb_icu_unicode_funcs
#else
#define HB_UNICODE_FUNCS_NIL 1
extern HB_INTERNAL const hb_unicode_funcs_t _hb_unicode_funcs_nil;
#define _hb_unicode_funcs_default _hb_unicode_funcs_nil
#endif
/* Modified combining marks */

View File

@ -109,12 +109,44 @@ hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED
}
#define HB_UNICODE_FUNCS_IMPLEMENT_SET \
HB_UNICODE_FUNCS_IMPLEMENT (glib) \
HB_UNICODE_FUNCS_IMPLEMENT (icu) \
HB_UNICODE_FUNCS_IMPLEMENT (nil) \
/* ^--- Add new callbacks before nil */
#define hb_nil_get_unicode_funcs hb_unicode_funcs_get_empty
/* Prototype them all */
#define HB_UNICODE_FUNCS_IMPLEMENT(set) \
extern "C" hb_unicode_funcs_t *hb_##set##_get_unicode_funcs (void);
HB_UNICODE_FUNCS_IMPLEMENT_SET
#undef HB_UNICODE_FUNCS_IMPLEMENT
hb_unicode_funcs_t *
hb_unicode_funcs_get_default (void)
{
return const_cast<hb_unicode_funcs_t *> (&_hb_unicode_funcs_default);
#define HB_UNICODE_FUNCS_IMPLEMENT(set) \
return hb_##set##_get_unicode_funcs ();
#ifdef HAVE_GLIB
HB_UNICODE_FUNCS_IMPLEMENT(glib)
#elif defined(HAVE_ICU)
HB_UNICODE_FUNCS_IMPLEMENT(icu)
#else
#define HB_UNICODE_FUNCS_NIL 1
HB_UNICODE_FUNCS_IMPLEMENT(nil)
#endif
#undef HB_UNICODE_FUNCS_IMPLEMENT
}
#if !defined(HB_NO_UNICODE_FUNCS) && defined(HB_UNICODE_FUNCS_NIL)
#pragma message("Could not find any Unicode functions implementation, you have to provide your own.")
#pragma message("To suppress this warnings, define HB_NO_UNICODE_FUNCS.")
#endif
hb_unicode_funcs_t *
hb_unicode_funcs_create (hb_unicode_funcs_t *parent)
{
@ -140,7 +172,6 @@ hb_unicode_funcs_create (hb_unicode_funcs_t *parent)
}
extern HB_INTERNAL const hb_unicode_funcs_t _hb_unicode_funcs_nil;
const hb_unicode_funcs_t _hb_unicode_funcs_nil = {
HB_OBJECT_HEADER_STATIC,

View File

@ -37,11 +37,3 @@
#if defined(HB_ATOMIC_INT_NIL) || defined(HB_MUTEX_IMPL_NIL)
#pragma message("To suppress these warnings, define HB_NO_MT.")
#endif
#include "hb-unicode-private.hh"
#if !defined(HB_NO_UNICODE_FUNCS) && defined(HB_UNICODE_FUNCS_NIL)
#pragma message("Could not find any Unicode functions implementation, you have to provide your own.")
#pragma message("To suppress this warnings, define HB_NO_UNICODE_FUNCS.")
#endif