[hb-old] Shovel out the line-breaking / word-segmentation stuff
This commit is contained in:
parent
0bcbe88cf3
commit
4a31166b28
|
@ -18,10 +18,7 @@ MAINSOURCES = \
|
||||||
harfbuzz-hebrew.c \
|
harfbuzz-hebrew.c \
|
||||||
harfbuzz-arabic.c \
|
harfbuzz-arabic.c \
|
||||||
harfbuzz-hangul.c \
|
harfbuzz-hangul.c \
|
||||||
harfbuzz-myanmar.c \
|
harfbuzz-myanmar.c
|
||||||
harfbuzz-thai.c
|
|
||||||
|
|
||||||
EXTRA_SOURCES = harfbuzz.c
|
|
||||||
|
|
||||||
PUBLICHEADERS = \
|
PUBLICHEADERS = \
|
||||||
harfbuzz.h \
|
harfbuzz.h \
|
||||||
|
@ -50,7 +47,4 @@ libhb_old_la_SOURCES = \
|
||||||
$(PUBLICHEADERS) \
|
$(PUBLICHEADERS) \
|
||||||
$(PRIVATEHEADERS)
|
$(PRIVATEHEADERS)
|
||||||
|
|
||||||
EXTRA_DIST = \
|
EXTRA_DIST = README COPYING
|
||||||
README \
|
|
||||||
COPYING \
|
|
||||||
$(EXTRA_SOURCES)
|
|
||||||
|
|
|
@ -52,7 +52,7 @@ am__objects_1 = harfbuzz-buffer.lo harfbuzz-stream.lo harfbuzz-gdef.lo \
|
||||||
harfbuzz-open.lo harfbuzz-shaper.lo harfbuzz-greek.lo \
|
harfbuzz-open.lo harfbuzz-shaper.lo harfbuzz-greek.lo \
|
||||||
harfbuzz-tibetan.lo harfbuzz-khmer.lo harfbuzz-indic.lo \
|
harfbuzz-tibetan.lo harfbuzz-khmer.lo harfbuzz-indic.lo \
|
||||||
harfbuzz-hebrew.lo harfbuzz-arabic.lo harfbuzz-hangul.lo \
|
harfbuzz-hebrew.lo harfbuzz-arabic.lo harfbuzz-hangul.lo \
|
||||||
harfbuzz-myanmar.lo harfbuzz-thai.lo
|
harfbuzz-myanmar.lo
|
||||||
am__objects_2 =
|
am__objects_2 =
|
||||||
am_libhb_old_la_OBJECTS = $(am__objects_1) $(am__objects_2) \
|
am_libhb_old_la_OBJECTS = $(am__objects_1) $(am__objects_2) \
|
||||||
$(am__objects_2)
|
$(am__objects_2)
|
||||||
|
@ -268,10 +268,8 @@ MAINSOURCES = \
|
||||||
harfbuzz-hebrew.c \
|
harfbuzz-hebrew.c \
|
||||||
harfbuzz-arabic.c \
|
harfbuzz-arabic.c \
|
||||||
harfbuzz-hangul.c \
|
harfbuzz-hangul.c \
|
||||||
harfbuzz-myanmar.c \
|
harfbuzz-myanmar.c
|
||||||
harfbuzz-thai.c
|
|
||||||
|
|
||||||
EXTRA_SOURCES = harfbuzz.c
|
|
||||||
PUBLICHEADERS = \
|
PUBLICHEADERS = \
|
||||||
harfbuzz.h \
|
harfbuzz.h \
|
||||||
harfbuzz-buffer.h \
|
harfbuzz-buffer.h \
|
||||||
|
@ -299,11 +297,7 @@ libhb_old_la_SOURCES = \
|
||||||
$(PUBLICHEADERS) \
|
$(PUBLICHEADERS) \
|
||||||
$(PRIVATEHEADERS)
|
$(PRIVATEHEADERS)
|
||||||
|
|
||||||
EXTRA_DIST = \
|
EXTRA_DIST = README COPYING
|
||||||
README \
|
|
||||||
COPYING \
|
|
||||||
$(EXTRA_SOURCES)
|
|
||||||
|
|
||||||
all: all-am
|
all: all-am
|
||||||
|
|
||||||
.SUFFIXES:
|
.SUFFIXES:
|
||||||
|
@ -371,7 +365,6 @@ distclean-compile:
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/harfbuzz-open.Plo@am__quote@
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/harfbuzz-open.Plo@am__quote@
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/harfbuzz-shaper.Plo@am__quote@
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/harfbuzz-shaper.Plo@am__quote@
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/harfbuzz-stream.Plo@am__quote@
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/harfbuzz-stream.Plo@am__quote@
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/harfbuzz-thai.Plo@am__quote@
|
|
||||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/harfbuzz-tibetan.Plo@am__quote@
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/harfbuzz-tibetan.Plo@am__quote@
|
||||||
|
|
||||||
.c.o:
|
.c.o:
|
||||||
|
|
|
@ -35,21 +35,6 @@ HB_BEGIN_HEADER
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
see http://www.unicode.org/reports/tr14/tr14-19.html
|
|
||||||
we don't use the XX, AI and CB properties and map them to AL instead.
|
|
||||||
as we don't support any EBDIC based OS'es, NL is ignored and mapped to AL as well.
|
|
||||||
*/
|
|
||||||
typedef enum {
|
|
||||||
HB_LineBreak_OP, HB_LineBreak_CL, HB_LineBreak_QU, HB_LineBreak_GL, HB_LineBreak_NS,
|
|
||||||
HB_LineBreak_EX, HB_LineBreak_SY, HB_LineBreak_IS, HB_LineBreak_PR, HB_LineBreak_PO,
|
|
||||||
HB_LineBreak_NU, HB_LineBreak_AL, HB_LineBreak_ID, HB_LineBreak_IN, HB_LineBreak_HY,
|
|
||||||
HB_LineBreak_BA, HB_LineBreak_BB, HB_LineBreak_B2, HB_LineBreak_ZW, HB_LineBreak_CM,
|
|
||||||
HB_LineBreak_WJ, HB_LineBreak_H2, HB_LineBreak_H3, HB_LineBreak_JL, HB_LineBreak_JV,
|
|
||||||
HB_LineBreak_JT, HB_LineBreak_SA, HB_LineBreak_SG,
|
|
||||||
HB_LineBreak_SP, HB_LineBreak_CR, HB_LineBreak_LF, HB_LineBreak_BK
|
|
||||||
} HB_LineBreakClass;
|
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
HB_Mark_NonSpacing, /* Mn */
|
HB_Mark_NonSpacing, /* Mn */
|
||||||
|
@ -90,62 +75,11 @@ typedef enum
|
||||||
HB_Symbol_Other /* So */
|
HB_Symbol_Other /* So */
|
||||||
} HB_CharCategory;
|
} HB_CharCategory;
|
||||||
|
|
||||||
typedef enum
|
|
||||||
{
|
|
||||||
HB_Grapheme_Other,
|
|
||||||
HB_Grapheme_CR,
|
|
||||||
HB_Grapheme_LF,
|
|
||||||
HB_Grapheme_Control,
|
|
||||||
HB_Grapheme_Extend,
|
|
||||||
HB_Grapheme_L,
|
|
||||||
HB_Grapheme_V,
|
|
||||||
HB_Grapheme_T,
|
|
||||||
HB_Grapheme_LV,
|
|
||||||
HB_Grapheme_LVT
|
|
||||||
} HB_GraphemeClass;
|
|
||||||
|
|
||||||
|
|
||||||
typedef enum
|
|
||||||
{
|
|
||||||
HB_Word_Other,
|
|
||||||
HB_Word_Format,
|
|
||||||
HB_Word_Katakana,
|
|
||||||
HB_Word_ALetter,
|
|
||||||
HB_Word_MidLetter,
|
|
||||||
HB_Word_MidNum,
|
|
||||||
HB_Word_Numeric,
|
|
||||||
HB_Word_ExtendNumLet
|
|
||||||
} HB_WordClass;
|
|
||||||
|
|
||||||
|
|
||||||
typedef enum
|
|
||||||
{
|
|
||||||
HB_Sentence_Other,
|
|
||||||
HB_Sentence_Sep,
|
|
||||||
HB_Sentence_Format,
|
|
||||||
HB_Sentence_Sp,
|
|
||||||
HB_Sentence_Lower,
|
|
||||||
HB_Sentence_Upper,
|
|
||||||
HB_Sentence_OLetter,
|
|
||||||
HB_Sentence_Numeric,
|
|
||||||
HB_Sentence_ATerm,
|
|
||||||
HB_Sentence_STerm,
|
|
||||||
HB_Sentence_Close
|
|
||||||
} HB_SentenceClass;
|
|
||||||
|
|
||||||
HB_GraphemeClass HB_GetGraphemeClass(HB_UChar32 ch);
|
|
||||||
HB_WordClass HB_GetWordClass(HB_UChar32 ch);
|
|
||||||
HB_SentenceClass HB_GetSentenceClass(HB_UChar32 ch);
|
|
||||||
HB_LineBreakClass HB_GetLineBreakClass(HB_UChar32 ch);
|
|
||||||
|
|
||||||
void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme, HB_LineBreakClass *lineBreak);
|
|
||||||
void HB_GetUnicodeCharProperties(HB_UChar32 ch, HB_CharCategory *category, int *combiningClass);
|
void HB_GetUnicodeCharProperties(HB_UChar32 ch, HB_CharCategory *category, int *combiningClass);
|
||||||
HB_CharCategory HB_GetUnicodeCharCategory(HB_UChar32 ch);
|
HB_CharCategory HB_GetUnicodeCharCategory(HB_UChar32 ch);
|
||||||
int HB_GetUnicodeCharCombiningClass(HB_UChar32 ch);
|
int HB_GetUnicodeCharCombiningClass(HB_UChar32 ch);
|
||||||
HB_UChar16 HB_GetMirroredChar(HB_UChar16 ch);
|
HB_UChar16 HB_GetMirroredChar(HB_UChar16 ch);
|
||||||
|
|
||||||
void *HB_Library_Resolve(const char *library, int version, const char *symbol);
|
|
||||||
|
|
||||||
HB_END_HEADER
|
HB_END_HEADER
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1866,29 +1866,3 @@ HB_Bool HB_IndicShape(HB_ShaperItem *item)
|
||||||
item->num_glyphs = first_glyph;
|
item->num_glyphs = first_glyph;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HB_IndicAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
|
|
||||||
{
|
|
||||||
int end = from + len;
|
|
||||||
const HB_UChar16 *uc = text + from;
|
|
||||||
attributes += from;
|
|
||||||
hb_uint32 i = 0;
|
|
||||||
while (i < len) {
|
|
||||||
bool invalid;
|
|
||||||
hb_uint32 boundary = indic_nextSyllableBoundary(script, text, from+i, end, &invalid) - from;
|
|
||||||
attributes[i].charStop = true;
|
|
||||||
|
|
||||||
if (boundary > len-1) boundary = len;
|
|
||||||
i++;
|
|
||||||
while (i < boundary) {
|
|
||||||
attributes[i].charStop = false;
|
|
||||||
++uc;
|
|
||||||
++i;
|
|
||||||
}
|
|
||||||
assert(i == boundary);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -640,28 +640,3 @@ HB_Bool HB_KhmerShape(HB_ShaperItem *item)
|
||||||
item->num_glyphs = first_glyph;
|
item->num_glyphs = first_glyph;
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HB_KhmerAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
|
|
||||||
{
|
|
||||||
int end = from + len;
|
|
||||||
const HB_UChar16 *uc = text + from;
|
|
||||||
hb_uint32 i = 0;
|
|
||||||
HB_UNUSED(script);
|
|
||||||
attributes += from;
|
|
||||||
while ( i < len ) {
|
|
||||||
HB_Bool invalid;
|
|
||||||
hb_uint32 boundary = khmer_nextSyllableBoundary( text, from+i, end, &invalid ) - from;
|
|
||||||
|
|
||||||
attributes[i].charStop = TRUE;
|
|
||||||
|
|
||||||
if ( boundary > len-1 ) boundary = len;
|
|
||||||
i++;
|
|
||||||
while ( i < boundary ) {
|
|
||||||
attributes[i].charStop = FALSE;
|
|
||||||
++uc;
|
|
||||||
++i;
|
|
||||||
}
|
|
||||||
assert( i == boundary );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
|
@ -509,31 +509,3 @@ HB_Bool HB_MyanmarShape(HB_ShaperItem *item)
|
||||||
item->num_glyphs = first_glyph;
|
item->num_glyphs = first_glyph;
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HB_MyanmarAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
|
|
||||||
{
|
|
||||||
int end = from + len;
|
|
||||||
const HB_UChar16 *uc = text + from;
|
|
||||||
hb_uint32 i = 0;
|
|
||||||
HB_UNUSED(script);
|
|
||||||
attributes += from;
|
|
||||||
while (i < len) {
|
|
||||||
HB_Bool invalid;
|
|
||||||
hb_uint32 boundary = myanmar_nextSyllableBoundary(text, from+i, end, &invalid) - from;
|
|
||||||
|
|
||||||
attributes[i].charStop = TRUE;
|
|
||||||
if (i)
|
|
||||||
attributes[i-1].lineBreakType = HB_Break;
|
|
||||||
|
|
||||||
if (boundary > len-1)
|
|
||||||
boundary = len;
|
|
||||||
i++;
|
|
||||||
while (i < boundary) {
|
|
||||||
attributes[i].charStop = FALSE;
|
|
||||||
++uc;
|
|
||||||
++i;
|
|
||||||
}
|
|
||||||
assert(i == boundary);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
|
@ -93,11 +93,9 @@ typedef enum {
|
||||||
|
|
||||||
/* return true if ok. */
|
/* return true if ok. */
|
||||||
typedef HB_Bool (*HB_ShapeFunction)(HB_ShaperItem *shaper_item);
|
typedef HB_Bool (*HB_ShapeFunction)(HB_ShaperItem *shaper_item);
|
||||||
typedef void (*HB_AttributeFunction)(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
HB_ShapeFunction shape;
|
HB_ShapeFunction shape;
|
||||||
HB_AttributeFunction charAttributes;
|
|
||||||
} HB_ScriptEngine;
|
} HB_ScriptEngine;
|
||||||
|
|
||||||
extern const HB_ScriptEngine hb_scriptEngines[];
|
extern const HB_ScriptEngine hb_scriptEngines[];
|
||||||
|
@ -112,16 +110,6 @@ extern HB_Bool HB_MyanmarShape(HB_ShaperItem *shaper_item);
|
||||||
extern HB_Bool HB_KhmerShape(HB_ShaperItem *shaper_item);
|
extern HB_Bool HB_KhmerShape(HB_ShaperItem *shaper_item);
|
||||||
extern HB_Bool HB_IndicShape(HB_ShaperItem *shaper_item);
|
extern HB_Bool HB_IndicShape(HB_ShaperItem *shaper_item);
|
||||||
|
|
||||||
extern void HB_TibetanAttributes(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);
|
|
||||||
|
|
||||||
extern void HB_MyanmarAttributes(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);
|
|
||||||
|
|
||||||
extern void HB_KhmerAttributes(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);
|
|
||||||
|
|
||||||
extern void HB_IndicAttributes(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);
|
|
||||||
|
|
||||||
extern void HB_ThaiAttributes(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
hb_uint32 tag;
|
hb_uint32 tag;
|
||||||
hb_uint32 property;
|
hb_uint32 property;
|
||||||
|
|
|
@ -32,205 +32,6 @@
|
||||||
#define HB_MIN(a, b) ((a) < (b) ? (a) : (b))
|
#define HB_MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||||
#define HB_MAX(a, b) ((a) > (b) ? (a) : (b))
|
#define HB_MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// The line break algorithm. See http://www.unicode.org/reports/tr14/tr14-13.html
|
|
||||||
//
|
|
||||||
// -----------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/* The Unicode algorithm does in our opinion allow line breaks at some
|
|
||||||
places they shouldn't be allowed. The following changes were thus
|
|
||||||
made in comparison to the Unicode reference:
|
|
||||||
|
|
||||||
EX->AL from DB to IB
|
|
||||||
SY->AL from DB to IB
|
|
||||||
SY->PO from DB to IB
|
|
||||||
SY->PR from DB to IB
|
|
||||||
SY->OP from DB to IB
|
|
||||||
AL->PR from DB to IB
|
|
||||||
AL->PO from DB to IB
|
|
||||||
PR->PR from DB to IB
|
|
||||||
PO->PO from DB to IB
|
|
||||||
PR->PO from DB to IB
|
|
||||||
PO->PR from DB to IB
|
|
||||||
HY->PO from DB to IB
|
|
||||||
HY->PR from DB to IB
|
|
||||||
HY->OP from DB to IB
|
|
||||||
NU->EX from PB to IB
|
|
||||||
EX->PO from DB to IB
|
|
||||||
*/
|
|
||||||
|
|
||||||
// The following line break classes are not treated by the table:
|
|
||||||
// AI, BK, CB, CR, LF, NL, SA, SG, SP, XX
|
|
||||||
|
|
||||||
enum break_class {
|
|
||||||
// the first 4 values have to agree with the enum in QCharAttributes
|
|
||||||
ProhibitedBreak, // PB in table
|
|
||||||
DirectBreak, // DB in table
|
|
||||||
IndirectBreak, // IB in table
|
|
||||||
CombiningIndirectBreak, // CI in table
|
|
||||||
CombiningProhibitedBreak // CP in table
|
|
||||||
};
|
|
||||||
#define DB DirectBreak
|
|
||||||
#define IB IndirectBreak
|
|
||||||
#define CI CombiningIndirectBreak
|
|
||||||
#define CP CombiningProhibitedBreak
|
|
||||||
#define PB ProhibitedBreak
|
|
||||||
|
|
||||||
static const hb_uint8 breakTable[HB_LineBreak_JT+1][HB_LineBreak_JT+1] =
|
|
||||||
{
|
|
||||||
/* OP CL QU GL NS EX SY IS PR PO NU AL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT */
|
|
||||||
/* OP */ { PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, CP, PB, PB, PB, PB, PB, PB },
|
|
||||||
/* CL */ { DB, PB, IB, IB, PB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
|
||||||
/* QU */ { PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
|
||||||
/* GL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
|
||||||
/* NS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
|
||||||
/* EX */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
|
||||||
/* SY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
|
||||||
/* IS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
|
||||||
/* PR */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, IB },
|
|
||||||
/* PO */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
|
||||||
/* NU */ { IB, PB, IB, IB, IB, IB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
|
||||||
/* AL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
|
||||||
/* ID */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
|
||||||
/* IN */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
|
||||||
/* HY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
|
||||||
/* BA */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
|
||||||
/* BB */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
|
||||||
/* B2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, PB, PB, CI, PB, DB, DB, DB, DB, DB },
|
|
||||||
/* ZW */ { DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, PB, DB, DB, DB, DB, DB, DB, DB },
|
|
||||||
/* CM */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
|
||||||
/* WJ */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
|
||||||
/* H2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
|
|
||||||
/* H3 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB },
|
|
||||||
/* JL */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, DB },
|
|
||||||
/* JV */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
|
|
||||||
/* JT */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB }
|
|
||||||
};
|
|
||||||
#undef DB
|
|
||||||
#undef IB
|
|
||||||
#undef CI
|
|
||||||
#undef CP
|
|
||||||
#undef PB
|
|
||||||
|
|
||||||
static const hb_uint8 graphemeTable[HB_Grapheme_LVT + 1][HB_Grapheme_LVT + 1] =
|
|
||||||
{
|
|
||||||
// Other, CR, LF, Control,Extend,L, V, T, LV, LVT
|
|
||||||
{ true , true , true , true , true , true , true , true , true , true }, // Other,
|
|
||||||
{ true , true , true , true , true , true , true , true , true , true }, // CR,
|
|
||||||
{ true , false, true , true , true , true , true , true , true , true }, // LF,
|
|
||||||
{ true , true , true , true , true , true , true , true , true , true }, // Control,
|
|
||||||
{ false, true , true , true , false, false, false, false, false, false }, // Extend,
|
|
||||||
{ true , true , true , true , true , false, true , true , true , true }, // L,
|
|
||||||
{ true , true , true , true , true , false, false, true , false, true }, // V,
|
|
||||||
{ true , true , true , true , true , true , false, false, false, false }, // T,
|
|
||||||
{ true , true , true , true , true , false, true , true , true , true }, // LV,
|
|
||||||
{ true , true , true , true , true , false, true , true , true , true }, // LVT
|
|
||||||
};
|
|
||||||
|
|
||||||
static void calcLineBreaks(const HB_UChar16 *uc, hb_uint32 len, HB_CharAttributes *charAttributes)
|
|
||||||
{
|
|
||||||
if (!len)
|
|
||||||
return;
|
|
||||||
|
|
||||||
// ##### can this fail if the first char is a surrogate?
|
|
||||||
HB_LineBreakClass cls;
|
|
||||||
HB_GraphemeClass grapheme;
|
|
||||||
HB_GetGraphemeAndLineBreakClass(*uc, &grapheme, &cls);
|
|
||||||
// handle case where input starts with an LF
|
|
||||||
if (cls == HB_LineBreak_LF)
|
|
||||||
cls = HB_LineBreak_BK;
|
|
||||||
|
|
||||||
charAttributes[0].whiteSpace = (cls == HB_LineBreak_SP || cls == HB_LineBreak_BK);
|
|
||||||
charAttributes[0].charStop = true;
|
|
||||||
|
|
||||||
int lcls = cls;
|
|
||||||
for (hb_uint32 i = 1; i < len; ++i) {
|
|
||||||
charAttributes[i].whiteSpace = false;
|
|
||||||
charAttributes[i].charStop = true;
|
|
||||||
|
|
||||||
HB_UChar32 code = uc[i];
|
|
||||||
HB_GraphemeClass ngrapheme;
|
|
||||||
HB_LineBreakClass ncls;
|
|
||||||
HB_GetGraphemeAndLineBreakClass(code, &ngrapheme, &ncls);
|
|
||||||
charAttributes[i].charStop = graphemeTable[ngrapheme][grapheme];
|
|
||||||
// handle surrogates
|
|
||||||
if (ncls == HB_LineBreak_SG) {
|
|
||||||
if (HB_IsHighSurrogate(uc[i]) && i < len - 1 && HB_IsLowSurrogate(uc[i+1])) {
|
|
||||||
continue;
|
|
||||||
} else if (HB_IsLowSurrogate(uc[i]) && HB_IsHighSurrogate(uc[i-1])) {
|
|
||||||
code = HB_SurrogateToUcs4(uc[i-1], uc[i]);
|
|
||||||
HB_GetGraphemeAndLineBreakClass(code, &ngrapheme, &ncls);
|
|
||||||
charAttributes[i].charStop = false;
|
|
||||||
} else {
|
|
||||||
ncls = HB_LineBreak_AL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// set white space and char stop flag
|
|
||||||
if (ncls >= HB_LineBreak_SP)
|
|
||||||
charAttributes[i].whiteSpace = true;
|
|
||||||
|
|
||||||
HB_LineBreakType lineBreakType = HB_NoBreak;
|
|
||||||
if (cls >= HB_LineBreak_LF) {
|
|
||||||
lineBreakType = HB_ForcedBreak;
|
|
||||||
} else if(cls == HB_LineBreak_CR) {
|
|
||||||
lineBreakType = (ncls == HB_LineBreak_LF) ? HB_NoBreak : HB_ForcedBreak;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ncls == HB_LineBreak_SP)
|
|
||||||
goto next_no_cls_update;
|
|
||||||
if (ncls >= HB_LineBreak_CR)
|
|
||||||
goto next;
|
|
||||||
|
|
||||||
{
|
|
||||||
int tcls = ncls;
|
|
||||||
// for south east asian chars that require a complex (dictionary analysis), the unicode
|
|
||||||
// standard recommends to treat them as AL. thai_attributes and other attribute methods that
|
|
||||||
// do dictionary analysis can override
|
|
||||||
if (tcls >= HB_LineBreak_SA)
|
|
||||||
tcls = HB_LineBreak_AL;
|
|
||||||
if (cls >= HB_LineBreak_SA)
|
|
||||||
cls = HB_LineBreak_AL;
|
|
||||||
|
|
||||||
int brk = breakTable[cls][tcls];
|
|
||||||
switch (brk) {
|
|
||||||
case DirectBreak:
|
|
||||||
lineBreakType = HB_Break;
|
|
||||||
if (uc[i-1] == 0xad) // soft hyphen
|
|
||||||
lineBreakType = HB_SoftHyphen;
|
|
||||||
break;
|
|
||||||
case IndirectBreak:
|
|
||||||
lineBreakType = (lcls == HB_LineBreak_SP) ? HB_Break : HB_NoBreak;
|
|
||||||
break;
|
|
||||||
case CombiningIndirectBreak:
|
|
||||||
lineBreakType = HB_NoBreak;
|
|
||||||
if (lcls == HB_LineBreak_SP){
|
|
||||||
if (i > 1)
|
|
||||||
charAttributes[i-2].lineBreakType = HB_Break;
|
|
||||||
} else {
|
|
||||||
goto next_no_cls_update;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case CombiningProhibitedBreak:
|
|
||||||
lineBreakType = HB_NoBreak;
|
|
||||||
if (lcls != HB_LineBreak_SP)
|
|
||||||
goto next_no_cls_update;
|
|
||||||
case ProhibitedBreak:
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
next:
|
|
||||||
cls = ncls;
|
|
||||||
next_no_cls_update:
|
|
||||||
lcls = ncls;
|
|
||||||
grapheme = ngrapheme;
|
|
||||||
charAttributes[i-1].lineBreakType = lineBreakType;
|
|
||||||
}
|
|
||||||
charAttributes[len-1].lineBreakType = HB_ForcedBreak;
|
|
||||||
}
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// Basic processing
|
// Basic processing
|
||||||
|
@ -582,210 +383,63 @@ HB_Bool HB_BasicShape(HB_ShaperItem *shaper_item)
|
||||||
|
|
||||||
const HB_ScriptEngine HB_ScriptEngines[] = {
|
const HB_ScriptEngine HB_ScriptEngines[] = {
|
||||||
// Common
|
// Common
|
||||||
{ HB_BasicShape, 0},
|
{ HB_BasicShape},
|
||||||
// Greek
|
// Greek
|
||||||
{ HB_GreekShape, 0},
|
{ HB_GreekShape},
|
||||||
// Cyrillic
|
// Cyrillic
|
||||||
{ HB_BasicShape, 0},
|
{ HB_BasicShape},
|
||||||
// Armenian
|
// Armenian
|
||||||
{ HB_BasicShape, 0},
|
{ HB_BasicShape},
|
||||||
// Hebrew
|
// Hebrew
|
||||||
{ HB_HebrewShape, 0 },
|
{ HB_HebrewShape},
|
||||||
// Arabic
|
// Arabic
|
||||||
{ HB_ArabicShape, 0},
|
{ HB_ArabicShape},
|
||||||
// Syriac
|
// Syriac
|
||||||
{ HB_ArabicShape, 0},
|
{ HB_ArabicShape},
|
||||||
// Thaana
|
// Thaana
|
||||||
{ HB_BasicShape, 0 },
|
{ HB_BasicShape},
|
||||||
// Devanagari
|
// Devanagari
|
||||||
{ HB_IndicShape, HB_IndicAttributes },
|
{ HB_IndicShape},
|
||||||
// Bengali
|
// Bengali
|
||||||
{ HB_IndicShape, HB_IndicAttributes },
|
{ HB_IndicShape},
|
||||||
// Gurmukhi
|
// Gurmukhi
|
||||||
{ HB_IndicShape, HB_IndicAttributes },
|
{ HB_IndicShape},
|
||||||
// Gujarati
|
// Gujarati
|
||||||
{ HB_IndicShape, HB_IndicAttributes },
|
{ HB_IndicShape},
|
||||||
// Oriya
|
// Oriya
|
||||||
{ HB_IndicShape, HB_IndicAttributes },
|
{ HB_IndicShape},
|
||||||
// Tamil
|
// Tamil
|
||||||
{ HB_IndicShape, HB_IndicAttributes },
|
{ HB_IndicShape},
|
||||||
// Telugu
|
// Telugu
|
||||||
{ HB_IndicShape, HB_IndicAttributes },
|
{ HB_IndicShape},
|
||||||
// Kannada
|
// Kannada
|
||||||
{ HB_IndicShape, HB_IndicAttributes },
|
{ HB_IndicShape},
|
||||||
// Malayalam
|
// Malayalam
|
||||||
{ HB_IndicShape, HB_IndicAttributes },
|
{ HB_IndicShape},
|
||||||
// Sinhala
|
// Sinhala
|
||||||
{ HB_IndicShape, HB_IndicAttributes },
|
{ HB_IndicShape},
|
||||||
// Thai
|
// Thai
|
||||||
{ HB_BasicShape, HB_ThaiAttributes },
|
{ HB_BasicShape},
|
||||||
// Lao
|
// Lao
|
||||||
{ HB_BasicShape, 0 },
|
{ HB_BasicShape},
|
||||||
// Tibetan
|
// Tibetan
|
||||||
{ HB_TibetanShape, HB_TibetanAttributes },
|
{ HB_TibetanShape},
|
||||||
// Myanmar
|
// Myanmar
|
||||||
{ HB_MyanmarShape, HB_MyanmarAttributes },
|
{ HB_MyanmarShape},
|
||||||
// Georgian
|
// Georgian
|
||||||
{ HB_BasicShape, 0 },
|
{ HB_BasicShape},
|
||||||
// Hangul
|
// Hangul
|
||||||
{ HB_HangulShape, 0 },
|
{ HB_HangulShape},
|
||||||
// Ogham
|
// Ogham
|
||||||
{ HB_BasicShape, 0 },
|
{ HB_BasicShape},
|
||||||
// Runic
|
// Runic
|
||||||
{ HB_BasicShape, 0 },
|
{ HB_BasicShape},
|
||||||
// Khmer
|
// Khmer
|
||||||
{ HB_KhmerShape, HB_KhmerAttributes },
|
{ HB_KhmerShape},
|
||||||
// N'Ko
|
// N'Ko
|
||||||
{ HB_ArabicShape, 0}
|
{ HB_ArabicShape}
|
||||||
};
|
};
|
||||||
|
|
||||||
void HB_GetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
|
|
||||||
const HB_ScriptItem *items, hb_uint32 numItems,
|
|
||||||
HB_CharAttributes *attributes)
|
|
||||||
{
|
|
||||||
calcLineBreaks(string, stringLength, attributes);
|
|
||||||
|
|
||||||
for (hb_uint32 i = 0; i < numItems; ++i) {
|
|
||||||
HB_Script script = items[i].script;
|
|
||||||
if (script == HB_Script_Inherited)
|
|
||||||
script = HB_Script_Common;
|
|
||||||
HB_AttributeFunction attributeFunction = HB_ScriptEngines[script].charAttributes;
|
|
||||||
if (!attributeFunction)
|
|
||||||
continue;
|
|
||||||
attributeFunction(script, string, items[i].pos, items[i].length, attributes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
enum BreakRule { NoBreak = 0, Break = 1, Middle = 2 };
|
|
||||||
|
|
||||||
static const hb_uint8 wordbreakTable[HB_Word_ExtendNumLet + 1][HB_Word_ExtendNumLet + 1] = {
|
|
||||||
// Other Format Katakana ALetter MidLetter MidNum Numeric ExtendNumLet
|
|
||||||
{ Break, Break, Break, Break, Break, Break, Break, Break }, // Other
|
|
||||||
{ Break, Break, Break, Break, Break, Break, Break, Break }, // Format
|
|
||||||
{ Break, Break, NoBreak, Break, Break, Break, Break, NoBreak }, // Katakana
|
|
||||||
{ Break, Break, Break, NoBreak, Middle, Break, NoBreak, NoBreak }, // ALetter
|
|
||||||
{ Break, Break, Break, Break, Break, Break, Break, Break }, // MidLetter
|
|
||||||
{ Break, Break, Break, Break, Break, Break, Break, Break }, // MidNum
|
|
||||||
{ Break, Break, Break, NoBreak, Break, Middle, NoBreak, NoBreak }, // Numeric
|
|
||||||
{ Break, Break, NoBreak, NoBreak, Break, Break, NoBreak, NoBreak }, // ExtendNumLet
|
|
||||||
};
|
|
||||||
|
|
||||||
void HB_GetWordBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
|
|
||||||
const HB_ScriptItem * /*items*/, hb_uint32 /*numItems*/,
|
|
||||||
HB_CharAttributes *attributes)
|
|
||||||
{
|
|
||||||
if (stringLength == 0)
|
|
||||||
return;
|
|
||||||
unsigned int brk = HB_GetWordClass(string[0]);
|
|
||||||
attributes[0].wordBoundary = true;
|
|
||||||
for (hb_uint32 i = 1; i < stringLength; ++i) {
|
|
||||||
if (!attributes[i].charStop) {
|
|
||||||
attributes[i].wordBoundary = false;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
hb_uint32 nbrk = HB_GetWordClass(string[i]);
|
|
||||||
if (nbrk == HB_Word_Format) {
|
|
||||||
attributes[i].wordBoundary = (HB_GetSentenceClass(string[i-1]) == HB_Sentence_Sep);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
BreakRule rule = (BreakRule)wordbreakTable[brk][nbrk];
|
|
||||||
if (rule == Middle) {
|
|
||||||
rule = Break;
|
|
||||||
hb_uint32 lookahead = i + 1;
|
|
||||||
while (lookahead < stringLength) {
|
|
||||||
hb_uint32 testbrk = HB_GetWordClass(string[lookahead]);
|
|
||||||
if (testbrk == HB_Word_Format && HB_GetSentenceClass(string[lookahead]) != HB_Sentence_Sep) {
|
|
||||||
++lookahead;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (testbrk == brk) {
|
|
||||||
rule = NoBreak;
|
|
||||||
while (i < lookahead)
|
|
||||||
attributes[i++].wordBoundary = false;
|
|
||||||
nbrk = testbrk;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
attributes[i].wordBoundary = (rule == Break);
|
|
||||||
brk = nbrk;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
enum SentenceBreakStates {
|
|
||||||
SB_Initial,
|
|
||||||
SB_Upper,
|
|
||||||
SB_UpATerm,
|
|
||||||
SB_ATerm,
|
|
||||||
SB_ATermC,
|
|
||||||
SB_ACS,
|
|
||||||
SB_STerm,
|
|
||||||
SB_STermC,
|
|
||||||
SB_SCS,
|
|
||||||
SB_BAfter,
|
|
||||||
SB_Break,
|
|
||||||
SB_Look
|
|
||||||
};
|
|
||||||
|
|
||||||
static const hb_uint8 sentenceBreakTable[HB_Sentence_Close + 1][HB_Sentence_Close + 1] = {
|
|
||||||
// Other Sep Format Sp Lower Upper OLetter Numeric ATerm STerm Close
|
|
||||||
{ SB_Initial, SB_BAfter , SB_Initial, SB_Initial, SB_Initial, SB_Upper , SB_Initial, SB_Initial, SB_ATerm , SB_STerm , SB_Initial }, // SB_Initial,
|
|
||||||
{ SB_Initial, SB_BAfter , SB_Upper , SB_Initial, SB_Initial, SB_Upper , SB_Initial, SB_Initial, SB_UpATerm, SB_STerm , SB_Initial }, // SB_Upper
|
|
||||||
|
|
||||||
{ SB_Look , SB_BAfter , SB_UpATerm, SB_ACS , SB_Initial, SB_Upper , SB_Break , SB_Initial, SB_ATerm , SB_STerm , SB_ATermC }, // SB_UpATerm
|
|
||||||
{ SB_Look , SB_BAfter , SB_ATerm , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Initial, SB_ATerm , SB_STerm , SB_ATermC }, // SB_ATerm
|
|
||||||
{ SB_Look , SB_BAfter , SB_ATermC , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Look , SB_ATerm , SB_STerm , SB_ATermC }, // SB_ATermC,
|
|
||||||
{ SB_Look , SB_BAfter , SB_ACS , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Look , SB_ATerm , SB_STerm , SB_Look }, // SB_ACS,
|
|
||||||
|
|
||||||
{ SB_Break , SB_BAfter , SB_STerm , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_STermC }, // SB_STerm,
|
|
||||||
{ SB_Break , SB_BAfter , SB_STermC , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_STermC }, // SB_STermC,
|
|
||||||
{ SB_Break , SB_BAfter , SB_SCS , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_Break }, // SB_SCS,
|
|
||||||
{ SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break }, // SB_BAfter,
|
|
||||||
};
|
|
||||||
|
|
||||||
void HB_GetSentenceBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
|
|
||||||
const HB_ScriptItem * /*items*/, hb_uint32 /*numItems*/,
|
|
||||||
HB_CharAttributes *attributes)
|
|
||||||
{
|
|
||||||
if (stringLength == 0)
|
|
||||||
return;
|
|
||||||
hb_uint32 brk = sentenceBreakTable[SB_Initial][HB_GetSentenceClass(string[0])];
|
|
||||||
attributes[0].sentenceBoundary = true;
|
|
||||||
for (hb_uint32 i = 1; i < stringLength; ++i) {
|
|
||||||
if (!attributes[i].charStop) {
|
|
||||||
attributes[i].sentenceBoundary = false;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
brk = sentenceBreakTable[brk][HB_GetSentenceClass(string[i])];
|
|
||||||
if (brk == SB_Look) {
|
|
||||||
brk = SB_Break;
|
|
||||||
hb_uint32 lookahead = i + 1;
|
|
||||||
while (lookahead < stringLength) {
|
|
||||||
hb_uint32 sbrk = HB_GetSentenceClass(string[lookahead]);
|
|
||||||
if (sbrk != HB_Sentence_Other && sbrk != HB_Sentence_Numeric && sbrk != HB_Sentence_Close) {
|
|
||||||
break;
|
|
||||||
} else if (sbrk == HB_Sentence_Lower) {
|
|
||||||
brk = SB_Initial;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
++lookahead;
|
|
||||||
}
|
|
||||||
if (brk == SB_Initial) {
|
|
||||||
while (i < lookahead)
|
|
||||||
attributes[i++].sentenceBoundary = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (brk == SB_Break) {
|
|
||||||
attributes[i].sentenceBoundary = true;
|
|
||||||
brk = sentenceBreakTable[SB_Initial][HB_GetSentenceClass(string[i])];
|
|
||||||
} else {
|
|
||||||
attributes[i].sentenceBoundary = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static inline char *tag_to_string(HB_UInt tag)
|
static inline char *tag_to_string(HB_UInt tag)
|
||||||
{
|
{
|
||||||
|
@ -1335,4 +989,3 @@ HB_Bool HB_ShapeItem(HB_ShaperItem *shaper_item)
|
||||||
shaper_item->glyphIndicesPresent = false;
|
shaper_item->glyphIndicesPresent = false;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -130,37 +130,6 @@ typedef struct
|
||||||
hb_uint8 bidiLevel;
|
hb_uint8 bidiLevel;
|
||||||
} HB_ScriptItem;
|
} HB_ScriptItem;
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
HB_NoBreak,
|
|
||||||
HB_SoftHyphen,
|
|
||||||
HB_Break,
|
|
||||||
HB_ForcedBreak
|
|
||||||
} HB_LineBreakType;
|
|
||||||
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
/*HB_LineBreakType*/ hb_bitfield lineBreakType :2;
|
|
||||||
/*HB_Bool*/ hb_bitfield whiteSpace :1; /* A unicode whitespace character, except NBSP, ZWNBSP */
|
|
||||||
/*HB_Bool*/ hb_bitfield charStop :1; /* Valid cursor position (for left/right arrow) */
|
|
||||||
/*HB_Bool*/ hb_bitfield wordBoundary :1;
|
|
||||||
/*HB_Bool*/ hb_bitfield sentenceBoundary :1;
|
|
||||||
hb_bitfield unused :2;
|
|
||||||
} HB_CharAttributes;
|
|
||||||
|
|
||||||
void HB_GetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
|
|
||||||
const HB_ScriptItem *items, hb_uint32 numItems,
|
|
||||||
HB_CharAttributes *attributes);
|
|
||||||
|
|
||||||
/* requires HB_GetCharAttributes to be called before */
|
|
||||||
void HB_GetWordBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
|
|
||||||
const HB_ScriptItem *items, hb_uint32 numItems,
|
|
||||||
HB_CharAttributes *attributes);
|
|
||||||
|
|
||||||
/* requires HB_GetCharAttributes to be called before */
|
|
||||||
void HB_GetSentenceBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
|
|
||||||
const HB_ScriptItem *items, hb_uint32 numItems,
|
|
||||||
HB_CharAttributes *attributes);
|
|
||||||
|
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
HB_LeftToRight = 0,
|
HB_LeftToRight = 0,
|
||||||
|
|
|
@ -1,111 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
|
|
||||||
*
|
|
||||||
* This is part of HarfBuzz, an OpenType Layout engine library.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, without written agreement and without
|
|
||||||
* license or royalty fees, to use, copy, modify, and distribute this
|
|
||||||
* software and its documentation for any purpose, provided that the
|
|
||||||
* above copyright notice and the following two paragraphs appear in
|
|
||||||
* all copies of this software.
|
|
||||||
*
|
|
||||||
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
|
|
||||||
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
|
||||||
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
|
|
||||||
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
|
||||||
* DAMAGE.
|
|
||||||
*
|
|
||||||
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
|
|
||||||
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
|
||||||
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
|
|
||||||
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "harfbuzz-shaper.h"
|
|
||||||
#include "harfbuzz-shaper-private.h"
|
|
||||||
#include "harfbuzz-external.h"
|
|
||||||
|
|
||||||
#include <assert.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
typedef int (*th_brk_def)(const char*, int[], int);
|
|
||||||
static th_brk_def th_brk = 0;
|
|
||||||
static int libthai_resolved = 0;
|
|
||||||
|
|
||||||
static void resolve_libthai()
|
|
||||||
{
|
|
||||||
if (!th_brk)
|
|
||||||
th_brk = (th_brk_def)HB_Library_Resolve("thai", 0, "th_brk");
|
|
||||||
libthai_resolved = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void to_tis620(const HB_UChar16 *string, hb_uint32 len, const char *cstr)
|
|
||||||
{
|
|
||||||
hb_uint32 i;
|
|
||||||
unsigned char *result = (unsigned char *)cstr;
|
|
||||||
|
|
||||||
for (i = 0; i < len; ++i) {
|
|
||||||
if (string[i] <= 0xa0)
|
|
||||||
result[i] = (unsigned char)string[i];
|
|
||||||
if (string[i] >= 0xe01 && string[i] <= 0xe5b)
|
|
||||||
result[i] = (unsigned char)(string[i] - 0xe00 + 0xa0);
|
|
||||||
else
|
|
||||||
result[i] = '?';
|
|
||||||
}
|
|
||||||
|
|
||||||
result[len] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void thaiWordBreaks(const HB_UChar16 *string, hb_uint32 len, HB_CharAttributes *attributes)
|
|
||||||
{
|
|
||||||
char s[128];
|
|
||||||
char *cstr = s;
|
|
||||||
int brp[128];
|
|
||||||
int *break_positions = brp;
|
|
||||||
hb_uint32 numbreaks;
|
|
||||||
hb_uint32 i;
|
|
||||||
|
|
||||||
if (!libthai_resolved)
|
|
||||||
resolve_libthai();
|
|
||||||
|
|
||||||
if (!th_brk)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (len >= 128)
|
|
||||||
cstr = (char *)malloc(len*sizeof(char) + 1);
|
|
||||||
|
|
||||||
to_tis620(string, len, cstr);
|
|
||||||
|
|
||||||
numbreaks = th_brk(cstr, break_positions, 128);
|
|
||||||
if (numbreaks > 128) {
|
|
||||||
break_positions = (int *)malloc(numbreaks * sizeof(int));
|
|
||||||
numbreaks = th_brk(cstr, break_positions, numbreaks);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < len; ++i) {
|
|
||||||
attributes[i].lineBreakType = HB_NoBreak;
|
|
||||||
attributes[i].wordBoundary = FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < numbreaks; ++i) {
|
|
||||||
if (break_positions[i] > 0) {
|
|
||||||
attributes[break_positions[i]-1].lineBreakType = HB_Break;
|
|
||||||
attributes[break_positions[i]-1].wordBoundary = TRUE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (break_positions != brp)
|
|
||||||
free(break_positions);
|
|
||||||
|
|
||||||
if (len >= 128)
|
|
||||||
free(cstr);
|
|
||||||
}
|
|
||||||
|
|
||||||
void HB_ThaiAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
|
|
||||||
{
|
|
||||||
assert(script == HB_Script_Thai);
|
|
||||||
attributes += from;
|
|
||||||
thaiWordBreaks(text + from, len, attributes);
|
|
||||||
}
|
|
||||||
|
|
|
@ -246,29 +246,3 @@ HB_Bool HB_TibetanShape(HB_ShaperItem *item)
|
||||||
item->num_glyphs = first_glyph;
|
item->num_glyphs = first_glyph;
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HB_TibetanAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
|
|
||||||
{
|
|
||||||
int end = from + len;
|
|
||||||
const HB_UChar16 *uc = text + from;
|
|
||||||
hb_uint32 i = 0;
|
|
||||||
HB_UNUSED(script);
|
|
||||||
attributes += from;
|
|
||||||
while (i < len) {
|
|
||||||
HB_Bool invalid;
|
|
||||||
hb_uint32 boundary = tibetan_nextSyllableBoundary(text, from+i, end, &invalid) - from;
|
|
||||||
|
|
||||||
attributes[i].charStop = TRUE;
|
|
||||||
|
|
||||||
if (boundary > len-1) boundary = len;
|
|
||||||
i++;
|
|
||||||
while (i < boundary) {
|
|
||||||
attributes[i].charStop = FALSE;
|
|
||||||
++uc;
|
|
||||||
++i;
|
|
||||||
}
|
|
||||||
assert(i == boundary);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,32 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2006 Behdad Esfahbod
|
|
||||||
*
|
|
||||||
* This is part of HarfBuzz, an OpenType Layout engine library.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, without written agreement and without
|
|
||||||
* license or royalty fees, to use, copy, modify, and distribute this
|
|
||||||
* software and its documentation for any purpose, provided that the
|
|
||||||
* above copyright notice and the following two paragraphs appear in
|
|
||||||
* all copies of this software.
|
|
||||||
*
|
|
||||||
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
|
|
||||||
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
|
||||||
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
|
|
||||||
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
|
||||||
* DAMAGE.
|
|
||||||
*
|
|
||||||
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
|
|
||||||
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
|
||||||
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
|
|
||||||
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define HB_INTERNAL static
|
|
||||||
#include "harfbuzz-buffer.c"
|
|
||||||
#include "harfbuzz-gdef.c"
|
|
||||||
#include "harfbuzz-gsub.c"
|
|
||||||
#include "harfbuzz-gpos.c"
|
|
||||||
#include "harfbuzz-impl.c"
|
|
||||||
#include "harfbuzz-open.c"
|
|
||||||
#include "harfbuzz-stream.c"
|
|
Loading…
Reference in New Issue