[hb-old] Shovel out the line-breaking / word-segmentation stuff

2012-07-24 19:49:48 -04:00 · 2012-07-24 19:49:48 -04:00 · 4a31166b28
parent 0bcbe88cf3
commit 4a31166b28
12 changed files with 33 additions and 750 deletions
--- a/src/hb-old/Makefile.am
+++ b/src/hb-old/Makefile.am
@ -18,10 +18,7 @@ MAINSOURCES =  \
 	harfbuzz-hebrew.c \
 	harfbuzz-arabic.c \
 	harfbuzz-hangul.c \
-	harfbuzz-myanmar.c \
-	harfbuzz-thai.c
-
-EXTRA_SOURCES = harfbuzz.c
+	harfbuzz-myanmar.c

 PUBLICHEADERS = \
 	harfbuzz.h \
@ -50,7 +47,4 @@ libhb_old_la_SOURCES = \
 	$(PUBLICHEADERS) \
 	$(PRIVATEHEADERS)

-EXTRA_DIST = 		\
-	README		\
-	COPYING		\
-	$(EXTRA_SOURCES)
+EXTRA_DIST = README COPYING
--- a/src/hb-old/Makefile.in
+++ b/src/hb-old/Makefile.in
@ -52,7 +52,7 @@ am__objects_1 = harfbuzz-buffer.lo harfbuzz-stream.lo harfbuzz-gdef.lo \
 	harfbuzz-open.lo harfbuzz-shaper.lo harfbuzz-greek.lo \
 	harfbuzz-tibetan.lo harfbuzz-khmer.lo harfbuzz-indic.lo \
 	harfbuzz-hebrew.lo harfbuzz-arabic.lo harfbuzz-hangul.lo \
-	harfbuzz-myanmar.lo harfbuzz-thai.lo
+	harfbuzz-myanmar.lo
 am__objects_2 =
 am_libhb_old_la_OBJECTS = $(am__objects_1) $(am__objects_2) \
 	$(am__objects_2)
@ -268,10 +268,8 @@ MAINSOURCES = \
 	harfbuzz-hebrew.c \
 	harfbuzz-arabic.c \
 	harfbuzz-hangul.c \
-	harfbuzz-myanmar.c \
-	harfbuzz-thai.c
+	harfbuzz-myanmar.c

-EXTRA_SOURCES = harfbuzz.c
 PUBLICHEADERS = \
 	harfbuzz.h \
 	harfbuzz-buffer.h \
@ -299,11 +297,7 @@ libhb_old_la_SOURCES = \
 	$(PUBLICHEADERS) \
 	$(PRIVATEHEADERS)

-EXTRA_DIST = \
-	README		\
-	COPYING		\
-	$(EXTRA_SOURCES)
-
+EXTRA_DIST = README COPYING
 all: all-am

 .SUFFIXES:
@ -371,7 +365,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/harfbuzz-open.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/harfbuzz-shaper.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/harfbuzz-stream.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/harfbuzz-thai.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/harfbuzz-tibetan.Plo@am__quote@

 .c.o:
--- a/src/hb-old/harfbuzz-external.h
+++ b/src/hb-old/harfbuzz-external.h
@ -35,21 +35,6 @@ HB_BEGIN_HEADER
 */


-/*
- see http://www.unicode.org/reports/tr14/tr14-19.html
- we don't use the XX, AI and CB properties and map them to AL instead.
- as we don't support any EBDIC based OS'es, NL is ignored and mapped to AL as well.
-*/
-typedef enum {
-    HB_LineBreak_OP, HB_LineBreak_CL, HB_LineBreak_QU, HB_LineBreak_GL, HB_LineBreak_NS,
-    HB_LineBreak_EX, HB_LineBreak_SY, HB_LineBreak_IS, HB_LineBreak_PR, HB_LineBreak_PO,
-    HB_LineBreak_NU, HB_LineBreak_AL, HB_LineBreak_ID, HB_LineBreak_IN, HB_LineBreak_HY,
-    HB_LineBreak_BA, HB_LineBreak_BB, HB_LineBreak_B2, HB_LineBreak_ZW, HB_LineBreak_CM,
-    HB_LineBreak_WJ, HB_LineBreak_H2, HB_LineBreak_H3, HB_LineBreak_JL, HB_LineBreak_JV,
-    HB_LineBreak_JT, HB_LineBreak_SA, HB_LineBreak_SG,
-    HB_LineBreak_SP, HB_LineBreak_CR, HB_LineBreak_LF, HB_LineBreak_BK
-} HB_LineBreakClass;
-
 typedef enum 
 {
    HB_Mark_NonSpacing,          /*   Mn */
@ -90,62 +75,11 @@ typedef enum
    HB_Symbol_Other              /*   So */
 } HB_CharCategory;

-typedef enum
-{
-    HB_Grapheme_Other, 
-    HB_Grapheme_CR,
-    HB_Grapheme_LF,
-    HB_Grapheme_Control,
-    HB_Grapheme_Extend,
-    HB_Grapheme_L, 
-    HB_Grapheme_V, 
-    HB_Grapheme_T, 
-    HB_Grapheme_LV, 
-    HB_Grapheme_LVT
-} HB_GraphemeClass;
-
-
-typedef enum
-{
-    HB_Word_Other,
-    HB_Word_Format,
-    HB_Word_Katakana,
-    HB_Word_ALetter,
-    HB_Word_MidLetter,
-    HB_Word_MidNum,
-    HB_Word_Numeric,
-    HB_Word_ExtendNumLet
-} HB_WordClass;
-
-
-typedef enum
-{
-    HB_Sentence_Other,
-    HB_Sentence_Sep,
-    HB_Sentence_Format,
-    HB_Sentence_Sp,
-    HB_Sentence_Lower,
-    HB_Sentence_Upper,
-    HB_Sentence_OLetter,
-    HB_Sentence_Numeric,
-    HB_Sentence_ATerm,
-    HB_Sentence_STerm,
-    HB_Sentence_Close
-} HB_SentenceClass;
-
-HB_GraphemeClass HB_GetGraphemeClass(HB_UChar32 ch);
-HB_WordClass HB_GetWordClass(HB_UChar32 ch);
-HB_SentenceClass HB_GetSentenceClass(HB_UChar32 ch);
-HB_LineBreakClass HB_GetLineBreakClass(HB_UChar32 ch);
-
-void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme, HB_LineBreakClass *lineBreak);
 void HB_GetUnicodeCharProperties(HB_UChar32 ch, HB_CharCategory *category, int *combiningClass);
 HB_CharCategory HB_GetUnicodeCharCategory(HB_UChar32 ch);
 int HB_GetUnicodeCharCombiningClass(HB_UChar32 ch);
 HB_UChar16 HB_GetMirroredChar(HB_UChar16 ch);

-void *HB_Library_Resolve(const char *library, int version, const char *symbol);
-
 HB_END_HEADER

 #endif
--- a/src/hb-old/harfbuzz-indic.cpp
+++ b/src/hb-old/harfbuzz-indic.cpp
@ -1866,29 +1866,3 @@ HB_Bool HB_IndicShape(HB_ShaperItem *item)
    item->num_glyphs = first_glyph;
    return true;
 }
-
-void HB_IndicAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
-{
-    int end = from + len;
-    const HB_UChar16 *uc = text + from;
-    attributes += from;
-    hb_uint32 i = 0;
-    while (i < len) {
-        bool invalid;
-        hb_uint32 boundary = indic_nextSyllableBoundary(script, text, from+i, end, &invalid) - from;
-         attributes[i].charStop = true;
-
-        if (boundary > len-1) boundary = len;
-        i++;
-        while (i < boundary) {
-            attributes[i].charStop = false;
-            ++uc;
-            ++i;
-        }
-        assert(i == boundary);
-    }
-
-
-}
-
-
--- a/src/hb-old/harfbuzz-khmer.c
+++ b/src/hb-old/harfbuzz-khmer.c
@ -640,28 +640,3 @@ HB_Bool HB_KhmerShape(HB_ShaperItem *item)
    item->num_glyphs = first_glyph;
    return TRUE;
 }
-
-void HB_KhmerAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
-{
-    int end = from + len;
-    const HB_UChar16 *uc = text + from;
-    hb_uint32 i = 0;
-    HB_UNUSED(script);
-    attributes += from;
-    while ( i < len ) {
-	HB_Bool invalid;
-	hb_uint32 boundary = khmer_nextSyllableBoundary( text, from+i, end, &invalid ) - from;
-
-	attributes[i].charStop = TRUE;
-
-	if ( boundary > len-1 ) boundary = len;
-	i++;
-	while ( i < boundary ) {
-	    attributes[i].charStop = FALSE;
-	    ++uc;
-	    ++i;
-	}
-	assert( i == boundary );
-    }
-}
-
--- a/src/hb-old/harfbuzz-myanmar.c
+++ b/src/hb-old/harfbuzz-myanmar.c
@ -509,31 +509,3 @@ HB_Bool HB_MyanmarShape(HB_ShaperItem *item)
    item->num_glyphs = first_glyph;
    return TRUE;
 }
-
-void HB_MyanmarAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
-{
-    int end = from + len;
-    const HB_UChar16 *uc = text + from;
-    hb_uint32 i = 0;
-    HB_UNUSED(script);
-    attributes += from;
-    while (i < len) {
-	HB_Bool invalid;
-	hb_uint32 boundary = myanmar_nextSyllableBoundary(text, from+i, end, &invalid) - from;
-
-	attributes[i].charStop = TRUE;
-        if (i)
-            attributes[i-1].lineBreakType = HB_Break;
-
-	if (boundary > len-1)
-            boundary = len;
-	i++;
-	while (i < boundary) {
-	    attributes[i].charStop = FALSE;
-	    ++uc;
-	    ++i;
-	}
-	assert(i == boundary);
-    }
-}
-
--- a/src/hb-old/harfbuzz-shaper-private.h
+++ b/src/hb-old/harfbuzz-shaper-private.h
@ -93,11 +93,9 @@ typedef enum {

 /* return true if ok. */
 typedef HB_Bool (*HB_ShapeFunction)(HB_ShaperItem *shaper_item);
-typedef void (*HB_AttributeFunction)(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);

 typedef struct {
    HB_ShapeFunction shape;
-    HB_AttributeFunction charAttributes;
 } HB_ScriptEngine;

 extern const HB_ScriptEngine hb_scriptEngines[];
@ -112,16 +110,6 @@ extern HB_Bool HB_MyanmarShape(HB_ShaperItem *shaper_item);
 extern HB_Bool HB_KhmerShape(HB_ShaperItem *shaper_item);
 extern HB_Bool HB_IndicShape(HB_ShaperItem *shaper_item);

-extern void HB_TibetanAttributes(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);
-
-extern void HB_MyanmarAttributes(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);
-
-extern void HB_KhmerAttributes(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);
-
-extern void HB_IndicAttributes(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);
-
-extern void HB_ThaiAttributes(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);
-
 typedef struct {
    hb_uint32 tag;
    hb_uint32 property;
--- a/src/hb-old/harfbuzz-shaper.cpp
+++ b/src/hb-old/harfbuzz-shaper.cpp
@ -32,205 +32,6 @@
 #define HB_MIN(a, b) ((a) < (b) ? (a) : (b))
 #define HB_MAX(a, b) ((a) > (b) ? (a) : (b))

-// -----------------------------------------------------------------------------------------------------
-//
-// The line break algorithm. See http://www.unicode.org/reports/tr14/tr14-13.html
-//
-// -----------------------------------------------------------------------------------------------------
-
-/* The Unicode algorithm does in our opinion allow line breaks at some
-   places they shouldn't be allowed. The following changes were thus
-   made in comparison to the Unicode reference:
-
-   EX->AL from DB to IB
-   SY->AL from DB to IB
-   SY->PO from DB to IB
-   SY->PR from DB to IB
-   SY->OP from DB to IB
-   AL->PR from DB to IB
-   AL->PO from DB to IB
-   PR->PR from DB to IB
-   PO->PO from DB to IB
-   PR->PO from DB to IB
-   PO->PR from DB to IB
-   HY->PO from DB to IB
-   HY->PR from DB to IB
-   HY->OP from DB to IB
-   NU->EX from PB to IB
-   EX->PO from DB to IB
-*/
-
-// The following line break classes are not treated by the table:
-//  AI, BK, CB, CR, LF, NL, SA, SG, SP, XX
-
-enum break_class {
-    // the first 4 values have to agree with the enum in QCharAttributes
-    ProhibitedBreak,            // PB in table
-    DirectBreak,                // DB in table
-    IndirectBreak,              // IB in table
-    CombiningIndirectBreak,     // CI in table
-    CombiningProhibitedBreak    // CP in table
-};
-#define DB DirectBreak
-#define IB IndirectBreak
-#define CI CombiningIndirectBreak
-#define CP CombiningProhibitedBreak
-#define PB ProhibitedBreak
-
-static const hb_uint8 breakTable[HB_LineBreak_JT+1][HB_LineBreak_JT+1] =
-{
-/*          OP  CL  QU  GL  NS  EX  SY  IS  PR  PO  NU  AL  ID  IN  HY  BA  BB  B2  ZW  CM  WJ  H2  H3  JL  JV  JT */
-/* OP */ { PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, CP, PB, PB, PB, PB, PB, PB },
-/* CL */ { DB, PB, IB, IB, PB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* QU */ { PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
-/* GL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
-/* NS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* EX */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* SY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* IS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* PR */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, IB },
-/* PO */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* NU */ { IB, PB, IB, IB, IB, IB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* AL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* ID */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* IN */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* HY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* BA */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* BB */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
-/* B2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, PB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* ZW */ { DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, PB, DB, DB, DB, DB, DB, DB, DB },
-/* CM */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* WJ */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
-/* H2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
-/* H3 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB },
-/* JL */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, DB },
-/* JV */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
-/* JT */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB }
-};
-#undef DB
-#undef IB
-#undef CI
-#undef CP
-#undef PB
-
-static const hb_uint8 graphemeTable[HB_Grapheme_LVT + 1][HB_Grapheme_LVT + 1] =
-{
-//      Other, CR,    LF,    Control,Extend,L,    V,     T,     LV,    LVT
-    { true , true , true , true , true , true , true , true , true , true  }, // Other, 
-    { true , true , true , true , true , true , true , true , true , true  }, // CR,
-    { true , false, true , true , true , true , true , true , true , true  }, // LF,
-    { true , true , true , true , true , true , true , true , true , true  }, // Control,
-    { false, true , true , true , false, false, false, false, false, false }, // Extend,
-    { true , true , true , true , true , false, true , true , true , true  }, // L, 
-    { true , true , true , true , true , false, false, true , false, true  }, // V, 
-    { true , true , true , true , true , true , false, false, false, false }, // T, 
-    { true , true , true , true , true , false, true , true , true , true  }, // LV, 
-    { true , true , true , true , true , false, true , true , true , true  }, // LVT
-};
-    
-static void calcLineBreaks(const HB_UChar16 *uc, hb_uint32 len, HB_CharAttributes *charAttributes)
-{
-    if (!len)
-        return;
-
-    // ##### can this fail if the first char is a surrogate?
-    HB_LineBreakClass cls;
-    HB_GraphemeClass grapheme;
-    HB_GetGraphemeAndLineBreakClass(*uc, &grapheme, &cls);
-    // handle case where input starts with an LF
-    if (cls == HB_LineBreak_LF)
-        cls = HB_LineBreak_BK;
-
-    charAttributes[0].whiteSpace = (cls == HB_LineBreak_SP || cls == HB_LineBreak_BK);
-    charAttributes[0].charStop = true;
-
-    int lcls = cls;
-    for (hb_uint32 i = 1; i < len; ++i) {
-        charAttributes[i].whiteSpace = false;
-        charAttributes[i].charStop = true;
-
-        HB_UChar32 code = uc[i];
-        HB_GraphemeClass ngrapheme;
-        HB_LineBreakClass ncls;
-        HB_GetGraphemeAndLineBreakClass(code, &ngrapheme, &ncls);
-        charAttributes[i].charStop = graphemeTable[ngrapheme][grapheme];
-        // handle surrogates
-        if (ncls == HB_LineBreak_SG) {
-            if (HB_IsHighSurrogate(uc[i]) && i < len - 1 && HB_IsLowSurrogate(uc[i+1])) {
-                continue;
-            } else if (HB_IsLowSurrogate(uc[i]) && HB_IsHighSurrogate(uc[i-1])) {
-                code = HB_SurrogateToUcs4(uc[i-1], uc[i]);
-                HB_GetGraphemeAndLineBreakClass(code, &ngrapheme, &ncls);
-                charAttributes[i].charStop = false;
-            } else {
-                ncls = HB_LineBreak_AL;
-            }
-        }
-
-        // set white space and char stop flag
-        if (ncls >= HB_LineBreak_SP)
-            charAttributes[i].whiteSpace = true;
-
-        HB_LineBreakType lineBreakType = HB_NoBreak;
-        if (cls >= HB_LineBreak_LF) {
-            lineBreakType = HB_ForcedBreak;
-        } else if(cls == HB_LineBreak_CR) {
-            lineBreakType = (ncls == HB_LineBreak_LF) ? HB_NoBreak : HB_ForcedBreak;
-        }
-
-        if (ncls == HB_LineBreak_SP)
-            goto next_no_cls_update;
-        if (ncls >= HB_LineBreak_CR)
-            goto next;
-
-        {
-            int tcls = ncls;
-            // for south east asian chars that require a complex (dictionary analysis), the unicode
-            // standard recommends to treat them as AL. thai_attributes and other attribute methods that
-            // do dictionary analysis can override
-            if (tcls >= HB_LineBreak_SA)
-                tcls = HB_LineBreak_AL;
-            if (cls >= HB_LineBreak_SA)
-                cls = HB_LineBreak_AL;
-
-            int brk = breakTable[cls][tcls];
-            switch (brk) {
-            case DirectBreak:
-                lineBreakType = HB_Break;
-                if (uc[i-1] == 0xad) // soft hyphen
-                    lineBreakType = HB_SoftHyphen;
-                break;
-            case IndirectBreak:
-                lineBreakType = (lcls == HB_LineBreak_SP) ? HB_Break : HB_NoBreak;
-                break;
-            case CombiningIndirectBreak:
-                lineBreakType = HB_NoBreak;
-                if (lcls == HB_LineBreak_SP){
-                    if (i > 1)
-                        charAttributes[i-2].lineBreakType = HB_Break;
-                } else {
-                    goto next_no_cls_update;
-                }
-                break;
-            case CombiningProhibitedBreak:
-                lineBreakType = HB_NoBreak;
-                if (lcls != HB_LineBreak_SP)
-                    goto next_no_cls_update;
-            case ProhibitedBreak:
-            default:
-                break;
-            }
-        }
-    next:
-        cls = ncls;
-    next_no_cls_update:
-        lcls = ncls;
-        grapheme = ngrapheme;
-        charAttributes[i-1].lineBreakType = lineBreakType;
-    }
-    charAttributes[len-1].lineBreakType = HB_ForcedBreak;
-}
-
 // --------------------------------------------------------------------------------------------------------------------------------------------
 //
 // Basic processing
@ -582,210 +383,63 @@ HB_Bool HB_BasicShape(HB_ShaperItem *shaper_item)

 const HB_ScriptEngine HB_ScriptEngines[] = {
    // Common
-    { HB_BasicShape, 0},
+    { HB_BasicShape},
    // Greek
-    { HB_GreekShape, 0},
+    { HB_GreekShape},
    // Cyrillic
-    { HB_BasicShape, 0},
+    { HB_BasicShape},
    // Armenian
-    { HB_BasicShape, 0},
+    { HB_BasicShape},
    // Hebrew
-    { HB_HebrewShape, 0 },
+    { HB_HebrewShape},
    // Arabic
-    { HB_ArabicShape, 0},
+    { HB_ArabicShape},
    // Syriac
-    { HB_ArabicShape, 0},
+    { HB_ArabicShape},
    // Thaana
-    { HB_BasicShape, 0 },
+    { HB_BasicShape},
    // Devanagari
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Bengali
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Gurmukhi
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Gujarati
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Oriya
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Tamil
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Telugu
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Kannada
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Malayalam
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Sinhala
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Thai
-    { HB_BasicShape, HB_ThaiAttributes },
+    { HB_BasicShape},
    // Lao
-    { HB_BasicShape, 0 },
+    { HB_BasicShape},
    // Tibetan
-    { HB_TibetanShape, HB_TibetanAttributes },
+    { HB_TibetanShape},
    // Myanmar
-    { HB_MyanmarShape, HB_MyanmarAttributes },
+    { HB_MyanmarShape},
    // Georgian
-    { HB_BasicShape, 0 },
+    { HB_BasicShape},
    // Hangul
-    { HB_HangulShape, 0 },
+    { HB_HangulShape},
    // Ogham
-    { HB_BasicShape, 0 },
+    { HB_BasicShape},
    // Runic
-    { HB_BasicShape, 0 },
+    { HB_BasicShape},
    // Khmer
-    { HB_KhmerShape, HB_KhmerAttributes },
+    { HB_KhmerShape},
    // N'Ko
-    { HB_ArabicShape, 0}
+    { HB_ArabicShape}
 };

-void HB_GetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
-                          const HB_ScriptItem *items, hb_uint32 numItems,
-                          HB_CharAttributes *attributes)
-{
-    calcLineBreaks(string, stringLength, attributes);
-
-    for (hb_uint32 i = 0; i < numItems; ++i) {
-        HB_Script script = items[i].script;
-        if (script == HB_Script_Inherited)
-            script = HB_Script_Common;
-        HB_AttributeFunction attributeFunction = HB_ScriptEngines[script].charAttributes;
-        if (!attributeFunction)
-            continue;
-        attributeFunction(script, string, items[i].pos, items[i].length, attributes);
-    }
-}
-
-
-enum BreakRule { NoBreak = 0, Break = 1, Middle = 2 };
-
-static const hb_uint8 wordbreakTable[HB_Word_ExtendNumLet + 1][HB_Word_ExtendNumLet + 1] = {
-//        Other    Format   Katakana ALetter  MidLetter MidNum  Numeric  ExtendNumLet
-    {   Break,   Break,   Break,   Break,   Break,   Break,   Break,   Break }, // Other
-    {   Break,   Break,   Break,   Break,   Break,   Break,   Break,   Break }, // Format 
-    {   Break,   Break, NoBreak,   Break,   Break,   Break,   Break, NoBreak }, // Katakana
-    {   Break,   Break,   Break, NoBreak,  Middle,   Break, NoBreak, NoBreak }, // ALetter
-    {   Break,   Break,   Break,   Break,   Break,   Break,   Break,   Break }, // MidLetter
-    {   Break,   Break,   Break,   Break,   Break,   Break,   Break,   Break }, // MidNum
-    {   Break,   Break,   Break, NoBreak,   Break,  Middle, NoBreak, NoBreak }, // Numeric
-    {   Break,   Break, NoBreak, NoBreak,   Break,   Break, NoBreak, NoBreak }, // ExtendNumLet
-};
-
-void HB_GetWordBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
-                          const HB_ScriptItem * /*items*/, hb_uint32 /*numItems*/,
-                          HB_CharAttributes *attributes)
-{
-    if (stringLength == 0)
-        return;
-    unsigned int brk = HB_GetWordClass(string[0]);
-    attributes[0].wordBoundary = true;
-    for (hb_uint32 i = 1; i < stringLength; ++i) {
-        if (!attributes[i].charStop) {
-            attributes[i].wordBoundary = false;
-            continue;
-        }
-        hb_uint32 nbrk = HB_GetWordClass(string[i]);
-        if (nbrk == HB_Word_Format) {
-            attributes[i].wordBoundary = (HB_GetSentenceClass(string[i-1]) == HB_Sentence_Sep);
-            continue;
-        }
-        BreakRule rule = (BreakRule)wordbreakTable[brk][nbrk];
-        if (rule == Middle) {
-            rule = Break;
-            hb_uint32 lookahead = i + 1;
-            while (lookahead < stringLength) {
-                hb_uint32 testbrk = HB_GetWordClass(string[lookahead]);
-                if (testbrk == HB_Word_Format && HB_GetSentenceClass(string[lookahead]) != HB_Sentence_Sep) {
-                    ++lookahead;
-                    continue;
-                }
-                if (testbrk == brk) {
-                    rule = NoBreak;
-                    while (i < lookahead)
-                        attributes[i++].wordBoundary = false;
-                    nbrk = testbrk;
-                }
-                break;
-            }
-        }
-        attributes[i].wordBoundary = (rule == Break);
-        brk = nbrk;
-    }
-}
-
-
-enum SentenceBreakStates {
-    SB_Initial,
-    SB_Upper,
-    SB_UpATerm, 
-    SB_ATerm,
-    SB_ATermC, 
-    SB_ACS, 
-    SB_STerm, 
-    SB_STermC, 
-    SB_SCS,
-    SB_BAfter, 
-    SB_Break,
-    SB_Look
-};
-
-static const hb_uint8 sentenceBreakTable[HB_Sentence_Close + 1][HB_Sentence_Close + 1] = {
-//        Other       Sep         Format      Sp          Lower       Upper       OLetter     Numeric     ATerm       STerm       Close
-      { SB_Initial, SB_BAfter , SB_Initial, SB_Initial, SB_Initial, SB_Upper  , SB_Initial, SB_Initial, SB_ATerm  , SB_STerm  , SB_Initial }, // SB_Initial,
-      { SB_Initial, SB_BAfter , SB_Upper  , SB_Initial, SB_Initial, SB_Upper  , SB_Initial, SB_Initial, SB_UpATerm, SB_STerm  , SB_Initial }, // SB_Upper
-      
-      { SB_Look   , SB_BAfter , SB_UpATerm, SB_ACS    , SB_Initial, SB_Upper  , SB_Break  , SB_Initial, SB_ATerm  , SB_STerm  , SB_ATermC  }, // SB_UpATerm
-      { SB_Look   , SB_BAfter , SB_ATerm  , SB_ACS    , SB_Initial, SB_Break  , SB_Break  , SB_Initial, SB_ATerm  , SB_STerm  , SB_ATermC  }, // SB_ATerm
-      { SB_Look   , SB_BAfter , SB_ATermC , SB_ACS    , SB_Initial, SB_Break  , SB_Break  , SB_Look   , SB_ATerm  , SB_STerm  , SB_ATermC  }, // SB_ATermC,
-      { SB_Look   , SB_BAfter , SB_ACS    , SB_ACS    , SB_Initial, SB_Break  , SB_Break  , SB_Look   , SB_ATerm  , SB_STerm  , SB_Look    }, // SB_ACS,
-      
-      { SB_Break  , SB_BAfter , SB_STerm  , SB_SCS    , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_ATerm  , SB_STerm  , SB_STermC  }, // SB_STerm,
-      { SB_Break  , SB_BAfter , SB_STermC , SB_SCS    , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_ATerm  , SB_STerm  , SB_STermC  }, // SB_STermC,
-      { SB_Break  , SB_BAfter , SB_SCS    , SB_SCS    , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_ATerm  , SB_STerm  , SB_Break   }, // SB_SCS,
-      { SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break   }, // SB_BAfter,
-};
-
-void HB_GetSentenceBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
-                              const HB_ScriptItem * /*items*/, hb_uint32 /*numItems*/,
-                              HB_CharAttributes *attributes)
-{
-    if (stringLength == 0)
-        return;
-    hb_uint32 brk = sentenceBreakTable[SB_Initial][HB_GetSentenceClass(string[0])];
-    attributes[0].sentenceBoundary = true;
-    for (hb_uint32 i = 1; i < stringLength; ++i) {
-        if (!attributes[i].charStop) {
-            attributes[i].sentenceBoundary = false;
-            continue;
-        }
-        brk = sentenceBreakTable[brk][HB_GetSentenceClass(string[i])];
-        if (brk == SB_Look) {
-            brk = SB_Break;
-            hb_uint32 lookahead = i + 1;
-            while (lookahead < stringLength) {
-                hb_uint32 sbrk = HB_GetSentenceClass(string[lookahead]);
-                if (sbrk != HB_Sentence_Other && sbrk != HB_Sentence_Numeric && sbrk != HB_Sentence_Close) {
-                    break;
-                } else if (sbrk == HB_Sentence_Lower) {
-                    brk = SB_Initial;
-                    break;
-                }
-                ++lookahead;
-            }
-            if (brk == SB_Initial) {
-                while (i < lookahead)
-                    attributes[i++].sentenceBoundary = false;
-            }
-        }
-        if (brk == SB_Break) {
-            attributes[i].sentenceBoundary = true;
-            brk = sentenceBreakTable[SB_Initial][HB_GetSentenceClass(string[i])];
-        } else {
-            attributes[i].sentenceBoundary = false;
-        }
-    }
-}
-

 static inline char *tag_to_string(HB_UInt tag)
 {
@ -1335,4 +989,3 @@ HB_Bool HB_ShapeItem(HB_ShaperItem *shaper_item)
    shaper_item->glyphIndicesPresent = false;
    return result;
 }
-
--- a/src/hb-old/harfbuzz-shaper.h
+++ b/src/hb-old/harfbuzz-shaper.h
@ -130,37 +130,6 @@ typedef struct
    hb_uint8 bidiLevel;
 } HB_ScriptItem;

-typedef enum {
-    HB_NoBreak,
-    HB_SoftHyphen,
-    HB_Break,
-    HB_ForcedBreak
-} HB_LineBreakType;
-
-
-typedef struct {
-    /*HB_LineBreakType*/ hb_bitfield lineBreakType  :2;
-    /*HB_Bool*/ hb_bitfield whiteSpace              :1;     /* A unicode whitespace character, except NBSP, ZWNBSP */
-    /*HB_Bool*/ hb_bitfield charStop                :1;     /* Valid cursor position (for left/right arrow) */
-    /*HB_Bool*/ hb_bitfield wordBoundary            :1;
-    /*HB_Bool*/ hb_bitfield sentenceBoundary        :1;
-    hb_bitfield unused                  :2;
-} HB_CharAttributes;
-
-void HB_GetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
-                          const HB_ScriptItem *items, hb_uint32 numItems,
-                          HB_CharAttributes *attributes);
-
-/* requires HB_GetCharAttributes to be called before */
-void HB_GetWordBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
-                          const HB_ScriptItem *items, hb_uint32 numItems,
-                          HB_CharAttributes *attributes);
-
-/* requires HB_GetCharAttributes to be called before */
-void HB_GetSentenceBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
-                              const HB_ScriptItem *items, hb_uint32 numItems,
-                              HB_CharAttributes *attributes);
-

 typedef enum {
    HB_LeftToRight = 0,
--- a/src/hb-old/harfbuzz-thai.c
+++ b/src/hb-old/harfbuzz-thai.c
@ -1,111 +0,0 @@
-/*
- * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
- *
- * This is part of HarfBuzz, an OpenType Layout engine library.
- *
- * Permission is hereby granted, without written agreement and without
- * license or royalty fees, to use, copy, modify, and distribute this
- * software and its documentation for any purpose, provided that the
- * above copyright notice and the following two paragraphs appear in
- * all copies of this software.
- *
- * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
- * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
- * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
- * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- *
- * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
- * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
- * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
- * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
- */
-
-#include "harfbuzz-shaper.h"
-#include "harfbuzz-shaper-private.h"
-#include "harfbuzz-external.h"
-
-#include <assert.h>
-#include <stdio.h>
-
-typedef int (*th_brk_def)(const char*, int[], int);
-static th_brk_def th_brk = 0;
-static int libthai_resolved = 0;
-
-static void resolve_libthai()
-{
-    if (!th_brk)
-        th_brk = (th_brk_def)HB_Library_Resolve("thai", 0, "th_brk");
-    libthai_resolved = 1;
-}
-
-static void to_tis620(const HB_UChar16 *string, hb_uint32 len, const char *cstr)
-{
-    hb_uint32 i;
-    unsigned char *result = (unsigned char *)cstr;
-
-    for (i = 0; i < len; ++i) {
-        if (string[i] <= 0xa0)
-            result[i] = (unsigned char)string[i];
-        if (string[i] >= 0xe01 && string[i] <= 0xe5b)
-            result[i] = (unsigned char)(string[i] - 0xe00 + 0xa0);
-        else
-            result[i] = '?';
-    }
-
-    result[len] = 0;
-}
-
-static void thaiWordBreaks(const HB_UChar16 *string, hb_uint32 len, HB_CharAttributes *attributes)
-{
-    char s[128];
-    char *cstr = s;
-    int brp[128];
-    int *break_positions = brp;
-    hb_uint32 numbreaks;
-    hb_uint32 i;
-
-    if (!libthai_resolved)
-        resolve_libthai();
-
-    if (!th_brk)
-        return;
-
-    if (len >= 128)
-        cstr = (char *)malloc(len*sizeof(char) + 1);
-
-    to_tis620(string, len, cstr);
-
-    numbreaks = th_brk(cstr, break_positions, 128);
-    if (numbreaks > 128) {
-        break_positions = (int *)malloc(numbreaks * sizeof(int));
-        numbreaks = th_brk(cstr, break_positions, numbreaks);
-    }
-
-    for (i = 0; i < len; ++i) {
-        attributes[i].lineBreakType = HB_NoBreak;
-        attributes[i].wordBoundary = FALSE;
-    }
-
-    for (i = 0; i < numbreaks; ++i) {
-        if (break_positions[i] > 0) {
-            attributes[break_positions[i]-1].lineBreakType = HB_Break;
-            attributes[break_positions[i]-1].wordBoundary = TRUE;
-        }
-    }
-
-    if (break_positions != brp)
-        free(break_positions);
-
-    if (len >= 128)
-        free(cstr);
-}
-
-void HB_ThaiAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
-{
-    assert(script == HB_Script_Thai);
-    attributes += from;
-    thaiWordBreaks(text + from, len, attributes);
-}
-
--- a/src/hb-old/harfbuzz-tibetan.c
+++ b/src/hb-old/harfbuzz-tibetan.c
@ -246,29 +246,3 @@ HB_Bool HB_TibetanShape(HB_ShaperItem *item)
    item->num_glyphs = first_glyph;
    return TRUE;
 }
-
-void HB_TibetanAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
-{
-    int end = from + len;
-    const HB_UChar16 *uc = text + from;
-    hb_uint32 i = 0;
-    HB_UNUSED(script);
-    attributes += from;
-    while (i < len) {
-        HB_Bool invalid;
-        hb_uint32 boundary = tibetan_nextSyllableBoundary(text, from+i, end, &invalid) - from;
-
-        attributes[i].charStop = TRUE;
-
-        if (boundary > len-1) boundary = len;
-        i++;
-        while (i < boundary) {
-            attributes[i].charStop = FALSE;
-            ++uc;
-            ++i;
-        }
-        assert(i == boundary);
-    }
-}
-
-
--- a/src/hb-old/harfbuzz.c
+++ b/src/hb-old/harfbuzz.c
@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2006  Behdad Esfahbod
- *
- * This is part of HarfBuzz, an OpenType Layout engine library.
- *
- * Permission is hereby granted, without written agreement and without
- * license or royalty fees, to use, copy, modify, and distribute this
- * software and its documentation for any purpose, provided that the
- * above copyright notice and the following two paragraphs appear in
- * all copies of this software.
- *
- * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
- * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
- * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
- * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- *
- * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
- * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
- * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
- * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
- */
-
-#define HB_INTERNAL static
-#include "harfbuzz-buffer.c"
-#include "harfbuzz-gdef.c"
-#include "harfbuzz-gsub.c"
-#include "harfbuzz-gpos.c"
-#include "harfbuzz-impl.c"
-#include "harfbuzz-open.c"
-#include "harfbuzz-stream.c"