[hb-old] Shovel out the line-breaking / word-segmentation stuff

2012-07-24 19:49:48 -04:00 · 2012-07-24 19:49:48 -04:00 · 4a31166b28
parent 0bcbe88cf3
commit 4a31166b28
12 changed files with 33 additions and 750 deletions
--- a/src/hb-old/Makefile.am
+++ b/src/hb-old/Makefile.am
@ -18,10 +18,7 @@ MAINSOURCES =  \
 	harfbuzz-hebrew.c \
 	harfbuzz-arabic.c \
 	harfbuzz-hangul.c \
-	harfbuzz-myanmar.c \
+	harfbuzz-myanmar.c
 	harfbuzz-thai.c
 EXTRA_SOURCES = harfbuzz.c
 PUBLICHEADERS = \
 	harfbuzz.h \
@ -50,7 +47,4 @@ libhb_old_la_SOURCES = \
 	$(PUBLICHEADERS) \
 	$(PRIVATEHEADERS)
-EXTRA_DIST = 		\
+EXTRA_DIST = README COPYING
 	README		\
 	COPYING		\
 	$(EXTRA_SOURCES)
--- a/src/hb-old/Makefile.in
+++ b/src/hb-old/Makefile.in
@ -52,7 +52,7 @@ am__objects_1 = harfbuzz-buffer.lo harfbuzz-stream.lo harfbuzz-gdef.lo \
 	harfbuzz-open.lo harfbuzz-shaper.lo harfbuzz-greek.lo \
 	harfbuzz-tibetan.lo harfbuzz-khmer.lo harfbuzz-indic.lo \
 	harfbuzz-hebrew.lo harfbuzz-arabic.lo harfbuzz-hangul.lo \
-	harfbuzz-myanmar.lo harfbuzz-thai.lo
+	harfbuzz-myanmar.lo
 am__objects_2 =
 am_libhb_old_la_OBJECTS = $(am__objects_1) $(am__objects_2) \
 	$(am__objects_2)
@ -268,10 +268,8 @@ MAINSOURCES = \
 	harfbuzz-hebrew.c \
 	harfbuzz-arabic.c \
 	harfbuzz-hangul.c \
-	harfbuzz-myanmar.c \
+	harfbuzz-myanmar.c
 	harfbuzz-thai.c
 EXTRA_SOURCES = harfbuzz.c
 PUBLICHEADERS = \
 	harfbuzz.h \
 	harfbuzz-buffer.h \
@ -299,11 +297,7 @@ libhb_old_la_SOURCES = \
 	$(PUBLICHEADERS) \
 	$(PRIVATEHEADERS)
-EXTRA_DIST = \
+EXTRA_DIST = README COPYING
 	README		\
 	COPYING		\
 	$(EXTRA_SOURCES)
 all: all-am
 .SUFFIXES:
@ -371,7 +365,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/harfbuzz-open.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/harfbuzz-shaper.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/harfbuzz-stream.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/harfbuzz-thai.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/harfbuzz-tibetan.Plo@am__quote@
 .c.o:
--- a/src/hb-old/harfbuzz-external.h
+++ b/src/hb-old/harfbuzz-external.h
@ -35,21 +35,6 @@ HB_BEGIN_HEADER
 */
 /*
 see http://www.unicode.org/reports/tr14/tr14-19.html
 we don't use the XX, AI and CB properties and map them to AL instead.
 as we don't support any EBDIC based OS'es, NL is ignored and mapped to AL as well.
 */
 typedef enum {
    HB_LineBreak_OP, HB_LineBreak_CL, HB_LineBreak_QU, HB_LineBreak_GL, HB_LineBreak_NS,
    HB_LineBreak_EX, HB_LineBreak_SY, HB_LineBreak_IS, HB_LineBreak_PR, HB_LineBreak_PO,
    HB_LineBreak_NU, HB_LineBreak_AL, HB_LineBreak_ID, HB_LineBreak_IN, HB_LineBreak_HY,
    HB_LineBreak_BA, HB_LineBreak_BB, HB_LineBreak_B2, HB_LineBreak_ZW, HB_LineBreak_CM,
    HB_LineBreak_WJ, HB_LineBreak_H2, HB_LineBreak_H3, HB_LineBreak_JL, HB_LineBreak_JV,
    HB_LineBreak_JT, HB_LineBreak_SA, HB_LineBreak_SG,
    HB_LineBreak_SP, HB_LineBreak_CR, HB_LineBreak_LF, HB_LineBreak_BK
 } HB_LineBreakClass;
 typedef enum 
 {
    HB_Mark_NonSpacing,          /*   Mn */
@ -90,62 +75,11 @@ typedef enum
    HB_Symbol_Other              /*   So */
 } HB_CharCategory;
 typedef enum
 {
    HB_Grapheme_Other, 
    HB_Grapheme_CR,
    HB_Grapheme_LF,
    HB_Grapheme_Control,
    HB_Grapheme_Extend,
    HB_Grapheme_L, 
    HB_Grapheme_V, 
    HB_Grapheme_T, 
    HB_Grapheme_LV, 
    HB_Grapheme_LVT
 } HB_GraphemeClass;
 typedef enum
 {
    HB_Word_Other,
    HB_Word_Format,
    HB_Word_Katakana,
    HB_Word_ALetter,
    HB_Word_MidLetter,
    HB_Word_MidNum,
    HB_Word_Numeric,
    HB_Word_ExtendNumLet
 } HB_WordClass;
 typedef enum
 {
    HB_Sentence_Other,
    HB_Sentence_Sep,
    HB_Sentence_Format,
    HB_Sentence_Sp,
    HB_Sentence_Lower,
    HB_Sentence_Upper,
    HB_Sentence_OLetter,
    HB_Sentence_Numeric,
    HB_Sentence_ATerm,
    HB_Sentence_STerm,
    HB_Sentence_Close
 } HB_SentenceClass;
 HB_GraphemeClass HB_GetGraphemeClass(HB_UChar32 ch);
 HB_WordClass HB_GetWordClass(HB_UChar32 ch);
 HB_SentenceClass HB_GetSentenceClass(HB_UChar32 ch);
 HB_LineBreakClass HB_GetLineBreakClass(HB_UChar32 ch);
 void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme, HB_LineBreakClass *lineBreak);
 void HB_GetUnicodeCharProperties(HB_UChar32 ch, HB_CharCategory *category, int *combiningClass);
 HB_CharCategory HB_GetUnicodeCharCategory(HB_UChar32 ch);
 int HB_GetUnicodeCharCombiningClass(HB_UChar32 ch);
 HB_UChar16 HB_GetMirroredChar(HB_UChar16 ch);
 void *HB_Library_Resolve(const char *library, int version, const char *symbol);
 HB_END_HEADER
 #endif
--- a/src/hb-old/harfbuzz-indic.cpp
+++ b/src/hb-old/harfbuzz-indic.cpp
@ -1866,29 +1866,3 @@ HB_Bool HB_IndicShape(HB_ShaperItem *item)
    item->num_glyphs = first_glyph;
    return true;
 }
 void HB_IndicAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
 {
    int end = from + len;
    const HB_UChar16 *uc = text + from;
    attributes += from;
    hb_uint32 i = 0;
    while (i < len) {
        bool invalid;
        hb_uint32 boundary = indic_nextSyllableBoundary(script, text, from+i, end, &invalid) - from;
         attributes[i].charStop = true;
        if (boundary > len-1) boundary = len;
        i++;
        while (i < boundary) {
            attributes[i].charStop = false;
            ++uc;
            ++i;
        }
        assert(i == boundary);
    }
 }
--- a/src/hb-old/harfbuzz-khmer.c
+++ b/src/hb-old/harfbuzz-khmer.c
@ -640,28 +640,3 @@ HB_Bool HB_KhmerShape(HB_ShaperItem *item)
    item->num_glyphs = first_glyph;
    return TRUE;
 }
 void HB_KhmerAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
 {
    int end = from + len;
    const HB_UChar16 *uc = text + from;
    hb_uint32 i = 0;
    HB_UNUSED(script);
    attributes += from;
    while ( i < len ) {
 	HB_Bool invalid;
 	hb_uint32 boundary = khmer_nextSyllableBoundary( text, from+i, end, &invalid ) - from;
 	attributes[i].charStop = TRUE;
 	if ( boundary > len-1 ) boundary = len;
 	i++;
 	while ( i < boundary ) {
 	    attributes[i].charStop = FALSE;
 	    ++uc;
 	    ++i;
 	}
 	assert( i == boundary );
    }
 }
--- a/src/hb-old/harfbuzz-myanmar.c
+++ b/src/hb-old/harfbuzz-myanmar.c
@ -509,31 +509,3 @@ HB_Bool HB_MyanmarShape(HB_ShaperItem *item)
    item->num_glyphs = first_glyph;
    return TRUE;
 }
 void HB_MyanmarAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
 {
    int end = from + len;
    const HB_UChar16 *uc = text + from;
    hb_uint32 i = 0;
    HB_UNUSED(script);
    attributes += from;
    while (i < len) {
 	HB_Bool invalid;
 	hb_uint32 boundary = myanmar_nextSyllableBoundary(text, from+i, end, &invalid) - from;
 	attributes[i].charStop = TRUE;
        if (i)
            attributes[i-1].lineBreakType = HB_Break;
 	if (boundary > len-1)
            boundary = len;
 	i++;
 	while (i < boundary) {
 	    attributes[i].charStop = FALSE;
 	    ++uc;
 	    ++i;
 	}
 	assert(i == boundary);
    }
 }
--- a/src/hb-old/harfbuzz-shaper-private.h
+++ b/src/hb-old/harfbuzz-shaper-private.h
@ -93,11 +93,9 @@ typedef enum {
 /* return true if ok. */
 typedef HB_Bool (*HB_ShapeFunction)(HB_ShaperItem *shaper_item);
 typedef void (*HB_AttributeFunction)(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);
 typedef struct {
    HB_ShapeFunction shape;
    HB_AttributeFunction charAttributes;
 } HB_ScriptEngine;
 extern const HB_ScriptEngine hb_scriptEngines[];
@ -112,16 +110,6 @@ extern HB_Bool HB_MyanmarShape(HB_ShaperItem *shaper_item);
 extern HB_Bool HB_KhmerShape(HB_ShaperItem *shaper_item);
 extern HB_Bool HB_IndicShape(HB_ShaperItem *shaper_item);
 extern void HB_TibetanAttributes(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);
 extern void HB_MyanmarAttributes(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);
 extern void HB_KhmerAttributes(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);
 extern void HB_IndicAttributes(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);
 extern void HB_ThaiAttributes(HB_Script script, const HB_UChar16 *string, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes);
 typedef struct {
    hb_uint32 tag;
    hb_uint32 property;
--- a/src/hb-old/harfbuzz-shaper.cpp
+++ b/src/hb-old/harfbuzz-shaper.cpp
@ -32,205 +32,6 @@
 #define HB_MIN(a, b) ((a) < (b) ? (a) : (b))
 #define HB_MAX(a, b) ((a) > (b) ? (a) : (b))
 // -----------------------------------------------------------------------------------------------------
 //
 // The line break algorithm. See http://www.unicode.org/reports/tr14/tr14-13.html
 //
 // -----------------------------------------------------------------------------------------------------
 /* The Unicode algorithm does in our opinion allow line breaks at some
   places they shouldn't be allowed. The following changes were thus
   made in comparison to the Unicode reference:
   EX->AL from DB to IB
   SY->AL from DB to IB
   SY->PO from DB to IB
   SY->PR from DB to IB
   SY->OP from DB to IB
   AL->PR from DB to IB
   AL->PO from DB to IB
   PR->PR from DB to IB
   PO->PO from DB to IB
   PR->PO from DB to IB
   PO->PR from DB to IB
   HY->PO from DB to IB
   HY->PR from DB to IB
   HY->OP from DB to IB
   NU->EX from PB to IB
   EX->PO from DB to IB
 */
 // The following line break classes are not treated by the table:
 //  AI, BK, CB, CR, LF, NL, SA, SG, SP, XX
 enum break_class {
    // the first 4 values have to agree with the enum in QCharAttributes
    ProhibitedBreak,            // PB in table
    DirectBreak,                // DB in table
    IndirectBreak,              // IB in table
    CombiningIndirectBreak,     // CI in table
    CombiningProhibitedBreak    // CP in table
 };
 #define DB DirectBreak
 #define IB IndirectBreak
 #define CI CombiningIndirectBreak
 #define CP CombiningProhibitedBreak
 #define PB ProhibitedBreak
 static const hb_uint8 breakTable[HB_LineBreak_JT+1][HB_LineBreak_JT+1] =
 {
 /*          OP  CL  QU  GL  NS  EX  SY  IS  PR  PO  NU  AL  ID  IN  HY  BA  BB  B2  ZW  CM  WJ  H2  H3  JL  JV  JT */
 /* OP */ { PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, CP, PB, PB, PB, PB, PB, PB },
 /* CL */ { DB, PB, IB, IB, PB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
 /* QU */ { PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
 /* GL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
 /* NS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
 /* EX */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
 /* SY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
 /* IS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
 /* PR */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, IB },
 /* PO */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
 /* NU */ { IB, PB, IB, IB, IB, IB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
 /* AL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
 /* ID */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
 /* IN */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
 /* HY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
 /* BA */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
 /* BB */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
 /* B2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, PB, PB, CI, PB, DB, DB, DB, DB, DB },
 /* ZW */ { DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, PB, DB, DB, DB, DB, DB, DB, DB },
 /* CM */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
 /* WJ */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
 /* H2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
 /* H3 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB },
 /* JL */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, DB },
 /* JV */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
 /* JT */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB }
 };
 #undef DB
 #undef IB
 #undef CI
 #undef CP
 #undef PB
 static const hb_uint8 graphemeTable[HB_Grapheme_LVT + 1][HB_Grapheme_LVT + 1] =
 {
 //      Other, CR,    LF,    Control,Extend,L,    V,     T,     LV,    LVT
    { true , true , true , true , true , true , true , true , true , true  }, // Other, 
    { true , true , true , true , true , true , true , true , true , true  }, // CR,
    { true , false, true , true , true , true , true , true , true , true  }, // LF,
    { true , true , true , true , true , true , true , true , true , true  }, // Control,
    { false, true , true , true , false, false, false, false, false, false }, // Extend,
    { true , true , true , true , true , false, true , true , true , true  }, // L, 
    { true , true , true , true , true , false, false, true , false, true  }, // V, 
    { true , true , true , true , true , true , false, false, false, false }, // T, 
    { true , true , true , true , true , false, true , true , true , true  }, // LV, 
    { true , true , true , true , true , false, true , true , true , true  }, // LVT
 };
 static void calcLineBreaks(const HB_UChar16 *uc, hb_uint32 len, HB_CharAttributes *charAttributes)
 {
    if (!len)
        return;
    // ##### can this fail if the first char is a surrogate?
    HB_LineBreakClass cls;
    HB_GraphemeClass grapheme;
    HB_GetGraphemeAndLineBreakClass(*uc, &grapheme, &cls);
    // handle case where input starts with an LF
    if (cls == HB_LineBreak_LF)
        cls = HB_LineBreak_BK;
    charAttributes[0].whiteSpace = (cls == HB_LineBreak_SP || cls == HB_LineBreak_BK);
    charAttributes[0].charStop = true;
    int lcls = cls;
    for (hb_uint32 i = 1; i < len; ++i) {
        charAttributes[i].whiteSpace = false;
        charAttributes[i].charStop = true;
        HB_UChar32 code = uc[i];
        HB_GraphemeClass ngrapheme;
        HB_LineBreakClass ncls;
        HB_GetGraphemeAndLineBreakClass(code, &ngrapheme, &ncls);
        charAttributes[i].charStop = graphemeTable[ngrapheme][grapheme];
        // handle surrogates
        if (ncls == HB_LineBreak_SG) {
            if (HB_IsHighSurrogate(uc[i]) && i < len - 1 && HB_IsLowSurrogate(uc[i+1])) {
                continue;
            } else if (HB_IsLowSurrogate(uc[i]) && HB_IsHighSurrogate(uc[i-1])) {
                code = HB_SurrogateToUcs4(uc[i-1], uc[i]);
                HB_GetGraphemeAndLineBreakClass(code, &ngrapheme, &ncls);
                charAttributes[i].charStop = false;
            } else {
                ncls = HB_LineBreak_AL;
            }
        }
        // set white space and char stop flag
        if (ncls >= HB_LineBreak_SP)
            charAttributes[i].whiteSpace = true;
        HB_LineBreakType lineBreakType = HB_NoBreak;
        if (cls >= HB_LineBreak_LF) {
            lineBreakType = HB_ForcedBreak;
        } else if(cls == HB_LineBreak_CR) {
            lineBreakType = (ncls == HB_LineBreak_LF) ? HB_NoBreak : HB_ForcedBreak;
        }
        if (ncls == HB_LineBreak_SP)
            goto next_no_cls_update;
        if (ncls >= HB_LineBreak_CR)
            goto next;
        {
            int tcls = ncls;
            // for south east asian chars that require a complex (dictionary analysis), the unicode
            // standard recommends to treat them as AL. thai_attributes and other attribute methods that
            // do dictionary analysis can override
            if (tcls >= HB_LineBreak_SA)
                tcls = HB_LineBreak_AL;
            if (cls >= HB_LineBreak_SA)
                cls = HB_LineBreak_AL;
            int brk = breakTable[cls][tcls];
            switch (brk) {
            case DirectBreak:
                lineBreakType = HB_Break;
                if (uc[i-1] == 0xad) // soft hyphen
                    lineBreakType = HB_SoftHyphen;
                break;
            case IndirectBreak:
                lineBreakType = (lcls == HB_LineBreak_SP) ? HB_Break : HB_NoBreak;
                break;
            case CombiningIndirectBreak:
                lineBreakType = HB_NoBreak;
                if (lcls == HB_LineBreak_SP){
                    if (i > 1)
                        charAttributes[i-2].lineBreakType = HB_Break;
                } else {
                    goto next_no_cls_update;
                }
                break;
            case CombiningProhibitedBreak:
                lineBreakType = HB_NoBreak;
                if (lcls != HB_LineBreak_SP)
                    goto next_no_cls_update;
            case ProhibitedBreak:
            default:
                break;
            }
        }
    next:
        cls = ncls;
    next_no_cls_update:
        lcls = ncls;
        grapheme = ngrapheme;
        charAttributes[i-1].lineBreakType = lineBreakType;
    }
    charAttributes[len-1].lineBreakType = HB_ForcedBreak;
 }
 // --------------------------------------------------------------------------------------------------------------------------------------------
 //
 // Basic processing
@ -582,210 +383,63 @@ HB_Bool HB_BasicShape(HB_ShaperItem *shaper_item)
 const HB_ScriptEngine HB_ScriptEngines[] = {
    // Common
-    { HB_BasicShape, 0},
+    { HB_BasicShape},
    // Greek
-    { HB_GreekShape, 0},
+    { HB_GreekShape},
    // Cyrillic
-    { HB_BasicShape, 0},
+    { HB_BasicShape},
    // Armenian
-    { HB_BasicShape, 0},
+    { HB_BasicShape},
    // Hebrew
-    { HB_HebrewShape, 0 },
+    { HB_HebrewShape},
    // Arabic
-    { HB_ArabicShape, 0},
+    { HB_ArabicShape},
    // Syriac
-    { HB_ArabicShape, 0},
+    { HB_ArabicShape},
    // Thaana
-    { HB_BasicShape, 0 },
+    { HB_BasicShape},
    // Devanagari
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Bengali
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Gurmukhi
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Gujarati
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Oriya
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Tamil
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Telugu
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Kannada
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Malayalam
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Sinhala
-    { HB_IndicShape, HB_IndicAttributes },
+    { HB_IndicShape},
    // Thai
-    { HB_BasicShape, HB_ThaiAttributes },
+    { HB_BasicShape},
    // Lao
-    { HB_BasicShape, 0 },
+    { HB_BasicShape},
    // Tibetan
-    { HB_TibetanShape, HB_TibetanAttributes },
+    { HB_TibetanShape},
    // Myanmar
-    { HB_MyanmarShape, HB_MyanmarAttributes },
+    { HB_MyanmarShape},
    // Georgian
-    { HB_BasicShape, 0 },
+    { HB_BasicShape},
    // Hangul
-    { HB_HangulShape, 0 },
+    { HB_HangulShape},
    // Ogham
-    { HB_BasicShape, 0 },
+    { HB_BasicShape},
    // Runic
-    { HB_BasicShape, 0 },
+    { HB_BasicShape},
    // Khmer
-    { HB_KhmerShape, HB_KhmerAttributes },
+    { HB_KhmerShape},
    // N'Ko
-    { HB_ArabicShape, 0}
+    { HB_ArabicShape}
 };
 void HB_GetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
                          const HB_ScriptItem *items, hb_uint32 numItems,
                          HB_CharAttributes *attributes)
 {
    calcLineBreaks(string, stringLength, attributes);
    for (hb_uint32 i = 0; i < numItems; ++i) {
        HB_Script script = items[i].script;
        if (script == HB_Script_Inherited)
            script = HB_Script_Common;
        HB_AttributeFunction attributeFunction = HB_ScriptEngines[script].charAttributes;
        if (!attributeFunction)
            continue;
        attributeFunction(script, string, items[i].pos, items[i].length, attributes);
    }
 }
 enum BreakRule { NoBreak = 0, Break = 1, Middle = 2 };
 static const hb_uint8 wordbreakTable[HB_Word_ExtendNumLet + 1][HB_Word_ExtendNumLet + 1] = {
 //        Other    Format   Katakana ALetter  MidLetter MidNum  Numeric  ExtendNumLet
    {   Break,   Break,   Break,   Break,   Break,   Break,   Break,   Break }, // Other
    {   Break,   Break,   Break,   Break,   Break,   Break,   Break,   Break }, // Format 
    {   Break,   Break, NoBreak,   Break,   Break,   Break,   Break, NoBreak }, // Katakana
    {   Break,   Break,   Break, NoBreak,  Middle,   Break, NoBreak, NoBreak }, // ALetter
    {   Break,   Break,   Break,   Break,   Break,   Break,   Break,   Break }, // MidLetter
    {   Break,   Break,   Break,   Break,   Break,   Break,   Break,   Break }, // MidNum
    {   Break,   Break,   Break, NoBreak,   Break,  Middle, NoBreak, NoBreak }, // Numeric
    {   Break,   Break, NoBreak, NoBreak,   Break,   Break, NoBreak, NoBreak }, // ExtendNumLet
 };
 void HB_GetWordBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
                          const HB_ScriptItem * /*items*/, hb_uint32 /*numItems*/,
                          HB_CharAttributes *attributes)
 {
    if (stringLength == 0)
        return;
    unsigned int brk = HB_GetWordClass(string[0]);
    attributes[0].wordBoundary = true;
    for (hb_uint32 i = 1; i < stringLength; ++i) {
        if (!attributes[i].charStop) {
            attributes[i].wordBoundary = false;
            continue;
        }
        hb_uint32 nbrk = HB_GetWordClass(string[i]);
        if (nbrk == HB_Word_Format) {
            attributes[i].wordBoundary = (HB_GetSentenceClass(string[i-1]) == HB_Sentence_Sep);
            continue;
        }
        BreakRule rule = (BreakRule)wordbreakTable[brk][nbrk];
        if (rule == Middle) {
            rule = Break;
            hb_uint32 lookahead = i + 1;
            while (lookahead < stringLength) {
                hb_uint32 testbrk = HB_GetWordClass(string[lookahead]);
                if (testbrk == HB_Word_Format && HB_GetSentenceClass(string[lookahead]) != HB_Sentence_Sep) {
                    ++lookahead;
                    continue;
                }
                if (testbrk == brk) {
                    rule = NoBreak;
                    while (i < lookahead)
                        attributes[i++].wordBoundary = false;
                    nbrk = testbrk;
                }
                break;
            }
        }
        attributes[i].wordBoundary = (rule == Break);
        brk = nbrk;
    }
 }
 enum SentenceBreakStates {
    SB_Initial,
    SB_Upper,
    SB_UpATerm, 
    SB_ATerm,
    SB_ATermC, 
    SB_ACS, 
    SB_STerm, 
    SB_STermC, 
    SB_SCS,
    SB_BAfter, 
    SB_Break,
    SB_Look
 };
 static const hb_uint8 sentenceBreakTable[HB_Sentence_Close + 1][HB_Sentence_Close + 1] = {
 //        Other       Sep         Format      Sp          Lower       Upper       OLetter     Numeric     ATerm       STerm       Close
      { SB_Initial, SB_BAfter , SB_Initial, SB_Initial, SB_Initial, SB_Upper  , SB_Initial, SB_Initial, SB_ATerm  , SB_STerm  , SB_Initial }, // SB_Initial,
      { SB_Initial, SB_BAfter , SB_Upper  , SB_Initial, SB_Initial, SB_Upper  , SB_Initial, SB_Initial, SB_UpATerm, SB_STerm  , SB_Initial }, // SB_Upper
      { SB_Look   , SB_BAfter , SB_UpATerm, SB_ACS    , SB_Initial, SB_Upper  , SB_Break  , SB_Initial, SB_ATerm  , SB_STerm  , SB_ATermC  }, // SB_UpATerm
      { SB_Look   , SB_BAfter , SB_ATerm  , SB_ACS    , SB_Initial, SB_Break  , SB_Break  , SB_Initial, SB_ATerm  , SB_STerm  , SB_ATermC  }, // SB_ATerm
      { SB_Look   , SB_BAfter , SB_ATermC , SB_ACS    , SB_Initial, SB_Break  , SB_Break  , SB_Look   , SB_ATerm  , SB_STerm  , SB_ATermC  }, // SB_ATermC,
      { SB_Look   , SB_BAfter , SB_ACS    , SB_ACS    , SB_Initial, SB_Break  , SB_Break  , SB_Look   , SB_ATerm  , SB_STerm  , SB_Look    }, // SB_ACS,
      { SB_Break  , SB_BAfter , SB_STerm  , SB_SCS    , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_ATerm  , SB_STerm  , SB_STermC  }, // SB_STerm,
      { SB_Break  , SB_BAfter , SB_STermC , SB_SCS    , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_ATerm  , SB_STerm  , SB_STermC  }, // SB_STermC,
      { SB_Break  , SB_BAfter , SB_SCS    , SB_SCS    , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_ATerm  , SB_STerm  , SB_Break   }, // SB_SCS,
      { SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break   }, // SB_BAfter,
 };
 void HB_GetSentenceBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
                              const HB_ScriptItem * /*items*/, hb_uint32 /*numItems*/,
                              HB_CharAttributes *attributes)
 {
    if (stringLength == 0)
        return;
    hb_uint32 brk = sentenceBreakTable[SB_Initial][HB_GetSentenceClass(string[0])];
    attributes[0].sentenceBoundary = true;
    for (hb_uint32 i = 1; i < stringLength; ++i) {
        if (!attributes[i].charStop) {
            attributes[i].sentenceBoundary = false;
            continue;
        }
        brk = sentenceBreakTable[brk][HB_GetSentenceClass(string[i])];
        if (brk == SB_Look) {
            brk = SB_Break;
            hb_uint32 lookahead = i + 1;
            while (lookahead < stringLength) {
                hb_uint32 sbrk = HB_GetSentenceClass(string[lookahead]);
                if (sbrk != HB_Sentence_Other && sbrk != HB_Sentence_Numeric && sbrk != HB_Sentence_Close) {
                    break;
                } else if (sbrk == HB_Sentence_Lower) {
                    brk = SB_Initial;
                    break;
                }
                ++lookahead;
            }
            if (brk == SB_Initial) {
                while (i < lookahead)
                    attributes[i++].sentenceBoundary = false;
            }
        }
        if (brk == SB_Break) {
            attributes[i].sentenceBoundary = true;
            brk = sentenceBreakTable[SB_Initial][HB_GetSentenceClass(string[i])];
        } else {
            attributes[i].sentenceBoundary = false;
        }
    }
 }
 static inline char *tag_to_string(HB_UInt tag)
 {
@ -1335,4 +989,3 @@ HB_Bool HB_ShapeItem(HB_ShaperItem *shaper_item)
    shaper_item->glyphIndicesPresent = false;
    return result;
 }
--- a/src/hb-old/harfbuzz-shaper.h
+++ b/src/hb-old/harfbuzz-shaper.h
@ -130,37 +130,6 @@ typedef struct
    hb_uint8 bidiLevel;
 } HB_ScriptItem;
 typedef enum {
    HB_NoBreak,
    HB_SoftHyphen,
    HB_Break,
    HB_ForcedBreak
 } HB_LineBreakType;
 typedef struct {
    /*HB_LineBreakType*/ hb_bitfield lineBreakType  :2;
    /*HB_Bool*/ hb_bitfield whiteSpace              :1;     /* A unicode whitespace character, except NBSP, ZWNBSP */
    /*HB_Bool*/ hb_bitfield charStop                :1;     /* Valid cursor position (for left/right arrow) */
    /*HB_Bool*/ hb_bitfield wordBoundary            :1;
    /*HB_Bool*/ hb_bitfield sentenceBoundary        :1;
    hb_bitfield unused                  :2;
 } HB_CharAttributes;
 void HB_GetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
                          const HB_ScriptItem *items, hb_uint32 numItems,
                          HB_CharAttributes *attributes);
 /* requires HB_GetCharAttributes to be called before */
 void HB_GetWordBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
                          const HB_ScriptItem *items, hb_uint32 numItems,
                          HB_CharAttributes *attributes);
 /* requires HB_GetCharAttributes to be called before */
 void HB_GetSentenceBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
                              const HB_ScriptItem *items, hb_uint32 numItems,
                              HB_CharAttributes *attributes);
 typedef enum {
    HB_LeftToRight = 0,
--- a/src/hb-old/harfbuzz-thai.c
+++ b/src/hb-old/harfbuzz-thai.c
@ -1,111 +0,0 @@
 /*
 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
 *
 * This is part of HarfBuzz, an OpenType Layout engine library.
 *
 * Permission is hereby granted, without written agreement and without
 * license or royalty fees, to use, copy, modify, and distribute this
 * software and its documentation for any purpose, provided that the
 * above copyright notice and the following two paragraphs appear in
 * all copies of this software.
 *
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 *
 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 */
 #include "harfbuzz-shaper.h"
 #include "harfbuzz-shaper-private.h"
 #include "harfbuzz-external.h"
 #include <assert.h>
 #include <stdio.h>
 typedef int (*th_brk_def)(const char*, int[], int);
 static th_brk_def th_brk = 0;
 static int libthai_resolved = 0;
 static void resolve_libthai()
 {
    if (!th_brk)
        th_brk = (th_brk_def)HB_Library_Resolve("thai", 0, "th_brk");
    libthai_resolved = 1;
 }
 static void to_tis620(const HB_UChar16 *string, hb_uint32 len, const char *cstr)
 {
    hb_uint32 i;
    unsigned char *result = (unsigned char *)cstr;
    for (i = 0; i < len; ++i) {
        if (string[i] <= 0xa0)
            result[i] = (unsigned char)string[i];
        if (string[i] >= 0xe01 && string[i] <= 0xe5b)
            result[i] = (unsigned char)(string[i] - 0xe00 + 0xa0);
        else
            result[i] = '?';
    }
    result[len] = 0;
 }
 static void thaiWordBreaks(const HB_UChar16 *string, hb_uint32 len, HB_CharAttributes *attributes)
 {
    char s[128];
    char *cstr = s;
    int brp[128];
    int *break_positions = brp;
    hb_uint32 numbreaks;
    hb_uint32 i;
    if (!libthai_resolved)
        resolve_libthai();
    if (!th_brk)
        return;
    if (len >= 128)
        cstr = (char *)malloc(len*sizeof(char) + 1);
    to_tis620(string, len, cstr);
    numbreaks = th_brk(cstr, break_positions, 128);
    if (numbreaks > 128) {
        break_positions = (int *)malloc(numbreaks * sizeof(int));
        numbreaks = th_brk(cstr, break_positions, numbreaks);
    }
    for (i = 0; i < len; ++i) {
        attributes[i].lineBreakType = HB_NoBreak;
        attributes[i].wordBoundary = FALSE;
    }
    for (i = 0; i < numbreaks; ++i) {
        if (break_positions[i] > 0) {
            attributes[break_positions[i]-1].lineBreakType = HB_Break;
            attributes[break_positions[i]-1].wordBoundary = TRUE;
        }
    }
    if (break_positions != brp)
        free(break_positions);
    if (len >= 128)
        free(cstr);
 }
 void HB_ThaiAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
 {
    assert(script == HB_Script_Thai);
    attributes += from;
    thaiWordBreaks(text + from, len, attributes);
 }
--- a/src/hb-old/harfbuzz-tibetan.c
+++ b/src/hb-old/harfbuzz-tibetan.c
@ -246,29 +246,3 @@ HB_Bool HB_TibetanShape(HB_ShaperItem *item)
    item->num_glyphs = first_glyph;
    return TRUE;
 }
 void HB_TibetanAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
 {
    int end = from + len;
    const HB_UChar16 *uc = text + from;
    hb_uint32 i = 0;
    HB_UNUSED(script);
    attributes += from;
    while (i < len) {
        HB_Bool invalid;
        hb_uint32 boundary = tibetan_nextSyllableBoundary(text, from+i, end, &invalid) - from;
        attributes[i].charStop = TRUE;
        if (boundary > len-1) boundary = len;
        i++;
        while (i < boundary) {
            attributes[i].charStop = FALSE;
            ++uc;
            ++i;
        }
        assert(i == boundary);
    }
 }
--- a/src/hb-old/harfbuzz.c
+++ b/src/hb-old/harfbuzz.c
@ -1,32 +0,0 @@
 /*
 * Copyright (C) 2006  Behdad Esfahbod
 *
 * This is part of HarfBuzz, an OpenType Layout engine library.
 *
 * Permission is hereby granted, without written agreement and without
 * license or royalty fees, to use, copy, modify, and distribute this
 * software and its documentation for any purpose, provided that the
 * above copyright notice and the following two paragraphs appear in
 * all copies of this software.
 *
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 *
 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 */
 #define HB_INTERNAL static
 #include "harfbuzz-buffer.c"
 #include "harfbuzz-gdef.c"
 #include "harfbuzz-gsub.c"
 #include "harfbuzz-gpos.c"
 #include "harfbuzz-impl.c"
 #include "harfbuzz-open.c"
 #include "harfbuzz-stream.c"