digraph { graph [outputorder=edgefirst]; node [shape="record", fontname="Noto Sans Mono SemiBold", fontsize=15]; edge [fontname="Verdana", fontsize=12,labeldistance=7.5 ]; fontname="Verdana"; ranksep=0.02; nodesep=0.5; subgraph { ranksep="0.02 equally"; preprocessing[style=filled,fillcolor="lightgreen",fontname="Verdana",label="Glyph pre-processing"]; orthographic[style=filled,fillcolor="lightblue",fontname="Verdana",label="Orthographic Unit Shaping"]; reordering[style=filled, fillcolor="lightcoral",fontname="Verdana",label="Reordering group (USE)"]; topographic[style=filled,fillcolor="lightgoldenrod",fontname="Verdana",label="Topographical Features‡"]; typographic[style=filled,fillcolor="lightpink",fontname="Verdana",label="Typographic Presentation"]; positioning[style=filled,fillcolor="lightsalmon",fontname="Verdana",label="Positioning"]; preprocessing->reordering->orthographic->topographic->typographic->positioning; } decision1 [shape="diamond", label="Script\ndirection?",fontname="Verdana"]; rvrn->decision1; ltrfeatures [label="{ltra|ltrm}", fillcolor="lightgreen",style="filled"]; { rtlfeatures [label="{rtla|rtlm¹}", fillcolor="lightgreen",style="filled"]; } { rank=same; fracfeatures [label="frac²|numr³|dnom⁴", fillcolor="lightpink",style="filled"]; fracnotes [fontname="Verdana",shape=plaintext,label=<
¹ rtlm is scoped to characters with a Unicode mirroring property
² frac is scoped to numr + the slash + dnom
³ numr is scoped to all decimal numbers before a U+2044 FRACTION SLASH.
⁴ dnom is scoped to all decimal numbers after a U+2044 FRACTION SLASH.
>]; } rand [fillcolor="lightpink",style="filled"]; decision1 -> ltrfeatures [label="Left-to-right"]; decision1 -> rtlfeatures [label="Right-to-left"]; decision1 -> fracfeatures [label="Other"]; ltrfeatures -> fracfeatures; rtlfeatures -> fracfeatures; fracfeatures->rand; decision2 [shape="diamond", label="Script?",fontname="Verdana"]; {rank=same; HARF [label="{Harf|HARF}"]; notes;} rand -> trak -> HARF -> decision2; commonfeatures [shape=none,label=<
abvm blwm ccmp locl mark mkmk rlig
> ]; decision3 [shape="diamond", label="Script\ndirection?",fontname="Verdana"]; BUZZ [label="{Buzz|BUZZ}"]; BUZZ -> commonfeatures -> decision3; horizontalfeatures [ shape=none,label=<
calt (not Hangul)
clig (not Khmer)
curs
dist
kern
liga (not Khmer)
rclt
> ]; vert [label="vert",style=filled,fillcolor="lightpink"]; decision3 -> horizontalfeatures [label="Horizontal"]; decision3 -> vert [label="Vertical"]; discretionary [label="User-selected\ndiscretionary\nfeatures",fontname="Verdana"]; horizontalfeatures -> discretionary; vert -> discretionary; decision2->stch; BUZZ; subgraph complexshapers { subgraph cluster_arabic { bgcolor="lightyellow" label="Arabic, Syriac"; stch [ style="filled", fillcolor="lightgreen",label="stch"]; ccmplocl [ style="filled", label="ccmp|locl", fillcolor="lightgreen"]; arabicfeatures [label="isol|fina|fin2|fin3|medi|med2|init", style="filled", fillcolor="lightgoldenrod"]; arabicfeatures2 [label="rclt|calt", style="filled",fillcolor="lightpink"]; rlig[style="filled",fillcolor="lightpink"]; mset [fillcolor="lightpink",style="filled"] stch->ccmplocl->arabicfeatures->rlig->arabicfeatures2->mset; } mset->BUZZ:n; subgraph cluster_hangul { bgcolor="lightyellow" label="Hangul"; hangulfeatures [label="ljmo|vjmo|tjmo", style="filled",fillcolor="lightgoldenrod"] } hangulfeatures->BUZZ:n; subgraph cluster_indic { label="Indic"; bgcolor="lightyellow" // Preprocessing loclccmpindic [label="locl†|ccmp†",style=filled,fillcolor="lightgreen"]; node[style=filled,fillcolor="lightgreen"]; nukt [label="nukt†"]; akhn [label="akhn†"]; loclccmpindic->indic_reorder_1->nukt->akhn; indic_reorder_1[label="Initial reordering", fontname="Verdana",fillcolor="lightgrey",shape=ellipse,style=filled] // Orthographic node[style=filled,fillcolor="lightblue"] rphf [label="rphf⁵"]; rkpf [label="rkpf†"]; pref [label="pref⁶"]; blwf [label="blwf⁷"]; abvf [label="abvf⁸"]; half [label="half⁹"]; pstf [label="pstf⁸"]; vatu [label="vatu†"]; cjct [label="cjct†"]; akhn ->rphf -> rkpf -> pref -> blwf -> abvf -> half -> pstf -> vatu -> cjct; // Typographic presentation indic_typographic[style=filled,fillcolor="lightpink",label="init|pres|abvs|blws|psts|haln"] indic_reorder_2[label="Final reordering",fillcolor="lightgrey",fontname="Verdana", shape=ellipse,style=filled] cjct->indic_reorder_2->indic_typographic; notes2 [fontname="Verdana",shape=plaintext,style="",label=<
⁵ rphf is scoped to pre-base ra+halant sequences
⁶ pref is scoped to the two glyphs after the base; outputs are reordered
⁷ blwf is usually scoped to the whole syllable, except in Telugu and Kannada where it is post-base
⁸ abvf and pstf are scoped to post-base
⁹ half is scoped to pre-base
>]; indic_typographic -> notes2 [style=invis]; } subgraph cluster_khmer { label="Khmer"; bgcolor="lightyellow" khmerbasic [style=filled,fillcolor="lightgreen",label="locl†|ccmp†|pref†|bwlf†|abvf†|pstf†|cfar†"] khmerother [style=filled,fillcolor="lightpink",label="pres|abvs|blws|psts"] khmerbasic -> khmerother -> khmerclig; khmerclig [label="clig",style=filled,fillcolor="lightpink"]; } subgraph cluster_myanmar { label="Myanmar"; bgcolor="lightyellow" loclccmpmyanmar [label="locl†|ccmp†",style=filled,fillcolor="lightgreen"]; rphfmymr [label="rphf†",style=filled,fillcolor="lightblue"] prefmymr [label="pref†",style=filled,fillcolor="lightblue"] blwfmymr [label="blwf†",style=filled,fillcolor="lightblue"] pstfmymr [label="pstf†",style=filled,fillcolor="lightblue"] myanmarother [label="pres|abvs|blws|psts",style=filled,fillcolor="lightpink"]; reorder_myanmar[label="Reordering", shape=ellipse,style=filled,fontname="Verdana"] loclccmpmyanmar -> reorder_myanmar-> rphfmymr -> prefmymr -> blwfmymr -> pstfmymr -> myanmarother; } subgraph cluster_use { label="Universal Shaping Engine" bgcolor="lightyellow" use_preprocessing [style=filled, label="locl†|ccmp†|nukt†|akhn†", fillcolor="lightgreen"]; // Reoredering rphfuse [label="rphf¹⁰", style=filled, fillcolor="lightcoral"]; prefuse [label="pref¹¹", style=filled, fillcolor="lightcoral"]; // Orthographic orthographicuse [label="rkrf†|abvf†|blwf†|half†|pstf†|vatu†|cjct†", style="filled", fillcolor="lightblue"]; topographicaluse [label="isol|init|medi|fina", style="filled", fillcolor="lightgoldenrod"]; typographicaluse [label="abvs|blws|haln|pres|psts", style="filled", fillcolor="lightpink"]; reorder_use[label="Reordering", shape=ellipse,style=filled,fontname="Verdana"] use_preprocessing -> rphfuse -> prefuse->orthographicuse ->reorder_use -> topographicaluse -> typographicaluse; notes3 [fontname="Verdana",shape=plaintext,label=<
¹⁰ Outputs are reordered as category R
¹¹ Outputs are reordered to before base
>]; typographicaluse -> notes3 [style=invis]; } } indic_typographic->BUZZ:n; typographicaluse->BUZZ:n; khmerclig -> BUZZ:n; myanmarother -> BUZZ:n; decision2->hangulfeatures; decision2->loclccmpindic; decision2->khmerbasic; decision2->loclccmpmyanmar; decision2->use_preprocessing; decision2->BUZZ [label=" Hebrew, Thai,\n Lao, other"]; notes [fontname="Verdana",shape=box,label=<
Indic scripts are: Bengali, Devanagari, Gujarati, Gurmukhi, Kannada, Malayalam, Oriya, Tamil, Telugu, Sinhala
USE scripts are: Adlam, Ahom, Balinese, Batak, Bhaiksuki, Brahmi, Buginese, Buhid, Chakma, Cham, Chorasmian, Dives Akuru, Dogra, Duployan,
Egyptian hieroglyphs, Elymaic, Grantha, Gunjala Ggondi, Hanifi Rohingya, Hanunoo, Javanese, Kaithi, Kayah li, Kharoshthi, Khojki,
Khudawadi, Lepcha, Limbu, Mahajani, Makasar, Mandaic, Manichaean, Marchen, Masaram Gondi, Medefaidrin, Meetei Mayek, Miao, Modi,
Mongolian, Multani, Nandinagari, Newa, Nko, Nyiakeng Puachue Hmong, Old Sogdian, Pahawh Hmong, Phags Pa, Psalter Pahlavi, Rejang,
Saurashtra, Sharada, Siddham, Sogdian, Soyombo, Sundanese, Syloti Nagri, Tagalog, Tagbanwa, Tai Le, Tai Tham, Tai Viet,
Takri, Tibetan, Tifinagh, Tirhuta, Wancho, Zanabazar square,
>] footnote[fontname="Verdana",label=<
† Feature is scoped to each syllable
‡ All topographic features are scoped based on topographic position
>]; notes3->footnote[style=invis]; }