2022-04-19 16:20:53 +02:00
|
|
|
digraph {
|
|
|
|
graph [outputorder=edgefirst];
|
2022-04-20 17:55:31 +02:00
|
|
|
node [shape="record", fontname="Noto Sans Mono SemiBold", fontsize=15];
|
2022-04-19 16:20:53 +02:00
|
|
|
edge [fontname="Verdana", fontsize=12,labeldistance=7.5 ];
|
|
|
|
fontname="Verdana";
|
|
|
|
ranksep=0.02; nodesep=0.5;
|
|
|
|
|
|
|
|
subgraph {
|
|
|
|
ranksep="0.02 equally";
|
|
|
|
preprocessing[style=filled,fillcolor="lightgreen",fontname="Verdana",label="Glyph pre-processing"];
|
|
|
|
orthographic[style=filled,fillcolor="lightblue",fontname="Verdana",label="Orthographic Unit Shaping"];
|
|
|
|
reordering[style=filled, fillcolor="lightcoral",fontname="Verdana",label="Reordering group (USE)"];
|
2022-04-20 17:55:31 +02:00
|
|
|
topographic[style=filled,fillcolor="lightgoldenrod",fontname="Verdana",label="Topographical Features‡"];
|
2022-04-19 16:20:53 +02:00
|
|
|
typographic[style=filled,fillcolor="lightpink",fontname="Verdana",label="Typographic Presentation"];
|
|
|
|
positioning[style=filled,fillcolor="lightsalmon",fontname="Verdana",label="Positioning"];
|
|
|
|
preprocessing->reordering->orthographic->topographic->typographic->positioning;
|
|
|
|
}
|
|
|
|
|
|
|
|
decision1 [shape="diamond", label="Script\ndirection?",fontname="Verdana"];
|
|
|
|
rvrn->decision1;
|
|
|
|
|
|
|
|
ltrfeatures [label="{ltra|ltrm}", fillcolor="lightgreen",style="filled"];
|
2022-04-20 17:55:31 +02:00
|
|
|
{
|
|
|
|
rtlfeatures [label="{rtla|rtlm¹}", fillcolor="lightgreen",style="filled"];
|
|
|
|
}
|
|
|
|
{
|
|
|
|
rank=same;
|
|
|
|
fracfeatures [label="frac²|numr³|dnom⁴", fillcolor="lightpink",style="filled"];
|
|
|
|
fracnotes [fontname="Verdana",shape=plaintext,label=<<table border="0" cellborder="0" cellspacing="0">
|
|
|
|
<tr><td align="left">¹ rtlm is scoped to characters with a Unicode mirroring property</td></tr>
|
|
|
|
<tr><td align="left">² frac is scoped to numr + the slash + dnom</td></tr>
|
|
|
|
<tr><td align="left">³ numr is scoped to all decimal numbers before a U+2044 FRACTION SLASH.</td></tr>
|
|
|
|
<tr><td align="left">⁴ dnom is scoped to all decimal numbers after a U+2044 FRACTION SLASH.</td></tr>
|
|
|
|
</table>
|
|
|
|
>];
|
|
|
|
|
|
|
|
}
|
2022-04-19 16:20:53 +02:00
|
|
|
rand [fillcolor="lightpink",style="filled"];
|
|
|
|
|
|
|
|
decision1 -> ltrfeatures [label="Left-to-right"];
|
|
|
|
decision1 -> rtlfeatures [label="Right-to-left"];
|
|
|
|
|
|
|
|
decision1 -> fracfeatures [label="Other"];
|
|
|
|
|
|
|
|
ltrfeatures -> fracfeatures;
|
|
|
|
rtlfeatures -> fracfeatures;
|
|
|
|
fracfeatures->rand;
|
|
|
|
|
|
|
|
decision2 [shape="diamond", label="Script?",fontname="Verdana"];
|
|
|
|
|
2022-04-20 17:55:31 +02:00
|
|
|
{rank=same; HARF [label="{Harf|HARF}"]; notes;}
|
2022-04-19 16:20:53 +02:00
|
|
|
rand -> trak -> HARF -> decision2;
|
|
|
|
|
|
|
|
commonfeatures [shape=none,label=<<table border="0" cellspacing="0">
|
|
|
|
<tr>
|
|
|
|
<td border="1" bgcolor="lightsalmon">abvm</td>
|
|
|
|
<td border="1" bgcolor="lightsalmon">blwm</td>
|
|
|
|
<td border="1" bgcolor="lightgreen">ccmp</td>
|
|
|
|
<td border="1" bgcolor="lightgreen">locl</td>
|
|
|
|
<td border="1" bgcolor="lightsalmon">mark</td>
|
|
|
|
<td border="1" bgcolor="lightsalmon">mkmk</td>
|
|
|
|
<td border="1" bgcolor="lightpink">rlig</td>
|
|
|
|
</tr>
|
|
|
|
</table>>
|
|
|
|
];
|
2022-04-20 17:55:31 +02:00
|
|
|
|
2022-04-19 16:20:53 +02:00
|
|
|
decision3 [shape="diamond", label="Script\ndirection?",fontname="Verdana"];
|
2022-04-20 17:55:31 +02:00
|
|
|
|
|
|
|
BUZZ [label="{Buzz|BUZZ}"];
|
2022-04-19 16:20:53 +02:00
|
|
|
BUZZ -> commonfeatures -> decision3;
|
|
|
|
|
|
|
|
horizontalfeatures [
|
|
|
|
shape=none,label=<<table border="0" cellspacing="0">
|
|
|
|
<tr><td border="1" bgcolor="lightpink">calt <font face="Verdana">(not Hangul)</font></td></tr>
|
|
|
|
<tr><td border="1" bgcolor="lightpink">clig <font face="Verdana">(not Khmer)</font></td></tr>
|
|
|
|
<tr><td border="1" bgcolor="lightsalmon">curs</td></tr>
|
|
|
|
<tr><td border="1" bgcolor="lightsalmon">dist</td></tr>
|
|
|
|
<tr><td border="1" bgcolor="lightsalmon">kern</td></tr>
|
|
|
|
<tr><td border="1" bgcolor="lightpink">liga <font face="Verdana">(not Khmer)</font></td></tr>
|
|
|
|
<tr><td border="1" bgcolor="lightpink">rclt</td></tr>
|
|
|
|
</table>>
|
|
|
|
];
|
|
|
|
vert [label="vert",style=filled,fillcolor="lightpink"];
|
|
|
|
|
|
|
|
decision3 -> horizontalfeatures [label="Horizontal"];
|
|
|
|
decision3 -> vert [label="Vertical"];
|
|
|
|
|
|
|
|
discretionary [label="User-selected\ndiscretionary\nfeatures",fontname="Verdana"];
|
|
|
|
|
|
|
|
horizontalfeatures -> discretionary;
|
|
|
|
vert -> discretionary;
|
|
|
|
|
|
|
|
decision2->stch;
|
|
|
|
|
|
|
|
BUZZ;
|
|
|
|
|
2022-06-03 20:24:32 +02:00
|
|
|
subgraph shapers {
|
2022-04-19 16:20:53 +02:00
|
|
|
subgraph cluster_arabic {
|
|
|
|
bgcolor="lightyellow"
|
|
|
|
label="Arabic, Syriac";
|
2022-04-20 17:55:31 +02:00
|
|
|
stch [ style="filled", fillcolor="lightgreen",label="stch"];
|
2022-04-19 16:20:53 +02:00
|
|
|
ccmplocl [ style="filled", label="ccmp|locl", fillcolor="lightgreen"];
|
|
|
|
arabicfeatures [label="isol|fina|fin2|fin3|medi|med2|init", style="filled", fillcolor="lightgoldenrod"];
|
|
|
|
arabicfeatures2 [label="rclt|calt", style="filled",fillcolor="lightpink"];
|
|
|
|
rlig[style="filled",fillcolor="lightpink"];
|
|
|
|
mset [fillcolor="lightpink",style="filled"]
|
|
|
|
stch->ccmplocl->arabicfeatures->rlig->arabicfeatures2->mset;
|
|
|
|
}
|
|
|
|
mset->BUZZ:n;
|
|
|
|
|
|
|
|
subgraph cluster_hangul {
|
|
|
|
bgcolor="lightyellow"
|
|
|
|
label="Hangul";
|
2022-04-20 17:55:31 +02:00
|
|
|
hangulfeatures [label="ljmo|vjmo|tjmo", style="filled",fillcolor="lightgoldenrod"]
|
2022-04-19 16:20:53 +02:00
|
|
|
}
|
|
|
|
hangulfeatures->BUZZ:n;
|
|
|
|
|
|
|
|
subgraph cluster_indic {
|
|
|
|
label="Indic";
|
|
|
|
bgcolor="lightyellow"
|
|
|
|
// Preprocessing
|
2022-04-20 17:55:31 +02:00
|
|
|
loclccmpindic [label="locl†|ccmp†",style=filled,fillcolor="lightgreen"];
|
2022-04-19 16:20:53 +02:00
|
|
|
node[style=filled,fillcolor="lightgreen"];
|
2022-04-20 17:55:31 +02:00
|
|
|
nukt [label="nukt†"];
|
|
|
|
akhn [label="akhn†"];
|
2022-04-19 16:20:53 +02:00
|
|
|
loclccmpindic->indic_reorder_1->nukt->akhn;
|
|
|
|
indic_reorder_1[label="Initial reordering", fontname="Verdana",fillcolor="lightgrey",shape=ellipse,style=filled]
|
|
|
|
|
|
|
|
// Orthographic
|
|
|
|
node[style=filled,fillcolor="lightblue"]
|
2022-04-20 17:55:31 +02:00
|
|
|
rphf [label="rphf⁵"];
|
|
|
|
rkpf [label="rkpf†"];
|
|
|
|
pref [label="pref⁶"];
|
|
|
|
blwf [label="blwf⁷"];
|
|
|
|
abvf [label="abvf⁸"];
|
|
|
|
half [label="half⁹"];
|
|
|
|
pstf [label="pstf⁸"];
|
|
|
|
vatu [label="vatu†"];
|
|
|
|
cjct [label="cjct†"];
|
2022-04-19 16:20:53 +02:00
|
|
|
akhn ->rphf -> rkpf -> pref -> blwf -> abvf -> half -> pstf -> vatu -> cjct;
|
|
|
|
// Typographic presentation
|
|
|
|
indic_typographic[style=filled,fillcolor="lightpink",label="init|pres|abvs|blws|psts|haln"]
|
|
|
|
indic_reorder_2[label="Final reordering",fillcolor="lightgrey",fontname="Verdana", shape=ellipse,style=filled]
|
|
|
|
cjct->indic_reorder_2->indic_typographic;
|
2022-04-20 17:55:31 +02:00
|
|
|
|
|
|
|
notes2 [fontname="Verdana",shape=plaintext,style="",label=<<table border="0" cellborder="0" cellspacing="0">
|
|
|
|
<tr><td align="right">⁵ rphf is scoped to pre-base ra+halant sequences</td></tr>
|
|
|
|
<tr><td align="right">⁶ pref is scoped to the two glyphs after the base; outputs are reordered</td></tr>
|
|
|
|
<tr><td align="right">⁷ blwf is usually scoped to the whole syllable, except in Telugu and Kannada where it is post-base</td></tr>
|
|
|
|
<tr><td align="right">⁸ abvf and pstf are scoped to post-base</td></tr>
|
|
|
|
<tr><td align="right">⁹ half is scoped to pre-base</td></tr>
|
|
|
|
</table>
|
|
|
|
>];
|
|
|
|
indic_typographic -> notes2 [style=invis];
|
2022-04-19 16:20:53 +02:00
|
|
|
}
|
|
|
|
|
2022-04-20 17:55:31 +02:00
|
|
|
|
2022-04-19 16:20:53 +02:00
|
|
|
subgraph cluster_khmer {
|
|
|
|
label="Khmer";
|
|
|
|
bgcolor="lightyellow"
|
|
|
|
|
2022-04-20 17:55:31 +02:00
|
|
|
khmerbasic [style=filled,fillcolor="lightgreen",label="locl†|ccmp†|pref†|bwlf†|abvf†|pstf†|cfar†"]
|
2022-04-19 16:20:53 +02:00
|
|
|
khmerother [style=filled,fillcolor="lightpink",label="pres|abvs|blws|psts"]
|
|
|
|
khmerbasic -> khmerother -> khmerclig;
|
|
|
|
khmerclig [label="clig",style=filled,fillcolor="lightpink"];
|
|
|
|
}
|
|
|
|
|
|
|
|
subgraph cluster_myanmar {
|
|
|
|
label="Myanmar";
|
|
|
|
bgcolor="lightyellow"
|
2022-04-20 17:55:31 +02:00
|
|
|
loclccmpmyanmar [label="locl†|ccmp†",style=filled,fillcolor="lightgreen"];
|
|
|
|
rphfmymr [label="rphf†",style=filled,fillcolor="lightblue"]
|
|
|
|
prefmymr [label="pref†",style=filled,fillcolor="lightblue"]
|
|
|
|
blwfmymr [label="blwf†",style=filled,fillcolor="lightblue"]
|
|
|
|
pstfmymr [label="pstf†",style=filled,fillcolor="lightblue"]
|
2022-04-19 16:20:53 +02:00
|
|
|
myanmarother [label="pres|abvs|blws|psts",style=filled,fillcolor="lightpink"];
|
|
|
|
reorder_myanmar[label="Reordering", shape=ellipse,style=filled,fontname="Verdana"]
|
|
|
|
loclccmpmyanmar -> reorder_myanmar-> rphfmymr -> prefmymr -> blwfmymr -> pstfmymr -> myanmarother;
|
|
|
|
}
|
|
|
|
|
|
|
|
subgraph cluster_use {
|
|
|
|
label="Universal Shaping Engine"
|
|
|
|
bgcolor="lightyellow"
|
2022-04-20 17:55:31 +02:00
|
|
|
use_preprocessing [style=filled, label="locl†|ccmp†|nukt†|akhn†", fillcolor="lightgreen"];
|
2022-04-19 16:20:53 +02:00
|
|
|
// Reoredering
|
2022-04-20 17:55:31 +02:00
|
|
|
rphfuse [label="rphf¹⁰", style=filled, fillcolor="lightcoral"];
|
|
|
|
prefuse [label="pref¹¹", style=filled, fillcolor="lightcoral"];
|
2022-04-19 16:20:53 +02:00
|
|
|
// Orthographic
|
2022-04-20 17:55:31 +02:00
|
|
|
orthographicuse [label="rkrf†|abvf†|blwf†|half†|pstf†|vatu†|cjct†", style="filled", fillcolor="lightblue"];
|
2022-04-19 16:20:53 +02:00
|
|
|
topographicaluse [label="isol|init|medi|fina", style="filled", fillcolor="lightgoldenrod"];
|
|
|
|
typographicaluse [label="abvs|blws|haln|pres|psts", style="filled", fillcolor="lightpink"];
|
|
|
|
reorder_use[label="Reordering", shape=ellipse,style=filled,fontname="Verdana"]
|
|
|
|
use_preprocessing -> rphfuse -> prefuse->orthographicuse ->reorder_use -> topographicaluse -> typographicaluse;
|
2022-04-20 17:55:31 +02:00
|
|
|
notes3 [fontname="Verdana",shape=plaintext,label=<<table border="0" cellborder="0" cellspacing="0">
|
|
|
|
<tr><td align="left">¹⁰ Outputs are reordered as category R</td></tr>
|
|
|
|
<tr><td align="left">¹¹ Outputs are reordered to before base</td></tr>
|
|
|
|
</table>
|
|
|
|
>];
|
|
|
|
typographicaluse -> notes3 [style=invis];
|
2022-04-19 16:20:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
indic_typographic->BUZZ:n;
|
|
|
|
typographicaluse->BUZZ:n;
|
|
|
|
khmerclig -> BUZZ:n;
|
|
|
|
myanmarother -> BUZZ:n;
|
|
|
|
|
|
|
|
|
|
|
|
decision2->hangulfeatures;
|
|
|
|
decision2->loclccmpindic;
|
|
|
|
decision2->khmerbasic;
|
|
|
|
decision2->loclccmpmyanmar;
|
|
|
|
decision2->use_preprocessing;
|
|
|
|
decision2->BUZZ [label=" Hebrew, Thai,\n Lao, other"];
|
|
|
|
|
|
|
|
notes [fontname="Verdana",shape=box,label=<<table border="0" cellborder="0" cellspacing="0">
|
|
|
|
<tr><td align="left">
|
|
|
|
<b>Indic</b> scripts are: Bengali, Devanagari,
|
|
|
|
Gujarati, Gurmukhi, Kannada,
|
|
|
|
Malayalam, Oriya, Tamil,
|
2022-06-25 19:32:04 +02:00
|
|
|
Telugu
|
2022-04-19 16:20:53 +02:00
|
|
|
</td></tr>
|
|
|
|
|
|
|
|
<tr><td align="left">
|
|
|
|
<b>USE</b> scripts are:
|
|
|
|
Adlam, Ahom, Balinese, Batak, Bhaiksuki, Brahmi, Buginese,
|
|
|
|
Buhid, Chakma, Cham, Chorasmian, Dives Akuru, Dogra, Duployan,
|
|
|
|
</td></tr>
|
|
|
|
<tr><td align="left">
|
|
|
|
Egyptian hieroglyphs, Elymaic, Grantha, Gunjala Ggondi, Hanifi Rohingya,
|
|
|
|
Hanunoo, Javanese, Kaithi, Kayah li, Kharoshthi, Khojki,
|
|
|
|
</td></tr>
|
|
|
|
<tr><td align="left">
|
|
|
|
Khudawadi, Lepcha, Limbu, Mahajani, Makasar, Mandaic, Manichaean,
|
|
|
|
Marchen, Masaram Gondi, Medefaidrin, Meetei Mayek, Miao, Modi,
|
|
|
|
</td></tr>
|
|
|
|
<tr><td align="left">
|
|
|
|
Mongolian, Multani, Nandinagari, Newa, Nko, Nyiakeng Puachue Hmong,
|
|
|
|
Old Sogdian, Pahawh Hmong, Phags Pa, Psalter Pahlavi, Rejang,
|
|
|
|
</td></tr>
|
|
|
|
<tr><td align="left">
|
2022-06-25 19:32:04 +02:00
|
|
|
Saurashtra, Sharada, Siddham, Sinhala, Sogdian, Soyombo, Sundanese,
|
2022-04-19 16:20:53 +02:00
|
|
|
Syloti Nagri, Tagalog, Tagbanwa, Tai Le, Tai Tham, Tai Viet,
|
|
|
|
</td></tr>
|
|
|
|
<tr><td align="left">
|
|
|
|
Takri, Tibetan, Tifinagh, Tirhuta, Wancho, Zanabazar square,
|
|
|
|
</td></tr>
|
|
|
|
|
|
|
|
</table>>]
|
2022-04-20 17:55:31 +02:00
|
|
|
|
|
|
|
|
|
|
|
footnote[fontname="Verdana",label=<<table border="0" cellborder="0" cellspacing="0">
|
|
|
|
<tr><td align="left">† Feature is scoped to each syllable</td></tr>
|
|
|
|
<tr><td align="left">‡ All topographic features are scoped based on topographic position</td></tr>
|
|
|
|
</table>>];
|
|
|
|
notes3->footnote[style=invis];
|
|
|
|
|
2022-04-19 16:20:53 +02:00
|
|
|
}
|