[indic] Disallow vowel mark combinations that spoof other vowel marks

Fixes https://github.com/harfbuzz/harfbuzz/issues/1019

New numbers:

BENGALI: 353725 out of 354188 tests passed. 463 failed (0.130722%)
DEVANAGARI: 707261 out of 707394 tests passed. 133 failed (0.0188014%)
GUJARATI: 366353 out of 366457 tests passed. 104 failed (0.0283799%)
GURMUKHI: 60729 out of 60747 tests passed. 18 failed (0.0296311%)
KANNADA: 951300 out of 951913 tests passed. 613 failed (0.0643966%)
MALAYALAM: 1048136 out of 1048334 tests passed. 198 failed (0.0188871%)
ORIYA: 42327 out of 42329 tests passed. 2 failed (0.00472489%)
SINHALA: 271596 out of 271847 tests passed. 251 failed (0.0923313%)
TAMIL: 1091754 out of 1091754 tests passed. 0 failed (0%)
TELUGU: 970555 out of 970573 tests passed. 18 failed (0.00185457%)

Devanagari regressed because Uniscribe doesn't enforce the full set.

Tests added with the *-vowel-letters.txt files in tree and Noto fonts.
This commit is contained in:
Behdad Esfahbod 2018-10-03 14:44:25 +02:00
parent 1b8d5e9991
commit df32eaae42
11 changed files with 309 additions and 1 deletions

View File

@ -331,6 +331,260 @@ data_destroy_indic (void *data)
free (data);
}
static void
preprocess_text_indic (const hb_ot_shape_plan_t *plan,
hb_buffer_t *buffer,
hb_font_t *font)
{
/* UGLY UGLY UGLY business of adding dotted-circle in the middle of
* vowel-sequences that look like another vowel. Data for each script
* collected from Unicode 11 book, tables named "Vowel Letters" with
* "Use" and "Do Not Use" columns.
*
* https://github.com/harfbuzz/harfbuzz/issues/1019
*/
bool processed = false;
buffer->clear_output ();
unsigned int count = buffer->len;
switch ((unsigned) buffer->props.script)
{
case HB_SCRIPT_DEVANAGARI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur().codepoint)
{
case 0x0905u:
switch (buffer->cur(1).codepoint)
{
case 0x093Au: case 0x093Bu: case 0x093Eu: case 0x0945u:
case 0x0946u: case 0x0949u: case 0x094Au: case 0x094Bu:
case 0x094Cu: case 0x094Fu: case 0x0956u: case 0x0957u:
matched = true;
break;
}
break;
case 0x0906u:
switch (buffer->cur(1).codepoint)
{
case 0x093Au: case 0x0945u: case 0x0946u: case 0x0947u:
case 0x0948u:
matched = true;
break;
}
break;
case 0x0909u:
switch (buffer->cur(1).codepoint)
{
case 0x0941u:
matched = true;
break;
}
break;
case 0x090Fu:
switch (buffer->cur(1).codepoint)
{
case 0x0945u: case 0x0946u: case 0x0947u:
matched = true;
break;
}
break;
case 0x0930u:
if (0x094Du == buffer->cur(1).codepoint &&
buffer->idx + 2 < count &&
0x0907u == buffer->cur(2).codepoint)
{
buffer->next_glyph ();
buffer->next_glyph ();
buffer->output_glyph (0x25CCu);
}
break;
}
buffer->next_glyph ();
if (matched) { buffer->output_glyph (0x25CCu); buffer->next_glyph (); }
}
processed = true;
break;
case HB_SCRIPT_BENGALI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur().codepoint)
{
case 0x0985u: matched = 0x09BE == buffer->cur(1).codepoint; break;
case 0x098Bu: matched = 0x09C3 == buffer->cur(1).codepoint; break;
case 0x098Cu: matched = 0x09E2 == buffer->cur(1).codepoint; break;
}
buffer->next_glyph ();
if (matched) { buffer->output_glyph (0x25CCu); buffer->next_glyph (); }
}
processed = true;
break;
case HB_SCRIPT_GURMUKHI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur().codepoint)
{
case 0x0A05u:
switch (buffer->cur(1).codepoint)
{
case 0x0A3Eu: case 0x0A48u: case 0x0A4Cu:
matched = true;
break;
}
break;
case 0x0A72u:
switch (buffer->cur(1).codepoint)
{
case 0x0A3Fu: case 0x0A40u: case 0x0A47u:
matched = true;
break;
}
break;
case 0x0A73u:
switch (buffer->cur(1).codepoint)
{
case 0x0A41u: case 0x0A42u: case 0x0A4Bu:
matched = true;
break;
}
break;
}
buffer->next_glyph ();
if (matched) { buffer->output_glyph (0x25CCu); buffer->next_glyph (); }
}
processed = true;
break;
case HB_SCRIPT_GUJARATI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur().codepoint)
{
case 0x0A85u:
switch (buffer->cur(1).codepoint)
{
case 0x0ABEu: case 0x0AC5u: case 0x0AC7u: case 0x0AC8u:
case 0x0AC9u: case 0x0ACBu: case 0x0ACCu:
matched = true;
break;
}
break;
case 0x0AC5u:
matched = 0x0ABE == buffer->cur(1).codepoint; break;
break;
}
buffer->next_glyph ();
if (matched) { buffer->output_glyph (0x25CCu); buffer->next_glyph (); }
}
processed = true;
break;
case HB_SCRIPT_ORIYA:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur().codepoint)
{
case 0x0B05u:
matched = 0x0B3E == buffer->cur(1).codepoint;
break;
case 0x0B0Fu: case 0x0B13u:
matched = 0x0B57 == buffer->cur(1).codepoint;
break;
}
buffer->next_glyph ();
if (matched) { buffer->output_glyph (0x25CCu); buffer->next_glyph (); }
}
processed = true;
break;
case HB_SCRIPT_TELUGU:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur().codepoint)
{
case 0x0C12u:
switch (buffer->cur(1).codepoint)
{
case 0x0C4Cu: case 0x0C55u:
matched = true;
break;
}
break;
case 0x0C3Fu: case 0x0C46u: case 0xC4Au:
matched = 0x0C55 == buffer->cur(1).codepoint;
break;
}
buffer->next_glyph ();
if (matched) { buffer->output_glyph (0x25CCu); buffer->next_glyph (); }
}
processed = true;
break;
case HB_SCRIPT_KANNADA:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur().codepoint)
{
case 0x0C89u: case 0x0C8Bu:
matched = 0x0CBE == buffer->cur(1).codepoint;
break;
case 0x0C92u:
matched = 0x0CCC == buffer->cur(1).codepoint;
break;
}
buffer->next_glyph ();
if (matched) { buffer->output_glyph (0x25CCu); buffer->next_glyph (); }
}
processed = true;
break;
case HB_SCRIPT_MALAYALAM:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur().codepoint)
{
case 0x0D07u: case 0x0D09u:
matched = 0x0D57 == buffer->cur(1).codepoint;
break;
case 0x0D0Eu:
matched = 0x0D46 == buffer->cur(1).codepoint;
break;
case 0x0D12u:
switch (buffer->cur(1).codepoint)
{
case 0x0D3Eu: case 0x0D57u:
matched = true;
break;
}
break;
}
buffer->next_glyph ();
if (matched) { buffer->output_glyph (0x25CCu); buffer->next_glyph (); }
}
processed = true;
break;
default:
break;
}
if (processed)
{
if (buffer->idx < count)
buffer->next_glyph ();
if (likely (buffer->successful))
buffer->swap_buffers ();
}
}
static indic_position_t
consonant_position_from_face (const indic_shape_plan_t *indic_plan,
const hb_codepoint_t consonant,
@ -1615,7 +1869,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic =
override_features_indic,
data_create_indic,
data_destroy_indic,
nullptr, /* preprocess_text */
preprocess_text_indic,
nullptr, /* postprocess_glyphs */
HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
decompose_indic,

View File

@ -27,6 +27,7 @@ TESTS = \
tests/indic-script-extensions.tests \
tests/indic-special-cases.tests \
tests/indic-syllable.tests \
tests/indic-vowel-letter-spoofing.tests \
tests/khmer-mark-order.tests \
tests/khmer-misc.tests \
tests/language-tags.tests \

View File

@ -0,0 +1,53 @@
../fonts/1a5face3fcbd929d228235c2f72bbd6f8eb37424.ttf::U+0904,U+0020,U+0905,U+0946:[ashortdeva=0+764|space=1+260|adeva=2+764|uni25CC=2+510|eshortvowelsigndeva=2+0]
../fonts/1a5face3fcbd929d228235c2f72bbd6f8eb37424.ttf::U+0906,U+0020,U+0905,U+093E:[aadeva=0+1023|space=1+260|adeva=2+764|uni25CC=2+510|aavowelsigndeva=2+259]
../fonts/1a5face3fcbd929d228235c2f72bbd6f8eb37424.ttf::U+0908,U+0020,U+0930,U+094D,U+0907:[iideva=0+491|space=1+260|uni25CC=2+510|rephdeva=2+0|ideva=2+491]
../fonts/1a5face3fcbd929d228235c2f72bbd6f8eb37424.ttf::U+090A,U+0020,U+0909,U+0941:[uudeva=0+765|space=1+260|udeva=2+548|uni25CC=2+510|uvowelsigndeva=2+0]
../fonts/1a5face3fcbd929d228235c2f72bbd6f8eb37424.ttf::U+090D,U+0020,U+090F,U+0945:[ecandradeva=0+553|space=1+260|edeva=2+553|uni25CC=2+510|ecandravowelsigndeva=2+0]
../fonts/1a5face3fcbd929d228235c2f72bbd6f8eb37424.ttf::U+090E,U+0020,U+090F,U+0946:[eshortdeva=0+553|space=1+260|edeva=2+553|uni25CC=2+510|eshortvowelsigndeva=2+0]
../fonts/1a5face3fcbd929d228235c2f72bbd6f8eb37424.ttf::U+0910,U+0020,U+090F,U+0947:[aideva=0+553|space=1+260|edeva=2+553|uni25CC=2+510|evowelsigndeva=2+0]
../fonts/1a5face3fcbd929d228235c2f72bbd6f8eb37424.ttf::U+0911,U+0020,U+0905,U+0949,U+0020,U+0906,U+0945:[ocandradeva=0+1023|space=1+260|adeva=2+764|uni25CC=2+510|ocandravowelsigndeva=2+259|space=4+260|aadeva=5+1023|uni25CC=5+510|ecandravowelsigndeva=5+0]
../fonts/1a5face3fcbd929d228235c2f72bbd6f8eb37424.ttf::U+0912,U+0020,U+0905,U+094A,U+0020,U+0906,U+0946:[oshortdeva=0+1023|space=1+260|adeva=2+764|uni25CC=2+510|oshortvowelsigndeva=2+259|space=4+260|aadeva=5+1023|uni25CC=5+510|eshortvowelsigndeva=5+0]
../fonts/1a5face3fcbd929d228235c2f72bbd6f8eb37424.ttf::U+0913,U+0020,U+0905,U+094B,U+0020,U+0906,U+0947:[odeva=0+1023|space=1+260|adeva=2+764|uni25CC=2+510|ovowelsigndeva=2+259|space=4+260|aadeva=5+1023|uni25CC=5+510|evowelsigndeva=5+0]
../fonts/1a5face3fcbd929d228235c2f72bbd6f8eb37424.ttf::U+0914,U+0020,U+0905,U+094C,U+0020,U+0906,U+0948:[audeva=0+1023|space=1+260|adeva=2+764|uni25CC=2+510|auvowelsigndeva=2+259|space=4+260|aadeva=5+1023|uni25CC=5+510|aivowelsigndeva=5+0]
../fonts/1a5face3fcbd929d228235c2f72bbd6f8eb37424.ttf::U+0972,U+0020,U+0905,U+0945:[acandradeva=0+764|space=1+260|adeva=2+764|uni25CC=2+510|ecandravowelsigndeva=2+0]
../fonts/1a5face3fcbd929d228235c2f72bbd6f8eb37424.ttf::U+0973,U+0020,U+0905,U+093A:[oedeva=0+764|space=1+260|adeva=2+764|uni25CC=2+510|oevowelsigndeva=2+0]
../fonts/1a5face3fcbd929d228235c2f72bbd6f8eb37424.ttf::U+0974,U+0020,U+0905,U+093B,U+0020,U+0906,U+093A:[ooedeva=0+1023|space=1+260|adeva=2+764|uni25CC=2+510|ooevowelsigndeva=2+259|space=4+260|aadeva=5+1023|uni25CC=5+510|oevowelsigndeva=5+0]
../fonts/1a5face3fcbd929d228235c2f72bbd6f8eb37424.ttf::U+0975,U+0020,U+0905,U+094F:[awdeva=0+1023|space=1+260|adeva=2+764|uni25CC=2+510|awvowelsigndeva=2+259]
../fonts/1a5face3fcbd929d228235c2f72bbd6f8eb37424.ttf::U+0976,U+0020,U+0905,U+0956:[uedeva=0+764|space=1+260|adeva=2+764|uni25CC=2+510|uevowelsigndeva=2@50,0+0]
../fonts/1a5face3fcbd929d228235c2f72bbd6f8eb37424.ttf::U+0977,U+0020,U+0905,U+0957:[uuedeva=0+764|space=1+260|adeva=2+764|uni25CC=2+510|uuevowelsigndeva=2@50,0+0]
../fonts/881642af1667ae30a54e58de8be904566d00508f.ttf::U+0986,U+0020,U+0985,U+09BE:[aabeng=0+1158|space=1+260|abeng=2+893|uni25CC=2+510|aavowelsignbeng=2+266]
../fonts/881642af1667ae30a54e58de8be904566d00508f.ttf::U+09E0,U+0020,U+098B,U+09C3:[rrvocalicbeng=0+853|space=1+260|rvocalicbeng=2+853|uni25CC=2+510|rvocalicvowelsignbeng=2+0]
../fonts/881642af1667ae30a54e58de8be904566d00508f.ttf::U+09E1,U+0020,U+098C,U+09E2:[llvocalicbeng=0+639|space=1+260|lvocalicbeng=2+639|uni25CC=2+510|lvocalicvowelsignbeng=2+0]
../fonts/604026ae5aaca83c49cd8416909d71ba3e1c1194.ttf::U+0A06,U+0020,U+0A05,U+0A3E:[aaguru=0+2001|space=1+532|aguru=2+1520|uni25CC=2+1044|aamatraguru=2+481]
../fonts/604026ae5aaca83c49cd8416909d71ba3e1c1194.ttf::U+0A07,U+0020,U+0A72,U+0A3F:[iguru=0+1671|space=1+532|iriguru=2+1141|imatraguru=2+530|uni25CC=2+1044]
../fonts/604026ae5aaca83c49cd8416909d71ba3e1c1194.ttf::U+0A08,U+0020,U+0A72,U+0A40:[iiguru=0+1671|space=1+532|iriguru=2+1141|uni25CC=2+1044|iimatraguru=2+530]
../fonts/604026ae5aaca83c49cd8416909d71ba3e1c1194.ttf::U+0A09,U+0020,U+0A73,U+0A41:[uguru=0+1356|space=1+532|uraguru=2+1356|uni25CC=2+1044|umatraguru=2@102,0+0]
../fonts/604026ae5aaca83c49cd8416909d71ba3e1c1194.ttf::U+0A0A,U+0020,U+0A73,U+0A42:[uuguru=0+1356|space=1+532|uraguru=2+1356|uni25CC=2+1044|uumatraguru=2@102,0+0]
../fonts/604026ae5aaca83c49cd8416909d71ba3e1c1194.ttf::U+0A0F,U+0020,U+0A72,U+0A47:[eeguru=0+1141|space=1+532|iriguru=2+1141|uni25CC=2+1044|eematraguru=2+0]
../fonts/604026ae5aaca83c49cd8416909d71ba3e1c1194.ttf::U+0A10,U+0020,U+0A05,U+0A48:[aiguru=0+1520|space=1+532|aguru=2+1520|uni25CC=2+1044|aimatraguru=2+0]
../fonts/604026ae5aaca83c49cd8416909d71ba3e1c1194.ttf::U+0A13,U+0020,U+0A73,U+0A4B:[ooguru=0+1356|space=1+532|uraguru=2+1356|uni25CC=2+1044|oomatraguru=2+0]
../fonts/604026ae5aaca83c49cd8416909d71ba3e1c1194.ttf::U+0A14,U+0020,U+0A05,U+0A4C:[auguru=0+1520|space=1+532|aguru=2+1520|uni25CC=2+1044|aumatraguru=2+0]
../fonts/738d9f3b8c2dfd03875bf35a61d28fd78faf17c8.ttf::U+0A86,U+0020,U+0A85,U+0ABE:[gid3=0+2351|gid1=1+612|gid2=2+1808|gid17=2+1044|gid10=2+543]
../fonts/738d9f3b8c2dfd03875bf35a61d28fd78faf17c8.ttf::U+0A8D,U+0020,U+0A85,U+0AC5:[gid4=0+1808|gid1=1+612|gid2=2+1808|gid17=2+1044|gid11=2+0]
../fonts/738d9f3b8c2dfd03875bf35a61d28fd78faf17c8.ttf::U+0A8F,U+0020,U+0A85,U+0AC7:[gid5=0+1808|gid1=1+612|gid2=2+1808|gid17=2+1044|gid12=2+0]
../fonts/738d9f3b8c2dfd03875bf35a61d28fd78faf17c8.ttf::U+0A90,U+0020,U+0A85,U+0AC8:[gid6=0+1808|gid1=1+612|gid2=2+1808|gid17=2+1044|gid13=2+0]
../fonts/738d9f3b8c2dfd03875bf35a61d28fd78faf17c8.ttf::U+0A91,U+0020,U+0A85,U+0AC9:[gid7=0+2351|gid1=1+612|gid2=2+1808|gid17=2+1044|gid14=2+543]
../fonts/738d9f3b8c2dfd03875bf35a61d28fd78faf17c8.ttf::U+0A93,U+0020,U+0A85,U+0ACB,U+0020,U+0A85,U+0ABE,U+0AC5:[gid8=0+2351|gid1=1+612|gid2=2+1808|gid17=2+1044|gid15=2+543|gid1=4+612|gid2=5+1808|gid17=5+1044|gid11=5+0|gid10=5+543]
../fonts/738d9f3b8c2dfd03875bf35a61d28fd78faf17c8.ttf::U+0A94,U+0020,U+0A85,U+0ACC,U+0020,U+0A85,U+0ABE,U+0AC8:[gid9=0+2351|gid1=1+612|gid2=2+1808|gid17=2+1044|gid16=2+543|gid1=4+612|gid2=5+1808|gid17=5+1044|gid13=5+0|gid10=5+543]
../fonts/738d9f3b8c2dfd03875bf35a61d28fd78faf17c8.ttf::U+0AC9,U+0020,U+0AC5,U+0ABE:[gid17=0+1044|gid14=0+543|gid1=1+612|gid17=1+1044|gid11=1+0|gid17=1+1044|gid10=1+543]
../fonts/2c25beb56d9c556622d56b0b5d02b4670c034f89.ttf::U+0B06,U+0020,U+0B05,U+0B3E:[aaorya=0+1681|space=1+881|aorya=2+1284|uni25CC=2+1044|aavowelsignorya=2+387]
../fonts/2c25beb56d9c556622d56b0b5d02b4670c034f89.ttf::U+0B10,U+0020,U+0B0F,U+0B57:[aiorya=0+1681|space=1+881|eorya=2+1315|uni25CC=2+1044|aulengthmarkorya=2+387]
../fonts/2c25beb56d9c556622d56b0b5d02b4670c034f89.ttf::U+0B14,U+0020,U+0B13,U+0B57:[auorya=0+1679|space=1+881|oorya=2+1309|uni25CC=2+1044|aulengthmarkorya=2+387]
../fonts/03e3f463c3a985bc42096620cc415342818454fb.ttf::U+0C13,U+0020,U+0C12,U+0C55:[gid3=0+1497|gid1=1+580|gid2=2+1497|gid13=2+1184|gid12=2+0]
../fonts/03e3f463c3a985bc42096620cc415342818454fb.ttf::U+0C14,U+0020,U+0C12,U+0C4C:[gid4=0+1497|gid1=1+580|gid2=2+1497|gid13=2+1184|gid11=2+634]
../fonts/03e3f463c3a985bc42096620cc415342818454fb.ttf::U+0C40,U+0020,U+0C3F,U+0C55:[gid13=0+1184|gid6=0+0|gid1=1+580|gid13=1+1184|gid5=1+0|gid13=1+1184|gid12=1+0]
../fonts/03e3f463c3a985bc42096620cc415342818454fb.ttf::U+0C47,U+0020,U+0C46,U+0C55:[gid13=0+1184|gid8=0+0|gid1=1+580|gid13=1+1184|gid7=1+0|gid13=1+1184|gid12=1+0]
../fonts/03e3f463c3a985bc42096620cc415342818454fb.ttf::U+0C4B,U+0020,U+0C4A,U+0C55:[gid13=0+1184|gid10=0+634|gid1=1+580|gid13=1+1184|gid9=1+634|gid13=1+1184|gid12=1+0]
../fonts/7d18685e1529e4ceaad5b6095dfab2f9789e5bce.ttf::U+0C8A,U+0020,U+0C89,U+0CBE:[gid3=0+3269|gid1=1+590|gid2=2+2502|gid10=2+1184|gid7=2+919]
../fonts/7d18685e1529e4ceaad5b6095dfab2f9789e5bce.ttf::U+0C94,U+0020,U+0C92,U+0CCC:[gid6=0+1596|gid1=1+590|gid5=2+1590|gid10=2+1184|gid8=2+880]
../fonts/7d18685e1529e4ceaad5b6095dfab2f9789e5bce.ttf::U+0CE0,U+0020,U+0C8B,U+0CBE:[gid9=0+3214|gid1=1+590|gid4=2+2440|gid10=2+1184|gid7=2+919]
../fonts/af85624080af5627fb050f570d148a62f04fda74.ttf::U+0D08,U+0020,U+0D07,U+0D57:[gid3=0+3574|gid1=1+632|gid2=2+2019|gid14=2+1184|gid13=2+1555]
../fonts/af85624080af5627fb050f570d148a62f04fda74.ttf::U+0D0A,U+0020,U+0D09,U+0D57:[gid5=0+2972|gid1=1+632|gid4=2+1417|gid14=2+1184|gid13=2+1555]
../fonts/af85624080af5627fb050f570d148a62f04fda74.ttf::U+0D10,U+0020,U+0D0E,U+0D46:[gid7=0+4073|gid1=1+632|gid6=2+2608|gid12=2+1465|gid14=2+1184]
../fonts/af85624080af5627fb050f570d148a62f04fda74.ttf::U+0D13,U+0020,U+0D12,U+0D3E:[gid9=0+2557|gid1=1+632|gid8=2+1524|gid14=2+1184|gid11=2+1033]
../fonts/af85624080af5627fb050f570d148a62f04fda74.ttf::U+0D14,U+0020,U+0D12,U+0D57:[gid10=0+3073|gid1=1+632|gid8=2+1524|gid14=2+1184|gid13=2+1555]