From 8e81799b32f3dfaca000fa5d42943ceed9af8d17 Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Fri, 23 Feb 2018 18:35:41 -0800 Subject: [PATCH 01/15] [subset] Add hb-ot-os2-unicode-ranges.hh, a map of os2 unicode ranges. --- src/hb-ot-os2-unicode-ranges.hh | 220 ++++++++++++++++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 src/hb-ot-os2-unicode-ranges.hh diff --git a/src/hb-ot-os2-unicode-ranges.hh b/src/hb-ot-os2-unicode-ranges.hh new file mode 100644 index 000000000..94daad2f1 --- /dev/null +++ b/src/hb-ot-os2-unicode-ranges.hh @@ -0,0 +1,220 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Garret Rieger + */ + + + +#ifndef HB_OT_OS2_UNICODE_RANGES_HH +#define HB_OT_OS2_UNICODE_RANGES_HH + +namespace OT { + +struct Range +{ + unsigned int start; + unsigned int end; + unsigned int bit; +}; + +static Range os2UnicodeRangesSorted[] = { + { 0x0, 0x7F, 0}, // Basic Latin + { 0x80, 0xFF, 1}, // Latin-1 Supplement + { 0x100, 0x17F, 2}, // Latin Extended-A + { 0x180, 0x24F, 3}, // Latin Extended-B + { 0x250, 0x2AF, 4}, // IPA Extensions + { 0x2B0, 0x2FF, 5}, // Spacing Modifier Letters + { 0x300, 0x36F, 6}, // Combining Diacritical Marks + { 0x370, 0x3FF, 7}, // Greek and Coptic + { 0x400, 0x4FF, 9}, // Cyrillic + { 0x500, 0x52F, 9}, // Cyrillic Supplement + { 0x530, 0x58F, 10}, // Armenian + { 0x590, 0x5FF, 11}, // Hebrew + { 0x600, 0x6FF, 13}, // Arabic + { 0x700, 0x74F, 71}, // Syriac + { 0x750, 0x77F, 13}, // Arabic Supplement + { 0x780, 0x7BF, 72}, // Thaana + { 0x7C0, 0x7FF, 14}, // NKo + { 0x900, 0x97F, 15}, // Devanagari + { 0x980, 0x9FF, 16}, // Bengali + { 0xA00, 0xA7F, 17}, // Gurmukhi + { 0xA80, 0xAFF, 18}, // Gujarati + { 0xB00, 0xB7F, 19}, // Oriya + { 0xB80, 0xBFF, 20}, // Tamil + { 0xC00, 0xC7F, 21}, // Telugu + { 0xC80, 0xCFF, 22}, // Kannada + { 0xD00, 0xD7F, 23}, // Malayalam + { 0xD80, 0xDFF, 73}, // Sinhala + { 0xE00, 0xE7F, 24}, // Thai + { 0xE80, 0xEFF, 25}, // Lao + { 0xF00, 0xFFF, 70}, // Tibetan + { 0x1000, 0x109F, 74}, // Myanmar + { 0x10A0, 0x10FF, 26}, // Georgian + { 0x1100, 0x11FF, 28}, // Hangul Jamo + { 0x1200, 0x137F, 75}, // Ethiopic + { 0x1380, 0x139F, 75}, // Ethiopic Supplement + { 0x13A0, 0x13FF, 76}, // Cherokee + { 0x1400, 0x167F, 77}, // Unified Canadian Aboriginal Syllabics + { 0x1680, 0x169F, 78}, // Ogham + { 0x16A0, 0x16FF, 79}, // Runic + { 0x1700, 0x171F, 84}, // Tagalog + { 0x1720, 0x173F, 84}, // Hanunoo + { 0x1740, 0x175F, 84}, // Buhid + { 0x1760, 0x177F, 84}, // Tagbanwa + { 0x1780, 0x17FF, 80}, // Khmer + { 0x1800, 0x18AF, 81}, // Mongolian + { 0x1900, 0x194F, 93}, // Limbu + { 0x1950, 0x197F, 94}, // Tai Le + { 0x1980, 0x19DF, 95}, // New Tai Lue + { 0x19E0, 0x19FF, 80}, // Khmer Symbols + { 0x1A00, 0x1A1F, 96}, // Buginese + { 0x1B00, 0x1B7F, 27}, // Balinese + { 0x1B80, 0x1BBF, 112}, // Sundanese + { 0x1C00, 0x1C4F, 113}, // Lepcha + { 0x1C50, 0x1C7F, 114}, // Ol Chiki + { 0x1D00, 0x1D7F, 4}, // Phonetic Extensions + { 0x1D80, 0x1DBF, 4}, // Phonetic Extensions Supplement + { 0x1DC0, 0x1DFF, 6}, // Combining Diacritical Marks Supplement + { 0x1E00, 0x1EFF, 29}, // Latin Extended Additional + { 0x1F00, 0x1FFF, 30}, // Greek Extended + { 0x2000, 0x206F, 31}, // General Punctuation + { 0x2070, 0x209F, 32}, // Superscripts And Subscripts + { 0x20A0, 0x20CF, 33}, // Currency Symbols + { 0x20D0, 0x20FF, 34}, // Combining Diacritical Marks For Symbols + { 0x2100, 0x214F, 35}, // Letterlike Symbols + { 0x2150, 0x218F, 36}, // Number Forms + { 0x2190, 0x21FF, 37}, // Arrows + { 0x2200, 0x22FF, 38}, // Mathematical Operators + { 0x2300, 0x23FF, 39}, // Miscellaneous Technical + { 0x2400, 0x243F, 40}, // Control Pictures + { 0x2440, 0x245F, 41}, // Optical Character Recognition + { 0x2460, 0x24FF, 42}, // Enclosed Alphanumerics + { 0x2500, 0x257F, 43}, // Box Drawing + { 0x2580, 0x259F, 44}, // Block Elements + { 0x25A0, 0x25FF, 45}, // Geometric Shapes + { 0x2600, 0x26FF, 46}, // Miscellaneous Symbols + { 0x2700, 0x27BF, 47}, // Dingbats + { 0x27C0, 0x27EF, 38}, // Miscellaneous Mathematical Symbols-A + { 0x27F0, 0x27FF, 37}, // Supplemental Arrows-A + { 0x2800, 0x28FF, 82}, // Braille Patterns + { 0x2900, 0x297F, 37}, // Supplemental Arrows-B + { 0x2980, 0x29FF, 38}, // Miscellaneous Mathematical Symbols-B + { 0x2A00, 0x2AFF, 38}, // Supplemental Mathematical Operators + { 0x2B00, 0x2BFF, 37}, // Miscellaneous Symbols and Arrows + { 0x2C00, 0x2C5F, 97}, // Glagolitic + { 0x2C60, 0x2C7F, 29}, // Latin Extended-C + { 0x2C80, 0x2CFF, 8}, // Coptic + { 0x2D00, 0x2D2F, 26}, // Georgian Supplement + { 0x2D30, 0x2D7F, 98}, // Tifinagh + { 0x2D80, 0x2DDF, 75}, // Ethiopic Extended + { 0x2DE0, 0x2DFF, 9}, // Cyrillic Extended-A + { 0x2E00, 0x2E7F, 31}, // Supplemental Punctuation + { 0x2E80, 0x2EFF, 59}, // CJK Radicals Supplement + { 0x2F00, 0x2FDF, 59}, // Kangxi Radicals + { 0x2FF0, 0x2FFF, 59}, // Ideographic Description Characters + { 0x3000, 0x303F, 48}, // CJK Symbols And Punctuation + { 0x3040, 0x309F, 49}, // Hiragana + { 0x30A0, 0x30FF, 50}, // Katakana + { 0x3100, 0x312F, 51}, // Bopomofo + { 0x3130, 0x318F, 52}, // Hangul Compatibility Jamo + { 0x3190, 0x319F, 59}, // Kanbun + { 0x31A0, 0x31BF, 51}, // Bopomofo Extended + { 0x31C0, 0x31EF, 61}, // CJK Strokes + { 0x31F0, 0x31FF, 50}, // Katakana Phonetic Extensions + { 0x3200, 0x32FF, 54}, // Enclosed CJK Letters And Months + { 0x3300, 0x33FF, 55}, // CJK Compatibility + { 0x3400, 0x4DBF, 59}, // CJK Unified Ideographs Extension A + { 0x4DC0, 0x4DFF, 99}, // Yijing Hexagram Symbols + { 0x4E00, 0x9FFF, 59}, // CJK Unified Ideographs + { 0xA000, 0xA48F, 83}, // Yi Syllables + { 0xA490, 0xA4CF, 83}, // Yi Radicals + { 0xA500, 0xA63F, 12}, // Vai + { 0xA640, 0xA69F, 9}, // Cyrillic Extended-B + { 0xA700, 0xA71F, 5}, // Modifier Tone Letters + { 0xA720, 0xA7FF, 29}, // Latin Extended-D + { 0xA800, 0xA82F, 100}, // Syloti Nagri + { 0xA840, 0xA87F, 53}, // Phags-pa + { 0xA880, 0xA8DF, 115}, // Saurashtra + { 0xA900, 0xA92F, 116}, // Kayah Li + { 0xA930, 0xA95F, 117}, // Rejang + { 0xAA00, 0xAA5F, 118}, // Cham + { 0xAC00, 0xD7AF, 56}, // Hangul Syllables + { 0xD800, 0xDFFF, 57}, // Non-Plane 0 * + { 0xE000, 0xF8FF, 60}, // Private Use Area (plane 0) + { 0xF900, 0xFAFF, 61}, // CJK Compatibility Ideographs + { 0xFB00, 0xFB4F, 62}, // Alphabetic Presentation Forms + { 0xFB50, 0xFDFF, 63}, // Arabic Presentation Forms-A + { 0xFE00, 0xFE0F, 91}, // Variation Selectors + { 0xFE10, 0xFE1F, 65}, // Vertical Forms + { 0xFE20, 0xFE2F, 64}, // Combining Half Marks + { 0xFE30, 0xFE4F, 65}, // CJK Compatibility Forms + { 0xFE50, 0xFE6F, 66}, // Small Form Variants + { 0xFE70, 0xFEFF, 67}, // Arabic Presentation Forms-B + { 0xFF00, 0xFFEF, 68}, // Halfwidth And Fullwidth Forms + { 0xFFF0, 0xFFFF, 69}, // Specials + { 0x10000, 0x1007F, 101}, // Linear B Syllabary + { 0x10080, 0x100FF, 101}, // Linear B Ideograms + { 0x10100, 0x1013F, 101}, // Aegean Numbers + { 0x10140, 0x1018F, 102}, // Ancient Greek Numbers + { 0x10190, 0x101CF, 119}, // Ancient Symbols + { 0x101D0, 0x101FF, 120}, // Phaistos Disc + { 0x10280, 0x1029F, 121}, // Lycian + { 0x102A0, 0x102DF, 121}, // Carian + { 0x10300, 0x1032F, 85}, // Old Italic + { 0x10330, 0x1034F, 86}, // Gothic + { 0x10380, 0x1039F, 103}, // Ugaritic + { 0x103A0, 0x103DF, 104}, // Old Persian + { 0x10400, 0x1044F, 87}, // Deseret + { 0x10450, 0x1047F, 105}, // Shavian + { 0x10480, 0x104AF, 106}, // Osmanya + { 0x10800, 0x1083F, 107}, // Cypriot Syllabary + { 0x10900, 0x1091F, 58}, // Phoenician + { 0x10920, 0x1093F, 121}, // Lydian + { 0x10A00, 0x10A5F, 108}, // Kharoshthi + { 0x12000, 0x123FF, 110}, // Cuneiform + { 0x12400, 0x1247F, 110}, // Cuneiform Numbers and Punctuation + { 0x1D000, 0x1D0FF, 88}, // Byzantine Musical Symbols + { 0x1D100, 0x1D1FF, 88}, // Musical Symbols + { 0x1D200, 0x1D24F, 88}, // Ancient Greek Musical Notation + { 0x1D300, 0x1D35F, 109}, // Tai Xuan Jing Symbols + { 0x1D360, 0x1D37F, 111}, // Counting Rod Numerals + { 0x1D400, 0x1D7FF, 89}, // Mathematical Alphanumeric Symbols + { 0x1F000, 0x1F02F, 122}, // Mahjong Tiles + { 0x1F030, 0x1F09F, 122}, // Domino Tiles + { 0x20000, 0x2A6DF, 59}, // CJK Unified Ideographs Extension B + { 0x2F800, 0x2FA1F, 61}, // CJK Compatibility Ideographs Supplement + { 0xE0000, 0xE007F, 92}, // Tags + { 0xE0100, 0xE01EF, 91}, // Variation Selectors Supplement + { 0xF0000, 0xFFFFD, 90}, // Private Use (plane 15) + {0x100000, 0x10FFFD, 90}, // Private Use (plane 16) +}; + +static unsigned int hb_get_unicode_range_bit (hb_codepoint_t cp) +{ + +} + +} /* namespace OT */ + +#endif /* HB_OT_OS2_UNICODE_RANGES_HH */ From a570142d0c6d819feeeeb9e209fc90a33c1c2b48 Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Mon, 26 Feb 2018 15:20:23 -0800 Subject: [PATCH 02/15] [subset] Move hb-ot-os2-unicode-ranges into a cc file with a helper methods and tests. --- src/Makefile.am | 9 +- src/Makefile.sources | 2 + src/hb-ot-os2-unicode-ranges.cc | 279 ++++++++++++++++++++++++++++++++ src/hb-ot-os2-unicode-ranges.hh | 189 +--------------------- 4 files changed, 292 insertions(+), 187 deletions(-) create mode 100644 src/hb-ot-os2-unicode-ranges.cc diff --git a/src/Makefile.am b/src/Makefile.am index 3f98e1db6..6044366da 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -381,12 +381,17 @@ dump_use_data_SOURCES = dump-use-data.cc hb-ot-shape-complex-use-table.cc dump_use_data_CPPFLAGS = $(HBCFLAGS) dump_use_data_LDADD = libharfbuzz.la $(HBLIBS) -check_PROGRAMS += test-ot-tag -TESTS += test-ot-tag +check_PROGRAMS += test-ot-tag test-unicode-ranges +TESTS += test-ot-tag test-unicode-ranges + test_ot_tag_SOURCES = hb-ot-tag.cc test_ot_tag_CPPFLAGS = $(HBCFLAGS) -DMAIN test_ot_tag_LDADD = libharfbuzz.la $(HBLIBS) +test_unicode_ranges_SOURCES = hb-ot-os2-unicode-ranges.cc +test_unicode_ranges_CPPFLAGS = $(HBCFLAGS) -DMAIN +test_unicode_ranges_LDADD = libharfbuzz.la $(HBLIBS) + TESTS_ENVIRONMENT = \ srcdir="$(srcdir)" \ MAKE="$(MAKE) $(AM_MAKEFLAGS)" \ diff --git a/src/Makefile.sources b/src/Makefile.sources index 7883412ca..c20716434 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -28,6 +28,8 @@ HB_BASE_sources = \ hb-ot-maxp-table.hh \ hb-ot-name-table.hh \ hb-ot-os2-table.hh \ + hb-ot-os2-unicode-ranges.hh \ + hb-ot-os2-unicode-ranges.cc \ hb-ot-post-macroman.hh \ hb-ot-post-table.hh \ hb-ot-tag.cc \ diff --git a/src/hb-ot-os2-unicode-ranges.cc b/src/hb-ot-os2-unicode-ranges.cc new file mode 100644 index 000000000..f2c063976 --- /dev/null +++ b/src/hb-ot-os2-unicode-ranges.cc @@ -0,0 +1,279 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Garret Rieger + */ + +#include "hb-private.hh" + +#include "hb-dsalgs.hh" + +struct Range { + unsigned int start; + unsigned int end; + unsigned int bit; +}; + +#define NUM_RANGES 169 +static Range os2UnicodeRangesSorted[NUM_RANGES] = { + { 0x0, 0x7F, 0}, // Basic Latin + { 0x80, 0xFF, 1}, // Latin-1 Supplement + { 0x100, 0x17F, 2}, // Latin Extended-A + { 0x180, 0x24F, 3}, // Latin Extended-B + { 0x250, 0x2AF, 4}, // IPA Extensions + { 0x2B0, 0x2FF, 5}, // Spacing Modifier Letters + { 0x300, 0x36F, 6}, // Combining Diacritical Marks + { 0x370, 0x3FF, 7}, // Greek and Coptic + { 0x400, 0x4FF, 9}, // Cyrillic + { 0x500, 0x52F, 9}, // Cyrillic Supplement + { 0x530, 0x58F, 10}, // Armenian + { 0x590, 0x5FF, 11}, // Hebrew + { 0x600, 0x6FF, 13}, // Arabic + { 0x700, 0x74F, 71}, // Syriac + { 0x750, 0x77F, 13}, // Arabic Supplement + { 0x780, 0x7BF, 72}, // Thaana + { 0x7C0, 0x7FF, 14}, // NKo + { 0x900, 0x97F, 15}, // Devanagari + { 0x980, 0x9FF, 16}, // Bengali + { 0xA00, 0xA7F, 17}, // Gurmukhi + { 0xA80, 0xAFF, 18}, // Gujarati + { 0xB00, 0xB7F, 19}, // Oriya + { 0xB80, 0xBFF, 20}, // Tamil + { 0xC00, 0xC7F, 21}, // Telugu + { 0xC80, 0xCFF, 22}, // Kannada + { 0xD00, 0xD7F, 23}, // Malayalam + { 0xD80, 0xDFF, 73}, // Sinhala + { 0xE00, 0xE7F, 24}, // Thai + { 0xE80, 0xEFF, 25}, // Lao + { 0xF00, 0xFFF, 70}, // Tibetan + { 0x1000, 0x109F, 74}, // Myanmar + { 0x10A0, 0x10FF, 26}, // Georgian + { 0x1100, 0x11FF, 28}, // Hangul Jamo + { 0x1200, 0x137F, 75}, // Ethiopic + { 0x1380, 0x139F, 75}, // Ethiopic Supplement + { 0x13A0, 0x13FF, 76}, // Cherokee + { 0x1400, 0x167F, 77}, // Unified Canadian Aboriginal Syllabics + { 0x1680, 0x169F, 78}, // Ogham + { 0x16A0, 0x16FF, 79}, // Runic + { 0x1700, 0x171F, 84}, // Tagalog + { 0x1720, 0x173F, 84}, // Hanunoo + { 0x1740, 0x175F, 84}, // Buhid + { 0x1760, 0x177F, 84}, // Tagbanwa + { 0x1780, 0x17FF, 80}, // Khmer + { 0x1800, 0x18AF, 81}, // Mongolian + { 0x1900, 0x194F, 93}, // Limbu + { 0x1950, 0x197F, 94}, // Tai Le + { 0x1980, 0x19DF, 95}, // New Tai Lue + { 0x19E0, 0x19FF, 80}, // Khmer Symbols + { 0x1A00, 0x1A1F, 96}, // Buginese + { 0x1B00, 0x1B7F, 27}, // Balinese + { 0x1B80, 0x1BBF, 112}, // Sundanese + { 0x1C00, 0x1C4F, 113}, // Lepcha + { 0x1C50, 0x1C7F, 114}, // Ol Chiki + { 0x1D00, 0x1D7F, 4}, // Phonetic Extensions + { 0x1D80, 0x1DBF, 4}, // Phonetic Extensions Supplement + { 0x1DC0, 0x1DFF, 6}, // Combining Diacritical Marks Supplement + { 0x1E00, 0x1EFF, 29}, // Latin Extended Additional + { 0x1F00, 0x1FFF, 30}, // Greek Extended + { 0x2000, 0x206F, 31}, // General Punctuation + { 0x2070, 0x209F, 32}, // Superscripts And Subscripts + { 0x20A0, 0x20CF, 33}, // Currency Symbols + { 0x20D0, 0x20FF, 34}, // Combining Diacritical Marks For Symbols + { 0x2100, 0x214F, 35}, // Letterlike Symbols + { 0x2150, 0x218F, 36}, // Number Forms + { 0x2190, 0x21FF, 37}, // Arrows + { 0x2200, 0x22FF, 38}, // Mathematical Operators + { 0x2300, 0x23FF, 39}, // Miscellaneous Technical + { 0x2400, 0x243F, 40}, // Control Pictures + { 0x2440, 0x245F, 41}, // Optical Character Recognition + { 0x2460, 0x24FF, 42}, // Enclosed Alphanumerics + { 0x2500, 0x257F, 43}, // Box Drawing + { 0x2580, 0x259F, 44}, // Block Elements + { 0x25A0, 0x25FF, 45}, // Geometric Shapes + { 0x2600, 0x26FF, 46}, // Miscellaneous Symbols + { 0x2700, 0x27BF, 47}, // Dingbats + { 0x27C0, 0x27EF, 38}, // Miscellaneous Mathematical Symbols-A + { 0x27F0, 0x27FF, 37}, // Supplemental Arrows-A + { 0x2800, 0x28FF, 82}, // Braille Patterns + { 0x2900, 0x297F, 37}, // Supplemental Arrows-B + { 0x2980, 0x29FF, 38}, // Miscellaneous Mathematical Symbols-B + { 0x2A00, 0x2AFF, 38}, // Supplemental Mathematical Operators + { 0x2B00, 0x2BFF, 37}, // Miscellaneous Symbols and Arrows + { 0x2C00, 0x2C5F, 97}, // Glagolitic + { 0x2C60, 0x2C7F, 29}, // Latin Extended-C + { 0x2C80, 0x2CFF, 8}, // Coptic + { 0x2D00, 0x2D2F, 26}, // Georgian Supplement + { 0x2D30, 0x2D7F, 98}, // Tifinagh + { 0x2D80, 0x2DDF, 75}, // Ethiopic Extended + { 0x2DE0, 0x2DFF, 9}, // Cyrillic Extended-A + { 0x2E00, 0x2E7F, 31}, // Supplemental Punctuation + { 0x2E80, 0x2EFF, 59}, // CJK Radicals Supplement + { 0x2F00, 0x2FDF, 59}, // Kangxi Radicals + { 0x2FF0, 0x2FFF, 59}, // Ideographic Description Characters + { 0x3000, 0x303F, 48}, // CJK Symbols And Punctuation + { 0x3040, 0x309F, 49}, // Hiragana + { 0x30A0, 0x30FF, 50}, // Katakana + { 0x3100, 0x312F, 51}, // Bopomofo + { 0x3130, 0x318F, 52}, // Hangul Compatibility Jamo + { 0x3190, 0x319F, 59}, // Kanbun + { 0x31A0, 0x31BF, 51}, // Bopomofo Extended + { 0x31C0, 0x31EF, 61}, // CJK Strokes + { 0x31F0, 0x31FF, 50}, // Katakana Phonetic Extensions + { 0x3200, 0x32FF, 54}, // Enclosed CJK Letters And Months + { 0x3300, 0x33FF, 55}, // CJK Compatibility + { 0x3400, 0x4DBF, 59}, // CJK Unified Ideographs Extension A + { 0x4DC0, 0x4DFF, 99}, // Yijing Hexagram Symbols + { 0x4E00, 0x9FFF, 59}, // CJK Unified Ideographs + { 0xA000, 0xA48F, 83}, // Yi Syllables + { 0xA490, 0xA4CF, 83}, // Yi Radicals + { 0xA500, 0xA63F, 12}, // Vai + { 0xA640, 0xA69F, 9}, // Cyrillic Extended-B + { 0xA700, 0xA71F, 5}, // Modifier Tone Letters + { 0xA720, 0xA7FF, 29}, // Latin Extended-D + { 0xA800, 0xA82F, 100}, // Syloti Nagri + { 0xA840, 0xA87F, 53}, // Phags-pa + { 0xA880, 0xA8DF, 115}, // Saurashtra + { 0xA900, 0xA92F, 116}, // Kayah Li + { 0xA930, 0xA95F, 117}, // Rejang + { 0xAA00, 0xAA5F, 118}, // Cham + { 0xAC00, 0xD7AF, 56}, // Hangul Syllables + { 0xD800, 0xDFFF, 57}, // Non-Plane 0 * + { 0xE000, 0xF8FF, 60}, // Private Use Area (plane 0) + { 0xF900, 0xFAFF, 61}, // CJK Compatibility Ideographs + { 0xFB00, 0xFB4F, 62}, // Alphabetic Presentation Forms + { 0xFB50, 0xFDFF, 63}, // Arabic Presentation Forms-A + { 0xFE00, 0xFE0F, 91}, // Variation Selectors + { 0xFE10, 0xFE1F, 65}, // Vertical Forms + { 0xFE20, 0xFE2F, 64}, // Combining Half Marks + { 0xFE30, 0xFE4F, 65}, // CJK Compatibility Forms + { 0xFE50, 0xFE6F, 66}, // Small Form Variants + { 0xFE70, 0xFEFF, 67}, // Arabic Presentation Forms-B + { 0xFF00, 0xFFEF, 68}, // Halfwidth And Fullwidth Forms + { 0xFFF0, 0xFFFF, 69}, // Specials + { 0x10000, 0x1007F, 101}, // Linear B Syllabary + { 0x10080, 0x100FF, 101}, // Linear B Ideograms + { 0x10100, 0x1013F, 101}, // Aegean Numbers + { 0x10140, 0x1018F, 102}, // Ancient Greek Numbers + { 0x10190, 0x101CF, 119}, // Ancient Symbols + { 0x101D0, 0x101FF, 120}, // Phaistos Disc + { 0x10280, 0x1029F, 121}, // Lycian + { 0x102A0, 0x102DF, 121}, // Carian + { 0x10300, 0x1032F, 85}, // Old Italic + { 0x10330, 0x1034F, 86}, // Gothic + { 0x10380, 0x1039F, 103}, // Ugaritic + { 0x103A0, 0x103DF, 104}, // Old Persian + { 0x10400, 0x1044F, 87}, // Deseret + { 0x10450, 0x1047F, 105}, // Shavian + { 0x10480, 0x104AF, 106}, // Osmanya + { 0x10800, 0x1083F, 107}, // Cypriot Syllabary + { 0x10900, 0x1091F, 58}, // Phoenician + { 0x10920, 0x1093F, 121}, // Lydian + { 0x10A00, 0x10A5F, 108}, // Kharoshthi + { 0x12000, 0x123FF, 110}, // Cuneiform + { 0x12400, 0x1247F, 110}, // Cuneiform Numbers and Punctuation + { 0x1D000, 0x1D0FF, 88}, // Byzantine Musical Symbols + { 0x1D100, 0x1D1FF, 88}, // Musical Symbols + { 0x1D200, 0x1D24F, 88}, // Ancient Greek Musical Notation + { 0x1D300, 0x1D35F, 109}, // Tai Xuan Jing Symbols + { 0x1D360, 0x1D37F, 111}, // Counting Rod Numerals + { 0x1D400, 0x1D7FF, 89}, // Mathematical Alphanumeric Symbols + { 0x1F000, 0x1F02F, 122}, // Mahjong Tiles + { 0x1F030, 0x1F09F, 122}, // Domino Tiles + { 0x20000, 0x2A6DF, 59}, // CJK Unified Ideographs Extension B + { 0x2F800, 0x2FA1F, 61}, // CJK Compatibility Ideographs Supplement + { 0xE0000, 0xE007F, 92}, // Tags + { 0xE0100, 0xE01EF, 91}, // Variation Selectors Supplement + { 0xF0000, 0xFFFFD, 90}, // Private Use (plane 15) + {0x100000, 0x10FFFD, 90}, // Private Use (plane 16) +}; + +static int +_compare_range (const void *_key, const void *_item, void *_arg) +{ + hb_codepoint_t *cp = (hb_codepoint_t *) _key; + Range *range = (Range *) _item; + + if (*cp < range->start) + return -1; + else if (*cp <= range->end) + return 0; + else + return 1; +} + +/** + * hb_get_unicode_range_bit: + * Returns the bit to be set in os/2 ulUnicodeRange for a given codepoint. + **/ +static int +hb_get_unicode_range_bit (hb_codepoint_t cp) +{ + Range *range = (Range*) hb_bsearch_r (&cp, os2UnicodeRangesSorted, NUM_RANGES, sizeof(Range), + _compare_range, nullptr); + if (range != NULL) + return range->bit; + return -1; +} + + +#ifdef MAIN + +void +test (hb_codepoint_t cp, int bit) +{ + if (hb_get_unicode_range_bit (cp) != bit) + { + fprintf (stderr, "got incorrect bit (%d) for cp 0x%X. Should have been %d.", + hb_get_unicode_range_bit (cp), + cp, + bit); + abort(); + } +} + +void +test_get_unicode_range_bit (void) +{ + test (0x0000, 0); + test (0x0042, 0); + test (0x007F, 0); + test (0x0080, 1); + + test (0x30A0, 50); + test (0x30B1, 50); + test (0x30FF, 50); + + test (0x10FFFD, 90); + + test (0x30000, -1); + test (0x110000, -1); +} + +int +main (void) +{ + test_get_unicode_range_bit (); + return 0; +} + +#endif diff --git a/src/hb-ot-os2-unicode-ranges.hh b/src/hb-ot-os2-unicode-ranges.hh index 94daad2f1..5f36b20ce 100644 --- a/src/hb-ot-os2-unicode-ranges.hh +++ b/src/hb-ot-os2-unicode-ranges.hh @@ -24,196 +24,15 @@ * Google Author(s): Garret Rieger */ - - #ifndef HB_OT_OS2_UNICODE_RANGES_HH #define HB_OT_OS2_UNICODE_RANGES_HH +#include "hb-private.hh" + namespace OT { -struct Range -{ - unsigned int start; - unsigned int end; - unsigned int bit; -}; - -static Range os2UnicodeRangesSorted[] = { - { 0x0, 0x7F, 0}, // Basic Latin - { 0x80, 0xFF, 1}, // Latin-1 Supplement - { 0x100, 0x17F, 2}, // Latin Extended-A - { 0x180, 0x24F, 3}, // Latin Extended-B - { 0x250, 0x2AF, 4}, // IPA Extensions - { 0x2B0, 0x2FF, 5}, // Spacing Modifier Letters - { 0x300, 0x36F, 6}, // Combining Diacritical Marks - { 0x370, 0x3FF, 7}, // Greek and Coptic - { 0x400, 0x4FF, 9}, // Cyrillic - { 0x500, 0x52F, 9}, // Cyrillic Supplement - { 0x530, 0x58F, 10}, // Armenian - { 0x590, 0x5FF, 11}, // Hebrew - { 0x600, 0x6FF, 13}, // Arabic - { 0x700, 0x74F, 71}, // Syriac - { 0x750, 0x77F, 13}, // Arabic Supplement - { 0x780, 0x7BF, 72}, // Thaana - { 0x7C0, 0x7FF, 14}, // NKo - { 0x900, 0x97F, 15}, // Devanagari - { 0x980, 0x9FF, 16}, // Bengali - { 0xA00, 0xA7F, 17}, // Gurmukhi - { 0xA80, 0xAFF, 18}, // Gujarati - { 0xB00, 0xB7F, 19}, // Oriya - { 0xB80, 0xBFF, 20}, // Tamil - { 0xC00, 0xC7F, 21}, // Telugu - { 0xC80, 0xCFF, 22}, // Kannada - { 0xD00, 0xD7F, 23}, // Malayalam - { 0xD80, 0xDFF, 73}, // Sinhala - { 0xE00, 0xE7F, 24}, // Thai - { 0xE80, 0xEFF, 25}, // Lao - { 0xF00, 0xFFF, 70}, // Tibetan - { 0x1000, 0x109F, 74}, // Myanmar - { 0x10A0, 0x10FF, 26}, // Georgian - { 0x1100, 0x11FF, 28}, // Hangul Jamo - { 0x1200, 0x137F, 75}, // Ethiopic - { 0x1380, 0x139F, 75}, // Ethiopic Supplement - { 0x13A0, 0x13FF, 76}, // Cherokee - { 0x1400, 0x167F, 77}, // Unified Canadian Aboriginal Syllabics - { 0x1680, 0x169F, 78}, // Ogham - { 0x16A0, 0x16FF, 79}, // Runic - { 0x1700, 0x171F, 84}, // Tagalog - { 0x1720, 0x173F, 84}, // Hanunoo - { 0x1740, 0x175F, 84}, // Buhid - { 0x1760, 0x177F, 84}, // Tagbanwa - { 0x1780, 0x17FF, 80}, // Khmer - { 0x1800, 0x18AF, 81}, // Mongolian - { 0x1900, 0x194F, 93}, // Limbu - { 0x1950, 0x197F, 94}, // Tai Le - { 0x1980, 0x19DF, 95}, // New Tai Lue - { 0x19E0, 0x19FF, 80}, // Khmer Symbols - { 0x1A00, 0x1A1F, 96}, // Buginese - { 0x1B00, 0x1B7F, 27}, // Balinese - { 0x1B80, 0x1BBF, 112}, // Sundanese - { 0x1C00, 0x1C4F, 113}, // Lepcha - { 0x1C50, 0x1C7F, 114}, // Ol Chiki - { 0x1D00, 0x1D7F, 4}, // Phonetic Extensions - { 0x1D80, 0x1DBF, 4}, // Phonetic Extensions Supplement - { 0x1DC0, 0x1DFF, 6}, // Combining Diacritical Marks Supplement - { 0x1E00, 0x1EFF, 29}, // Latin Extended Additional - { 0x1F00, 0x1FFF, 30}, // Greek Extended - { 0x2000, 0x206F, 31}, // General Punctuation - { 0x2070, 0x209F, 32}, // Superscripts And Subscripts - { 0x20A0, 0x20CF, 33}, // Currency Symbols - { 0x20D0, 0x20FF, 34}, // Combining Diacritical Marks For Symbols - { 0x2100, 0x214F, 35}, // Letterlike Symbols - { 0x2150, 0x218F, 36}, // Number Forms - { 0x2190, 0x21FF, 37}, // Arrows - { 0x2200, 0x22FF, 38}, // Mathematical Operators - { 0x2300, 0x23FF, 39}, // Miscellaneous Technical - { 0x2400, 0x243F, 40}, // Control Pictures - { 0x2440, 0x245F, 41}, // Optical Character Recognition - { 0x2460, 0x24FF, 42}, // Enclosed Alphanumerics - { 0x2500, 0x257F, 43}, // Box Drawing - { 0x2580, 0x259F, 44}, // Block Elements - { 0x25A0, 0x25FF, 45}, // Geometric Shapes - { 0x2600, 0x26FF, 46}, // Miscellaneous Symbols - { 0x2700, 0x27BF, 47}, // Dingbats - { 0x27C0, 0x27EF, 38}, // Miscellaneous Mathematical Symbols-A - { 0x27F0, 0x27FF, 37}, // Supplemental Arrows-A - { 0x2800, 0x28FF, 82}, // Braille Patterns - { 0x2900, 0x297F, 37}, // Supplemental Arrows-B - { 0x2980, 0x29FF, 38}, // Miscellaneous Mathematical Symbols-B - { 0x2A00, 0x2AFF, 38}, // Supplemental Mathematical Operators - { 0x2B00, 0x2BFF, 37}, // Miscellaneous Symbols and Arrows - { 0x2C00, 0x2C5F, 97}, // Glagolitic - { 0x2C60, 0x2C7F, 29}, // Latin Extended-C - { 0x2C80, 0x2CFF, 8}, // Coptic - { 0x2D00, 0x2D2F, 26}, // Georgian Supplement - { 0x2D30, 0x2D7F, 98}, // Tifinagh - { 0x2D80, 0x2DDF, 75}, // Ethiopic Extended - { 0x2DE0, 0x2DFF, 9}, // Cyrillic Extended-A - { 0x2E00, 0x2E7F, 31}, // Supplemental Punctuation - { 0x2E80, 0x2EFF, 59}, // CJK Radicals Supplement - { 0x2F00, 0x2FDF, 59}, // Kangxi Radicals - { 0x2FF0, 0x2FFF, 59}, // Ideographic Description Characters - { 0x3000, 0x303F, 48}, // CJK Symbols And Punctuation - { 0x3040, 0x309F, 49}, // Hiragana - { 0x30A0, 0x30FF, 50}, // Katakana - { 0x3100, 0x312F, 51}, // Bopomofo - { 0x3130, 0x318F, 52}, // Hangul Compatibility Jamo - { 0x3190, 0x319F, 59}, // Kanbun - { 0x31A0, 0x31BF, 51}, // Bopomofo Extended - { 0x31C0, 0x31EF, 61}, // CJK Strokes - { 0x31F0, 0x31FF, 50}, // Katakana Phonetic Extensions - { 0x3200, 0x32FF, 54}, // Enclosed CJK Letters And Months - { 0x3300, 0x33FF, 55}, // CJK Compatibility - { 0x3400, 0x4DBF, 59}, // CJK Unified Ideographs Extension A - { 0x4DC0, 0x4DFF, 99}, // Yijing Hexagram Symbols - { 0x4E00, 0x9FFF, 59}, // CJK Unified Ideographs - { 0xA000, 0xA48F, 83}, // Yi Syllables - { 0xA490, 0xA4CF, 83}, // Yi Radicals - { 0xA500, 0xA63F, 12}, // Vai - { 0xA640, 0xA69F, 9}, // Cyrillic Extended-B - { 0xA700, 0xA71F, 5}, // Modifier Tone Letters - { 0xA720, 0xA7FF, 29}, // Latin Extended-D - { 0xA800, 0xA82F, 100}, // Syloti Nagri - { 0xA840, 0xA87F, 53}, // Phags-pa - { 0xA880, 0xA8DF, 115}, // Saurashtra - { 0xA900, 0xA92F, 116}, // Kayah Li - { 0xA930, 0xA95F, 117}, // Rejang - { 0xAA00, 0xAA5F, 118}, // Cham - { 0xAC00, 0xD7AF, 56}, // Hangul Syllables - { 0xD800, 0xDFFF, 57}, // Non-Plane 0 * - { 0xE000, 0xF8FF, 60}, // Private Use Area (plane 0) - { 0xF900, 0xFAFF, 61}, // CJK Compatibility Ideographs - { 0xFB00, 0xFB4F, 62}, // Alphabetic Presentation Forms - { 0xFB50, 0xFDFF, 63}, // Arabic Presentation Forms-A - { 0xFE00, 0xFE0F, 91}, // Variation Selectors - { 0xFE10, 0xFE1F, 65}, // Vertical Forms - { 0xFE20, 0xFE2F, 64}, // Combining Half Marks - { 0xFE30, 0xFE4F, 65}, // CJK Compatibility Forms - { 0xFE50, 0xFE6F, 66}, // Small Form Variants - { 0xFE70, 0xFEFF, 67}, // Arabic Presentation Forms-B - { 0xFF00, 0xFFEF, 68}, // Halfwidth And Fullwidth Forms - { 0xFFF0, 0xFFFF, 69}, // Specials - { 0x10000, 0x1007F, 101}, // Linear B Syllabary - { 0x10080, 0x100FF, 101}, // Linear B Ideograms - { 0x10100, 0x1013F, 101}, // Aegean Numbers - { 0x10140, 0x1018F, 102}, // Ancient Greek Numbers - { 0x10190, 0x101CF, 119}, // Ancient Symbols - { 0x101D0, 0x101FF, 120}, // Phaistos Disc - { 0x10280, 0x1029F, 121}, // Lycian - { 0x102A0, 0x102DF, 121}, // Carian - { 0x10300, 0x1032F, 85}, // Old Italic - { 0x10330, 0x1034F, 86}, // Gothic - { 0x10380, 0x1039F, 103}, // Ugaritic - { 0x103A0, 0x103DF, 104}, // Old Persian - { 0x10400, 0x1044F, 87}, // Deseret - { 0x10450, 0x1047F, 105}, // Shavian - { 0x10480, 0x104AF, 106}, // Osmanya - { 0x10800, 0x1083F, 107}, // Cypriot Syllabary - { 0x10900, 0x1091F, 58}, // Phoenician - { 0x10920, 0x1093F, 121}, // Lydian - { 0x10A00, 0x10A5F, 108}, // Kharoshthi - { 0x12000, 0x123FF, 110}, // Cuneiform - { 0x12400, 0x1247F, 110}, // Cuneiform Numbers and Punctuation - { 0x1D000, 0x1D0FF, 88}, // Byzantine Musical Symbols - { 0x1D100, 0x1D1FF, 88}, // Musical Symbols - { 0x1D200, 0x1D24F, 88}, // Ancient Greek Musical Notation - { 0x1D300, 0x1D35F, 109}, // Tai Xuan Jing Symbols - { 0x1D360, 0x1D37F, 111}, // Counting Rod Numerals - { 0x1D400, 0x1D7FF, 89}, // Mathematical Alphanumeric Symbols - { 0x1F000, 0x1F02F, 122}, // Mahjong Tiles - { 0x1F030, 0x1F09F, 122}, // Domino Tiles - { 0x20000, 0x2A6DF, 59}, // CJK Unified Ideographs Extension B - { 0x2F800, 0x2FA1F, 61}, // CJK Compatibility Ideographs Supplement - { 0xE0000, 0xE007F, 92}, // Tags - { 0xE0100, 0xE01EF, 91}, // Variation Selectors Supplement - { 0xF0000, 0xFFFFD, 90}, // Private Use (plane 15) - {0x100000, 0x10FFFD, 90}, // Private Use (plane 16) -}; - -static unsigned int hb_get_unicode_range_bit (hb_codepoint_t cp) -{ - -} +HB_INTERNAL int +hb_get_unicode_range_bit (hb_codepoint_t cp); } /* namespace OT */ From 4014555ca083dea3e4f42120aeaf52a2186b8a09 Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Mon, 26 Feb 2018 15:50:13 -0800 Subject: [PATCH 03/15] [subset] set ulUnicodeRange[] in os2. --- src/Makefile.sources | 1 - src/hb-ot-os2-table.hh | 23 +++- src/hb-ot-os2-unicode-ranges.cc | 221 +------------------------------- src/hb-ot-os2-unicode-ranges.hh | 210 +++++++++++++++++++++++++++++- 4 files changed, 236 insertions(+), 219 deletions(-) diff --git a/src/Makefile.sources b/src/Makefile.sources index c20716434..e114a5013 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -29,7 +29,6 @@ HB_BASE_sources = \ hb-ot-name-table.hh \ hb-ot-os2-table.hh \ hb-ot-os2-unicode-ranges.hh \ - hb-ot-os2-unicode-ranges.cc \ hb-ot-post-macroman.hh \ hb-ot-post-table.hh \ hb-ot-tag.cc \ diff --git a/src/hb-ot-os2-table.hh b/src/hb-ot-os2-table.hh index 2d9d21495..63e972646 100644 --- a/src/hb-ot-os2-table.hh +++ b/src/hb-ot-os2-table.hh @@ -28,7 +28,7 @@ #define HB_OT_OS2_TABLE_HH #include "hb-open-type-private.hh" - +#include "hb-ot-os2-unicode-ranges.hh" namespace OT { @@ -67,11 +67,32 @@ struct os2 os2_prime->usFirstCharIndex.set (min_cp); os2_prime->usLastCharIndex.set (max_cp); + _update_unicode_ranges (plan->codepoints, os2_prime->ulUnicodeRange); bool result = hb_subset_plan_add_table(plan, HB_OT_TAG_os2, os2_prime_blob); + hb_blob_destroy (os2_prime_blob); return result; } + inline void _update_unicode_ranges (const hb_prealloced_array_t &codepoints, + HBUINT32 ulUnicodeRange[4]) const + { + for (unsigned int i = 0; i < 4; i++) + ulUnicodeRange[i].set (0); + + for (unsigned int i = 0; i < codepoints.len; i++) + { + hb_codepoint_t cp = codepoints[i]; + int bit = hb_get_unicode_range_bit (cp); + if (bit >= 0 && bit < 128) { + unsigned int block = bit / 32; + unsigned int bit_in_block = bit % 32; + unsigned int mask = 1 << bit_in_block; + ulUnicodeRange[block].set (ulUnicodeRange[block] | mask); + } + } + } + static inline void find_min_and_max_codepoint (const hb_prealloced_array_t &codepoints, uint16_t *min_cp, /* OUT */ uint16_t *max_cp /* OUT */) diff --git a/src/hb-ot-os2-unicode-ranges.cc b/src/hb-ot-os2-unicode-ranges.cc index f2c063976..68dfe079b 100644 --- a/src/hb-ot-os2-unicode-ranges.cc +++ b/src/hb-ot-os2-unicode-ranges.cc @@ -26,225 +26,18 @@ #include "hb-private.hh" -#include "hb-dsalgs.hh" - -struct Range { - unsigned int start; - unsigned int end; - unsigned int bit; -}; - -#define NUM_RANGES 169 -static Range os2UnicodeRangesSorted[NUM_RANGES] = { - { 0x0, 0x7F, 0}, // Basic Latin - { 0x80, 0xFF, 1}, // Latin-1 Supplement - { 0x100, 0x17F, 2}, // Latin Extended-A - { 0x180, 0x24F, 3}, // Latin Extended-B - { 0x250, 0x2AF, 4}, // IPA Extensions - { 0x2B0, 0x2FF, 5}, // Spacing Modifier Letters - { 0x300, 0x36F, 6}, // Combining Diacritical Marks - { 0x370, 0x3FF, 7}, // Greek and Coptic - { 0x400, 0x4FF, 9}, // Cyrillic - { 0x500, 0x52F, 9}, // Cyrillic Supplement - { 0x530, 0x58F, 10}, // Armenian - { 0x590, 0x5FF, 11}, // Hebrew - { 0x600, 0x6FF, 13}, // Arabic - { 0x700, 0x74F, 71}, // Syriac - { 0x750, 0x77F, 13}, // Arabic Supplement - { 0x780, 0x7BF, 72}, // Thaana - { 0x7C0, 0x7FF, 14}, // NKo - { 0x900, 0x97F, 15}, // Devanagari - { 0x980, 0x9FF, 16}, // Bengali - { 0xA00, 0xA7F, 17}, // Gurmukhi - { 0xA80, 0xAFF, 18}, // Gujarati - { 0xB00, 0xB7F, 19}, // Oriya - { 0xB80, 0xBFF, 20}, // Tamil - { 0xC00, 0xC7F, 21}, // Telugu - { 0xC80, 0xCFF, 22}, // Kannada - { 0xD00, 0xD7F, 23}, // Malayalam - { 0xD80, 0xDFF, 73}, // Sinhala - { 0xE00, 0xE7F, 24}, // Thai - { 0xE80, 0xEFF, 25}, // Lao - { 0xF00, 0xFFF, 70}, // Tibetan - { 0x1000, 0x109F, 74}, // Myanmar - { 0x10A0, 0x10FF, 26}, // Georgian - { 0x1100, 0x11FF, 28}, // Hangul Jamo - { 0x1200, 0x137F, 75}, // Ethiopic - { 0x1380, 0x139F, 75}, // Ethiopic Supplement - { 0x13A0, 0x13FF, 76}, // Cherokee - { 0x1400, 0x167F, 77}, // Unified Canadian Aboriginal Syllabics - { 0x1680, 0x169F, 78}, // Ogham - { 0x16A0, 0x16FF, 79}, // Runic - { 0x1700, 0x171F, 84}, // Tagalog - { 0x1720, 0x173F, 84}, // Hanunoo - { 0x1740, 0x175F, 84}, // Buhid - { 0x1760, 0x177F, 84}, // Tagbanwa - { 0x1780, 0x17FF, 80}, // Khmer - { 0x1800, 0x18AF, 81}, // Mongolian - { 0x1900, 0x194F, 93}, // Limbu - { 0x1950, 0x197F, 94}, // Tai Le - { 0x1980, 0x19DF, 95}, // New Tai Lue - { 0x19E0, 0x19FF, 80}, // Khmer Symbols - { 0x1A00, 0x1A1F, 96}, // Buginese - { 0x1B00, 0x1B7F, 27}, // Balinese - { 0x1B80, 0x1BBF, 112}, // Sundanese - { 0x1C00, 0x1C4F, 113}, // Lepcha - { 0x1C50, 0x1C7F, 114}, // Ol Chiki - { 0x1D00, 0x1D7F, 4}, // Phonetic Extensions - { 0x1D80, 0x1DBF, 4}, // Phonetic Extensions Supplement - { 0x1DC0, 0x1DFF, 6}, // Combining Diacritical Marks Supplement - { 0x1E00, 0x1EFF, 29}, // Latin Extended Additional - { 0x1F00, 0x1FFF, 30}, // Greek Extended - { 0x2000, 0x206F, 31}, // General Punctuation - { 0x2070, 0x209F, 32}, // Superscripts And Subscripts - { 0x20A0, 0x20CF, 33}, // Currency Symbols - { 0x20D0, 0x20FF, 34}, // Combining Diacritical Marks For Symbols - { 0x2100, 0x214F, 35}, // Letterlike Symbols - { 0x2150, 0x218F, 36}, // Number Forms - { 0x2190, 0x21FF, 37}, // Arrows - { 0x2200, 0x22FF, 38}, // Mathematical Operators - { 0x2300, 0x23FF, 39}, // Miscellaneous Technical - { 0x2400, 0x243F, 40}, // Control Pictures - { 0x2440, 0x245F, 41}, // Optical Character Recognition - { 0x2460, 0x24FF, 42}, // Enclosed Alphanumerics - { 0x2500, 0x257F, 43}, // Box Drawing - { 0x2580, 0x259F, 44}, // Block Elements - { 0x25A0, 0x25FF, 45}, // Geometric Shapes - { 0x2600, 0x26FF, 46}, // Miscellaneous Symbols - { 0x2700, 0x27BF, 47}, // Dingbats - { 0x27C0, 0x27EF, 38}, // Miscellaneous Mathematical Symbols-A - { 0x27F0, 0x27FF, 37}, // Supplemental Arrows-A - { 0x2800, 0x28FF, 82}, // Braille Patterns - { 0x2900, 0x297F, 37}, // Supplemental Arrows-B - { 0x2980, 0x29FF, 38}, // Miscellaneous Mathematical Symbols-B - { 0x2A00, 0x2AFF, 38}, // Supplemental Mathematical Operators - { 0x2B00, 0x2BFF, 37}, // Miscellaneous Symbols and Arrows - { 0x2C00, 0x2C5F, 97}, // Glagolitic - { 0x2C60, 0x2C7F, 29}, // Latin Extended-C - { 0x2C80, 0x2CFF, 8}, // Coptic - { 0x2D00, 0x2D2F, 26}, // Georgian Supplement - { 0x2D30, 0x2D7F, 98}, // Tifinagh - { 0x2D80, 0x2DDF, 75}, // Ethiopic Extended - { 0x2DE0, 0x2DFF, 9}, // Cyrillic Extended-A - { 0x2E00, 0x2E7F, 31}, // Supplemental Punctuation - { 0x2E80, 0x2EFF, 59}, // CJK Radicals Supplement - { 0x2F00, 0x2FDF, 59}, // Kangxi Radicals - { 0x2FF0, 0x2FFF, 59}, // Ideographic Description Characters - { 0x3000, 0x303F, 48}, // CJK Symbols And Punctuation - { 0x3040, 0x309F, 49}, // Hiragana - { 0x30A0, 0x30FF, 50}, // Katakana - { 0x3100, 0x312F, 51}, // Bopomofo - { 0x3130, 0x318F, 52}, // Hangul Compatibility Jamo - { 0x3190, 0x319F, 59}, // Kanbun - { 0x31A0, 0x31BF, 51}, // Bopomofo Extended - { 0x31C0, 0x31EF, 61}, // CJK Strokes - { 0x31F0, 0x31FF, 50}, // Katakana Phonetic Extensions - { 0x3200, 0x32FF, 54}, // Enclosed CJK Letters And Months - { 0x3300, 0x33FF, 55}, // CJK Compatibility - { 0x3400, 0x4DBF, 59}, // CJK Unified Ideographs Extension A - { 0x4DC0, 0x4DFF, 99}, // Yijing Hexagram Symbols - { 0x4E00, 0x9FFF, 59}, // CJK Unified Ideographs - { 0xA000, 0xA48F, 83}, // Yi Syllables - { 0xA490, 0xA4CF, 83}, // Yi Radicals - { 0xA500, 0xA63F, 12}, // Vai - { 0xA640, 0xA69F, 9}, // Cyrillic Extended-B - { 0xA700, 0xA71F, 5}, // Modifier Tone Letters - { 0xA720, 0xA7FF, 29}, // Latin Extended-D - { 0xA800, 0xA82F, 100}, // Syloti Nagri - { 0xA840, 0xA87F, 53}, // Phags-pa - { 0xA880, 0xA8DF, 115}, // Saurashtra - { 0xA900, 0xA92F, 116}, // Kayah Li - { 0xA930, 0xA95F, 117}, // Rejang - { 0xAA00, 0xAA5F, 118}, // Cham - { 0xAC00, 0xD7AF, 56}, // Hangul Syllables - { 0xD800, 0xDFFF, 57}, // Non-Plane 0 * - { 0xE000, 0xF8FF, 60}, // Private Use Area (plane 0) - { 0xF900, 0xFAFF, 61}, // CJK Compatibility Ideographs - { 0xFB00, 0xFB4F, 62}, // Alphabetic Presentation Forms - { 0xFB50, 0xFDFF, 63}, // Arabic Presentation Forms-A - { 0xFE00, 0xFE0F, 91}, // Variation Selectors - { 0xFE10, 0xFE1F, 65}, // Vertical Forms - { 0xFE20, 0xFE2F, 64}, // Combining Half Marks - { 0xFE30, 0xFE4F, 65}, // CJK Compatibility Forms - { 0xFE50, 0xFE6F, 66}, // Small Form Variants - { 0xFE70, 0xFEFF, 67}, // Arabic Presentation Forms-B - { 0xFF00, 0xFFEF, 68}, // Halfwidth And Fullwidth Forms - { 0xFFF0, 0xFFFF, 69}, // Specials - { 0x10000, 0x1007F, 101}, // Linear B Syllabary - { 0x10080, 0x100FF, 101}, // Linear B Ideograms - { 0x10100, 0x1013F, 101}, // Aegean Numbers - { 0x10140, 0x1018F, 102}, // Ancient Greek Numbers - { 0x10190, 0x101CF, 119}, // Ancient Symbols - { 0x101D0, 0x101FF, 120}, // Phaistos Disc - { 0x10280, 0x1029F, 121}, // Lycian - { 0x102A0, 0x102DF, 121}, // Carian - { 0x10300, 0x1032F, 85}, // Old Italic - { 0x10330, 0x1034F, 86}, // Gothic - { 0x10380, 0x1039F, 103}, // Ugaritic - { 0x103A0, 0x103DF, 104}, // Old Persian - { 0x10400, 0x1044F, 87}, // Deseret - { 0x10450, 0x1047F, 105}, // Shavian - { 0x10480, 0x104AF, 106}, // Osmanya - { 0x10800, 0x1083F, 107}, // Cypriot Syllabary - { 0x10900, 0x1091F, 58}, // Phoenician - { 0x10920, 0x1093F, 121}, // Lydian - { 0x10A00, 0x10A5F, 108}, // Kharoshthi - { 0x12000, 0x123FF, 110}, // Cuneiform - { 0x12400, 0x1247F, 110}, // Cuneiform Numbers and Punctuation - { 0x1D000, 0x1D0FF, 88}, // Byzantine Musical Symbols - { 0x1D100, 0x1D1FF, 88}, // Musical Symbols - { 0x1D200, 0x1D24F, 88}, // Ancient Greek Musical Notation - { 0x1D300, 0x1D35F, 109}, // Tai Xuan Jing Symbols - { 0x1D360, 0x1D37F, 111}, // Counting Rod Numerals - { 0x1D400, 0x1D7FF, 89}, // Mathematical Alphanumeric Symbols - { 0x1F000, 0x1F02F, 122}, // Mahjong Tiles - { 0x1F030, 0x1F09F, 122}, // Domino Tiles - { 0x20000, 0x2A6DF, 59}, // CJK Unified Ideographs Extension B - { 0x2F800, 0x2FA1F, 61}, // CJK Compatibility Ideographs Supplement - { 0xE0000, 0xE007F, 92}, // Tags - { 0xE0100, 0xE01EF, 91}, // Variation Selectors Supplement - { 0xF0000, 0xFFFFD, 90}, // Private Use (plane 15) - {0x100000, 0x10FFFD, 90}, // Private Use (plane 16) -}; - -static int -_compare_range (const void *_key, const void *_item, void *_arg) -{ - hb_codepoint_t *cp = (hb_codepoint_t *) _key; - Range *range = (Range *) _item; - - if (*cp < range->start) - return -1; - else if (*cp <= range->end) - return 0; - else - return 1; -} - -/** - * hb_get_unicode_range_bit: - * Returns the bit to be set in os/2 ulUnicodeRange for a given codepoint. - **/ -static int -hb_get_unicode_range_bit (hb_codepoint_t cp) -{ - Range *range = (Range*) hb_bsearch_r (&cp, os2UnicodeRangesSorted, NUM_RANGES, sizeof(Range), - _compare_range, nullptr); - if (range != NULL) - return range->bit; - return -1; -} - - -#ifdef MAIN +#include "hb-ot-os2-unicode-ranges.hh" void test (hb_codepoint_t cp, int bit) { - if (hb_get_unicode_range_bit (cp) != bit) + // TODO: + // Note: * Setting bit 57 implies that there is at least one codepoint beyond the Basic + // Multilingual Plane that is supported by this font. (See fontTools impl.) + if (OT::hb_get_unicode_range_bit (cp) != bit) { fprintf (stderr, "got incorrect bit (%d) for cp 0x%X. Should have been %d.", - hb_get_unicode_range_bit (cp), + OT::hb_get_unicode_range_bit (cp), cp, bit); abort(); @@ -275,5 +68,3 @@ main (void) test_get_unicode_range_bit (); return 0; } - -#endif diff --git a/src/hb-ot-os2-unicode-ranges.hh b/src/hb-ot-os2-unicode-ranges.hh index 5f36b20ce..4148342ac 100644 --- a/src/hb-ot-os2-unicode-ranges.hh +++ b/src/hb-ot-os2-unicode-ranges.hh @@ -28,11 +28,217 @@ #define HB_OT_OS2_UNICODE_RANGES_HH #include "hb-private.hh" +#include "hb-dsalgs.hh" namespace OT { -HB_INTERNAL int -hb_get_unicode_range_bit (hb_codepoint_t cp); +struct Range { + unsigned int start; + unsigned int end; + unsigned int bit; +}; + +static Range os2UnicodeRangesSorted[] = { + { 0x0, 0x7F, 0}, // Basic Latin + { 0x80, 0xFF, 1}, // Latin-1 Supplement + { 0x100, 0x17F, 2}, // Latin Extended-A + { 0x180, 0x24F, 3}, // Latin Extended-B + { 0x250, 0x2AF, 4}, // IPA Extensions + { 0x2B0, 0x2FF, 5}, // Spacing Modifier Letters + { 0x300, 0x36F, 6}, // Combining Diacritical Marks + { 0x370, 0x3FF, 7}, // Greek and Coptic + { 0x400, 0x4FF, 9}, // Cyrillic + { 0x500, 0x52F, 9}, // Cyrillic Supplement + { 0x530, 0x58F, 10}, // Armenian + { 0x590, 0x5FF, 11}, // Hebrew + { 0x600, 0x6FF, 13}, // Arabic + { 0x700, 0x74F, 71}, // Syriac + { 0x750, 0x77F, 13}, // Arabic Supplement + { 0x780, 0x7BF, 72}, // Thaana + { 0x7C0, 0x7FF, 14}, // NKo + { 0x900, 0x97F, 15}, // Devanagari + { 0x980, 0x9FF, 16}, // Bengali + { 0xA00, 0xA7F, 17}, // Gurmukhi + { 0xA80, 0xAFF, 18}, // Gujarati + { 0xB00, 0xB7F, 19}, // Oriya + { 0xB80, 0xBFF, 20}, // Tamil + { 0xC00, 0xC7F, 21}, // Telugu + { 0xC80, 0xCFF, 22}, // Kannada + { 0xD00, 0xD7F, 23}, // Malayalam + { 0xD80, 0xDFF, 73}, // Sinhala + { 0xE00, 0xE7F, 24}, // Thai + { 0xE80, 0xEFF, 25}, // Lao + { 0xF00, 0xFFF, 70}, // Tibetan + { 0x1000, 0x109F, 74}, // Myanmar + { 0x10A0, 0x10FF, 26}, // Georgian + { 0x1100, 0x11FF, 28}, // Hangul Jamo + { 0x1200, 0x137F, 75}, // Ethiopic + { 0x1380, 0x139F, 75}, // Ethiopic Supplement + { 0x13A0, 0x13FF, 76}, // Cherokee + { 0x1400, 0x167F, 77}, // Unified Canadian Aboriginal Syllabics + { 0x1680, 0x169F, 78}, // Ogham + { 0x16A0, 0x16FF, 79}, // Runic + { 0x1700, 0x171F, 84}, // Tagalog + { 0x1720, 0x173F, 84}, // Hanunoo + { 0x1740, 0x175F, 84}, // Buhid + { 0x1760, 0x177F, 84}, // Tagbanwa + { 0x1780, 0x17FF, 80}, // Khmer + { 0x1800, 0x18AF, 81}, // Mongolian + { 0x1900, 0x194F, 93}, // Limbu + { 0x1950, 0x197F, 94}, // Tai Le + { 0x1980, 0x19DF, 95}, // New Tai Lue + { 0x19E0, 0x19FF, 80}, // Khmer Symbols + { 0x1A00, 0x1A1F, 96}, // Buginese + { 0x1B00, 0x1B7F, 27}, // Balinese + { 0x1B80, 0x1BBF, 112}, // Sundanese + { 0x1C00, 0x1C4F, 113}, // Lepcha + { 0x1C50, 0x1C7F, 114}, // Ol Chiki + { 0x1D00, 0x1D7F, 4}, // Phonetic Extensions + { 0x1D80, 0x1DBF, 4}, // Phonetic Extensions Supplement + { 0x1DC0, 0x1DFF, 6}, // Combining Diacritical Marks Supplement + { 0x1E00, 0x1EFF, 29}, // Latin Extended Additional + { 0x1F00, 0x1FFF, 30}, // Greek Extended + { 0x2000, 0x206F, 31}, // General Punctuation + { 0x2070, 0x209F, 32}, // Superscripts And Subscripts + { 0x20A0, 0x20CF, 33}, // Currency Symbols + { 0x20D0, 0x20FF, 34}, // Combining Diacritical Marks For Symbols + { 0x2100, 0x214F, 35}, // Letterlike Symbols + { 0x2150, 0x218F, 36}, // Number Forms + { 0x2190, 0x21FF, 37}, // Arrows + { 0x2200, 0x22FF, 38}, // Mathematical Operators + { 0x2300, 0x23FF, 39}, // Miscellaneous Technical + { 0x2400, 0x243F, 40}, // Control Pictures + { 0x2440, 0x245F, 41}, // Optical Character Recognition + { 0x2460, 0x24FF, 42}, // Enclosed Alphanumerics + { 0x2500, 0x257F, 43}, // Box Drawing + { 0x2580, 0x259F, 44}, // Block Elements + { 0x25A0, 0x25FF, 45}, // Geometric Shapes + { 0x2600, 0x26FF, 46}, // Miscellaneous Symbols + { 0x2700, 0x27BF, 47}, // Dingbats + { 0x27C0, 0x27EF, 38}, // Miscellaneous Mathematical Symbols-A + { 0x27F0, 0x27FF, 37}, // Supplemental Arrows-A + { 0x2800, 0x28FF, 82}, // Braille Patterns + { 0x2900, 0x297F, 37}, // Supplemental Arrows-B + { 0x2980, 0x29FF, 38}, // Miscellaneous Mathematical Symbols-B + { 0x2A00, 0x2AFF, 38}, // Supplemental Mathematical Operators + { 0x2B00, 0x2BFF, 37}, // Miscellaneous Symbols and Arrows + { 0x2C00, 0x2C5F, 97}, // Glagolitic + { 0x2C60, 0x2C7F, 29}, // Latin Extended-C + { 0x2C80, 0x2CFF, 8}, // Coptic + { 0x2D00, 0x2D2F, 26}, // Georgian Supplement + { 0x2D30, 0x2D7F, 98}, // Tifinagh + { 0x2D80, 0x2DDF, 75}, // Ethiopic Extended + { 0x2DE0, 0x2DFF, 9}, // Cyrillic Extended-A + { 0x2E00, 0x2E7F, 31}, // Supplemental Punctuation + { 0x2E80, 0x2EFF, 59}, // CJK Radicals Supplement + { 0x2F00, 0x2FDF, 59}, // Kangxi Radicals + { 0x2FF0, 0x2FFF, 59}, // Ideographic Description Characters + { 0x3000, 0x303F, 48}, // CJK Symbols And Punctuation + { 0x3040, 0x309F, 49}, // Hiragana + { 0x30A0, 0x30FF, 50}, // Katakana + { 0x3100, 0x312F, 51}, // Bopomofo + { 0x3130, 0x318F, 52}, // Hangul Compatibility Jamo + { 0x3190, 0x319F, 59}, // Kanbun + { 0x31A0, 0x31BF, 51}, // Bopomofo Extended + { 0x31C0, 0x31EF, 61}, // CJK Strokes + { 0x31F0, 0x31FF, 50}, // Katakana Phonetic Extensions + { 0x3200, 0x32FF, 54}, // Enclosed CJK Letters And Months + { 0x3300, 0x33FF, 55}, // CJK Compatibility + { 0x3400, 0x4DBF, 59}, // CJK Unified Ideographs Extension A + { 0x4DC0, 0x4DFF, 99}, // Yijing Hexagram Symbols + { 0x4E00, 0x9FFF, 59}, // CJK Unified Ideographs + { 0xA000, 0xA48F, 83}, // Yi Syllables + { 0xA490, 0xA4CF, 83}, // Yi Radicals + { 0xA500, 0xA63F, 12}, // Vai + { 0xA640, 0xA69F, 9}, // Cyrillic Extended-B + { 0xA700, 0xA71F, 5}, // Modifier Tone Letters + { 0xA720, 0xA7FF, 29}, // Latin Extended-D + { 0xA800, 0xA82F, 100}, // Syloti Nagri + { 0xA840, 0xA87F, 53}, // Phags-pa + { 0xA880, 0xA8DF, 115}, // Saurashtra + { 0xA900, 0xA92F, 116}, // Kayah Li + { 0xA930, 0xA95F, 117}, // Rejang + { 0xAA00, 0xAA5F, 118}, // Cham + { 0xAC00, 0xD7AF, 56}, // Hangul Syllables + { 0xD800, 0xDFFF, 57}, // Non-Plane 0 * + { 0xE000, 0xF8FF, 60}, // Private Use Area (plane 0) + { 0xF900, 0xFAFF, 61}, // CJK Compatibility Ideographs + { 0xFB00, 0xFB4F, 62}, // Alphabetic Presentation Forms + { 0xFB50, 0xFDFF, 63}, // Arabic Presentation Forms-A + { 0xFE00, 0xFE0F, 91}, // Variation Selectors + { 0xFE10, 0xFE1F, 65}, // Vertical Forms + { 0xFE20, 0xFE2F, 64}, // Combining Half Marks + { 0xFE30, 0xFE4F, 65}, // CJK Compatibility Forms + { 0xFE50, 0xFE6F, 66}, // Small Form Variants + { 0xFE70, 0xFEFF, 67}, // Arabic Presentation Forms-B + { 0xFF00, 0xFFEF, 68}, // Halfwidth And Fullwidth Forms + { 0xFFF0, 0xFFFF, 69}, // Specials + { 0x10000, 0x1007F, 101}, // Linear B Syllabary + { 0x10080, 0x100FF, 101}, // Linear B Ideograms + { 0x10100, 0x1013F, 101}, // Aegean Numbers + { 0x10140, 0x1018F, 102}, // Ancient Greek Numbers + { 0x10190, 0x101CF, 119}, // Ancient Symbols + { 0x101D0, 0x101FF, 120}, // Phaistos Disc + { 0x10280, 0x1029F, 121}, // Lycian + { 0x102A0, 0x102DF, 121}, // Carian + { 0x10300, 0x1032F, 85}, // Old Italic + { 0x10330, 0x1034F, 86}, // Gothic + { 0x10380, 0x1039F, 103}, // Ugaritic + { 0x103A0, 0x103DF, 104}, // Old Persian + { 0x10400, 0x1044F, 87}, // Deseret + { 0x10450, 0x1047F, 105}, // Shavian + { 0x10480, 0x104AF, 106}, // Osmanya + { 0x10800, 0x1083F, 107}, // Cypriot Syllabary + { 0x10900, 0x1091F, 58}, // Phoenician + { 0x10920, 0x1093F, 121}, // Lydian + { 0x10A00, 0x10A5F, 108}, // Kharoshthi + { 0x12000, 0x123FF, 110}, // Cuneiform + { 0x12400, 0x1247F, 110}, // Cuneiform Numbers and Punctuation + { 0x1D000, 0x1D0FF, 88}, // Byzantine Musical Symbols + { 0x1D100, 0x1D1FF, 88}, // Musical Symbols + { 0x1D200, 0x1D24F, 88}, // Ancient Greek Musical Notation + { 0x1D300, 0x1D35F, 109}, // Tai Xuan Jing Symbols + { 0x1D360, 0x1D37F, 111}, // Counting Rod Numerals + { 0x1D400, 0x1D7FF, 89}, // Mathematical Alphanumeric Symbols + { 0x1F000, 0x1F02F, 122}, // Mahjong Tiles + { 0x1F030, 0x1F09F, 122}, // Domino Tiles + { 0x20000, 0x2A6DF, 59}, // CJK Unified Ideographs Extension B + { 0x2F800, 0x2FA1F, 61}, // CJK Compatibility Ideographs Supplement + { 0xE0000, 0xE007F, 92}, // Tags + { 0xE0100, 0xE01EF, 91}, // Variation Selectors Supplement + { 0xF0000, 0xFFFFD, 90}, // Private Use (plane 15) + {0x100000, 0x10FFFD, 90}, // Private Use (plane 16) +}; + +static int +_compare_range (const void *_key, const void *_item, void *_arg) +{ + hb_codepoint_t *cp = (hb_codepoint_t *) _key; + Range *range = (Range *) _item; + + if (*cp < range->start) + return -1; + else if (*cp <= range->end) + return 0; + else + return 1; +} + +/** + * hb_get_unicode_range_bit: + * Returns the bit to be set in os/2 ulUnicodeRange for a given codepoint. + **/ +static int +hb_get_unicode_range_bit (hb_codepoint_t cp) +{ + Range *range = (Range*) hb_bsearch_r (&cp, os2UnicodeRangesSorted, + sizeof (os2UnicodeRangesSorted) / sizeof(Range), + sizeof(Range), + _compare_range, nullptr); + if (range != NULL) + return range->bit; + return -1; +} } /* namespace OT */ From f82f2a3e50805503d93aa1aa1ccb27da4967a14a Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Mon, 26 Feb 2018 15:52:01 -0800 Subject: [PATCH 04/15] [subset] Rename hb-os2-unicode-ranges.cc to test-unicode-ranges.cc. --- src/Makefile.am | 2 +- src/{hb-ot-os2-unicode-ranges.cc => test-unicode-ranges.cc} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/{hb-ot-os2-unicode-ranges.cc => test-unicode-ranges.cc} (100%) diff --git a/src/Makefile.am b/src/Makefile.am index 6044366da..73c0c61dd 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -388,7 +388,7 @@ test_ot_tag_SOURCES = hb-ot-tag.cc test_ot_tag_CPPFLAGS = $(HBCFLAGS) -DMAIN test_ot_tag_LDADD = libharfbuzz.la $(HBLIBS) -test_unicode_ranges_SOURCES = hb-ot-os2-unicode-ranges.cc +test_unicode_ranges_SOURCES = test-unicode-ranges.cc test_unicode_ranges_CPPFLAGS = $(HBCFLAGS) -DMAIN test_unicode_ranges_LDADD = libharfbuzz.la $(HBLIBS) diff --git a/src/hb-ot-os2-unicode-ranges.cc b/src/test-unicode-ranges.cc similarity index 100% rename from src/hb-ot-os2-unicode-ranges.cc rename to src/test-unicode-ranges.cc From f757757eda5f00a89a156e3427bdf8c4313611ef Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Mon, 26 Feb 2018 15:56:36 -0800 Subject: [PATCH 05/15] [subset] Add cmake build def for test-unicode-ranges. --- CMakeLists.txt | 2 +- src/Makefile.am | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f7a5d8308..660da5a1f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -809,7 +809,7 @@ endif () ## src/ executables if (NOT HB_DISABLE_TEST_PROGS) - foreach (prog main test test-would-substitute test-size-params test-buffer-serialize hb-ot-tag) + foreach (prog main test test-would-substitute test-size-params test-buffer-serialize hb-ot-tag test-unicode-ranges) set (prog_name ${prog}) if (${prog_name} STREQUAL "test") # test can not be used as a valid executable name on cmake, lets special case it diff --git a/src/Makefile.am b/src/Makefile.am index 73c0c61dd..2871f30f4 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -389,7 +389,6 @@ test_ot_tag_CPPFLAGS = $(HBCFLAGS) -DMAIN test_ot_tag_LDADD = libharfbuzz.la $(HBLIBS) test_unicode_ranges_SOURCES = test-unicode-ranges.cc -test_unicode_ranges_CPPFLAGS = $(HBCFLAGS) -DMAIN test_unicode_ranges_LDADD = libharfbuzz.la $(HBLIBS) TESTS_ENVIRONMENT = \ From ddc4f2b9fc5566e70558a57133289f84d467cc98 Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Mon, 26 Feb 2018 15:59:32 -0800 Subject: [PATCH 06/15] [subset] Add python util that was used to generated hb-ot-os2-unicode-ranges.hh --- util/generate-unicode-ranges.py | 50 +++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 util/generate-unicode-ranges.py diff --git a/util/generate-unicode-ranges.py b/util/generate-unicode-ranges.py new file mode 100644 index 000000000..e24b262a3 --- /dev/null +++ b/util/generate-unicode-ranges.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- + +# Generates the code for a sorted unicode range array as used in hb-ot-os2-unicode-ranges.hh +# Input is a tab seperated list of unicode ranges from the otspec. + +import io +import re +import sys + +reload(sys) +sys.setdefaultencoding('utf-8') + +print (u"""static Range os2UnicodeRangesSorted[] = {""") + +args = sys.argv[1:] +input_file = args[0] + +with io.open(input_file, mode="r", encoding="utf-8") as f: + + all_ranges = []; + current_bit = 0 + while True: + line = f.readline().strip() + if not line: + break + fields = re.split(r'\t+', line) + if len(fields) == 3: + current_bit = fields[0] + fields = fields[1:] + elif len(fields) > 3: + raise Error("bad input :(.") + + name = fields[0] + ranges = re.split("-", fields[1]) + if len(ranges) != 2: + raise Error("bad input :(.") + + v = tuple((int(ranges[0], 16), int(ranges[1], 16), int(current_bit), name)) + all_ranges.append(v) + +all_ranges = sorted(all_ranges, key=lambda t: t[0]) + +for ranges in all_ranges: + start = ("0x%X" % ranges[0]).rjust(8) + end = ("0x%X" % ranges[1]).rjust(8) + bit = ("%s" % ranges[2]).rjust(3) + + print " {%s, %s, %s}, // %s" % (start, end, bit, ranges[3]) + +print (u"""};"""); From 074b5a29a166d1812abc7229a71af4a3bb311536 Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Mon, 26 Feb 2018 16:11:24 -0800 Subject: [PATCH 07/15] [subset] Add special case handling of bit 57 in os2 ulUnicodeRange. --- src/hb-ot-os2-table.hh | 7 +++++++ src/test-unicode-ranges.cc | 3 --- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/hb-ot-os2-table.hh b/src/hb-ot-os2-table.hh index 63e972646..7325e93ef 100644 --- a/src/hb-ot-os2-table.hh +++ b/src/hb-ot-os2-table.hh @@ -90,6 +90,13 @@ struct os2 unsigned int mask = 1 << bit_in_block; ulUnicodeRange[block].set (ulUnicodeRange[block] | mask); } + if (cp >= 0x10000 && cp <= 0x110000) + { + /* the spec says that bit 57 ("Non Plane 0") implies that there's + at least one codepoint beyond the BMP; so I also include all + the non-BMP codepoints here */ + ulUnicodeRange[2].set (ulUnicodeRange[2] | (1 << 25)); + } } } diff --git a/src/test-unicode-ranges.cc b/src/test-unicode-ranges.cc index 68dfe079b..16d01ef20 100644 --- a/src/test-unicode-ranges.cc +++ b/src/test-unicode-ranges.cc @@ -31,9 +31,6 @@ void test (hb_codepoint_t cp, int bit) { - // TODO: - // Note: * Setting bit 57 implies that there is at least one codepoint beyond the Basic - // Multilingual Plane that is supported by this font. (See fontTools impl.) if (OT::hb_get_unicode_range_bit (cp) != bit) { fprintf (stderr, "got incorrect bit (%d) for cp 0x%X. Should have been %d.", From e20ab71d12b032371b830b76462e5e979d963b58 Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Mon, 26 Feb 2018 16:13:10 -0800 Subject: [PATCH 08/15] [subset] Fix incorrect index. --- src/hb-ot-os2-table.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hb-ot-os2-table.hh b/src/hb-ot-os2-table.hh index 7325e93ef..754537a6a 100644 --- a/src/hb-ot-os2-table.hh +++ b/src/hb-ot-os2-table.hh @@ -95,7 +95,7 @@ struct os2 /* the spec says that bit 57 ("Non Plane 0") implies that there's at least one codepoint beyond the BMP; so I also include all the non-BMP codepoints here */ - ulUnicodeRange[2].set (ulUnicodeRange[2] | (1 << 25)); + ulUnicodeRange[1].set (ulUnicodeRange[1] | (1 << 25)); } } } From 0be9fea0a96701b159a9db190e55b1c3efc38a28 Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Mon, 26 Feb 2018 16:15:09 -0800 Subject: [PATCH 09/15] [subset] Add comment to os2UnicodeRangesSorted. --- src/hb-ot-os2-unicode-ranges.hh | 1 + 1 file changed, 1 insertion(+) diff --git a/src/hb-ot-os2-unicode-ranges.hh b/src/hb-ot-os2-unicode-ranges.hh index 4148342ac..9e06c1006 100644 --- a/src/hb-ot-os2-unicode-ranges.hh +++ b/src/hb-ot-os2-unicode-ranges.hh @@ -38,6 +38,7 @@ struct Range { unsigned int bit; }; +/* Note: The contents of this array was generated using util/generate-unicode-ranges.py. */ static Range os2UnicodeRangesSorted[] = { { 0x0, 0x7F, 0}, // Basic Latin { 0x80, 0xFF, 1}, // Latin-1 Supplement From 6368ce4c927b1457cf19945d5957e91d4621dc8b Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Mon, 26 Feb 2018 17:44:39 -0800 Subject: [PATCH 10/15] [subset] const in _compare_range. --- src/hb-ot-os2-unicode-ranges.hh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hb-ot-os2-unicode-ranges.hh b/src/hb-ot-os2-unicode-ranges.hh index 9e06c1006..a456ffddb 100644 --- a/src/hb-ot-os2-unicode-ranges.hh +++ b/src/hb-ot-os2-unicode-ranges.hh @@ -214,12 +214,12 @@ static Range os2UnicodeRangesSorted[] = { static int _compare_range (const void *_key, const void *_item, void *_arg) { - hb_codepoint_t *cp = (hb_codepoint_t *) _key; - Range *range = (Range *) _item; + hb_codepoint_t cp = *((hb_codepoint_t *) _key); + const Range *range = (Range *) _item; - if (*cp < range->start) + if (cp < range->start) return -1; - else if (*cp <= range->end) + else if (cp <= range->end) return 0; else return 1; From f630ae5161bfc8420f9ae0127fd8c7f447874fdd Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Mon, 26 Feb 2018 17:46:17 -0800 Subject: [PATCH 11/15] [subset] unsigned int -> hb_codepoint_t. --- src/hb-ot-os2-unicode-ranges.hh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hb-ot-os2-unicode-ranges.hh b/src/hb-ot-os2-unicode-ranges.hh index a456ffddb..a3ad4d344 100644 --- a/src/hb-ot-os2-unicode-ranges.hh +++ b/src/hb-ot-os2-unicode-ranges.hh @@ -33,8 +33,8 @@ namespace OT { struct Range { - unsigned int start; - unsigned int end; + hb_codepoint_t start; + hb_codepoint_t end; unsigned int bit; }; From 0c0fe2ff8209228f2ddfce464b7b6f1b1ee1654a Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Mon, 26 Feb 2018 17:47:10 -0800 Subject: [PATCH 12/15] [subset] Move util/generated-unicode-ranges.py to src/gen-unicode-ranges.py --- util/generate-unicode-ranges.py => src/gen-unicode-ranges.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename util/generate-unicode-ranges.py => src/gen-unicode-ranges.py (100%) diff --git a/util/generate-unicode-ranges.py b/src/gen-unicode-ranges.py similarity index 100% rename from util/generate-unicode-ranges.py rename to src/gen-unicode-ranges.py From f1c8fc3487d5c5efb8ee1804acb07e6e282d3bc5 Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Mon, 26 Feb 2018 17:48:51 -0800 Subject: [PATCH 13/15] [subset] small updates to gen-unicode-ranges.py --- src/gen-unicode-ranges.py | 6 ++++-- src/hb-ot-os2-unicode-ranges.hh | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/gen-unicode-ranges.py b/src/gen-unicode-ranges.py index e24b262a3..3b59cd862 100644 --- a/src/gen-unicode-ranges.py +++ b/src/gen-unicode-ranges.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- # Generates the code for a sorted unicode range array as used in hb-ot-os2-unicode-ranges.hh -# Input is a tab seperated list of unicode ranges from the otspec. +# Input is a tab seperated list of unicode ranges from the otspec +# (https://docs.microsoft.com/en-us/typography/opentype/spec/os2#ulunicoderange1). import io import re @@ -10,7 +11,8 @@ import sys reload(sys) sys.setdefaultencoding('utf-8') -print (u"""static Range os2UnicodeRangesSorted[] = {""") +print (u"""static Range os2UnicodeRangesSorted[] = +{""") args = sys.argv[1:] input_file = args[0] diff --git a/src/hb-ot-os2-unicode-ranges.hh b/src/hb-ot-os2-unicode-ranges.hh index a3ad4d344..f4b339eaf 100644 --- a/src/hb-ot-os2-unicode-ranges.hh +++ b/src/hb-ot-os2-unicode-ranges.hh @@ -39,7 +39,8 @@ struct Range { }; /* Note: The contents of this array was generated using util/generate-unicode-ranges.py. */ -static Range os2UnicodeRangesSorted[] = { +static Range os2UnicodeRangesSorted[] = +{ { 0x0, 0x7F, 0}, // Basic Latin { 0x80, 0xFF, 1}, // Latin-1 Supplement { 0x100, 0x17F, 2}, // Latin Extended-A From ad3f2f77dafdee524e836e732077ee9670602369 Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Mon, 26 Feb 2018 17:51:27 -0800 Subject: [PATCH 14/15] [subset] small cleanups in hb-ot-os2-table. --- src/hb-ot-os2-table.hh | 5 +++-- src/hb-ot-os2-unicode-ranges.hh | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/hb-ot-os2-table.hh b/src/hb-ot-os2-table.hh index 754537a6a..6cb8d4949 100644 --- a/src/hb-ot-os2-table.hh +++ b/src/hb-ot-os2-table.hh @@ -83,8 +83,9 @@ struct os2 for (unsigned int i = 0; i < codepoints.len; i++) { hb_codepoint_t cp = codepoints[i]; - int bit = hb_get_unicode_range_bit (cp); - if (bit >= 0 && bit < 128) { + unsigned int bit = hb_get_unicode_range_bit (cp); + if (bit < 128) + { unsigned int block = bit / 32; unsigned int bit_in_block = bit % 32; unsigned int mask = 1 << bit_in_block; diff --git a/src/hb-ot-os2-unicode-ranges.hh b/src/hb-ot-os2-unicode-ranges.hh index f4b339eaf..2c05d895e 100644 --- a/src/hb-ot-os2-unicode-ranges.hh +++ b/src/hb-ot-os2-unicode-ranges.hh @@ -230,7 +230,7 @@ _compare_range (const void *_key, const void *_item, void *_arg) * hb_get_unicode_range_bit: * Returns the bit to be set in os/2 ulUnicodeRange for a given codepoint. **/ -static int +static unsigned int hb_get_unicode_range_bit (hb_codepoint_t cp) { Range *range = (Range*) hb_bsearch_r (&cp, os2UnicodeRangesSorted, From 44dc36dd82fc948a15e2ad0d605eb4a466b3553d Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Mon, 26 Feb 2018 17:56:23 -0800 Subject: [PATCH 15/15] [subset] update to comment in hb-ot-os2-unicode-ranges.hh --- src/hb-ot-os2-unicode-ranges.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hb-ot-os2-unicode-ranges.hh b/src/hb-ot-os2-unicode-ranges.hh index 2c05d895e..2cf168f9c 100644 --- a/src/hb-ot-os2-unicode-ranges.hh +++ b/src/hb-ot-os2-unicode-ranges.hh @@ -38,7 +38,7 @@ struct Range { unsigned int bit; }; -/* Note: The contents of this array was generated using util/generate-unicode-ranges.py. */ +/* Note: The contents of this array was generated using src/gen-unicode-ranges.py. */ static Range os2UnicodeRangesSorted[] = { { 0x0, 0x7F, 0}, // Basic Latin