diff --git a/src/Makefile.am b/src/Makefile.am index 9e7fd2995..73631992c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -289,13 +289,15 @@ harfbuzz-gobject.def: $(HB_GOBJECT_headers) GENERATORS = \ gen-arabic-table.py \ - gen-indic-table.py \ - gen-use-table.py \ gen-def.py \ + gen-emoji-table.py \ + gen-indic-table.py \ + gen-os2-unicode-ranges.py \ + gen-use-table.py \ $(NULL) EXTRA_DIST += $(GENERATORS) -unicode-tables: arabic-table indic-table use-table +unicode-tables: arabic-table indic-table use-table emoji-table arabic-table: gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt $(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-arabic-table.hh \ @@ -309,9 +311,13 @@ use-table: gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.tx $(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-use-table.cc \ || ($(RM) $(srcdir)/hb-ot-shape-complex-use-table.cc; false) +emoji-table: gen-emoji-table.py emoji-data.txt + $(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-unicode-emoji-table.hh \ + || ($(RM) $(srcdir)/hb-unicode-emoji-table.hh; false) + built-sources: $(BUILT_SOURCES) -.PHONY: unicode-tables arabic-table indic-table use-table built-sources +.PHONY: unicode-tables arabic-table indic-table use-table emoji-table built-sources RAGEL_GENERATED = \ $(patsubst %,$(srcdir)/%,$(HB_BASE_RAGEL_GENERATED_sources)) \ diff --git a/src/gen-emoji-table.py b/src/gen-emoji-table.py new file mode 100755 index 000000000..278e0b2d4 --- /dev/null +++ b/src/gen-emoji-table.py @@ -0,0 +1,64 @@ +#!/usr/bin/python + +from __future__ import print_function, division, absolute_import +import sys +import os.path +from collections import OrderedDict + +if len (sys.argv) != 2: + print("usage: ./gen-emoji-table.py emoji-data.txt", file=sys.stderr) + sys.exit (1) + +f = open(sys.argv[1]) +header = [f.readline () for _ in range(10)] + +sets = OrderedDict() +for line in f.readlines(): + line = line.strip() + if not line or line[0] == '#': + continue + rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]] + + rang = [int(s, 16) for s in rang.split('..')] + if len(rang) > 1: + start, end = rang + else: + start = end = rang[0] + + if typ not in sets: + sets[typ] = set() + sets[typ].add((start, end)) + + + +print ("/* == Start of generated table == */") +print ("/*") +print (" * The following tables are generated by running:") +print (" *") +print (" * ./gen-emoji-table.py emoji-data.txt") +print (" *") +print (" * on file with this header:") +print (" *") +for l in header: + print (" * %s" % (l.strip())) +print (" */") +print () +print ("#ifndef HB_UNICODE_EMOJI_TABLE_HH") +print ("#define HB_UNICODE_EMOJI_TABLE_HH") +print () +print ('#include "hb-unicode.hh"') +print () + +for typ,s in sets.items(): + if typ != "Extended_Pictographic": continue + print() + print("static const struct hb_unicode_range_t _hb_unicode_emoji_%s_table[] =" % typ) + print("{") + for pair in sorted(s): + print(" {0x%04X, 0x%04X}," % pair) + print("};") + +print () +print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */") +print () +print ("/* == End of generated table == */") diff --git a/src/hb-ot-os2-table.hh b/src/hb-ot-os2-table.hh index f28858c59..71d2bf59d 100644 --- a/src/hb-ot-os2-table.hh +++ b/src/hb-ot-os2-table.hh @@ -81,7 +81,7 @@ struct os2 hb_codepoint_t cp = HB_SET_VALUE_INVALID; while (codepoints->next (&cp)) { - unsigned int bit = hb_get_unicode_range_bit (cp); + unsigned int bit = _hb_ot_os2_get_unicode_range_bit (cp); if (bit < 128) { unsigned int block = bit / 32; diff --git a/src/hb-ot-os2-unicode-ranges.hh b/src/hb-ot-os2-unicode-ranges.hh index ee45844b5..8fa21dfb9 100644 --- a/src/hb-ot-os2-unicode-ranges.hh +++ b/src/hb-ot-os2-unicode-ranges.hh @@ -44,7 +44,7 @@ struct OS2Range else if (cp <= range->end) return 0; else - return 1; + return +1; } hb_codepoint_t start; @@ -227,11 +227,11 @@ static OS2Range _hb_os2_unicode_ranges[] = }; /** - * hb_get_unicode_range_bit: + * _hb_ot_os2_get_unicode_range_bit: * Returns the bit to be set in os/2 ulUnicodeOS2Range for a given codepoint. **/ static unsigned int -hb_get_unicode_range_bit (hb_codepoint_t cp) +_hb_ot_os2_get_unicode_range_bit (hb_codepoint_t cp) { OS2Range *range = (OS2Range*) hb_bsearch_r (&cp, _hb_os2_unicode_ranges, ARRAY_LENGTH (_hb_os2_unicode_ranges), diff --git a/src/hb-unicode-emoji-table.hh b/src/hb-unicode-emoji-table.hh new file mode 100644 index 000000000..41199de53 --- /dev/null +++ b/src/hb-unicode-emoji-table.hh @@ -0,0 +1,269 @@ +/* == Start of generated table == */ +/* + * The following tables are generated by running: + * + * ./gen-emoji-table.py emoji-data.txt + * + * on file with this header: + * + * # emoji-data.txt + * # Date: 2018-02-07, 07:55:18 GMT + * # © 2018 Unicode®, Inc. + * # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. + * # For terms of use, see http://www.unicode.org/terms_of_use.html + * # + * # Emoji Data for UTS #51 + * # Version: 11.0 + * # + * # For documentation and usage, see http://www.unicode.org/reports/tr51 + */ + +#ifndef HB_UNICODE_EMOJI_TABLE_HH +#define HB_UNICODE_EMOJI_TABLE_HH + +#include "hb-unicode.hh" + + +static const struct hb_unicode_range_t _hb_unicode_emoji_Extended_Pictographic_table[] = +{ + {0x00A9, 0x00A9}, + {0x00AE, 0x00AE}, + {0x203C, 0x203C}, + {0x2049, 0x2049}, + {0x2122, 0x2122}, + {0x2139, 0x2139}, + {0x2194, 0x2199}, + {0x21A9, 0x21AA}, + {0x231A, 0x231B}, + {0x2328, 0x2328}, + {0x2388, 0x2388}, + {0x23CF, 0x23CF}, + {0x23E9, 0x23F3}, + {0x23F8, 0x23FA}, + {0x24C2, 0x24C2}, + {0x25AA, 0x25AB}, + {0x25B6, 0x25B6}, + {0x25C0, 0x25C0}, + {0x25FB, 0x25FE}, + {0x2600, 0x2605}, + {0x2607, 0x2612}, + {0x2614, 0x2615}, + {0x2616, 0x2617}, + {0x2618, 0x2618}, + {0x2619, 0x2619}, + {0x261A, 0x266F}, + {0x2670, 0x2671}, + {0x2672, 0x267D}, + {0x267E, 0x267F}, + {0x2680, 0x2685}, + {0x2690, 0x2691}, + {0x2692, 0x269C}, + {0x269D, 0x269D}, + {0x269E, 0x269F}, + {0x26A0, 0x26A1}, + {0x26A2, 0x26B1}, + {0x26B2, 0x26B2}, + {0x26B3, 0x26BC}, + {0x26BD, 0x26BF}, + {0x26C0, 0x26C3}, + {0x26C4, 0x26CD}, + {0x26CE, 0x26CE}, + {0x26CF, 0x26E1}, + {0x26E2, 0x26E2}, + {0x26E3, 0x26E3}, + {0x26E4, 0x26E7}, + {0x26E8, 0x26FF}, + {0x2700, 0x2700}, + {0x2701, 0x2704}, + {0x2705, 0x2705}, + {0x2708, 0x2709}, + {0x270A, 0x270B}, + {0x270C, 0x2712}, + {0x2714, 0x2714}, + {0x2716, 0x2716}, + {0x271D, 0x271D}, + {0x2721, 0x2721}, + {0x2728, 0x2728}, + {0x2733, 0x2734}, + {0x2744, 0x2744}, + {0x2747, 0x2747}, + {0x274C, 0x274C}, + {0x274E, 0x274E}, + {0x2753, 0x2755}, + {0x2757, 0x2757}, + {0x2763, 0x2767}, + {0x2795, 0x2797}, + {0x27A1, 0x27A1}, + {0x27B0, 0x27B0}, + {0x27BF, 0x27BF}, + {0x2934, 0x2935}, + {0x2B05, 0x2B07}, + {0x2B1B, 0x2B1C}, + {0x2B50, 0x2B50}, + {0x2B55, 0x2B55}, + {0x3030, 0x3030}, + {0x303D, 0x303D}, + {0x3297, 0x3297}, + {0x3299, 0x3299}, + {0x1F000, 0x1F02B}, + {0x1F02C, 0x1F02F}, + {0x1F030, 0x1F093}, + {0x1F094, 0x1F09F}, + {0x1F0A0, 0x1F0AE}, + {0x1F0AF, 0x1F0B0}, + {0x1F0B1, 0x1F0BE}, + {0x1F0BF, 0x1F0BF}, + {0x1F0C0, 0x1F0C0}, + {0x1F0C1, 0x1F0CF}, + {0x1F0D0, 0x1F0D0}, + {0x1F0D1, 0x1F0DF}, + {0x1F0E0, 0x1F0F5}, + {0x1F0F6, 0x1F0FF}, + {0x1F10D, 0x1F10F}, + {0x1F12F, 0x1F12F}, + {0x1F16C, 0x1F16F}, + {0x1F170, 0x1F171}, + {0x1F17E, 0x1F17E}, + {0x1F17F, 0x1F17F}, + {0x1F18E, 0x1F18E}, + {0x1F191, 0x1F19A}, + {0x1F1AD, 0x1F1E5}, + {0x1F201, 0x1F202}, + {0x1F203, 0x1F20F}, + {0x1F21A, 0x1F21A}, + {0x1F22F, 0x1F22F}, + {0x1F232, 0x1F23A}, + {0x1F23C, 0x1F23F}, + {0x1F249, 0x1F24F}, + {0x1F250, 0x1F251}, + {0x1F252, 0x1F25F}, + {0x1F260, 0x1F265}, + {0x1F266, 0x1F2FF}, + {0x1F300, 0x1F320}, + {0x1F321, 0x1F32C}, + {0x1F32D, 0x1F32F}, + {0x1F330, 0x1F335}, + {0x1F336, 0x1F336}, + {0x1F337, 0x1F37C}, + {0x1F37D, 0x1F37D}, + {0x1F37E, 0x1F37F}, + {0x1F380, 0x1F393}, + {0x1F394, 0x1F39F}, + {0x1F3A0, 0x1F3C4}, + {0x1F3C5, 0x1F3C5}, + {0x1F3C6, 0x1F3CA}, + {0x1F3CB, 0x1F3CE}, + {0x1F3CF, 0x1F3D3}, + {0x1F3D4, 0x1F3DF}, + {0x1F3E0, 0x1F3F0}, + {0x1F3F1, 0x1F3F7}, + {0x1F3F8, 0x1F3FA}, + {0x1F400, 0x1F43E}, + {0x1F43F, 0x1F43F}, + {0x1F440, 0x1F440}, + {0x1F441, 0x1F441}, + {0x1F442, 0x1F4F7}, + {0x1F4F8, 0x1F4F8}, + {0x1F4F9, 0x1F4FC}, + {0x1F4FD, 0x1F4FE}, + {0x1F4FF, 0x1F4FF}, + {0x1F500, 0x1F53D}, + {0x1F546, 0x1F54A}, + {0x1F54B, 0x1F54F}, + {0x1F550, 0x1F567}, + {0x1F568, 0x1F579}, + {0x1F57A, 0x1F57A}, + {0x1F57B, 0x1F5A3}, + {0x1F5A4, 0x1F5A4}, + {0x1F5A5, 0x1F5FA}, + {0x1F5FB, 0x1F5FF}, + {0x1F600, 0x1F600}, + {0x1F601, 0x1F610}, + {0x1F611, 0x1F611}, + {0x1F612, 0x1F614}, + {0x1F615, 0x1F615}, + {0x1F616, 0x1F616}, + {0x1F617, 0x1F617}, + {0x1F618, 0x1F618}, + {0x1F619, 0x1F619}, + {0x1F61A, 0x1F61A}, + {0x1F61B, 0x1F61B}, + {0x1F61C, 0x1F61E}, + {0x1F61F, 0x1F61F}, + {0x1F620, 0x1F625}, + {0x1F626, 0x1F627}, + {0x1F628, 0x1F62B}, + {0x1F62C, 0x1F62C}, + {0x1F62D, 0x1F62D}, + {0x1F62E, 0x1F62F}, + {0x1F630, 0x1F633}, + {0x1F634, 0x1F634}, + {0x1F635, 0x1F640}, + {0x1F641, 0x1F642}, + {0x1F643, 0x1F644}, + {0x1F645, 0x1F64F}, + {0x1F680, 0x1F6C5}, + {0x1F6C6, 0x1F6CF}, + {0x1F6D0, 0x1F6D0}, + {0x1F6D1, 0x1F6D2}, + {0x1F6D3, 0x1F6D4}, + {0x1F6D5, 0x1F6DF}, + {0x1F6E0, 0x1F6EC}, + {0x1F6ED, 0x1F6EF}, + {0x1F6F0, 0x1F6F3}, + {0x1F6F4, 0x1F6F6}, + {0x1F6F7, 0x1F6F8}, + {0x1F6F9, 0x1F6F9}, + {0x1F6FA, 0x1F6FF}, + {0x1F774, 0x1F77F}, + {0x1F7D5, 0x1F7D8}, + {0x1F7D9, 0x1F7FF}, + {0x1F80C, 0x1F80F}, + {0x1F848, 0x1F84F}, + {0x1F85A, 0x1F85F}, + {0x1F888, 0x1F88F}, + {0x1F8AE, 0x1F8FF}, + {0x1F90C, 0x1F90F}, + {0x1F910, 0x1F918}, + {0x1F919, 0x1F91E}, + {0x1F91F, 0x1F91F}, + {0x1F920, 0x1F927}, + {0x1F928, 0x1F92F}, + {0x1F930, 0x1F930}, + {0x1F931, 0x1F932}, + {0x1F933, 0x1F93A}, + {0x1F93C, 0x1F93E}, + {0x1F93F, 0x1F93F}, + {0x1F940, 0x1F945}, + {0x1F947, 0x1F94B}, + {0x1F94C, 0x1F94C}, + {0x1F94D, 0x1F94F}, + {0x1F950, 0x1F95E}, + {0x1F95F, 0x1F96B}, + {0x1F96C, 0x1F970}, + {0x1F971, 0x1F972}, + {0x1F973, 0x1F976}, + {0x1F977, 0x1F979}, + {0x1F97A, 0x1F97A}, + {0x1F97B, 0x1F97B}, + {0x1F97C, 0x1F97F}, + {0x1F980, 0x1F984}, + {0x1F985, 0x1F991}, + {0x1F992, 0x1F997}, + {0x1F998, 0x1F9A2}, + {0x1F9A3, 0x1F9AF}, + {0x1F9B0, 0x1F9B9}, + {0x1F9BA, 0x1F9BF}, + {0x1F9C0, 0x1F9C0}, + {0x1F9C1, 0x1F9C2}, + {0x1F9C3, 0x1F9CF}, + {0x1F9D0, 0x1F9E6}, + {0x1F9E7, 0x1F9FF}, + {0x1FA00, 0x1FA5F}, + {0x1FA60, 0x1FA6D}, + {0x1FA6E, 0x1FFFD}, +}; + +#endif /* HB_UNICODE_EMOJI_TABLE_HH */ + +/* == End of generated table == */ diff --git a/src/hb-unicode.cc b/src/hb-unicode.cc index 0e8b520d0..7b821b46d 100644 --- a/src/hb-unicode.cc +++ b/src/hb-unicode.cc @@ -564,3 +564,19 @@ _hb_modified_combining_class[256] = 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, /* HB_UNICODE_COMBINING_CLASS_INVALID */ }; + + +/* + * Emoji + */ + +#include "hb-unicode-emoji-table.hh" + +bool +_hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp) +{ + return hb_bsearch_r (&cp, _hb_unicode_emoji_Extended_Pictographic_table, + ARRAY_LENGTH (_hb_unicode_emoji_Extended_Pictographic_table), + sizeof (hb_unicode_range_t), + hb_unicode_range_t::cmp, nullptr); +} diff --git a/src/hb-unicode.hh b/src/hb-unicode.hh index 1f0d97e18..eac52eaa5 100644 --- a/src/hb-unicode.hh +++ b/src/hb-unicode.hh @@ -286,7 +286,9 @@ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE DECLARE_NULL_INSTANCE (hb_unicode_funcs_t); -/* Modified combining marks */ +/* + * Modified combining marks + */ /* Hebrew * @@ -384,4 +386,37 @@ DECLARE_NULL_INSTANCE (hb_unicode_funcs_t); (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \ FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))) + +/* + * Ranges, used for bsearch tables. + */ + +struct hb_unicode_range_t +{ + static int + cmp (const void *_key, const void *_item, void *_arg) + { + hb_codepoint_t cp = *((hb_codepoint_t *) _key); + const hb_unicode_range_t *range = (hb_unicode_range_t *) _item; + + if (cp < range->start) + return -1; + else if (cp <= range->end) + return 0; + else + return +1; + } + + hb_codepoint_t start; + hb_codepoint_t end; +}; + +/* + * Emoji. + */ + +HB_INTERNAL bool +_hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp); + + #endif /* HB_UNICODE_HH */