Switch on the first char of a complex language tag
This results in a tenfold speed-up for the common case of tags that are not complex, in the sense of `hb_ot_tags_from_complex_language`.
This commit is contained in:
parent
a754d44195
commit
3f8877473f
|
@ -288,6 +288,37 @@ class LanguageTag (object):
|
|||
except StopIteration:
|
||||
return None
|
||||
|
||||
def is_complex (self):
|
||||
"""Return whether this tag is too complex to represent as a
|
||||
``LangTag`` in the generated code.
|
||||
|
||||
Complex tags need to be handled in
|
||||
``hb_ot_tags_from_complex_language``.
|
||||
|
||||
Returns:
|
||||
Whether this tag is complex.
|
||||
"""
|
||||
return not (len (self.subtags) == 1
|
||||
or self.grandfathered
|
||||
and len (self.subtags[1]) != 3
|
||||
and ot.from_bcp_47[self.subtags[0]] == ot.from_bcp_47[self.language])
|
||||
|
||||
def get_group (self):
|
||||
"""Return the group into which this tag should be categorized in
|
||||
``hb_ot_tags_from_complex_language``.
|
||||
|
||||
The group is the first letter of the tag, or ``'und'`` if this tag
|
||||
should not be matched in a ``switch`` statement in the generated
|
||||
code.
|
||||
|
||||
Returns:
|
||||
This tag's group.
|
||||
"""
|
||||
return ('und'
|
||||
if (self.language == 'und'
|
||||
or self.variant in bcp_47.prefixes and len (bcp_47.prefixes[self.variant]) == 1)
|
||||
else self.language[0])
|
||||
|
||||
class OpenTypeRegistryParser (HTMLParser):
|
||||
"""A parser for the OpenType language system tag registry.
|
||||
|
||||
|
@ -598,16 +629,15 @@ class BCP47Parser (object):
|
|||
for macrolanguage in macrolanguages:
|
||||
self._add_macrolanguage (biggest_macrolanguage, macrolanguage)
|
||||
|
||||
def get_name (self, tag):
|
||||
def get_name (self, lt):
|
||||
"""Return the names of the subtags in a language tag.
|
||||
|
||||
Args:
|
||||
tag (str): A BCP 47 language tag.
|
||||
lt (LanguageTag): A BCP 47 language tag.
|
||||
|
||||
Returns:
|
||||
The name form of ``tag``.
|
||||
The name form of ``lt``.
|
||||
"""
|
||||
lt = LanguageTag (tag)
|
||||
name = self.names[lt.language].split ('\n')[0]
|
||||
if lt.script:
|
||||
name += '; ' + self.names[lt.script.title ()].split ('\n')[0]
|
||||
|
@ -909,47 +939,42 @@ print ('\t\t\t\t unsigned int *count /* IN/OUT */,')
|
|||
print ('\t\t\t\t hb_tag_t *tags /* OUT */)')
|
||||
print ('{')
|
||||
|
||||
def print_subtag_matches (subtag):
|
||||
def print_subtag_matches (subtag, new_line):
|
||||
if subtag:
|
||||
if new_line:
|
||||
print ()
|
||||
print (' && subtag_matches (lang_str, limit, "-%s")' % subtag, end='')
|
||||
print ('\t&& ', end='')
|
||||
print ('subtag_matches (lang_str, limit, "-%s")' % subtag, end='')
|
||||
|
||||
for language, tags in sorted (ot.from_bcp_47.items (), key=lambda i: (-len (i[0]), i[0])):
|
||||
lt = LanguageTag (language)
|
||||
if len (lt.subtags) == 1 or lt.grandfathered and len (lt.subtags[1]) != 3 and ot.from_bcp_47[lt.subtags[0]] == tags:
|
||||
complex_tags = collections.defaultdict (list)
|
||||
for initial, group in itertools.groupby ((lt_tags for lt_tags in [
|
||||
(LanguageTag (language), tags)
|
||||
for language, tags in sorted (ot.from_bcp_47.items (),
|
||||
key=lambda i: (-len (i[0]), i[0]))
|
||||
] if lt_tags[0].is_complex ()),
|
||||
key=lambda lt_tags: lt_tags[0].get_group ()):
|
||||
complex_tags[initial] += group
|
||||
|
||||
for initial, items in sorted (complex_tags.items ()):
|
||||
if initial != 'und':
|
||||
continue
|
||||
print (' if (', end='')
|
||||
if (lt.language == 'und' or
|
||||
lt.variant in bcp_47.prefixes and
|
||||
len (bcp_47.prefixes[lt.variant]) == 1):
|
||||
for lt, tags in items:
|
||||
if lt.variant in bcp_47.prefixes:
|
||||
expect (next (iter (bcp_47.prefixes[lt.variant])) == lt.language,
|
||||
'%s is not a valid prefix of %s' % (lt.language, lt.variant))
|
||||
print ('1', end='')
|
||||
elif lt.grandfathered:
|
||||
print ('0 == strcmp (lang_str, "%s")' % lt.language, end='')
|
||||
else:
|
||||
print ('lang_matches (lang_str, "%s' % lt.language, end='')
|
||||
if lt.script:
|
||||
print ('-%s' % lt.script, end='')
|
||||
lt.script = None
|
||||
if lt.region:
|
||||
print ('-%s' % lt.region, end='')
|
||||
lt.region = None
|
||||
print ('")', end='')
|
||||
print_subtag_matches (lt.script)
|
||||
print_subtag_matches (lt.region)
|
||||
print_subtag_matches (lt.variant)
|
||||
print (' if (', end='')
|
||||
print_subtag_matches (lt.script, False)
|
||||
print_subtag_matches (lt.region, False)
|
||||
print_subtag_matches (lt.variant, False)
|
||||
print (')')
|
||||
print (' {')
|
||||
write (' /* %s */' % bcp_47.get_name (language))
|
||||
write (' /* %s */' % bcp_47.get_name (lt))
|
||||
print ()
|
||||
if len (tags) == 1:
|
||||
write (' tags[0] = %s; /* %s */' % (hb_tag (tags[0]), ot.names[tags[0]]))
|
||||
print ()
|
||||
print (' *count = 1;')
|
||||
else:
|
||||
print (' unsigned int i;')
|
||||
print (' hb_tag_t possible_tags[] = {')
|
||||
for tag in tags:
|
||||
write (' %s, /* %s */' % (hb_tag (tag), ot.names[tag]))
|
||||
|
@ -961,6 +986,54 @@ for language, tags in sorted (ot.from_bcp_47.items (), key=lambda i: (-len (i[0]
|
|||
print (' return true;')
|
||||
print (' }')
|
||||
|
||||
print (' switch (lang_str[0])')
|
||||
print (' {')
|
||||
for initial, items in sorted (complex_tags.items ()):
|
||||
if initial == 'und':
|
||||
continue
|
||||
print (" case '%s':" % initial)
|
||||
for lt, tags in items:
|
||||
print (' if (', end='')
|
||||
if lt.grandfathered:
|
||||
print ('0 == strcmp (&lang_str[1], "%s")' % lt.language[1:], end='')
|
||||
else:
|
||||
string_literal = lt.language[1:] + '-'
|
||||
if lt.script:
|
||||
string_literal += lt.script
|
||||
lt.script = None
|
||||
if lt.region:
|
||||
string_literal += '-' + lt.region
|
||||
lt.region = None
|
||||
if string_literal[-1] == '-':
|
||||
print ('0 == strncmp (&lang_str[1], "%s", %i)' % (string_literal, len (string_literal)), end='')
|
||||
else:
|
||||
print ('lang_matches (&lang_str[1], "%s")' % string_literal, end='')
|
||||
print_subtag_matches (lt.script, True)
|
||||
print_subtag_matches (lt.region, True)
|
||||
print_subtag_matches (lt.variant, True)
|
||||
print (')')
|
||||
print (' {')
|
||||
write (' /* %s */' % bcp_47.get_name (lt))
|
||||
print ()
|
||||
if len (tags) == 1:
|
||||
write (' tags[0] = %s; /* %s */' % (hb_tag (tags[0]), ot.names[tags[0]]))
|
||||
print ()
|
||||
print (' *count = 1;')
|
||||
else:
|
||||
print (' unsigned int i;')
|
||||
print (' hb_tag_t possible_tags[] = {')
|
||||
for tag in tags:
|
||||
write ('\t%s, /* %s */' % (hb_tag (tag), ot.names[tag]))
|
||||
print ()
|
||||
print (' };')
|
||||
print (' for (i = 0; i < %s && i < *count; i++)' % len (tags))
|
||||
print ('\ttags[i] = possible_tags[i];')
|
||||
print (' *count = i;')
|
||||
print (' return true;')
|
||||
print (' }')
|
||||
print (' break;')
|
||||
|
||||
print (' }')
|
||||
print (' return false;')
|
||||
print ('}')
|
||||
print ()
|
||||
|
@ -1030,7 +1103,7 @@ verify_disambiguation_dict ()
|
|||
for ot_tag, bcp_47_tag in sorted (disambiguation.items ()):
|
||||
write (' case %s: /* %s */' % (hb_tag (ot_tag), ot.names[ot_tag]))
|
||||
print ()
|
||||
write (' return hb_language_from_string (\"%s\", -1); /* %s */' % (bcp_47_tag, bcp_47.get_name (bcp_47_tag)))
|
||||
write (' return hb_language_from_string (\"%s\", -1); /* %s */' % (bcp_47_tag, bcp_47.get_name (LanguageTag (bcp_47_tag))))
|
||||
print ()
|
||||
|
||||
print (' default:')
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue