From 2c29b81e7f36cf56e92f5b5eb406cc46e6394178 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 23 Jan 2023 20:49:19 -0700 Subject: [PATCH] [buffer-deserialize-text] Separate glyphs / unicodes machines --- src/Makefile.sources | 6 +- ...l => hb-buffer-deserialize-text-glyphs.rl} | 31 +---- src/hb-buffer-deserialize-text-unicodes.rl | 108 ++++++++++++++++++ src/hb-buffer-serialize.cc | 15 +-- src/meson.build | 6 +- 5 files changed, 130 insertions(+), 36 deletions(-) rename src/{hb-buffer-deserialize-text.rl => hb-buffer-deserialize-text-glyphs.rl} (81%) create mode 100644 src/hb-buffer-deserialize-text-unicodes.rl diff --git a/src/Makefile.sources b/src/Makefile.sources index 1aa4fe8d0..7a027fa40 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -256,7 +256,8 @@ HB_BASE_sources = \ HB_BASE_RAGEL_GENERATED_sources = \ hb-buffer-deserialize-json.hh \ - hb-buffer-deserialize-text.hh \ + hb-buffer-deserialize-text-glyphs.hh \ + hb-buffer-deserialize-text-unicodes.hh \ hb-number-parser.hh \ hb-ot-shaper-indic-machine.hh \ hb-ot-shaper-khmer-machine.hh \ @@ -265,7 +266,8 @@ HB_BASE_RAGEL_GENERATED_sources = \ $(NULL) HB_BASE_RAGEL_sources = \ hb-buffer-deserialize-json.rl \ - hb-buffer-deserialize-text.rl \ + hb-buffer-deserialize-text-glyphs.rl \ + hb-buffer-deserialize-text-unicodes.rl \ hb-number-parser.rl \ hb-ot-shaper-indic-machine.rl \ hb-ot-shaper-khmer-machine.rl \ diff --git a/src/hb-buffer-deserialize-text.rl b/src/hb-buffer-deserialize-text-glyphs.rl similarity index 81% rename from src/hb-buffer-deserialize-text.rl rename to src/hb-buffer-deserialize-text-glyphs.rl index d7e3b63b1..8eec370e4 100644 --- a/src/hb-buffer-deserialize-text.rl +++ b/src/hb-buffer-deserialize-text-glyphs.rl @@ -24,14 +24,14 @@ * Google Author(s): Behdad Esfahbod */ -#ifndef HB_BUFFER_DESERIALIZE_TEXT_HH -#define HB_BUFFER_DESERIALIZE_TEXT_HH +#ifndef HB_BUFFER_DESERIALIZE_TEXT_GLYPHS_HH +#define HB_BUFFER_DESERIALIZE_TEXT_GLYPHS_HH #include "hb.hh" %%{ -machine deserialize_text; +machine deserialize_text_glyphs; alphtype unsigned char; write data; @@ -52,9 +52,6 @@ action tok { tok = p; } -action ensure_glyphs { if (unlikely (!buffer->ensure_glyphs ())) return false; } -action ensure_unicode { if (unlikely (!buffer->ensure_unicode ())) return false; } - action parse_glyph { /* TODO Unescape delimiters. */ if (!hb_font_glyph_from_string (font, @@ -63,8 +60,6 @@ action parse_glyph { return false; } -action parse_hexdigits {if (!parse_hex (tok, p, &info.codepoint )) return false; } - action parse_cluster { if (!parse_uint (tok, p, &info.cluster )) return false; } action parse_x_offset { if (!parse_int (tok, p, &pos.x_offset )) return false; } action parse_y_offset { if (!parse_int (tok, p, &pos.y_offset )) return false; } @@ -93,31 +88,17 @@ glyph_item = glyphflags? ) >clear_item - @ensure_glyphs - %add_item - ; - -unicode = 'U' '+' xdigit+ >tok %parse_hexdigits; - -unicode_item = - ( - unicode - cluster? - ) - >clear_item - @ensure_unicode %add_item ; glyphs = glyph_item (space* '|' space* glyph_item)* space* ('|'|']'); -unicodes = unicode_item (space* '|' space* unicode_item)* space* ('|'|'>'); -main := space* ( ('[' glyphs) | ('<' unicodes) ); +main := space* '[' glyphs; }%% static hb_bool_t -_hb_buffer_deserialize_text (hb_buffer_t *buffer, +_hb_buffer_deserialize_text_glyphs (hb_buffer_t *buffer, const char *buf, unsigned int buf_len, const char **end_ptr, @@ -145,4 +126,4 @@ _hb_buffer_deserialize_text (hb_buffer_t *buffer, return p == pe && *(p-1) != ']'; } -#endif /* HB_BUFFER_DESERIALIZE_TEXT_HH */ +#endif /* HB_BUFFER_DESERIALIZE_TEXT_GLYPHS_HH */ diff --git a/src/hb-buffer-deserialize-text-unicodes.rl b/src/hb-buffer-deserialize-text-unicodes.rl new file mode 100644 index 000000000..4d81aa70b --- /dev/null +++ b/src/hb-buffer-deserialize-text-unicodes.rl @@ -0,0 +1,108 @@ +/* + * Copyright © 2013 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_BUFFER_DESERIALIZE_TEXT_UNICODES_HH +#define HB_BUFFER_DESERIALIZE_TEXT_UNICODES_HH + +#include "hb.hh" + +%%{ + +machine deserialize_text_unicodes; +alphtype unsigned char; +write data; + +action clear_item { + hb_memset (&info, 0, sizeof (info)); + hb_memset (&pos , 0, sizeof (pos )); +} + +action add_item { + buffer->add_info (info); + if (unlikely (!buffer->successful)) + return false; + if (buffer->have_positions) + buffer->pos[buffer->len - 1] = pos; + *end_ptr = p; +} + +action tok { + tok = p; +} + +action parse_hexdigits {if (!parse_hex (tok, p, &info.codepoint )) return false; } + +action parse_cluster { if (!parse_uint (tok, p, &info.cluster )) return false; } + +unum = '0' | [1-9] digit*; +num = '-'? unum; + +cluster = '=' (unum >tok %parse_cluster); + +unicode = 'U' '+' xdigit+ >tok %parse_hexdigits; + +unicode_item = + ( + unicode + cluster? + ) + >clear_item + %add_item + ; + +unicodes = unicode_item (space* '|' space* unicode_item)* space* ('|'|'>'); + +main := space* '<' unicodes; + +}%% + +static hb_bool_t +_hb_buffer_deserialize_text_unicodes (hb_buffer_t *buffer, + const char *buf, + unsigned int buf_len, + const char **end_ptr, + hb_font_t *font) +{ + const char *p = buf, *pe = buf + buf_len; + + while (p < pe && ISSPACE (*p)) + p++; + + const char *tok = nullptr; + int cs; + hb_glyph_info_t info = {0}; + hb_glyph_position_t pos = {0}; + %%{ + write init; + write exec; + }%% + + *end_ptr = p; + + return p == pe && *(p-1) != ']'; +} + +#endif /* HB_BUFFER_DESERIALIZE_TEXT_UNICODES_HH */ diff --git a/src/hb-buffer-serialize.cc b/src/hb-buffer-serialize.cc index a458f2318..6e47fb351 100644 --- a/src/hb-buffer-serialize.cc +++ b/src/hb-buffer-serialize.cc @@ -721,7 +721,8 @@ parse_hex (const char *pp, const char *end, uint32_t *pv) } #include "hb-buffer-deserialize-json.hh" -#include "hb-buffer-deserialize-text.hh" +#include "hb-buffer-deserialize-text-glyphs.hh" +#include "hb-buffer-deserialize-text-unicodes.hh" /** * hb_buffer_deserialize_glyphs: @@ -779,9 +780,9 @@ hb_buffer_deserialize_glyphs (hb_buffer_t *buffer, switch (format) { case HB_BUFFER_SERIALIZE_FORMAT_TEXT: - return _hb_buffer_deserialize_text (buffer, - buf, buf_len, end_ptr, - font); + return _hb_buffer_deserialize_text_glyphs (buffer, + buf, buf_len, end_ptr, + font); case HB_BUFFER_SERIALIZE_FORMAT_JSON: return _hb_buffer_deserialize_json (buffer, @@ -849,9 +850,9 @@ hb_buffer_deserialize_unicode (hb_buffer_t *buffer, switch (format) { case HB_BUFFER_SERIALIZE_FORMAT_TEXT: - return _hb_buffer_deserialize_text (buffer, - buf, buf_len, end_ptr, - font); + return _hb_buffer_deserialize_text_unicodes (buffer, + buf, buf_len, end_ptr, + font); case HB_BUFFER_SERIALIZE_FORMAT_JSON: return _hb_buffer_deserialize_json (buffer, diff --git a/src/meson.build b/src/meson.build index a32b21e73..1a6827f1b 100644 --- a/src/meson.build +++ b/src/meson.build @@ -256,7 +256,8 @@ hb_base_sources = files( hb_base_ragel_generated_sources = files( 'hb-buffer-deserialize-json.hh', - 'hb-buffer-deserialize-text.hh', + 'hb-buffer-deserialize-text-glyphs.hh', + 'hb-buffer-deserialize-text-unicodes.hh', 'hb-number-parser.hh', 'hb-ot-shaper-indic-machine.hh', 'hb-ot-shaper-khmer-machine.hh', @@ -265,7 +266,8 @@ hb_base_ragel_generated_sources = files( ) hb_base_ragel_sources = [ 'hb-buffer-deserialize-json.rl', - 'hb-buffer-deserialize-text.rl', + 'hb-buffer-deserialize-text-glyphs.rl', + 'hb-buffer-deserialize-text-unicodes.rl', 'hb-number-parser.rl', 'hb-ot-shaper-indic-machine.rl', 'hb-ot-shaper-khmer-machine.rl',