[buffer-deserialize-text] Separate glyphs / unicodes machines

This commit is contained in:
Behdad Esfahbod 2023-01-23 20:49:19 -07:00
parent d0355eb4bd
commit 2c29b81e7f
5 changed files with 130 additions and 36 deletions

View File

@ -256,7 +256,8 @@ HB_BASE_sources = \
HB_BASE_RAGEL_GENERATED_sources = \ HB_BASE_RAGEL_GENERATED_sources = \
hb-buffer-deserialize-json.hh \ hb-buffer-deserialize-json.hh \
hb-buffer-deserialize-text.hh \ hb-buffer-deserialize-text-glyphs.hh \
hb-buffer-deserialize-text-unicodes.hh \
hb-number-parser.hh \ hb-number-parser.hh \
hb-ot-shaper-indic-machine.hh \ hb-ot-shaper-indic-machine.hh \
hb-ot-shaper-khmer-machine.hh \ hb-ot-shaper-khmer-machine.hh \
@ -265,7 +266,8 @@ HB_BASE_RAGEL_GENERATED_sources = \
$(NULL) $(NULL)
HB_BASE_RAGEL_sources = \ HB_BASE_RAGEL_sources = \
hb-buffer-deserialize-json.rl \ hb-buffer-deserialize-json.rl \
hb-buffer-deserialize-text.rl \ hb-buffer-deserialize-text-glyphs.rl \
hb-buffer-deserialize-text-unicodes.rl \
hb-number-parser.rl \ hb-number-parser.rl \
hb-ot-shaper-indic-machine.rl \ hb-ot-shaper-indic-machine.rl \
hb-ot-shaper-khmer-machine.rl \ hb-ot-shaper-khmer-machine.rl \

View File

@ -24,14 +24,14 @@
* Google Author(s): Behdad Esfahbod * Google Author(s): Behdad Esfahbod
*/ */
#ifndef HB_BUFFER_DESERIALIZE_TEXT_HH #ifndef HB_BUFFER_DESERIALIZE_TEXT_GLYPHS_HH
#define HB_BUFFER_DESERIALIZE_TEXT_HH #define HB_BUFFER_DESERIALIZE_TEXT_GLYPHS_HH
#include "hb.hh" #include "hb.hh"
%%{ %%{
machine deserialize_text; machine deserialize_text_glyphs;
alphtype unsigned char; alphtype unsigned char;
write data; write data;
@ -52,9 +52,6 @@ action tok {
tok = p; tok = p;
} }
action ensure_glyphs { if (unlikely (!buffer->ensure_glyphs ())) return false; }
action ensure_unicode { if (unlikely (!buffer->ensure_unicode ())) return false; }
action parse_glyph { action parse_glyph {
/* TODO Unescape delimiters. */ /* TODO Unescape delimiters. */
if (!hb_font_glyph_from_string (font, if (!hb_font_glyph_from_string (font,
@ -63,8 +60,6 @@ action parse_glyph {
return false; return false;
} }
action parse_hexdigits {if (!parse_hex (tok, p, &info.codepoint )) return false; }
action parse_cluster { if (!parse_uint (tok, p, &info.cluster )) return false; } action parse_cluster { if (!parse_uint (tok, p, &info.cluster )) return false; }
action parse_x_offset { if (!parse_int (tok, p, &pos.x_offset )) return false; } action parse_x_offset { if (!parse_int (tok, p, &pos.x_offset )) return false; }
action parse_y_offset { if (!parse_int (tok, p, &pos.y_offset )) return false; } action parse_y_offset { if (!parse_int (tok, p, &pos.y_offset )) return false; }
@ -93,31 +88,17 @@ glyph_item =
glyphflags? glyphflags?
) )
>clear_item >clear_item
@ensure_glyphs
%add_item
;
unicode = 'U' '+' xdigit+ >tok %parse_hexdigits;
unicode_item =
(
unicode
cluster?
)
>clear_item
@ensure_unicode
%add_item %add_item
; ;
glyphs = glyph_item (space* '|' space* glyph_item)* space* ('|'|']'); glyphs = glyph_item (space* '|' space* glyph_item)* space* ('|'|']');
unicodes = unicode_item (space* '|' space* unicode_item)* space* ('|'|'>');
main := space* ( ('[' glyphs) | ('<' unicodes) ); main := space* '[' glyphs;
}%% }%%
static hb_bool_t static hb_bool_t
_hb_buffer_deserialize_text (hb_buffer_t *buffer, _hb_buffer_deserialize_text_glyphs (hb_buffer_t *buffer,
const char *buf, const char *buf,
unsigned int buf_len, unsigned int buf_len,
const char **end_ptr, const char **end_ptr,
@ -145,4 +126,4 @@ _hb_buffer_deserialize_text (hb_buffer_t *buffer,
return p == pe && *(p-1) != ']'; return p == pe && *(p-1) != ']';
} }
#endif /* HB_BUFFER_DESERIALIZE_TEXT_HH */ #endif /* HB_BUFFER_DESERIALIZE_TEXT_GLYPHS_HH */

View File

@ -0,0 +1,108 @@
/*
* Copyright © 2013 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Google Author(s): Behdad Esfahbod
*/
#ifndef HB_BUFFER_DESERIALIZE_TEXT_UNICODES_HH
#define HB_BUFFER_DESERIALIZE_TEXT_UNICODES_HH
#include "hb.hh"
%%{
machine deserialize_text_unicodes;
alphtype unsigned char;
write data;
action clear_item {
hb_memset (&info, 0, sizeof (info));
hb_memset (&pos , 0, sizeof (pos ));
}
action add_item {
buffer->add_info (info);
if (unlikely (!buffer->successful))
return false;
if (buffer->have_positions)
buffer->pos[buffer->len - 1] = pos;
*end_ptr = p;
}
action tok {
tok = p;
}
action parse_hexdigits {if (!parse_hex (tok, p, &info.codepoint )) return false; }
action parse_cluster { if (!parse_uint (tok, p, &info.cluster )) return false; }
unum = '0' | [1-9] digit*;
num = '-'? unum;
cluster = '=' (unum >tok %parse_cluster);
unicode = 'U' '+' xdigit+ >tok %parse_hexdigits;
unicode_item =
(
unicode
cluster?
)
>clear_item
%add_item
;
unicodes = unicode_item (space* '|' space* unicode_item)* space* ('|'|'>');
main := space* '<' unicodes;
}%%
static hb_bool_t
_hb_buffer_deserialize_text_unicodes (hb_buffer_t *buffer,
const char *buf,
unsigned int buf_len,
const char **end_ptr,
hb_font_t *font)
{
const char *p = buf, *pe = buf + buf_len;
while (p < pe && ISSPACE (*p))
p++;
const char *tok = nullptr;
int cs;
hb_glyph_info_t info = {0};
hb_glyph_position_t pos = {0};
%%{
write init;
write exec;
}%%
*end_ptr = p;
return p == pe && *(p-1) != ']';
}
#endif /* HB_BUFFER_DESERIALIZE_TEXT_UNICODES_HH */

View File

@ -721,7 +721,8 @@ parse_hex (const char *pp, const char *end, uint32_t *pv)
} }
#include "hb-buffer-deserialize-json.hh" #include "hb-buffer-deserialize-json.hh"
#include "hb-buffer-deserialize-text.hh" #include "hb-buffer-deserialize-text-glyphs.hh"
#include "hb-buffer-deserialize-text-unicodes.hh"
/** /**
* hb_buffer_deserialize_glyphs: * hb_buffer_deserialize_glyphs:
@ -779,9 +780,9 @@ hb_buffer_deserialize_glyphs (hb_buffer_t *buffer,
switch (format) switch (format)
{ {
case HB_BUFFER_SERIALIZE_FORMAT_TEXT: case HB_BUFFER_SERIALIZE_FORMAT_TEXT:
return _hb_buffer_deserialize_text (buffer, return _hb_buffer_deserialize_text_glyphs (buffer,
buf, buf_len, end_ptr, buf, buf_len, end_ptr,
font); font);
case HB_BUFFER_SERIALIZE_FORMAT_JSON: case HB_BUFFER_SERIALIZE_FORMAT_JSON:
return _hb_buffer_deserialize_json (buffer, return _hb_buffer_deserialize_json (buffer,
@ -849,9 +850,9 @@ hb_buffer_deserialize_unicode (hb_buffer_t *buffer,
switch (format) switch (format)
{ {
case HB_BUFFER_SERIALIZE_FORMAT_TEXT: case HB_BUFFER_SERIALIZE_FORMAT_TEXT:
return _hb_buffer_deserialize_text (buffer, return _hb_buffer_deserialize_text_unicodes (buffer,
buf, buf_len, end_ptr, buf, buf_len, end_ptr,
font); font);
case HB_BUFFER_SERIALIZE_FORMAT_JSON: case HB_BUFFER_SERIALIZE_FORMAT_JSON:
return _hb_buffer_deserialize_json (buffer, return _hb_buffer_deserialize_json (buffer,

View File

@ -256,7 +256,8 @@ hb_base_sources = files(
hb_base_ragel_generated_sources = files( hb_base_ragel_generated_sources = files(
'hb-buffer-deserialize-json.hh', 'hb-buffer-deserialize-json.hh',
'hb-buffer-deserialize-text.hh', 'hb-buffer-deserialize-text-glyphs.hh',
'hb-buffer-deserialize-text-unicodes.hh',
'hb-number-parser.hh', 'hb-number-parser.hh',
'hb-ot-shaper-indic-machine.hh', 'hb-ot-shaper-indic-machine.hh',
'hb-ot-shaper-khmer-machine.hh', 'hb-ot-shaper-khmer-machine.hh',
@ -265,7 +266,8 @@ hb_base_ragel_generated_sources = files(
) )
hb_base_ragel_sources = [ hb_base_ragel_sources = [
'hb-buffer-deserialize-json.rl', 'hb-buffer-deserialize-json.rl',
'hb-buffer-deserialize-text.rl', 'hb-buffer-deserialize-text-glyphs.rl',
'hb-buffer-deserialize-text-unicodes.rl',
'hb-number-parser.rl', 'hb-number-parser.rl',
'hb-ot-shaper-indic-machine.rl', 'hb-ot-shaper-indic-machine.rl',
'hb-ot-shaper-khmer-machine.rl', 'hb-ot-shaper-khmer-machine.rl',