[buffer] Implement buffer deserialization for format=text

Using a ragel machine.
This commit is contained in:
Behdad Esfahbod 2013-02-27 17:59:28 -05:00
parent 4ee803b42a
commit 847794e929
9 changed files with 221 additions and 34 deletions

View File

@ -258,11 +258,13 @@ arabic-table: gen-arabic-table.py ArabicShaping.txt UnicodeData.txt
.PHONY: unicode-tables arabic-table indic-table
BUILT_SOURCES += \
hb-buffer-deserialize-text.hh \
hb-ot-shape-complex-indic-machine.hh \
hb-ot-shape-complex-myanmar-machine.hh \
hb-ot-shape-complex-sea-machine.hh \
$(NULL)
EXTRA_DIST += \
hb-buffer-deserialize-text.rl \
hb-ot-shape-complex-indic-machine.rl \
hb-ot-shape-complex-myanmar-machine.rl \
hb-ot-shape-complex-sea-machine.rl \

View File

@ -0,0 +1,169 @@
/*
* Copyright © 2013 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Google Author(s): Behdad Esfahbod
*/
#ifndef HB_BUFFER_DESERIALIZE_TEXT_HH
#define HB_BUFFER_DESERIALIZE_TEXT_HH
#include "hb-private.hh"
%%{
machine deserialize_text;
alphtype unsigned char;
write data;
}%%
static hb_bool_t
parse_uint (const char *pp, const char *end, uint32_t *pv)
{
char buf[32];
unsigned int len = MIN (ARRAY_LENGTH (buf) - 1, (unsigned int) (end - pp));
strncpy (buf, pp, len);
buf[len] = '\0';
char *p = buf;
char *pend = p;
uint32_t v;
errno = 0;
v = strtol (p, &pend, 10);
if (errno || p == pend || pend - p != end - pp)
return false;
*pv = v;
return true;
}
static hb_bool_t
parse_int (const char *pp, const char *end, int32_t *pv)
{
char buf[32];
unsigned int len = MIN (ARRAY_LENGTH (buf) - 1, (unsigned int) (end - pp));
strncpy (buf, pp, len);
buf[len] = '\0';
char *p = buf;
char *pend = p;
int32_t v;
errno = 0;
v = strtol (p, &pend, 10);
if (errno || p == pend || pend - p != end - pp)
return false;
*pv = v;
return true;
}
%%{
action clear_item {
memset (&info, 0, sizeof (info));
memset (&pos , 0, sizeof (pos ));
}
action add_item {
buffer->add_info (info);
if (buffer->in_error)
return false;
buffer->pos[buffer->len - 1] = pos;
*end_ptr = p;
}
action tok {
tok = p;
}
action parse_glyph {
if (!hb_font_glyph_from_string (font,
tok, p - tok,
&info.codepoint))
return false;
}
action parse_cluster { if (!parse_uint (tok, p, &info.cluster )) return false; }
action parse_x_offset { if (!parse_int (tok, p, &pos.x_offset )) return false; }
action parse_y_offset { if (!parse_int (tok, p, &pos.y_offset )) return false; }
action parse_x_advance { if (!parse_int (tok, p, &pos.x_advance)) return false; }
action parse_y_advance { if (!parse_int (tok, p, &pos.y_advance)) return false; }
unum = '0' | [1-9] digit*;
num = '-'? unum;
glyph = alnum+ >tok %parse_glyph;
cluster = '=' (unum >tok %parse_cluster);
offsets = '@' (num >tok %parse_x_offset) ',' (num >tok %parse_y_offset );
advances= '+' (num >tok %parse_x_advance) (',' (num >tok %parse_y_advance))?;
item =
(
glyph
cluster?
offsets?
advances?
)
>clear_item
%add_item
;
main := space* item (space* '|' space* item)* space* ('|'|']')?;
}%%
static hb_bool_t
_hb_buffer_deserialize_glyphs_text (hb_buffer_t *buffer,
const char *buf,
unsigned int buf_len,
const char **end_ptr,
hb_font_t *font)
{
const char *p = buf, *pe = buf + buf_len;
/* Ensure we have positions. */
(void) hb_buffer_get_glyph_positions (buffer, NULL);
#define ISSPACE(c) ((c)==' '||(c)=='\f'||(c)=='\n'||(c)=='\r'||(c)=='\t'||(c)=='\v')
while (p < pe && ISSPACE (*p))
p++;
#undef ISSPACE
if (p < pe && *p == (buffer->len ? '|' : '['))
{
*end_ptr = ++p;
}
const char *eof = pe, *tok = NULL;
int cs;
hb_glyph_info_t info;
hb_glyph_position_t pos;
%%{
write init;
write exec;
}%%
*end_ptr = p;
return p == pe && *(p-1) != ']';
}
#endif /* HB_BUFFER_DESERIALIZE_TEXT_HH */

View File

@ -110,6 +110,7 @@ struct hb_buffer_t {
HB_INTERNAL void add (hb_codepoint_t codepoint,
unsigned int cluster);
HB_INTERNAL void add_info (const hb_glyph_info_t &glyph_info);
HB_INTERNAL void reverse_range (unsigned int start, unsigned int end);
HB_INTERNAL void reverse (void);
@ -128,7 +129,7 @@ struct hb_buffer_t {
HB_INTERNAL void replace_glyph (hb_codepoint_t glyph_index);
/* Makes a copy of the glyph at idx to output and replace glyph_index */
HB_INTERNAL void output_glyph (hb_codepoint_t glyph_index);
HB_INTERNAL void output_info (hb_glyph_info_t &glyph_info);
HB_INTERNAL void output_info (const hb_glyph_info_t &glyph_info);
/* Copies glyph at idx to output but doesn't advance idx */
HB_INTERNAL void copy_glyph (void);
/* Copies glyph at idx to output and advance idx.

View File

@ -143,7 +143,6 @@ _hb_buffer_serialize_glyphs_text (hb_buffer_t *buffer,
{
hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, NULL);
hb_glyph_position_t *pos = hb_buffer_get_glyph_positions (buffer, NULL);
hb_direction_t direction = hb_buffer_get_direction (buffer);
*buf_consumed = 0;
for (unsigned int i = start; i < end; i++)
@ -174,9 +173,8 @@ _hb_buffer_serialize_glyphs_text (hb_buffer_t *buffer,
p += snprintf (p, ARRAY_LENGTH (b) - (p - b), "@%d,%d", pos[i].x_offset, pos[i].y_offset);
*p++ = '+';
if (!HB_DIRECTION_IS_VERTICAL (direction) || pos[i].x_advance)
p += snprintf (p, ARRAY_LENGTH (b) - (p - b), "%d", pos[i].x_advance);
if (HB_DIRECTION_IS_VERTICAL (direction) || pos->y_advance)
p += snprintf (p, ARRAY_LENGTH (b) - (p - b), "%d", pos[i].x_advance);
if (pos->y_advance)
p += snprintf (p, ARRAY_LENGTH (b) - (p - b), ",%d", pos[i].y_advance);
}
@ -247,34 +245,26 @@ static hb_bool_t
_hb_buffer_deserialize_glyphs_json (hb_buffer_t *buffer,
const char *buf,
unsigned int buf_len,
unsigned int *buf_consumed,
const char **end_ptr,
hb_font_t *font)
{
return false;
}
static hb_bool_t
_hb_buffer_deserialize_glyphs_text (hb_buffer_t *buffer,
const char *buf,
unsigned int buf_len,
unsigned int *buf_consumed,
hb_font_t *font)
{
return false;
}
#include "hb-buffer-deserialize-text.hh"
hb_bool_t
hb_buffer_deserialize_glyphs (hb_buffer_t *buffer,
const char *buf,
int buf_len, /* -1 means nul-terminated */
unsigned int *buf_consumed, /* May be NULL */
const char **end_ptr, /* May be NULL */
hb_font_t *font, /* May be NULL */
hb_buffer_serialize_format_t format)
{
unsigned int sconsumed;
if (!buf_consumed)
buf_consumed = &sconsumed;
*buf_consumed = 0;
const char *end;
if (!end_ptr)
end_ptr = &end;
*end_ptr = buf;
assert ((!buffer->len && buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID) ||
buffer->content_type == HB_BUFFER_CONTENT_TYPE_GLYPHS);
@ -284,8 +274,8 @@ hb_buffer_deserialize_glyphs (hb_buffer_t *buffer,
if (!buf_len)
{
*buf_consumed = 0;
return true;
*end_ptr = buf;
return false;
}
hb_buffer_set_content_type (buffer, HB_BUFFER_CONTENT_TYPE_GLYPHS);
@ -297,12 +287,12 @@ hb_buffer_deserialize_glyphs (hb_buffer_t *buffer,
{
case HB_BUFFER_SERIALIZE_FORMAT_TEXT:
return _hb_buffer_deserialize_glyphs_text (buffer,
buf, buf_len, buf_consumed,
buf, buf_len, end_ptr,
font);
case HB_BUFFER_SERIALIZE_FORMAT_JSON:
return _hb_buffer_deserialize_glyphs_json (buffer,
buf, buf_len, buf_consumed,
buf, buf_len, end_ptr,
font);
default:

View File

@ -214,6 +214,17 @@ hb_buffer_t::add (hb_codepoint_t codepoint,
len++;
}
void
hb_buffer_t::add_info (const hb_glyph_info_t &glyph_info)
{
if (unlikely (!ensure (len + 1))) return;
info[len] = glyph_info;
len++;
}
void
hb_buffer_t::remove_output (void)
{
@ -315,7 +326,7 @@ hb_buffer_t::output_glyph (hb_codepoint_t glyph_index)
}
void
hb_buffer_t::output_info (hb_glyph_info_t &glyph_info)
hb_buffer_t::output_info (const hb_glyph_info_t &glyph_info)
{
if (unlikely (!make_room_for (0, 1))) return;

View File

@ -313,7 +313,7 @@ hb_bool_t
hb_buffer_deserialize_glyphs (hb_buffer_t *buffer,
const char *buf,
int buf_len, /* -1 means nul-terminated */
unsigned int *buf_consumed, /* May be NULL */
const char **end_ptr, /* May be NULL */
hb_font_t *font, /* May be NULL */
hb_buffer_serialize_format_t format);

View File

@ -845,8 +845,9 @@ hb_codepoint_parse (const char *s, unsigned int len, int base, hb_codepoint_t *o
{
/* Pain because we don't know whether s is nul-terminated. */
char buf[64];
strncpy (buf, s, MIN (ARRAY_LENGTH (buf) - 1, len));
buf[MIN (ARRAY_LENGTH (buf) - 1, len)] = '\0';
len = MIN (ARRAY_LENGTH (buf) - 1, len);
strncpy (buf, s, len);
buf[len] = '\0';
char *end;
errno = 0;

View File

@ -60,8 +60,9 @@ static hb_bool_t
parse_uint (const char **pp, const char *end, unsigned int *pv)
{
char buf[32];
strncpy (buf, *pp, end - *pp);
buf[ARRAY_LENGTH (buf) - 1] = '\0';
unsigned int len = MIN (ARRAY_LENGTH (buf) - 1, (unsigned int) (end - *pp));
strncpy (buf, *pp, len);
buf[len] = '\0';
char *p = buf;
char *pend = p;
@ -69,9 +70,9 @@ parse_uint (const char **pp, const char *end, unsigned int *pv)
/* Intentionally use strtol instead of strtoul, such that
* -1 turns into "big number"... */
errno = 0;
v = strtol (p, &pend, 0);
if (p == pend)
if (errno || p == pend)
return false;
*pv = v;

View File

@ -29,6 +29,9 @@
#endif
#include "hb.h"
#ifdef HAVE_FREETYPE
#include "hb-ft.h"
#endif
#ifdef HAVE_GLIB
#include <glib.h>
@ -84,7 +87,11 @@ main (int argc, char **argv)
unsigned int upem = hb_face_get_upem (face);
hb_font_t *font = hb_font_create (face);
hb_face_destroy (face);
hb_font_set_scale (font, upem, upem);
#ifdef HAVE_FREETYPE
hb_ft_font_set_funcs (font);
#endif
hb_buffer_t *buf;
buf = hb_buffer_create ();
@ -95,8 +102,13 @@ main (int argc, char **argv)
{
hb_buffer_clear_contents (buf);
if (!hb_buffer_deserialize_glyphs (buf, line, -1, NULL,
font, HB_BUFFER_SERIALIZE_FORMAT_TEXT))
const char *p = line;
while (hb_buffer_deserialize_glyphs (buf,
p, -1, &p,
font,
HB_BUFFER_SERIALIZE_FORMAT_TEXT))
;
if (*p && *p != '\n')
ret = false;
hb_buffer_serialize_glyphs (buf, 0, hb_buffer_get_length (buf),