[buffer] Save pre/post textual context

To be used for a variety of purposes.  We save up to five characters
in each direction.  No public API changes, everything is taken care
of already.  All clients need to do is to call hb_buffer_add_utf* with
the full text + segment info (or at least some context) instead of
just passing in the segment.

Various operations (hb_buffer_reset, hb_buffer_set_length,
hb_buffer_add*) automatically reset the relevant contexts.
This commit is contained in:
Behdad Esfahbod 2012-09-25 17:44:53 -04:00
parent 89ac39dbbe
commit 05207a79e0
3 changed files with 53 additions and 6 deletions

View File

@ -1,7 +1,7 @@
/* /*
* Copyright © 1998-2004 David Turner and Werner Lemberg * Copyright © 1998-2004 David Turner and Werner Lemberg
* Copyright © 2004,2007,2009,2010 Red Hat, Inc. * Copyright © 2004,2007,2009,2010 Red Hat, Inc.
* Copyright © 2011 Google, Inc. * Copyright © 2011,2012 Google, Inc.
* *
* This is part of HarfBuzz, a text shaping library. * This is part of HarfBuzz, a text shaping library.
* *
@ -117,9 +117,18 @@ struct hb_buffer_t {
inline hb_glyph_info_t prev (void) const { return info[out_len - 1]; } inline hb_glyph_info_t prev (void) const { return info[out_len - 1]; }
unsigned int serial; unsigned int serial;
/* These reflect current allocations of the bytes in glyph_info_t's var1 and var2. */
uint8_t allocated_var_bytes[8]; uint8_t allocated_var_bytes[8];
const char *allocated_var_owner[8]; const char *allocated_var_owner[8];
/* Text before / after the main buffer contents.
* Always in Unicode, and ordered outward.
* Index 0 is for "pre-context", 1 for "post-context". */
static const unsigned int CONTEXT_LENGTH = 5;
hb_codepoint_t context[2][CONTEXT_LENGTH];
unsigned int context_len[2];
/* Methods */ /* Methods */
@ -206,6 +215,8 @@ struct hb_buffer_t {
HB_INTERNAL bool make_room_for (unsigned int num_in, unsigned int num_out); HB_INTERNAL bool make_room_for (unsigned int num_in, unsigned int num_out);
HB_INTERNAL void *get_scratch_buffer (unsigned int *size); HB_INTERNAL void *get_scratch_buffer (unsigned int *size);
inline void clear_context (unsigned int side) { context_len[side] = 0; }
}; };

View File

@ -1,7 +1,7 @@
/* /*
* Copyright © 1998-2004 David Turner and Werner Lemberg * Copyright © 1998-2004 David Turner and Werner Lemberg
* Copyright © 2004,2007,2009,2010 Red Hat, Inc. * Copyright © 2004,2007,2009,2010 Red Hat, Inc.
* Copyright © 2011 Google, Inc. * Copyright © 2011,2012 Google, Inc.
* *
* This is part of HarfBuzz, a text shaping library. * This is part of HarfBuzz, a text shaping library.
* *
@ -158,6 +158,9 @@ hb_buffer_t::reset (void)
serial = 0; serial = 0;
memset (allocated_var_bytes, 0, sizeof allocated_var_bytes); memset (allocated_var_bytes, 0, sizeof allocated_var_bytes);
memset (allocated_var_owner, 0, sizeof allocated_var_owner); memset (allocated_var_owner, 0, sizeof allocated_var_owner);
memset (context, 0, sizeof context);
memset (context_len, 0, sizeof context_len);
} }
void void
@ -570,6 +573,8 @@ hb_buffer_get_empty (void)
true, /* in_error */ true, /* in_error */
true, /* have_output */ true, /* have_output */
true /* have_positions */ true /* have_positions */
/* Zero is good enough for everything else. */
}; };
return const_cast<hb_buffer_t *> (&_hb_buffer_nil); return const_cast<hb_buffer_t *> (&_hb_buffer_nil);
@ -723,6 +728,7 @@ hb_buffer_add (hb_buffer_t *buffer,
unsigned int cluster) unsigned int cluster)
{ {
buffer->add (codepoint, mask, cluster); buffer->add (codepoint, mask, cluster);
buffer->clear_context (1);
} }
hb_bool_t hb_bool_t
@ -743,6 +749,11 @@ hb_buffer_set_length (hb_buffer_t *buffer,
} }
buffer->len = length; buffer->len = length;
if (!length)
buffer->clear_context (0);
buffer->clear_context (1);
return true; return true;
} }
@ -817,13 +828,38 @@ hb_buffer_add_utf (hb_buffer_t *buffer,
buffer->ensure (buffer->len + item_length * sizeof (T) / 4); buffer->ensure (buffer->len + item_length * sizeof (T) / 4);
const T *next = (const T *) text + item_offset; if (!buffer->len)
{
/* Add pre-context */
buffer->clear_context (0);
const T *prev = text + item_offset;
const T *start = text;
while (start < prev && buffer->context_len[0] < buffer->CONTEXT_LENGTH)
{
hb_codepoint_t u;
prev = hb_utf_prev (prev, start, &u);
buffer->context[0][buffer->context_len[0]++] = u;
}
}
const T *next = text + item_offset;
const T *end = next + item_length; const T *end = next + item_length;
while (next < end) { while (next < end)
{
hb_codepoint_t u; hb_codepoint_t u;
const T *old_next = next; const T *old_next = next;
next = hb_utf_next (next, end, &u); next = hb_utf_next (next, end, &u);
hb_buffer_add (buffer, u, 1, old_next - (const T *) text); buffer->add (u, 1, old_next - (const T *) text);
}
/* Add post-context */
buffer->clear_context (1);
end = text + text_length;
while (next < end && buffer->context_len[1] < buffer->CONTEXT_LENGTH)
{
hb_codepoint_t u;
next = hb_utf_next (next, end, &u);
buffer->context[1][buffer->context_len[1]++] = u;
} }
buffer->content_type = HB_BUFFER_CONTENT_TYPE_UNICODE; buffer->content_type = HB_BUFFER_CONTENT_TYPE_UNICODE;

View File

@ -1,7 +1,7 @@
/* /*
* Copyright © 1998-2004 David Turner and Werner Lemberg * Copyright © 1998-2004 David Turner and Werner Lemberg
* Copyright © 2004,2007,2009 Red Hat, Inc. * Copyright © 2004,2007,2009 Red Hat, Inc.
* Copyright © 2011 Google, Inc. * Copyright © 2011,2012 Google, Inc.
* *
* This is part of HarfBuzz, a text shaping library. * This is part of HarfBuzz, a text shaping library.
* *