[subset] Make cmap4 packing more optimal.
The current CMAP4 implementation uses whatever the current codepoint ranges are and then encodes them as indivudal glyph ids or as a delta if possible. However, it's often possible to save bytes by splitting up existing ranges and encoding parts of them using deltas where the cost of splitting the range is less than encoding each glyph individual.
This commit is contained in:
parent
8aed5c21a3
commit
d9660fd58a
|
@ -93,120 +93,168 @@ struct CmapSubtableFormat0
|
||||||
struct CmapSubtableFormat4
|
struct CmapSubtableFormat4
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
||||||
template<typename Iterator,
|
template<typename Iterator,
|
||||||
|
typename Writer,
|
||||||
hb_requires (hb_is_iterator (Iterator))>
|
hb_requires (hb_is_iterator (Iterator))>
|
||||||
HBUINT16* serialize_endcode_array (hb_serialize_context_t *c,
|
void to_ranges (Iterator it, Writer& range_writer)
|
||||||
Iterator it)
|
|
||||||
{
|
{
|
||||||
HBUINT16 *endCode = c->start_embed<HBUINT16> ();
|
hb_codepoint_t start_cp, run_start_cp, end_cp, last_gid;
|
||||||
hb_codepoint_t prev_endcp = 0xFFFF;
|
int run_length, delta;
|
||||||
|
|
||||||
for (const auto& _ : +it)
|
enum {
|
||||||
{
|
FIRST_RANGE,
|
||||||
if (prev_endcp != 0xFFFF && prev_endcp + 1u != _.first)
|
FOLLOWING_RANGE,
|
||||||
{
|
} mode;
|
||||||
HBUINT16 end_code;
|
|
||||||
end_code = prev_endcp;
|
while (it) {
|
||||||
c->copy<HBUINT16> (end_code);
|
// Start a new range
|
||||||
|
start_cp = (*it).first;
|
||||||
|
run_start_cp = (*it).first;
|
||||||
|
end_cp = (*it).first;
|
||||||
|
last_gid = (*it).second;
|
||||||
|
run_length = 1;
|
||||||
|
delta = (*it).second - (*it).first;
|
||||||
|
mode = FIRST_RANGE;
|
||||||
|
it++;
|
||||||
|
|
||||||
|
while (it) {
|
||||||
|
// Process range
|
||||||
|
hb_codepoint_t next_cp = (*it).first;
|
||||||
|
hb_codepoint_t next_gid = (*it).second;
|
||||||
|
if (next_cp != end_cp + 1) {
|
||||||
|
// Current range is over, stop processing.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (next_gid == last_gid + 1) {
|
||||||
|
// The current run continues.
|
||||||
|
end_cp = next_cp;
|
||||||
|
run_length++;
|
||||||
|
last_gid = next_gid;
|
||||||
|
it++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// A new run is starting, decide if we want to commit the current run.
|
||||||
|
int split_cost = (mode == FIRST_RANGE) ? 8 : 16;
|
||||||
|
int run_cost = run_length * 2;
|
||||||
|
if (run_cost >= split_cost) {
|
||||||
|
commit_current_range(start_cp, run_start_cp, end_cp, delta, split_cost, range_writer);
|
||||||
|
mode = FOLLOWING_RANGE;
|
||||||
|
start_cp = next_cp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start the new run
|
||||||
|
run_start_cp = next_cp;
|
||||||
|
end_cp = next_cp;
|
||||||
|
delta = next_gid - run_start_cp;
|
||||||
|
run_length = 1;
|
||||||
|
last_gid = next_gid;
|
||||||
|
it++;
|
||||||
}
|
}
|
||||||
prev_endcp = _.first;
|
|
||||||
|
// Finalize range
|
||||||
|
commit_current_range (start_cp, run_start_cp, end_cp, delta, 8, range_writer);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
if (likely (end_cp != 0xFFFF)) {
|
||||||
// last endCode
|
range_writer (0xFFFF, 0xFFFF, 1);
|
||||||
HBUINT16 endcode;
|
}
|
||||||
endcode = prev_endcp;
|
}
|
||||||
if (unlikely (!c->copy<HBUINT16> (endcode))) return nullptr;
|
|
||||||
// There must be a final entry with end_code == 0xFFFF.
|
/*
|
||||||
if (prev_endcp != 0xFFFF)
|
* Writes the current range as either one or two ranges depending on what is most efficient.
|
||||||
{
|
*/
|
||||||
HBUINT16 finalcode;
|
template<typename Writer>
|
||||||
finalcode = 0xFFFF;
|
void commit_current_range (hb_codepoint_t start,
|
||||||
if (unlikely (!c->copy<HBUINT16> (finalcode))) return nullptr;
|
hb_codepoint_t run_start,
|
||||||
|
hb_codepoint_t end,
|
||||||
|
int run_delta,
|
||||||
|
int split_cost,
|
||||||
|
Writer& range_writer) {
|
||||||
|
bool should_split = false;
|
||||||
|
if (start < run_start && run_start < end) {
|
||||||
|
int run_cost = (end - run_start + 1) * 2;
|
||||||
|
if (run_cost >= split_cost) {
|
||||||
|
should_split = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return endCode;
|
if (should_split) {
|
||||||
|
range_writer (start, run_start - 1, 0);
|
||||||
|
range_writer (run_start, end, run_delta);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (start == run_start) {
|
||||||
|
// Range is only a run
|
||||||
|
range_writer (start, end, run_delta);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write only a single non-run range.
|
||||||
|
run_delta = (start == end) ? run_delta : 0;
|
||||||
|
range_writer (start, end, run_delta);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Iterator,
|
template<typename Iterator,
|
||||||
hb_requires (hb_is_iterator (Iterator))>
|
hb_requires (hb_is_iterator (Iterator))>
|
||||||
HBUINT16* serialize_startcode_array (hb_serialize_context_t *c,
|
unsigned serialize_find_segcount (Iterator it) {
|
||||||
Iterator it)
|
struct Counter {
|
||||||
{
|
unsigned segcount = 0;
|
||||||
HBUINT16 *startCode = c->start_embed<HBUINT16> ();
|
|
||||||
hb_codepoint_t prev_cp = 0xFFFF;
|
|
||||||
|
|
||||||
for (const auto& _ : +it)
|
void operator() (hb_codepoint_t start,
|
||||||
{
|
hb_codepoint_t end,
|
||||||
if (prev_cp == 0xFFFF || prev_cp + 1u != _.first)
|
int delta) {
|
||||||
{
|
segcount++;
|
||||||
HBUINT16 start_code;
|
|
||||||
start_code = _.first;
|
|
||||||
c->copy<HBUINT16> (start_code);
|
|
||||||
}
|
}
|
||||||
|
} counter;
|
||||||
|
|
||||||
prev_cp = _.first;
|
to_ranges (+it, counter);
|
||||||
}
|
return counter.segcount;
|
||||||
|
|
||||||
// There must be a final entry with end_code == 0xFFFF.
|
|
||||||
if (it.len () == 0 || prev_cp != 0xFFFF)
|
|
||||||
{
|
|
||||||
HBUINT16 finalcode;
|
|
||||||
finalcode = 0xFFFF;
|
|
||||||
if (unlikely (!c->copy<HBUINT16> (finalcode))) return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
return startCode;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<typename Iterator,
|
template<typename Iterator,
|
||||||
hb_requires (hb_is_iterator (Iterator))>
|
hb_requires (hb_is_iterator (Iterator))>
|
||||||
HBINT16* serialize_idDelta_array (hb_serialize_context_t *c,
|
bool serialize_start_end_delta_arrays (hb_serialize_context_t *c,
|
||||||
Iterator it,
|
Iterator it,
|
||||||
HBUINT16 *endCode,
|
int segcount)
|
||||||
HBUINT16 *startCode,
|
|
||||||
unsigned segcount)
|
|
||||||
{
|
{
|
||||||
unsigned i = 0;
|
struct Writer {
|
||||||
hb_codepoint_t last_gid = 0, start_gid = 0, last_cp = 0xFFFF;
|
hb_serialize_context_t *serializer_;
|
||||||
bool use_delta = true;
|
HBUINT16* end_code_;
|
||||||
|
HBUINT16* start_code_;
|
||||||
|
HBINT16* id_delta_;
|
||||||
|
int index_;
|
||||||
|
|
||||||
HBINT16 *idDelta = c->start_embed<HBINT16> ();
|
Writer(hb_serialize_context_t *serializer)
|
||||||
if ((char *)idDelta - (char *)startCode != (int) segcount * (int) HBINT16::static_size)
|
: serializer_(serializer),
|
||||||
return nullptr;
|
end_code_(nullptr),
|
||||||
|
start_code_(nullptr),
|
||||||
for (const auto& _ : +it)
|
id_delta_(nullptr),
|
||||||
{
|
index_ (0) {}
|
||||||
if (_.first == startCode[i])
|
void operator() (hb_codepoint_t start,
|
||||||
{
|
hb_codepoint_t end,
|
||||||
use_delta = true;
|
int delta) {
|
||||||
start_gid = _.second;
|
start_code_[index_] = start;
|
||||||
|
end_code_[index_] = end;
|
||||||
|
id_delta_[index_] = delta;
|
||||||
|
index_++;
|
||||||
}
|
}
|
||||||
else if (_.second != last_gid + 1) use_delta = false;
|
} writer(c);
|
||||||
|
|
||||||
if (_.first == endCode[i])
|
writer.end_code_ = c->allocate_size<HBUINT16> (HBUINT16::static_size * segcount);
|
||||||
{
|
c->allocate_size<HBUINT16> (2); // padding
|
||||||
HBINT16 delta;
|
writer.start_code_ = c->allocate_size<HBUINT16> (HBUINT16::static_size * segcount);
|
||||||
if (use_delta) delta = (int)start_gid - (int)startCode[i];
|
writer.id_delta_ = c->allocate_size<HBINT16> (HBINT16::static_size * segcount);
|
||||||
else delta = 0;
|
|
||||||
c->copy<HBINT16> (delta);
|
|
||||||
|
|
||||||
i++;
|
if (unlikely (!writer.end_code_ || !writer.start_code_ || !writer.id_delta_)) return false;
|
||||||
}
|
|
||||||
|
|
||||||
last_gid = _.second;
|
to_ranges (+it, writer);
|
||||||
last_cp = _.first;
|
return true;
|
||||||
}
|
|
||||||
|
|
||||||
if (it.len () == 0 || last_cp != 0xFFFF)
|
|
||||||
{
|
|
||||||
HBINT16 delta;
|
|
||||||
delta = 1;
|
|
||||||
if (unlikely (!c->copy<HBINT16> (delta))) return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
return idDelta;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Iterator,
|
template<typename Iterator,
|
||||||
|
@ -257,22 +305,14 @@ struct CmapSubtableFormat4
|
||||||
if (unlikely (!c->extend_min (this))) return;
|
if (unlikely (!c->extend_min (this))) return;
|
||||||
this->format = 4;
|
this->format = 4;
|
||||||
|
|
||||||
//serialize endCode[]
|
//serialize endCode[], startCode[], idDelta[]
|
||||||
HBUINT16 *endCode = serialize_endcode_array (c, format4_iter);
|
HBUINT16* endCode = c->start_embed<HBUINT16> ();
|
||||||
if (unlikely (!endCode)) return;
|
unsigned segcount = serialize_find_segcount (format4_iter);
|
||||||
|
if (unlikely (!serialize_start_end_delta_arrays (c, format4_iter, segcount)))
|
||||||
|
return;
|
||||||
|
|
||||||
unsigned segcount = (c->length () - min_size) / HBUINT16::static_size;
|
HBUINT16 *startCode = endCode + segcount + 1;
|
||||||
|
HBINT16 *idDelta = ((HBINT16*)startCode) + segcount;
|
||||||
// 2 bytes of padding.
|
|
||||||
if (unlikely (!c->allocate_size<HBUINT16> (HBUINT16::static_size))) return; // 2 bytes of padding.
|
|
||||||
|
|
||||||
// serialize startCode[]
|
|
||||||
HBUINT16 *startCode = serialize_startcode_array (c, format4_iter);
|
|
||||||
if (unlikely (!startCode)) return;
|
|
||||||
|
|
||||||
//serialize idDelta[]
|
|
||||||
HBINT16 *idDelta = serialize_idDelta_array (c, format4_iter, endCode, startCode, segcount);
|
|
||||||
if (unlikely (!idDelta)) return;
|
|
||||||
|
|
||||||
HBUINT16 *idRangeOffset = serialize_rangeoffset_glyid (c, format4_iter, endCode, startCode, idDelta, segcount);
|
HBUINT16 *idRangeOffset = serialize_rangeoffset_glyid (c, format4_iter, endCode, startCode, idDelta, segcount);
|
||||||
if (unlikely (!c->check_success (idRangeOffset))) return;
|
if (unlikely (!c->check_success (idRangeOffset))) return;
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue