Merge pull request #3910 from googlefonts/repacker_fuzz

[repacker] Add a fuzzer for the hb-subset-repacker api.
This commit is contained in:
Behdad Esfahbod 2022-12-02 12:43:13 -07:00 committed by GitHub
commit 16cbe41bca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 375 additions and 2 deletions

View File

@ -49,6 +49,48 @@ struct graph_t
unsigned end = 0;
unsigned priority = 0;
bool link_positions_valid (unsigned num_objects)
{
hb_set_t assigned_bytes;
for (const auto& l : obj.real_links)
{
if (l.objidx >= num_objects)
{
DEBUG_MSG (SUBSET_REPACK, nullptr,
"Invalid graph. Invalid object index.");
return false;
}
unsigned start = l.position;
unsigned end = start + l.width - 1;
if (unlikely (l.width < 2 || l.width > 4))
{
DEBUG_MSG (SUBSET_REPACK, nullptr,
"Invalid graph. Invalid link width.");
return false;
}
if (unlikely (end >= table_size ()))
{
DEBUG_MSG (SUBSET_REPACK, nullptr,
"Invalid graph. Link position is out of bounds.");
return false;
}
if (unlikely (assigned_bytes.intersects (start, end)))
{
DEBUG_MSG (SUBSET_REPACK, nullptr,
"Invalid graph. Found offsets whose positions overlap.");
return false;
}
assigned_bytes.add_range (start, end);
}
return true;
}
void normalize ()
{
obj.real_links.qsort ();
@ -286,8 +328,6 @@ struct graph_t
vertices_scratch_.alloc (objects.length);
for (unsigned i = 0; i < objects.length; i++)
{
// TODO(grieger): check all links point to valid objects.
// If this graph came from a serialization buffer object 0 is the
// nil object. We don't need it for our purposes here so drop it.
if (i == 0 && !objects[i])
@ -299,6 +339,9 @@ struct graph_t
vertex_t* v = vertices_.push ();
if (check_success (!vertices_.in_error ()))
v->obj = *objects[i];
check_success (v->link_positions_valid (objects.length));
if (!removed_nil) continue;
// Fix indices to account for removed nil object.
for (auto& l : v->obj.all_links_writer ()) {
@ -418,6 +461,13 @@ struct graph_t
hb_swap (sorted_graph[new_id], vertices_[next_id]);
const vertex_t& next = sorted_graph[new_id];
if (unlikely (!check_success(new_id >= 0))) {
// We are out of ids. Which means we've visited a node more than once.
// This graph contains a cycle which is not allowed.
DEBUG_MSG (SUBSET_REPACK, nullptr, "Invalid graph. Contains cycle.");
return;
}
id_map[next_id] = new_id--;
for (const auto& link : next.obj.all_links ()) {
@ -941,6 +991,72 @@ struct graph_t
return made_change;
}
bool is_fully_connected ()
{
update_parents();
if (root().parents)
// Root cannot have parents.
return false;
for (unsigned i = 0; i < root_idx (); i++)
{
if (!vertices_[i].parents)
return false;
}
return true;
}
#if 0
/*
* Saves the current graph to a packed binary format which the repacker fuzzer takes
* as a seed.
*/
void save_fuzzer_seed (hb_tag_t tag) const
{
FILE* f = fopen ("./repacker_fuzzer_seed", "w");
fwrite ((void*) &tag, sizeof (tag), 1, f);
uint16_t num_objects = vertices_.length;
fwrite ((void*) &num_objects, sizeof (num_objects), 1, f);
for (const auto& v : vertices_)
{
uint16_t blob_size = v.table_size ();
fwrite ((void*) &blob_size, sizeof (blob_size), 1, f);
fwrite ((const void*) v.obj.head, blob_size, 1, f);
}
uint16_t link_count = 0;
for (const auto& v : vertices_)
link_count += v.obj.real_links.length;
fwrite ((void*) &link_count, sizeof (link_count), 1, f);
typedef struct
{
uint16_t parent;
uint16_t child;
uint16_t position;
uint8_t width;
} link_t;
for (unsigned i = 0; i < vertices_.length; i++)
{
for (const auto& l : vertices_[i].obj.real_links)
{
link_t link {
(uint16_t) i, (uint16_t) l.objidx,
(uint16_t) l.position, (uint8_t) l.width
};
fwrite ((void*) &link, sizeof (link), 1, f);
}
}
fclose (f);
}
#endif
void print_orphaned_nodes ()
{
if (!DEBUG_ENABLED(SUBSET_REPACK)) return;
@ -949,6 +1065,10 @@ struct graph_t
parents_invalid = true;
update_parents();
if (root().parents) {
DEBUG_MSG (SUBSET_REPACK, nullptr, "Root node has incoming edges.");
}
for (unsigned i = 0; i < root_idx (); i++)
{
const auto& v = vertices_[i];

View File

@ -283,6 +283,11 @@ hb_resolve_graph_overflows (hb_tag_t table_tag,
graph_t& sorted_graph /* IN/OUT */)
{
sorted_graph.sort_shortest_distance ();
if (sorted_graph.in_error ())
{
DEBUG_MSG (SUBSET_REPACK, nullptr, "Sorted graph in error state after initial sort.");
return false;
}
bool will_overflow = graph::will_overflow (sorted_graph);
if (!will_overflow)
@ -376,6 +381,18 @@ hb_resolve_overflows (const T& packed,
unsigned max_rounds = 20,
bool recalculate_extensions = false) {
graph_t sorted_graph (packed);
if (sorted_graph.in_error ())
{
// Invalid graph definition.
return nullptr;
}
if (!sorted_graph.is_fully_connected ())
{
sorted_graph.print_orphaned_nodes ();
return nullptr;
}
if (!hb_resolve_graph_overflows (table_tag, max_rounds, recalculate_extensions, sorted_graph))
return nullptr;

View File

@ -1476,6 +1476,7 @@ static void test_sort_shortest ()
graph_t graph (c.object_graph ());
graph.sort_shortest_distance ();
assert (!graph.in_error ());
assert(strncmp (graph.object (4).head, "abc", 3) == 0);
assert(graph.object (4).real_links.length == 3);

Binary file not shown.

View File

@ -0,0 +1,145 @@
#include "hb-fuzzer.hh"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "hb-subset-repacker.h"
typedef struct
{
uint16_t parent;
uint16_t child;
uint16_t position;
uint8_t width;
} link_t;
/* The fuzzer seed contains a serialized representation of a object graph which forms
* the input graph to the repacker call. The binary format is:
*
* table tag: 4 bytes
* number of objects: 2 bytes
* objects[number of objects]:
* blob size: 2 bytes
* blob: blob size bytes
* num of real links: 2 bytes
* links[number of real links]: link_t struct
*
* TODO(garretrieger): add optional virtual links
*/
template <typename T>
bool read(const uint8_t** data, size_t* size, T* out)
{
if (*size < sizeof (T)) return false;
*out = * ((T*) *data);
*data += sizeof (T);
*size -= sizeof (T);
return true;
}
void cleanup (hb_object_t* objects, uint16_t num_objects)
{
for (uint32_t i = 0; i < num_objects; i++)
{
free (objects[i].head);
free (objects[i].real_links);
}
}
void add_links_to_objects (hb_object_t* objects, uint16_t num_objects,
link_t* links, uint16_t num_links)
{
unsigned* link_count = (unsigned*) calloc (num_objects, sizeof (unsigned));
for (uint32_t i = 0; i < num_links; i++)
{
uint16_t parent_idx = links[i].parent;
link_count[parent_idx]++;
}
for (uint32_t i = 0; i < num_objects; i++)
{
objects[i].num_real_links = link_count[i];
objects[i].real_links = (hb_link_t*) calloc (link_count[i], sizeof (hb_link_t));
objects[i].num_virtual_links = 0;
objects[i].virtual_links = nullptr;
}
for (uint32_t i = 0; i < num_links; i++)
{
uint16_t parent_idx = links[i].parent;
uint16_t child_idx = links[i].child + 1; // All indices are shifted by 1 by the null object.
hb_link_t* link = &(objects[parent_idx].real_links[link_count[parent_idx] - 1]);
link->width = links[i].width;
link->position = links[i].position;
link->objidx = child_idx;
link_count[parent_idx]--;
}
free (link_count);
}
extern "C" int LLVMFuzzerTestOneInput (const uint8_t *data, size_t size)
{
// TODO(garretrieger): move graph validity checks into repacker graph creation.
alloc_state = _fuzzing_alloc_state (data, size);
uint16_t num_objects = 0;
hb_object_t* objects = nullptr;
uint16_t num_real_links = 0;
link_t* links = nullptr;
hb_tag_t table_tag;
if (!read<hb_tag_t> (&data, &size, &table_tag)) goto end;
if (!read<uint16_t> (&data, &size, &num_objects)) goto end;
objects = (hb_object_t*) calloc (num_objects, sizeof (hb_object_t));
for (uint32_t i = 0; i < num_objects; i++)
{
uint16_t blob_size;
if (!read<uint16_t> (&data, &size, &blob_size)) goto end;
if (size < blob_size) goto end;
char* copy = (char*) calloc (1, blob_size);
memcpy (copy, data, blob_size);
objects[i].head = (char*) copy;
objects[i].tail = (char*) (copy + blob_size);
size -= blob_size;
data += blob_size;
}
if (!read<uint16_t> (&data, &size, &num_real_links)) goto end;
links = (link_t*) calloc (num_real_links, sizeof (link_t));
for (uint32_t i = 0; i < num_real_links; i++)
{
if (!read<link_t> (&data, &size, &links[i])) goto end;
if (links[i].parent >= num_objects)
goto end;
}
add_links_to_objects (objects, num_objects,
links, num_real_links);
hb_blob_destroy (hb_subset_repack_or_fail (table_tag,
objects,
num_objects));
end:
if (objects)
{
cleanup (objects, num_objects);
free (objects);
}
free (links);
return 0;
}

View File

@ -5,6 +5,10 @@ tests = [
'hb-draw-fuzzer.cc',
]
if get_option('experimental_api')
tests += 'hb-repacker-fuzzer.cc'
endif
foreach file_name : tests
test_name = file_name.split('.')[0]
@ -19,6 +23,10 @@ foreach file_name : tests
extra_cpp_args += '-DHB_IS_IN_FUZZER'
endif
if get_option('experimental_api')
extra_cpp_args += '-DHB_EXPERIMENTAL_API'
endif
exe = executable(test_name, sources,
cpp_args: cpp_args + extra_cpp_args,
include_directories: [incconfig, incsrc],
@ -55,6 +63,20 @@ test('subset_fuzzer', find_program('run-subset-fuzzer-tests.py'),
suite: ['fuzzing', 'slow'],
)
if get_option('experimental_api')
test('repacker_fuzzer', find_program('run-repacker-fuzzer-tests.py'),
args: [
hb_repacker_fuzzer_exe,
],
# as the tests are ran concurrently let's raise acceptable time here
# ideally better to break and let meson handles them in parallel
timeout: 300,
workdir: meson.current_build_dir() / '..' / '..',
env: env,
suite: ['fuzzing', 'slow'],
)
endif
test('draw_fuzzer', find_program('run-draw-fuzzer-tests.py'),
args: [
hb_draw_fuzzer_exe,

View File

@ -0,0 +1,68 @@
#!/usr/bin/env python3
import sys, os, subprocess, tempfile, shutil
def cmd (command):
# https://stackoverflow.com/a/4408409 as we might have huge output sometimes
with tempfile.TemporaryFile () as tempf:
p = subprocess.Popen (command, stderr=tempf)
try:
p.wait ()
tempf.seek (0)
text = tempf.read ()
#TODO: Detect debug mode with a better way
is_debug_mode = b"SANITIZE" in text
return ("" if is_debug_mode else text.decode ("utf-8").strip ()), p.returncode
except subprocess.TimeoutExpired:
return 'error: timeout, ' + ' '.join (command), 1
srcdir = os.getenv ("srcdir", ".")
EXEEXT = os.getenv ("EXEEXT", "")
top_builddir = os.getenv ("top_builddir", ".")
hb_repacker_fuzzer = os.path.join (top_builddir, "hb-repacker-fuzzer" + EXEEXT)
if not os.path.exists (hb_repacker_fuzzer):
if len (sys.argv) < 2 or not os.path.exists (sys.argv[1]):
sys.exit ("""Failed to find hb-repacker-fuzzer binary automatically,
please provide it as the first argument to the tool""")
hb_repacker_fuzzer = sys.argv[1]
print ('hb_repacker_fuzzer:', hb_repacker_fuzzer)
fails = 0
valgrind = None
if os.getenv ('RUN_VALGRIND', ''):
valgrind = shutil.which ('valgrind')
if valgrind is None:
sys.exit ("""Valgrind requested but not found.""")
def run_dir (parent_path):
global fails
for file in os.listdir (parent_path):
path = os.path.join(parent_path, file)
print ("running repacker fuzzer against %s" % path)
if valgrind:
text, returncode = cmd ([valgrind, '--leak-check=full', '--error-exitcode=1', hb_repacker_fuzzer, path])
else:
text, returncode = cmd ([hb_repacker_fuzzer, path])
if 'error' in text:
returncode = 1
if (not valgrind or returncode) and text.strip ():
print (text)
if returncode != 0:
print ("failed for %s" % path)
fails = fails + 1
run_dir (os.path.join (srcdir, "graphs"))
if fails:
sys.exit ("%d repacker fuzzer related tests failed." % fails)