diff --git a/src/graph/graph.hh b/src/graph/graph.hh index c6f088499..79119c626 100644 --- a/src/graph/graph.hh +++ b/src/graph/graph.hh @@ -49,6 +49,48 @@ struct graph_t unsigned end = 0; unsigned priority = 0; + + bool link_positions_valid (unsigned num_objects) + { + hb_set_t assigned_bytes; + for (const auto& l : obj.real_links) + { + if (l.objidx >= num_objects) + { + DEBUG_MSG (SUBSET_REPACK, nullptr, + "Invalid graph. Invalid object index."); + return false; + } + + unsigned start = l.position; + unsigned end = start + l.width - 1; + + if (unlikely (l.width < 2 || l.width > 4)) + { + DEBUG_MSG (SUBSET_REPACK, nullptr, + "Invalid graph. Invalid link width."); + return false; + } + + if (unlikely (end >= table_size ())) + { + DEBUG_MSG (SUBSET_REPACK, nullptr, + "Invalid graph. Link position is out of bounds."); + return false; + } + + if (unlikely (assigned_bytes.intersects (start, end))) + { + DEBUG_MSG (SUBSET_REPACK, nullptr, + "Invalid graph. Found offsets whose positions overlap."); + return false; + } + + assigned_bytes.add_range (start, end); + } + return true; + } + void normalize () { obj.real_links.qsort (); @@ -286,8 +328,6 @@ struct graph_t vertices_scratch_.alloc (objects.length); for (unsigned i = 0; i < objects.length; i++) { - // TODO(grieger): check all links point to valid objects. - // If this graph came from a serialization buffer object 0 is the // nil object. We don't need it for our purposes here so drop it. if (i == 0 && !objects[i]) @@ -299,6 +339,9 @@ struct graph_t vertex_t* v = vertices_.push (); if (check_success (!vertices_.in_error ())) v->obj = *objects[i]; + + check_success (v->link_positions_valid (objects.length)); + if (!removed_nil) continue; // Fix indices to account for removed nil object. for (auto& l : v->obj.all_links_writer ()) { @@ -418,6 +461,13 @@ struct graph_t hb_swap (sorted_graph[new_id], vertices_[next_id]); const vertex_t& next = sorted_graph[new_id]; + if (unlikely (!check_success(new_id >= 0))) { + // We are out of ids. Which means we've visited a node more than once. + // This graph contains a cycle which is not allowed. + DEBUG_MSG (SUBSET_REPACK, nullptr, "Invalid graph. Contains cycle."); + return; + } + id_map[next_id] = new_id--; for (const auto& link : next.obj.all_links ()) { @@ -941,6 +991,72 @@ struct graph_t return made_change; } + bool is_fully_connected () + { + update_parents(); + + if (root().parents) + // Root cannot have parents. + return false; + + for (unsigned i = 0; i < root_idx (); i++) + { + if (!vertices_[i].parents) + return false; + } + return true; + } + +#if 0 + /* + * Saves the current graph to a packed binary format which the repacker fuzzer takes + * as a seed. + */ + void save_fuzzer_seed (hb_tag_t tag) const + { + FILE* f = fopen ("./repacker_fuzzer_seed", "w"); + fwrite ((void*) &tag, sizeof (tag), 1, f); + + uint16_t num_objects = vertices_.length; + fwrite ((void*) &num_objects, sizeof (num_objects), 1, f); + + for (const auto& v : vertices_) + { + uint16_t blob_size = v.table_size (); + fwrite ((void*) &blob_size, sizeof (blob_size), 1, f); + fwrite ((const void*) v.obj.head, blob_size, 1, f); + } + + uint16_t link_count = 0; + for (const auto& v : vertices_) + link_count += v.obj.real_links.length; + + fwrite ((void*) &link_count, sizeof (link_count), 1, f); + + typedef struct + { + uint16_t parent; + uint16_t child; + uint16_t position; + uint8_t width; + } link_t; + + for (unsigned i = 0; i < vertices_.length; i++) + { + for (const auto& l : vertices_[i].obj.real_links) + { + link_t link { + (uint16_t) i, (uint16_t) l.objidx, + (uint16_t) l.position, (uint8_t) l.width + }; + fwrite ((void*) &link, sizeof (link), 1, f); + } + } + + fclose (f); + } +#endif + void print_orphaned_nodes () { if (!DEBUG_ENABLED(SUBSET_REPACK)) return; @@ -949,6 +1065,10 @@ struct graph_t parents_invalid = true; update_parents(); + if (root().parents) { + DEBUG_MSG (SUBSET_REPACK, nullptr, "Root node has incoming edges."); + } + for (unsigned i = 0; i < root_idx (); i++) { const auto& v = vertices_[i]; diff --git a/src/hb-repacker.hh b/src/hb-repacker.hh index 70f1d0e08..6817ffae4 100644 --- a/src/hb-repacker.hh +++ b/src/hb-repacker.hh @@ -283,6 +283,11 @@ hb_resolve_graph_overflows (hb_tag_t table_tag, graph_t& sorted_graph /* IN/OUT */) { sorted_graph.sort_shortest_distance (); + if (sorted_graph.in_error ()) + { + DEBUG_MSG (SUBSET_REPACK, nullptr, "Sorted graph in error state after initial sort."); + return false; + } bool will_overflow = graph::will_overflow (sorted_graph); if (!will_overflow) @@ -376,6 +381,18 @@ hb_resolve_overflows (const T& packed, unsigned max_rounds = 20, bool recalculate_extensions = false) { graph_t sorted_graph (packed); + if (sorted_graph.in_error ()) + { + // Invalid graph definition. + return nullptr; + } + + if (!sorted_graph.is_fully_connected ()) + { + sorted_graph.print_orphaned_nodes (); + return nullptr; + } + if (!hb_resolve_graph_overflows (table_tag, max_rounds, recalculate_extensions, sorted_graph)) return nullptr; diff --git a/src/test-repacker.cc b/src/test-repacker.cc index 7a20ba52c..94ff0849a 100644 --- a/src/test-repacker.cc +++ b/src/test-repacker.cc @@ -1476,6 +1476,7 @@ static void test_sort_shortest () graph_t graph (c.object_graph ()); graph.sort_shortest_distance (); + assert (!graph.in_error ()); assert(strncmp (graph.object (4).head, "abc", 3) == 0); assert(graph.object (4).real_links.length == 3); diff --git a/test/fuzzing/graphs/crash-3bf72494aa4c9f8cbbcbf887fdc2a2858c87feb4 b/test/fuzzing/graphs/crash-3bf72494aa4c9f8cbbcbf887fdc2a2858c87feb4 new file mode 100644 index 000000000..ff8da7b29 Binary files /dev/null and b/test/fuzzing/graphs/crash-3bf72494aa4c9f8cbbcbf887fdc2a2858c87feb4 differ diff --git a/test/fuzzing/graphs/crash-442bfac994a3d9929cf06262ae9fb00f6ee1f774 b/test/fuzzing/graphs/crash-442bfac994a3d9929cf06262ae9fb00f6ee1f774 new file mode 100644 index 000000000..0160ce59b Binary files /dev/null and b/test/fuzzing/graphs/crash-442bfac994a3d9929cf06262ae9fb00f6ee1f774 differ diff --git a/test/fuzzing/graphs/leak-a77f29b25edb873729f3ab120148fdb213cfa527 b/test/fuzzing/graphs/leak-a77f29b25edb873729f3ab120148fdb213cfa527 new file mode 100644 index 000000000..58b4075ad Binary files /dev/null and b/test/fuzzing/graphs/leak-a77f29b25edb873729f3ab120148fdb213cfa527 differ diff --git a/test/fuzzing/graphs/noto_nastaliq_urdu b/test/fuzzing/graphs/noto_nastaliq_urdu new file mode 100644 index 000000000..2e3cab886 Binary files /dev/null and b/test/fuzzing/graphs/noto_nastaliq_urdu differ diff --git a/test/fuzzing/hb-repacker-fuzzer.cc b/test/fuzzing/hb-repacker-fuzzer.cc new file mode 100644 index 000000000..536b0e8d0 --- /dev/null +++ b/test/fuzzing/hb-repacker-fuzzer.cc @@ -0,0 +1,145 @@ +#include "hb-fuzzer.hh" + +#include +#include +#include +#include + +#include "hb-subset-repacker.h" + +typedef struct +{ + uint16_t parent; + uint16_t child; + uint16_t position; + uint8_t width; +} link_t; + +/* The fuzzer seed contains a serialized representation of a object graph which forms + * the input graph to the repacker call. The binary format is: + * + * table tag: 4 bytes + * number of objects: 2 bytes + * objects[number of objects]: + * blob size: 2 bytes + * blob: blob size bytes + * num of real links: 2 bytes + * links[number of real links]: link_t struct + * + * TODO(garretrieger): add optional virtual links + */ + +template +bool read(const uint8_t** data, size_t* size, T* out) +{ + if (*size < sizeof (T)) return false; + + *out = * ((T*) *data); + + *data += sizeof (T); + *size -= sizeof (T); + + return true; +} + +void cleanup (hb_object_t* objects, uint16_t num_objects) +{ + for (uint32_t i = 0; i < num_objects; i++) + { + free (objects[i].head); + free (objects[i].real_links); + } +} + +void add_links_to_objects (hb_object_t* objects, uint16_t num_objects, + link_t* links, uint16_t num_links) +{ + unsigned* link_count = (unsigned*) calloc (num_objects, sizeof (unsigned)); + + for (uint32_t i = 0; i < num_links; i++) + { + uint16_t parent_idx = links[i].parent; + link_count[parent_idx]++; + } + + for (uint32_t i = 0; i < num_objects; i++) + { + objects[i].num_real_links = link_count[i]; + objects[i].real_links = (hb_link_t*) calloc (link_count[i], sizeof (hb_link_t)); + objects[i].num_virtual_links = 0; + objects[i].virtual_links = nullptr; + } + + for (uint32_t i = 0; i < num_links; i++) + { + uint16_t parent_idx = links[i].parent; + uint16_t child_idx = links[i].child + 1; // All indices are shifted by 1 by the null object. + hb_link_t* link = &(objects[parent_idx].real_links[link_count[parent_idx] - 1]); + + link->width = links[i].width; + link->position = links[i].position; + link->objidx = child_idx; + link_count[parent_idx]--; + } + + free (link_count); +} + +extern "C" int LLVMFuzzerTestOneInput (const uint8_t *data, size_t size) +{ + // TODO(garretrieger): move graph validity checks into repacker graph creation. + alloc_state = _fuzzing_alloc_state (data, size); + + uint16_t num_objects = 0; + hb_object_t* objects = nullptr; + + uint16_t num_real_links = 0; + link_t* links = nullptr; + + hb_tag_t table_tag; + if (!read (&data, &size, &table_tag)) goto end; + if (!read (&data, &size, &num_objects)) goto end; + + objects = (hb_object_t*) calloc (num_objects, sizeof (hb_object_t)); + for (uint32_t i = 0; i < num_objects; i++) + { + uint16_t blob_size; + if (!read (&data, &size, &blob_size)) goto end; + if (size < blob_size) goto end; + + char* copy = (char*) calloc (1, blob_size); + memcpy (copy, data, blob_size); + objects[i].head = (char*) copy; + objects[i].tail = (char*) (copy + blob_size); + + size -= blob_size; + data += blob_size; + } + + if (!read (&data, &size, &num_real_links)) goto end; + links = (link_t*) calloc (num_real_links, sizeof (link_t)); + for (uint32_t i = 0; i < num_real_links; i++) + { + if (!read (&data, &size, &links[i])) goto end; + + if (links[i].parent >= num_objects) + goto end; + } + + add_links_to_objects (objects, num_objects, + links, num_real_links); + + hb_blob_destroy (hb_subset_repack_or_fail (table_tag, + objects, + num_objects)); + +end: + if (objects) + { + cleanup (objects, num_objects); + free (objects); + } + free (links); + + return 0; +} diff --git a/test/fuzzing/meson.build b/test/fuzzing/meson.build index 3aba9eb67..d38ca8f9f 100644 --- a/test/fuzzing/meson.build +++ b/test/fuzzing/meson.build @@ -5,6 +5,10 @@ tests = [ 'hb-draw-fuzzer.cc', ] +if get_option('experimental_api') + tests += 'hb-repacker-fuzzer.cc' +endif + foreach file_name : tests test_name = file_name.split('.')[0] @@ -19,6 +23,10 @@ foreach file_name : tests extra_cpp_args += '-DHB_IS_IN_FUZZER' endif + if get_option('experimental_api') + extra_cpp_args += '-DHB_EXPERIMENTAL_API' + endif + exe = executable(test_name, sources, cpp_args: cpp_args + extra_cpp_args, include_directories: [incconfig, incsrc], @@ -55,6 +63,20 @@ test('subset_fuzzer', find_program('run-subset-fuzzer-tests.py'), suite: ['fuzzing', 'slow'], ) +if get_option('experimental_api') + test('repacker_fuzzer', find_program('run-repacker-fuzzer-tests.py'), + args: [ + hb_repacker_fuzzer_exe, + ], + # as the tests are ran concurrently let's raise acceptable time here + # ideally better to break and let meson handles them in parallel + timeout: 300, + workdir: meson.current_build_dir() / '..' / '..', + env: env, + suite: ['fuzzing', 'slow'], + ) +endif + test('draw_fuzzer', find_program('run-draw-fuzzer-tests.py'), args: [ hb_draw_fuzzer_exe, diff --git a/test/fuzzing/run-repacker-fuzzer-tests.py b/test/fuzzing/run-repacker-fuzzer-tests.py new file mode 100644 index 000000000..85a23e13e --- /dev/null +++ b/test/fuzzing/run-repacker-fuzzer-tests.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 + +import sys, os, subprocess, tempfile, shutil + + +def cmd (command): + # https://stackoverflow.com/a/4408409 as we might have huge output sometimes + with tempfile.TemporaryFile () as tempf: + p = subprocess.Popen (command, stderr=tempf) + + try: + p.wait () + tempf.seek (0) + text = tempf.read () + + #TODO: Detect debug mode with a better way + is_debug_mode = b"SANITIZE" in text + + return ("" if is_debug_mode else text.decode ("utf-8").strip ()), p.returncode + except subprocess.TimeoutExpired: + return 'error: timeout, ' + ' '.join (command), 1 + + +srcdir = os.getenv ("srcdir", ".") +EXEEXT = os.getenv ("EXEEXT", "") +top_builddir = os.getenv ("top_builddir", ".") +hb_repacker_fuzzer = os.path.join (top_builddir, "hb-repacker-fuzzer" + EXEEXT) + +if not os.path.exists (hb_repacker_fuzzer): + if len (sys.argv) < 2 or not os.path.exists (sys.argv[1]): + sys.exit ("""Failed to find hb-repacker-fuzzer binary automatically, +please provide it as the first argument to the tool""") + + hb_repacker_fuzzer = sys.argv[1] + +print ('hb_repacker_fuzzer:', hb_repacker_fuzzer) +fails = 0 + +valgrind = None +if os.getenv ('RUN_VALGRIND', ''): + valgrind = shutil.which ('valgrind') + if valgrind is None: + sys.exit ("""Valgrind requested but not found.""") + +def run_dir (parent_path): + global fails + for file in os.listdir (parent_path): + path = os.path.join(parent_path, file) + print ("running repacker fuzzer against %s" % path) + if valgrind: + text, returncode = cmd ([valgrind, '--leak-check=full', '--error-exitcode=1', hb_repacker_fuzzer, path]) + else: + text, returncode = cmd ([hb_repacker_fuzzer, path]) + if 'error' in text: + returncode = 1 + + if (not valgrind or returncode) and text.strip (): + print (text) + + if returncode != 0: + print ("failed for %s" % path) + fails = fails + 1 + + +run_dir (os.path.join (srcdir, "graphs")) + +if fails: + sys.exit ("%d repacker fuzzer related tests failed." % fails)