diff --git a/src/psl-make-dafsa b/src/psl-make-dafsa index 2bfd879..a7480bb 100755 --- a/src/psl-make-dafsa +++ b/src/psl-make-dafsa @@ -9,9 +9,9 @@ representation of an unordered word list (dictionary). https://en.wikipedia.org/wiki/Deterministic_acyclic_finite_state_automaton -This python program converts a list of strings to a byte array in C++. +This python program converts a list of strings to a byte array in C/C++. This python program fetches strings and return values from a gperf file -and generates a C++ file with a byte array representing graph that can be +and generates a C/C++ file with a byte array representing graph that can be used as a memory efficient replacement for the perfect hash table. The input strings must consist of printable 7-bit ASCII characters or UTF-8 @@ -159,7 +159,7 @@ node2 = ("\x02", [sink]) node3 = ("a\x01", [sink]) sink = None -A C++ representation of the compressed graph is generated: +A C/C++ representation of the compressed graph is generated: const unsigned char dafsa[7] = { 0x81, 0xE1, 0x02, 0x81, 0x82, 0x61, 0x81, @@ -196,7 +196,7 @@ node2 = ("aa\x01", [sink]) node3 = ("bb\x02", [sink]) sink = None -A C++ representation of the compressed graph is generated: +A C/C++ representation of the compressed graph is generated: const unsigned char dafsa[11] = { 0x02, 0x83, 0xE2, 0x02, 0x83, 0x61, 0x61, 0x81, 0x62, 0x62, 0x82, @@ -485,7 +485,7 @@ def encode(dafsa, utf_mode): def to_cxx(data, codecs): - """Generates C++ code from a list of encoded bytes.""" + """Generates C/C++ code from a list of encoded bytes.""" text = b'/* This file has been generated by psl-make-dafsa. DO NOT EDIT!\n\n' text += b'The byte array encodes effective tld names. See psl-make-dafsa source for' text += b' documentation.' @@ -511,7 +511,7 @@ def sha1_file(name): return sha1.hexdigest() def to_cxx_plus(data, codecs): - """Generates C++ code from a word list plus some variable assignments as needed by libpsl""" + """Generates C/C++ code from a word list plus some variable assignments as needed by libpsl""" text = to_cxx(data, codecs) text += b'static time_t _psl_file_time = %d;\n' % os.stat(psl_input_file).st_mtime text += b'static int _psl_nsuffixes = %d;\n' % psl_nsuffixes @@ -522,7 +522,7 @@ def to_cxx_plus(data, codecs): return text def words_to_whatever(words, converter, utf_mode, codecs): - """Generates C++ code from a word list""" + """Generates C/C++ code from a word list""" dafsa = to_dafsa(words, utf_mode) for fun in (reverse, join_suffixes, reverse, join_suffixes, join_labels): dafsa = fun(dafsa) @@ -530,15 +530,15 @@ def words_to_whatever(words, converter, utf_mode, codecs): def words_to_cxx(words, utf_mode, codecs): - """Generates C++ code from a word list""" + """Generates C/C++ code from a word list""" return words_to_whatever(words, to_cxx, utf_mode, codecs) def words_to_cxx_plus(words, utf_mode, codecs): - """Generates C++ code from a word list plus some variable assignments as needed by libpsl""" + """Generates C/C++ code from a word list plus some variable assignments as needed by libpsl""" return words_to_whatever(words, to_cxx_plus, utf_mode, codecs) def words_to_binary(words, utf_mode, codecs): - """Generates C++ code from a word list""" + """Generates C/C++ code from a word list""" return b'.DAFSA@PSL_0 \n' + words_to_whatever(words, lambda x, _: bytearray(x), utf_mode, codecs)