Fix comments from C++ to C/C++ in psl-make-dafsa
This commit is contained in:
parent
664f3dc852
commit
6a82bcbc2a
|
@ -9,9 +9,9 @@ representation of an unordered word list (dictionary).
|
||||||
|
|
||||||
https://en.wikipedia.org/wiki/Deterministic_acyclic_finite_state_automaton
|
https://en.wikipedia.org/wiki/Deterministic_acyclic_finite_state_automaton
|
||||||
|
|
||||||
This python program converts a list of strings to a byte array in C++.
|
This python program converts a list of strings to a byte array in C/C++.
|
||||||
This python program fetches strings and return values from a gperf file
|
This python program fetches strings and return values from a gperf file
|
||||||
and generates a C++ file with a byte array representing graph that can be
|
and generates a C/C++ file with a byte array representing graph that can be
|
||||||
used as a memory efficient replacement for the perfect hash table.
|
used as a memory efficient replacement for the perfect hash table.
|
||||||
|
|
||||||
The input strings must consist of printable 7-bit ASCII characters or UTF-8
|
The input strings must consist of printable 7-bit ASCII characters or UTF-8
|
||||||
|
@ -159,7 +159,7 @@ node2 = ("\x02", [sink])
|
||||||
node3 = ("a\x01", [sink])
|
node3 = ("a\x01", [sink])
|
||||||
sink = None
|
sink = None
|
||||||
|
|
||||||
A C++ representation of the compressed graph is generated:
|
A C/C++ representation of the compressed graph is generated:
|
||||||
|
|
||||||
const unsigned char dafsa[7] = {
|
const unsigned char dafsa[7] = {
|
||||||
0x81, 0xE1, 0x02, 0x81, 0x82, 0x61, 0x81,
|
0x81, 0xE1, 0x02, 0x81, 0x82, 0x61, 0x81,
|
||||||
|
@ -196,7 +196,7 @@ node2 = ("aa\x01", [sink])
|
||||||
node3 = ("bb\x02", [sink])
|
node3 = ("bb\x02", [sink])
|
||||||
sink = None
|
sink = None
|
||||||
|
|
||||||
A C++ representation of the compressed graph is generated:
|
A C/C++ representation of the compressed graph is generated:
|
||||||
|
|
||||||
const unsigned char dafsa[11] = {
|
const unsigned char dafsa[11] = {
|
||||||
0x02, 0x83, 0xE2, 0x02, 0x83, 0x61, 0x61, 0x81, 0x62, 0x62, 0x82,
|
0x02, 0x83, 0xE2, 0x02, 0x83, 0x61, 0x61, 0x81, 0x62, 0x62, 0x82,
|
||||||
|
@ -485,7 +485,7 @@ def encode(dafsa, utf_mode):
|
||||||
|
|
||||||
|
|
||||||
def to_cxx(data, codecs):
|
def to_cxx(data, codecs):
|
||||||
"""Generates C++ code from a list of encoded bytes."""
|
"""Generates C/C++ code from a list of encoded bytes."""
|
||||||
text = b'/* This file has been generated by psl-make-dafsa. DO NOT EDIT!\n\n'
|
text = b'/* This file has been generated by psl-make-dafsa. DO NOT EDIT!\n\n'
|
||||||
text += b'The byte array encodes effective tld names. See psl-make-dafsa source for'
|
text += b'The byte array encodes effective tld names. See psl-make-dafsa source for'
|
||||||
text += b' documentation.'
|
text += b' documentation.'
|
||||||
|
@ -511,7 +511,7 @@ def sha1_file(name):
|
||||||
return sha1.hexdigest()
|
return sha1.hexdigest()
|
||||||
|
|
||||||
def to_cxx_plus(data, codecs):
|
def to_cxx_plus(data, codecs):
|
||||||
"""Generates C++ code from a word list plus some variable assignments as needed by libpsl"""
|
"""Generates C/C++ code from a word list plus some variable assignments as needed by libpsl"""
|
||||||
text = to_cxx(data, codecs)
|
text = to_cxx(data, codecs)
|
||||||
text += b'static time_t _psl_file_time = %d;\n' % os.stat(psl_input_file).st_mtime
|
text += b'static time_t _psl_file_time = %d;\n' % os.stat(psl_input_file).st_mtime
|
||||||
text += b'static int _psl_nsuffixes = %d;\n' % psl_nsuffixes
|
text += b'static int _psl_nsuffixes = %d;\n' % psl_nsuffixes
|
||||||
|
@ -522,7 +522,7 @@ def to_cxx_plus(data, codecs):
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def words_to_whatever(words, converter, utf_mode, codecs):
|
def words_to_whatever(words, converter, utf_mode, codecs):
|
||||||
"""Generates C++ code from a word list"""
|
"""Generates C/C++ code from a word list"""
|
||||||
dafsa = to_dafsa(words, utf_mode)
|
dafsa = to_dafsa(words, utf_mode)
|
||||||
for fun in (reverse, join_suffixes, reverse, join_suffixes, join_labels):
|
for fun in (reverse, join_suffixes, reverse, join_suffixes, join_labels):
|
||||||
dafsa = fun(dafsa)
|
dafsa = fun(dafsa)
|
||||||
|
@ -530,15 +530,15 @@ def words_to_whatever(words, converter, utf_mode, codecs):
|
||||||
|
|
||||||
|
|
||||||
def words_to_cxx(words, utf_mode, codecs):
|
def words_to_cxx(words, utf_mode, codecs):
|
||||||
"""Generates C++ code from a word list"""
|
"""Generates C/C++ code from a word list"""
|
||||||
return words_to_whatever(words, to_cxx, utf_mode, codecs)
|
return words_to_whatever(words, to_cxx, utf_mode, codecs)
|
||||||
|
|
||||||
def words_to_cxx_plus(words, utf_mode, codecs):
|
def words_to_cxx_plus(words, utf_mode, codecs):
|
||||||
"""Generates C++ code from a word list plus some variable assignments as needed by libpsl"""
|
"""Generates C/C++ code from a word list plus some variable assignments as needed by libpsl"""
|
||||||
return words_to_whatever(words, to_cxx_plus, utf_mode, codecs)
|
return words_to_whatever(words, to_cxx_plus, utf_mode, codecs)
|
||||||
|
|
||||||
def words_to_binary(words, utf_mode, codecs):
|
def words_to_binary(words, utf_mode, codecs):
|
||||||
"""Generates C++ code from a word list"""
|
"""Generates C/C++ code from a word list"""
|
||||||
return b'.DAFSA@PSL_0 \n' + words_to_whatever(words, lambda x, _: bytearray(x), utf_mode, codecs)
|
return b'.DAFSA@PSL_0 \n' + words_to_whatever(words, lambda x, _: bytearray(x), utf_mode, codecs)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue