Merge 673a9e0132
into e126a67354
This commit is contained in:
commit
2844125fa8
1
AUTHORS
1
AUTHORS
|
@ -16,3 +16,4 @@ Christopher Meng (Fedora building)
|
|||
Jakub Čajka
|
||||
Giuseppe Scrivano
|
||||
Ryan Sleevi (Discussion, Requested DAFSA format and ICANN/PRIVATE support)
|
||||
Olle Liljenzin (Original DAFSA implementation and UTF-8 patch)
|
||||
|
|
|
@ -21,6 +21,48 @@
|
|||
|
||||
#define CHECK_LT(a, b) if ((a) >= b) return 0
|
||||
|
||||
static const char multibyte_length_table[16] = {
|
||||
0, 0, 0, 0, /* 0x00-0x3F */
|
||||
0, 0, 0, 0, /* 0x40-0x7F */
|
||||
0, 0, 0, 0, /* 0x80-0xBF */
|
||||
2, 2, 3, 4, /* 0xC0-0xFF */
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Get lenght of multibyte character sequence starting at a given byte.
|
||||
* Returns zero if the byte is not a valid leading byte in UTF-8.
|
||||
*/
|
||||
static int GetMultibyteLength(char c) {
|
||||
return multibyte_length_table[((unsigned char)c) >> 4];
|
||||
}
|
||||
|
||||
/**
|
||||
* Moves pointers one byte forward.
|
||||
*/
|
||||
static void NextPos(const unsigned char** pos,
|
||||
const char** key,
|
||||
const char** multibyte_start)
|
||||
{
|
||||
++*pos;
|
||||
if (*multibyte_start) {
|
||||
/* Advance key to next byte in multibyte sequence. */
|
||||
++*key;
|
||||
/* Reset multibyte_start if last byte in multibyte sequence was consumed. */
|
||||
if (*key - *multibyte_start == GetMultibyteLength(**multibyte_start))
|
||||
*multibyte_start = 0;
|
||||
} else {
|
||||
if (GetMultibyteLength(**key)) {
|
||||
/* Multibyte prefix was matched in the dafsa, start matching multibyte
|
||||
* content in next round. */
|
||||
*multibyte_start = *key;
|
||||
} else {
|
||||
/* Advance key as a single byte character was matched. */
|
||||
++*key;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Read next offset from pos.
|
||||
* Returns true if an offset could be read, false otherwise.
|
||||
|
@ -71,6 +113,35 @@ static int IsEOL(const unsigned char* offset, const unsigned char* end)
|
|||
return(*offset & 0x80) != 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if byte at offset matches first character in key.
|
||||
* This version assumes a range check was already performed by the caller.
|
||||
*/
|
||||
|
||||
static int IsMatchUnchecked(const unsigned char matcher,
|
||||
const char* key,
|
||||
const char* multibyte_start)
|
||||
{
|
||||
if (multibyte_start) {
|
||||
/* Multibyte matching mode. */
|
||||
if (multibyte_start == key) {
|
||||
/* Match leading byte, which will also match the sequence length. */
|
||||
return (matcher ^ 0x80) == (const unsigned char)*key;
|
||||
} else {
|
||||
/* Match following bytes. */
|
||||
return (matcher ^ 0xC0) == (const unsigned char)*key;
|
||||
}
|
||||
}
|
||||
/* If key points at a leading byte in a multibyte sequence, but we are not yet
|
||||
* in multibyte mode, then the dafsa should contain a special byte to indicate
|
||||
* a mode switch. */
|
||||
if (GetMultibyteLength(*key)) {
|
||||
return matcher == 0x1F;
|
||||
}
|
||||
/* Normal matching of a single byte character. */
|
||||
return matcher == (const unsigned char)*key;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if byte at offset matches first character in key.
|
||||
* This version matches characters not last in label.
|
||||
|
@ -78,10 +149,11 @@ static int IsEOL(const unsigned char* offset, const unsigned char* end)
|
|||
|
||||
static int IsMatch(const unsigned char* offset,
|
||||
const unsigned char* end,
|
||||
const char* key)
|
||||
const char* key,
|
||||
const char* multibyte_start)
|
||||
{
|
||||
CHECK_LT(offset, end);
|
||||
return *offset == *key;
|
||||
return IsMatchUnchecked(*offset, key, multibyte_start);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -91,10 +163,11 @@ static int IsMatch(const unsigned char* offset,
|
|||
|
||||
static int IsEndCharMatch(const unsigned char* offset,
|
||||
const unsigned char* end,
|
||||
const char* key)
|
||||
const char* key,
|
||||
const char* multibyte_start)
|
||||
{
|
||||
CHECK_LT(offset, end);
|
||||
return *offset == (*key | 0x80);
|
||||
return IsMatchUnchecked(*offset ^ 0x80, key, multibyte_start);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -104,10 +177,11 @@ static int IsEndCharMatch(const unsigned char* offset,
|
|||
|
||||
static int GetReturnValue(const unsigned char* offset,
|
||||
const unsigned char* end,
|
||||
const char* multibyte_start,
|
||||
int* return_value)
|
||||
{
|
||||
CHECK_LT(offset, end);
|
||||
if ((*offset & 0xE0) == 0x80) {
|
||||
if (!multibyte_start && (*offset & 0xE0) == 0x80) {
|
||||
*return_value = *offset & 0x0F;
|
||||
return 1;
|
||||
}
|
||||
|
@ -140,6 +214,7 @@ int _HIDDEN LookupStringInFixedSet(const unsigned char* graph,
|
|||
const unsigned char* end = graph + length;
|
||||
const unsigned char* offset = pos;
|
||||
const char* key_end = key + key_length;
|
||||
const char* multibyte_start = 0;
|
||||
|
||||
while (GetNextOffset(&pos, end, &offset)) {
|
||||
/*char <char>+ end_char offsets
|
||||
|
@ -153,11 +228,10 @@ int _HIDDEN LookupStringInFixedSet(const unsigned char* graph,
|
|||
|
||||
if (key != key_end && !IsEOL(offset, end)) {
|
||||
/* Leading <char> is not a match. Don't dive into this child */
|
||||
if (!IsMatch(offset, end, key))
|
||||
if (!IsMatch(offset, end, key, multibyte_start))
|
||||
continue;
|
||||
did_consume = 1;
|
||||
++offset;
|
||||
++key;
|
||||
NextPos(&offset, &key, &multibyte_start);
|
||||
/* Possible matches at this point:
|
||||
* <char>+ end_char offsets
|
||||
* <char>+ return value
|
||||
|
@ -167,10 +241,9 @@ int _HIDDEN LookupStringInFixedSet(const unsigned char* graph,
|
|||
|
||||
/* Remove all remaining <char> nodes possible */
|
||||
while (!IsEOL(offset, end) && key != key_end) {
|
||||
if (!IsMatch(offset, end, key))
|
||||
if (!IsMatch(offset, end, key, multibyte_start))
|
||||
return -1;
|
||||
++key;
|
||||
++offset;
|
||||
NextPos(&offset, &key, &multibyte_start);
|
||||
}
|
||||
}
|
||||
/* Possible matches at this point:
|
||||
|
@ -182,7 +255,7 @@ int _HIDDEN LookupStringInFixedSet(const unsigned char* graph,
|
|||
if (key == key_end) {
|
||||
int return_value;
|
||||
|
||||
if (GetReturnValue(offset, end, &return_value))
|
||||
if (GetReturnValue(offset, end, multibyte_start, &return_value))
|
||||
return return_value;
|
||||
/* The DAFSA guarantees that if the first char is a match, all
|
||||
* remaining char elements MUST match if the key is truly present.
|
||||
|
@ -191,14 +264,23 @@ int _HIDDEN LookupStringInFixedSet(const unsigned char* graph,
|
|||
return -1;
|
||||
continue;
|
||||
}
|
||||
if (!IsEndCharMatch(offset, end, key)) {
|
||||
if (!IsEndCharMatch(offset, end, key, multibyte_start)) {
|
||||
if (did_consume)
|
||||
return -1; /* Unexpected */
|
||||
continue;
|
||||
}
|
||||
++key;
|
||||
pos = ++offset; /* Dive into child */
|
||||
NextPos(&offset, &key, &multibyte_start);
|
||||
pos = offset; /* Dive into child */
|
||||
}
|
||||
|
||||
return -1; /* No match */
|
||||
}
|
||||
|
||||
/* prototype to skip warning with -Wmissing-prototypes */
|
||||
int _HIDDEN GetUtfMode(const unsigned char*, size_t);
|
||||
|
||||
int _HIDDEN GetUtfMode(const unsigned char* graph,
|
||||
size_t length)
|
||||
{
|
||||
return length > 0 && graph[length - 1] < 0x80;
|
||||
}
|
||||
|
|
|
@ -14,8 +14,9 @@ This python program fetches strings and return values from a gperf file
|
|||
and generates a C++ file with a byte array representing graph that can be
|
||||
used as a memory efficient replacement for the perfect hash table.
|
||||
|
||||
The input strings are assumed to consist of printable 7-bit ASCII characters
|
||||
and the return values are assumed to be one digit integers.
|
||||
The input strings must consist of printable 7-bit ASCII characters or UTF-8
|
||||
multibyte sequences. Control characters in the range [0x00-0x1F] are not
|
||||
allowed. The return values must be one digit integers. .
|
||||
|
||||
In this program a DAFSA is a diamond shaped graph starting at a common
|
||||
source node and ending at a common sink node. All internal nodes contain
|
||||
|
@ -47,8 +48,8 @@ The generated byte array can described by the following BNF:
|
|||
|
||||
<byte> ::= < 8-bit value in range [0x00-0xFF] >
|
||||
|
||||
<char> ::= < printable 7-bit ASCII character, byte in range [0x20-0x7F] >
|
||||
<end_char> ::= < char + 0x80, byte in range [0xA0-0xFF] >
|
||||
<char> ::= < byte in range [0x1F-0x7F] >
|
||||
<end_char> ::= < char + 0x80, byte in range [0x9F-0xFF] >
|
||||
<return value> ::= < value + 0x80, byte in range [0x80-0x8F] >
|
||||
|
||||
<offset1> ::= < byte in range [0x00-0x3F] >
|
||||
|
@ -84,13 +85,18 @@ The generated byte array can described by the following BNF:
|
|||
| <prefix> <node>
|
||||
| <end_label>
|
||||
|
||||
<dafsa> ::= <source>
|
||||
| <dafsa> <node>
|
||||
<graph> ::= <graph>
|
||||
| <graph> <node>
|
||||
|
||||
<version> ::= <empty> # The DAFSA was generated in ASCII mode.
|
||||
| < byte value 0x01 > # The DAFSA was generated in UTF-8 mode.
|
||||
|
||||
<dafsa> ::= <graph> <version>
|
||||
|
||||
Decoding:
|
||||
|
||||
<char> -> printable 7-bit ASCII character
|
||||
<end_char> & 0x7F -> printable 7-bit ASCII character
|
||||
<char> -> character
|
||||
<end_char> & 0x7F -> character
|
||||
<return value> & 0x0F -> integer
|
||||
<offset1 & 0x3F> -> integer
|
||||
((<offset2> & 0x1F>) << 8) + <byte> -> integer
|
||||
|
@ -105,6 +111,28 @@ between previous child node and next child node. Thus each offset links a node
|
|||
to a child node. The distance is always counted between start addresses, i.e.
|
||||
first byte in decoded offset or first byte in child node.
|
||||
|
||||
Transcoding of UTF-8 multibyte sequences:
|
||||
|
||||
The original DAFSA format was limited to 7-bit printable ASCII characters in
|
||||
range [0x20-0xFF], but has been extended to allow UTF-8 multibyte sequences.
|
||||
By transcoding of such characters the new format preserves compatibility with
|
||||
old parsers, so that a DAFSA in the extended format can be used by an old
|
||||
parser without false positives, although strings containing transcoded
|
||||
characters will never match. Since the format is extended rather than being
|
||||
changed, a parser supporting the new format will automatically support data
|
||||
generated in the old format.
|
||||
|
||||
Transcoding is performed by insertion of a start byte with the special value
|
||||
0x1F, followed by 2-4 bytes shifted into the range [0x40-0x7F], thus inside
|
||||
the range of printable ASCII.
|
||||
|
||||
2-byte: 110nnnnn, 10nnnnnn -> 00011111, 010nnnnn, 01nnnnnn
|
||||
|
||||
3-byte: 1110nnnn, 10nnnnnn, 10nnnnnn -> 00011111, 0110nnnn, 01nnnnnn, 01nnnnnn
|
||||
|
||||
4-byte: 11110nnn, 10nnnnnn, 10nnnnnn, 10nnnnnn ->
|
||||
00011111, 01110nnn, 01nnnnnn, 01nnnnnn, 01nnnnnn
|
||||
|
||||
Example 1:
|
||||
|
||||
%%
|
||||
|
@ -197,8 +225,25 @@ import sys
|
|||
class InputError(Exception):
|
||||
"""Exception raised for errors in the input file."""
|
||||
|
||||
# Length of a character starting at a given byte.
|
||||
char_length_table = ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 0x00-0x0F
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 0x10-0x1F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 0x20-0x2F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 0x30-x03F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 0x40-0x4F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 0x50-x05F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 0x60-0x6F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 0x70-x07F
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 0x80-0x8F
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 0x90-0x9F
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 0xA0-0xAF
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 0xB0-0xBF
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # 0xC0-0xCF
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # 0xD0-0xDF
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, # 0xE0-0xEF
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 ) # 0xF0-0xFF
|
||||
|
||||
def to_dafsa(words):
|
||||
def to_dafsa(words, utf_mode):
|
||||
"""Generates a DAFSA from a word list and returns the source node.
|
||||
|
||||
Each word is split into characters so that each character is represented by
|
||||
|
@ -206,14 +251,31 @@ def to_dafsa(words):
|
|||
"""
|
||||
if not words:
|
||||
raise InputError('The domain list must not be empty')
|
||||
def to_nodes(word):
|
||||
def to_nodes(word, multibyte_length):
|
||||
"""Split words into characters"""
|
||||
if not 0x1F < ord(word[0]) < 0x80:
|
||||
raise InputError('Domain names must be printable 7-bit ASCII')
|
||||
if len(word) == 1:
|
||||
return chr(int(word[0], 16) & 0x0F), [None]
|
||||
return word[0], [to_nodes(word[1:])]
|
||||
return [to_nodes(word) for word in words]
|
||||
byte = ord(word[0])
|
||||
if multibyte_length:
|
||||
# Consume next byte in multibyte sequence.
|
||||
if byte & 0xC0 != 0x80:
|
||||
raise InputError('Invalid UTF-8 multibyte sequence')
|
||||
return chr(byte ^ 0xC0), [to_nodes(word[1:], multibyte_length - 1)]
|
||||
char_length = char_length_table[byte]
|
||||
if char_length == 1:
|
||||
# 7-bit printable ASCII.
|
||||
if len(word) == 1:
|
||||
return chr(int(word[0], 16) & 0x0F), [None]
|
||||
return word[0], [to_nodes(word[1:], 0)]
|
||||
elif char_length > 1:
|
||||
# Leading byte in multibyte sequence.
|
||||
if not utf_mode:
|
||||
raise InputError('UTF-8 encoded characters are not allowed in ASCII mode')
|
||||
if len(word) <= char_length:
|
||||
raise InputError('Unterminated UTF-8 multibyte sequence')
|
||||
return chr(0x1F), [(chr(byte ^ 0x80), [to_nodes(word[1:], char_length - 1)])]
|
||||
# Unexpected character.
|
||||
raise InputError('Domain names must be printable ASCII or UTF-8')
|
||||
|
||||
return [to_nodes(word, 0) for word in words]
|
||||
|
||||
|
||||
def to_words(node):
|
||||
|
@ -396,7 +458,7 @@ def encode_label(label):
|
|||
return buf
|
||||
|
||||
|
||||
def encode(dafsa):
|
||||
def encode(dafsa, utf_mode):
|
||||
"""Encodes a DAFSA to a list of bytes"""
|
||||
output = []
|
||||
offsets = {}
|
||||
|
@ -412,6 +474,8 @@ def encode(dafsa):
|
|||
|
||||
output.extend(encode_links(dafsa, offsets, len(output)))
|
||||
output.reverse()
|
||||
if utf_mode:
|
||||
output.append(0x01)
|
||||
return output
|
||||
|
||||
|
||||
|
@ -430,22 +494,22 @@ def to_cxx(data):
|
|||
return text
|
||||
|
||||
|
||||
def words_to_whatever(words, converter):
|
||||
def words_to_whatever(words, converter, utf_mode):
|
||||
"""Generates C++ code from a word list"""
|
||||
dafsa = to_dafsa(words)
|
||||
dafsa = to_dafsa(words, utf_mode)
|
||||
for fun in (reverse, join_suffixes, reverse, join_suffixes, join_labels):
|
||||
dafsa = fun(dafsa)
|
||||
return converter(encode(dafsa))
|
||||
return converter(encode(dafsa, utf_mode))
|
||||
|
||||
|
||||
def words_to_cxx(words):
|
||||
def words_to_cxx(words, utf_mode):
|
||||
"""Generates C++ code from a word list"""
|
||||
return words_to_whatever(words, to_cxx)
|
||||
return words_to_whatever(words, to_cxx, utf_mode)
|
||||
|
||||
|
||||
def words_to_binary(words):
|
||||
def words_to_binary(words, utf_mode):
|
||||
"""Generates C++ code from a word list"""
|
||||
return b'.DAFSA@PSL_0 \n' + words_to_whatever(words, bytearray)
|
||||
return b'.DAFSA@PSL_0 \n' + words_to_whatever(words, bytearray, utf_mode)
|
||||
|
||||
|
||||
def parse_psl2c(infile):
|
||||
|
@ -455,10 +519,10 @@ def parse_psl2c(infile):
|
|||
for line in lines:
|
||||
if line[-3:-1] != ', ':
|
||||
raise InputError('Expected "domainname, <digit>", found "%s"' % line)
|
||||
# Technically the DAFSA format could support return values in range [0-31],
|
||||
# Technically the DAFSA format could support return values in range [0x00-0x1E],
|
||||
# but the values below are the only with a defined meaning.
|
||||
if line[-1] not in '0123456789ABCDEF':
|
||||
raise InputError('Expected value to be one of {0,1,2,4,5}, found "%s"' % line[-1])
|
||||
raise InputError('Expected value to be in range [0-9] or [A-F], found "%s"' % line[-1])
|
||||
|
||||
# with open("gperf.out", 'w') as outfile:
|
||||
# for line in sorted(lines):
|
||||
|
@ -540,6 +604,8 @@ def usage():
|
|||
print(' --input-format=psl infile is a Public Suffix List file')
|
||||
print(' --output-format=cxx Write DAFSA as C/C++ code')
|
||||
print(' --output-format=binary Write DAFSA binary data')
|
||||
print(' --encoding=ascii 7-bit ASCII mode (default)')
|
||||
print(' --encoding=utf-8 UTF-8 mode')
|
||||
exit(1)
|
||||
|
||||
|
||||
|
@ -550,6 +616,7 @@ def main():
|
|||
|
||||
converter = words_to_cxx
|
||||
parser = parse_psl2c
|
||||
utf_mode = False
|
||||
|
||||
for arg in sys.argv[1:-2]:
|
||||
if arg.startswith('--input-format='):
|
||||
|
@ -567,18 +634,24 @@ def main():
|
|||
converter = words_to_binary
|
||||
elif value == 'cxx':
|
||||
converter = words_to_cxx
|
||||
elif arg.startswith('--encoding='):
|
||||
value = arg[11:].lower()
|
||||
if value == 'ascii':
|
||||
utf_mode = False
|
||||
elif value == 'utf-8':
|
||||
utf_mode = True
|
||||
else:
|
||||
print("Unknown output format '%s'" % value)
|
||||
print("Unknown encoding '%s'" % value)
|
||||
return 1
|
||||
else:
|
||||
usage()
|
||||
|
||||
if sys.argv[-2] == '-':
|
||||
with open(sys.argv[-1], 'w') as outfile:
|
||||
outfile.write(converter(parser(sys.stdin)))
|
||||
outfile.write(converter(parser(sys.stdin), utf_mode))
|
||||
else:
|
||||
with open(sys.argv[-2], 'r') as infile, open(sys.argv[-1], 'w') as outfile:
|
||||
outfile.write(converter(parser(infile)))
|
||||
outfile.write(converter(parser(infile), utf_mode))
|
||||
|
||||
return 0
|
||||
|
||||
|
|
16
src/psl.c
16
src/psl.c
|
@ -784,6 +784,7 @@ static void _add_punycode_if_needed(_psl_idna_t *idna, _psl_vector_t *v, _psl_en
|
|||
|
||||
/* prototype */
|
||||
int LookupStringInFixedSet(const unsigned char* graph, size_t length, const char* key, size_t key_length);
|
||||
int GetUtfMode(const unsigned char*, size_t);
|
||||
|
||||
static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain, int type)
|
||||
{
|
||||
|
@ -791,6 +792,15 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain, int t
|
|||
const char *p;
|
||||
char *punycode = NULL;
|
||||
int need_conversion = 0;
|
||||
size_t dafsa_size = 0;
|
||||
const unsigned char *dafsa = NULL;
|
||||
int utf_mode = 0;
|
||||
|
||||
if (psl == &_builtin_psl || psl->dafsa) {
|
||||
dafsa_size = psl == &_builtin_psl ? sizeof(kDafsa) : psl->dafsa_size;
|
||||
dafsa = psl == &_builtin_psl ? kDafsa : psl->dafsa;
|
||||
utf_mode = GetUtfMode(dafsa, dafsa_size);
|
||||
}
|
||||
|
||||
/* this function should be called without leading dots, just make sure */
|
||||
if (*domain == '.')
|
||||
|
@ -801,7 +811,7 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain, int t
|
|||
for (p = domain; *p; p++) {
|
||||
if (*p == '.')
|
||||
suffix.nlabels++;
|
||||
else if (*((unsigned char *)p) >= 128)
|
||||
else if (!utf_mode && *((unsigned char *)p) >= 128)
|
||||
need_conversion = 1; /* in case domain is non-ascii we need a toASCII conversion */
|
||||
}
|
||||
|
||||
|
@ -831,9 +841,7 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain, int t
|
|||
suffix.length = p - suffix.label;
|
||||
}
|
||||
|
||||
if (psl == &_builtin_psl || psl->dafsa) {
|
||||
size_t dafsa_size = psl == &_builtin_psl ? sizeof(kDafsa) : psl->dafsa_size;
|
||||
const unsigned char *dafsa = psl == &_builtin_psl ? kDafsa : psl->dafsa;
|
||||
if (dafsa) {
|
||||
int rc = LookupStringInFixedSet(dafsa, dafsa_size, suffix.label, suffix.length);
|
||||
if (rc != -1) {
|
||||
/* check for correct rule type */
|
||||
|
|
Loading…
Reference in New Issue