Skip punycode conversion for _psl_is_public_suffix() if data contains UTF-8 rules

This commit is contained in:
Tim Rühsen 2016-11-05 10:34:09 +01:00
parent 86034ac7c9
commit 4b42762cbf
1 changed files with 7 additions and 3 deletions

View File

@ -174,10 +174,11 @@ struct _psl_ctx_st {
size_t size_t
dafsa_size; dafsa_size;
int int
mode,
nsuffixes, nsuffixes,
nexceptions, nexceptions,
nwildcards; nwildcards;
unsigned char
utf8 : 1; /* 1: data contains UTF-8 + punycode encoded rules */
}; };
/* include the PSL data compiled by 'psl2c' */ /* include the PSL data compiled by 'psl2c' */
@ -782,8 +783,9 @@ static void _add_punycode_if_needed(_psl_idna_t *idna, _psl_vector_t *v, _psl_en
} }
} }
/* prototype */ /* prototypes */
int LookupStringInFixedSet(const unsigned char* graph, size_t length, const char* key, size_t key_length); int LookupStringInFixedSet(const unsigned char* graph, size_t length, const char* key, size_t key_length);
int GetUtfMode(const unsigned char *graph, size_t length);
static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain, int type) static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain, int type)
{ {
@ -801,7 +803,7 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain, int t
for (p = domain; *p; p++) { for (p = domain; *p; p++) {
if (*p == '.') if (*p == '.')
suffix.nlabels++; suffix.nlabels++;
else if (*((unsigned char *)p) >= 128) else if (!psl->utf8 && *((unsigned char *)p) >= 128)
need_conversion = 1; /* in case domain is non-ascii we need a toASCII conversion */ need_conversion = 1; /* in case domain is non-ascii we need a toASCII conversion */
} }
@ -1150,6 +1152,7 @@ psl_ctx_t *psl_load_fp(FILE *fp)
psl->dafsa = m; psl->dafsa = m;
psl->dafsa_size = len; psl->dafsa_size = len;
psl->utf8 = !!GetUtfMode(psl->dafsa, len);
return psl; return psl;
} }
@ -1161,6 +1164,7 @@ psl_ctx_t *psl_load_fp(FILE *fp)
* as of 19.02.2014, the list at https://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions. * as of 19.02.2014, the list at https://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
*/ */
psl->suffixes = _vector_alloc(8*1024, _suffix_compare_array); psl->suffixes = _vector_alloc(8*1024, _suffix_compare_array);
psl->utf8 = 1; /* we put UTF-8 and punycode rules in the lookup vector */
do { do {
while (_isspace_ascii(*linep)) linep++; /* ignore leading whitespace */ while (_isspace_ascii(*linep)) linep++; /* ignore leading whitespace */