New function psl_is_public_suffix2()
The current PSL has two sections, ICANN and PRIVATE. This new function allows to limit the check for one or both of these sections.
This commit is contained in:
parent
519b8c9d17
commit
9d2e93f0b8
|
@ -13,6 +13,7 @@ psl_load_fp
|
|||
psl_builtin
|
||||
psl_free
|
||||
psl_is_public_suffix
|
||||
psl_is_public_suffix2
|
||||
psl_unregistrable_domain
|
||||
psl_registrable_domain
|
||||
psl_suffix_count
|
||||
|
|
|
@ -44,6 +44,11 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* types for psl_is_publix_suffix2() */
|
||||
#define PSL_TYPE_ICANN (1<<0)
|
||||
#define PSL_TYPE_PRIVATE (1<<1)
|
||||
#define PSL_TYPE_ANY (PSL_TYPE_ICANN | PSL_TYPE_PRIVATE)
|
||||
|
||||
/**
|
||||
* psl_error_t:
|
||||
* @PSL_SUCCESS: Successful return.
|
||||
|
@ -71,57 +76,79 @@ typedef struct _psl_ctx_st psl_ctx_t;
|
|||
/* frees PSL context */
|
||||
void
|
||||
psl_free(psl_ctx_t *psl);
|
||||
|
||||
/* loads PSL data from file */
|
||||
psl_ctx_t *
|
||||
psl_load_file(const char *fname);
|
||||
|
||||
/* loads PSL data from FILE pointer */
|
||||
psl_ctx_t *
|
||||
psl_load_fp(FILE *fp);
|
||||
|
||||
/* retrieves builtin PSL data */
|
||||
const psl_ctx_t *
|
||||
psl_builtin(void);
|
||||
|
||||
/* checks whether domain is a public suffix or not */
|
||||
int
|
||||
psl_is_public_suffix(const psl_ctx_t *psl, const char *domain);
|
||||
|
||||
/* checks whether domain is a public suffix regarding the type or not */
|
||||
int
|
||||
psl_is_public_suffix2(const psl_ctx_t *psl, const char *domain, int type);
|
||||
|
||||
/* checks whether cookie_domain is acceptable for domain or not */
|
||||
int
|
||||
psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, const char *cookie_domain);
|
||||
|
||||
/* returns the longest not registrable domain within 'domain' or NULL if none found */
|
||||
const char *
|
||||
psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain);
|
||||
|
||||
/* returns the shortest possible registrable domain part or NULL if domain is not registrable at all */
|
||||
const char *
|
||||
psl_registrable_domain(const psl_ctx_t *psl, const char *domain);
|
||||
|
||||
/* convert a string into lowercase UTF-8 */
|
||||
psl_error_t
|
||||
psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower);
|
||||
|
||||
/* does not include exceptions */
|
||||
int
|
||||
psl_suffix_count(const psl_ctx_t *psl);
|
||||
|
||||
/* just counts exceptions */
|
||||
int
|
||||
psl_suffix_exception_count(const psl_ctx_t *psl);
|
||||
|
||||
/* just counts wildcards */
|
||||
int
|
||||
psl_suffix_wildcard_count(const psl_ctx_t *psl);
|
||||
|
||||
/* returns compilation time */
|
||||
time_t
|
||||
psl_builtin_compile_time(void);
|
||||
|
||||
/* returns mtime of PSL source file */
|
||||
time_t
|
||||
psl_builtin_file_time(void);
|
||||
|
||||
/* returns SHA1 checksum (hex-encoded, lowercase) of PSL source file */
|
||||
const char *
|
||||
psl_builtin_sha1sum(void);
|
||||
|
||||
/* returns file name of PSL source file */
|
||||
const char *
|
||||
psl_builtin_filename(void);
|
||||
|
||||
/* returns library version string */
|
||||
const char *
|
||||
psl_get_version(void);
|
||||
|
||||
/* checks library version number */
|
||||
int
|
||||
psl_check_version_number(int version);
|
||||
|
||||
/* returns wether the built-in data is outdated or not */
|
||||
int
|
||||
psl_builtin_outdated(void);
|
||||
|
|
2
list
2
list
|
@ -1 +1 @@
|
|||
Subproject commit e801df4a56ac8c7519d349ad5125433206930d6e
|
||||
Subproject commit 1f3ad51171235aafe423435606e869f0161582e4
|
93
src/psl.c
93
src/psl.c
|
@ -126,9 +126,11 @@ static char *strndup(const char *s, size_t n)
|
|||
|
||||
#define countof(a) (sizeof(a)/sizeof(*(a)))
|
||||
|
||||
#define _PSL_FLAG_PLAIN (1<<0)
|
||||
#define _PSL_FLAG_EXCEPTION (1<<1)
|
||||
#define _PSL_FLAG_WILDCARD (1<<2)
|
||||
#define _PSL_FLAG_EXCEPTION (1<<0)
|
||||
#define _PSL_FLAG_WILDCARD (1<<1)
|
||||
#define _PSL_FLAG_ICANN (1<<2) /* entry of ICANN section */
|
||||
#define _PSL_FLAG_PRIVATE (1<<3) /* entry of PRIVATE section */
|
||||
#define _PSL_FLAG_PLAIN (1<<4) /* just used for PSL syntax checking */
|
||||
|
||||
typedef struct {
|
||||
char
|
||||
|
@ -157,6 +159,7 @@ struct _psl_ctx_st {
|
|||
_psl_vector_t
|
||||
*suffixes;
|
||||
int
|
||||
mode,
|
||||
nsuffixes,
|
||||
nexceptions,
|
||||
nwildcards;
|
||||
|
@ -177,7 +180,7 @@ struct _psl_ctx_st {
|
|||
static const char _psl_filename[] = "";
|
||||
#endif
|
||||
|
||||
/* references to this PSL will result in lookups to built-in data */
|
||||
/* references to these PSLs will result in lookups to built-in data */
|
||||
static const psl_ctx_t
|
||||
_builtin_psl;
|
||||
|
||||
|
@ -310,10 +313,11 @@ static int _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
|
||||
static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain, int type)
|
||||
{
|
||||
_psl_entry_t suffix, *rule;
|
||||
const char *p;
|
||||
int builtin;
|
||||
|
||||
/* this function should be called without leading dots, just make sure */
|
||||
suffix.label = domain + (*domain == '.');
|
||||
|
@ -332,7 +336,9 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
|
|||
}
|
||||
|
||||
/* if domain has enough labels, it is public */
|
||||
if (psl == &_builtin_psl)
|
||||
builtin = (psl == &_builtin_psl);
|
||||
|
||||
if (builtin)
|
||||
rule = &suffixes[0];
|
||||
else
|
||||
rule = _vector_get(psl->suffixes, 0);
|
||||
|
@ -340,12 +346,18 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
|
|||
if (!rule || rule->nlabels < suffix.nlabels - 1)
|
||||
return 0;
|
||||
|
||||
if (psl == &_builtin_psl)
|
||||
if (rule == &suffixes[0])
|
||||
rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare);
|
||||
else
|
||||
rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix));
|
||||
|
||||
if (rule) {
|
||||
/* check for correct rule type */
|
||||
if (type == PSL_TYPE_ICANN && !(rule->flags & _PSL_FLAG_ICANN))
|
||||
return 0;
|
||||
else if (type == PSL_TYPE_PRIVATE && !(rule->flags & _PSL_FLAG_PRIVATE))
|
||||
return 0;
|
||||
|
||||
/* definitely a match, no matter if the found rule is a wildcard or not */
|
||||
if (rule->flags & _PSL_FLAG_EXCEPTION)
|
||||
return 0;
|
||||
|
@ -360,12 +372,18 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
|
|||
suffix.length = strlen(suffix.label);
|
||||
suffix.nlabels--;
|
||||
|
||||
if (psl == &_builtin_psl)
|
||||
if (builtin)
|
||||
rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare);
|
||||
else
|
||||
rule = _vector_get(psl->suffixes, (pos = _vector_find(psl->suffixes, &suffix)));
|
||||
|
||||
if (rule) {
|
||||
/* check for correct rule type */
|
||||
if (type == PSL_TYPE_ICANN && !(rule->flags & _PSL_FLAG_ICANN))
|
||||
return 0;
|
||||
else if (type == PSL_TYPE_PRIVATE && !(rule->flags & _PSL_FLAG_PRIVATE))
|
||||
return 0;
|
||||
|
||||
if ((rule->flags & _PSL_FLAG_WILDCARD))
|
||||
return 1;
|
||||
}
|
||||
|
@ -399,7 +417,37 @@ int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
|
|||
if (!psl || !domain)
|
||||
return 1;
|
||||
|
||||
return _psl_is_public_suffix(psl, domain);
|
||||
return _psl_is_public_suffix(psl, domain, PSL_TYPE_ANY);
|
||||
}
|
||||
|
||||
/**
|
||||
* psl_is_public_suffix2:
|
||||
* @psl: PSL context
|
||||
* @domain: Domain string
|
||||
* @type: Domain type
|
||||
*
|
||||
* This function checks if @domain is a public suffix by the means of the
|
||||
* [Mozilla Public Suffix List](http://publicsuffix.org).
|
||||
*
|
||||
* @type specifies the PSL section where to perform the lookup. Valid values are
|
||||
* %PSL_TYPE_PRIVATE, %PSL_TYPE_ICANN and %PSL_TYPE_ANY.
|
||||
*
|
||||
* International @domain names have to be either in lowercase UTF-8 or in ASCII form (punycode).
|
||||
* Other encodings result in unexpected behavior.
|
||||
*
|
||||
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
|
||||
* psl_builtin().
|
||||
*
|
||||
* Returns: 1 if domain is a public suffix, 0 if not.
|
||||
*
|
||||
* Since: 0.1
|
||||
*/
|
||||
int psl_is_public_suffix2(const psl_ctx_t *psl, const char *domain, int type)
|
||||
{
|
||||
if (!psl || !domain)
|
||||
return 1;
|
||||
|
||||
return _psl_is_public_suffix(psl, domain, type);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -431,7 +479,7 @@ const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
|
|||
* 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
|
||||
*/
|
||||
|
||||
while (!_psl_is_public_suffix(psl, domain)) {
|
||||
while (!_psl_is_public_suffix(psl, domain, 0)) {
|
||||
if ((domain = strchr(domain, '.')))
|
||||
domain++;
|
||||
else
|
||||
|
@ -472,7 +520,7 @@ const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
|
|||
* 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
|
||||
*/
|
||||
|
||||
while (!_psl_is_public_suffix(psl, domain)) {
|
||||
while (!_psl_is_public_suffix(psl, domain, 0)) {
|
||||
if ((p = strchr(domain, '.'))) {
|
||||
regdom = domain;
|
||||
domain = p + 1;
|
||||
|
@ -691,6 +739,7 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
|||
psl_ctx_t *psl;
|
||||
_psl_entry_t suffix, *suffixp;
|
||||
char buf[256], *linep, *p;
|
||||
int type = 0;
|
||||
#ifdef WITH_LIBICU
|
||||
UIDNA *idna;
|
||||
UErrorCode status = 0;
|
||||
|
@ -716,8 +765,20 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
|||
while (_isspace_ascii(*linep)) linep++; /* ignore leading whitespace */
|
||||
if (!*linep) continue; /* skip empty lines */
|
||||
|
||||
if (*linep == '/' && linep[1] == '/')
|
||||
if (*linep == '/' && linep[1] == '/') {
|
||||
if (!type) {
|
||||
if (strstr(linep + 2, "===BEGIN ICANN DOMAINS==="))
|
||||
type = _PSL_FLAG_ICANN;
|
||||
else if (!type && strstr(linep + 2, "===BEGIN PRIVATE DOMAINS==="))
|
||||
type = _PSL_FLAG_PRIVATE;
|
||||
}
|
||||
else if (type == _PSL_FLAG_ICANN && strstr(linep + 2, "===END ICANN DOMAINS==="))
|
||||
type = 0;
|
||||
else if (type == _PSL_FLAG_PRIVATE && strstr(linep + 2, "===END PRIVATE DOMAINS==="))
|
||||
type = 0;
|
||||
|
||||
continue; /* skip comments */
|
||||
}
|
||||
|
||||
/* parse suffix rule */
|
||||
for (p = linep; *linep && !_isspace_ascii(*linep);) linep++;
|
||||
|
@ -725,7 +786,7 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
|||
|
||||
if (*p == '!') {
|
||||
p++;
|
||||
suffix.flags = _PSL_FLAG_EXCEPTION;
|
||||
suffix.flags = _PSL_FLAG_EXCEPTION | type;
|
||||
psl->nexceptions++;
|
||||
} else if (*p == '*') {
|
||||
if (*++p != '.') {
|
||||
|
@ -734,13 +795,13 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
|||
}
|
||||
p++;
|
||||
/* wildcard *.foo.bar implicitely make foo.bar a public suffix */
|
||||
suffix.flags = _PSL_FLAG_WILDCARD | _PSL_FLAG_PLAIN;
|
||||
suffix.flags = _PSL_FLAG_WILDCARD | _PSL_FLAG_PLAIN | type;
|
||||
psl->nwildcards++;
|
||||
psl->nsuffixes++;
|
||||
} else {
|
||||
if (!strchr(p, '.'))
|
||||
continue; /* we do not need an explicit plain TLD rule, already covered by implicit '*' rule */
|
||||
suffix.flags = _PSL_FLAG_PLAIN;
|
||||
suffix.flags = _PSL_FLAG_PLAIN | type;
|
||||
psl->nsuffixes++;
|
||||
}
|
||||
|
||||
|
@ -812,7 +873,7 @@ void psl_free(psl_ctx_t *psl)
|
|||
* The builtin data also contains punycode entries, one for each international domain name.
|
||||
*
|
||||
* If the generation of built-in data has been disabled during compilation, %NULL will be returned.
|
||||
* So if using the builtin psl context, you can provide UTF-8 or punycode representations of domains to
|
||||
* When using the builtin psl context, you can provide UTF-8 or punycode representations of domains to
|
||||
* functions like psl_is_public_suffix().
|
||||
*
|
||||
* Returns: Pointer to the built in PSL data or NULL if this data is not available.
|
||||
|
|
|
@ -58,7 +58,7 @@ static void test_psl(void)
|
|||
{
|
||||
FILE *fp;
|
||||
psl_ctx_t *psl;
|
||||
int result;
|
||||
int result, type = 0;
|
||||
char buf[256], *linep, *p;
|
||||
|
||||
psl = psl_load_file(PSL_FILE); /* PSL_FILE can be set by ./configure --with-psl-file=[PATH] */
|
||||
|
@ -74,8 +74,20 @@ static void test_psl(void)
|
|||
while (_isspace_ascii(*linep)) linep++; /* ignore leading whitespace */
|
||||
if (!*linep) continue; /* skip empty lines */
|
||||
|
||||
if (*linep == '/' && linep[1] == '/')
|
||||
if (*linep == '/' && linep[1] == '/') {
|
||||
if (!type) {
|
||||
if (strstr(linep + 2, "===BEGIN ICANN DOMAINS==="))
|
||||
type = PSL_TYPE_ICANN;
|
||||
else if (!type && strstr(linep + 2, "===BEGIN PRIVATE DOMAINS==="))
|
||||
type = PSL_TYPE_PRIVATE;
|
||||
}
|
||||
else if (type == PSL_TYPE_ICANN && strstr(linep + 2, "===END ICANN DOMAINS==="))
|
||||
type = 0;
|
||||
else if (type == PSL_TYPE_PRIVATE && strstr(linep + 2, "===END PRIVATE DOMAINS==="))
|
||||
type = 0;
|
||||
|
||||
continue; /* skip comments */
|
||||
}
|
||||
|
||||
/* parse suffix rule */
|
||||
for (p = linep; *linep && !_isspace_ascii(*linep);) linep++;
|
||||
|
@ -111,6 +123,39 @@ static void test_psl(void)
|
|||
failed++;
|
||||
printf("psl_is_public_suffix(%s)=%d (expected 1)\n", p, result);
|
||||
} else ok++;
|
||||
|
||||
if (!(strchr(p, '.'))) {
|
||||
/* TLDs are always expected to be Publix Suffixes */
|
||||
if (!(result = psl_is_public_suffix2(psl, p, PSL_TYPE_PRIVATE))) {
|
||||
failed++;
|
||||
printf("psl_is_public_suffix2(%s, PSL_TYPE_PRIVATE)=%d (expected 1)\n", p, result);
|
||||
} else ok++;
|
||||
|
||||
if (!(result = psl_is_public_suffix2(psl, p, PSL_TYPE_ICANN))) {
|
||||
failed++;
|
||||
printf("psl_is_public_suffix2(%s, PSL_TYPE_ICANN)=%d (expected 0)\n", p, result);
|
||||
} else ok++;
|
||||
} else if (type == PSL_TYPE_PRIVATE) {
|
||||
if (!(result = psl_is_public_suffix2(psl, p, PSL_TYPE_PRIVATE))) {
|
||||
failed++;
|
||||
printf("psl_is_public_suffix2(%s, PSL_TYPE_PRIVATE)=%d (expected 1)\n", p, result);
|
||||
} else ok++;
|
||||
|
||||
if ((result = psl_is_public_suffix2(psl, p, PSL_TYPE_ICANN))) {
|
||||
failed++;
|
||||
printf("psl_is_public_suffix2(%s, PSL_TYPE_ICANN)=%d (expected 0)\n", p, result);
|
||||
} else ok++;
|
||||
} else if (type == PSL_TYPE_ICANN) {
|
||||
if (!(result = psl_is_public_suffix2(psl, p, PSL_TYPE_ICANN))) {
|
||||
failed++;
|
||||
printf("psl_is_public_suffix2(%s, PSL_TYPE_ICANN)=%d (expected 1)\n", p, result);
|
||||
} else ok++;
|
||||
|
||||
if ((result = psl_is_public_suffix2(psl, p, PSL_TYPE_PRIVATE))) {
|
||||
failed++;
|
||||
printf("psl_is_public_suffix2(%s, PSL_TYPE_PRIVATE)=%d (expected 0)\n", p, result);
|
||||
} else ok++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue