reverted meaning of psl_is_public (again), started function docs

This commit is contained in:
Tim Ruehsen 2014-04-06 22:30:50 +02:00
parent 4a33b309a3
commit c37830f6fc
4 changed files with 131 additions and 72 deletions

107
src/psl.c
View File

@ -247,6 +247,22 @@ static int _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length)
return 0; return 0;
} }
/**
* psl_is_public:
* @psl: PSL context
* @domain: Domain string
*
* This function checks if @domain is a public suffix by the means of the
* [Mozilla Public Suffix List](http://publicsuffix.org).
*
* This can be used for e.g. cookie domain verification.
* You should never accept a cookie who's domain is a public suffix.
*
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
* psl_builtin().
*
* Returns: 1 if domain is a public suffix, 0 if not.
*/
int psl_is_public(const psl_ctx_t *psl, const char *domain) int psl_is_public(const psl_ctx_t *psl, const char *domain)
{ {
_psl_entry_t suffix, *rule; _psl_entry_t suffix, *rule;
@ -254,7 +270,7 @@ int psl_is_public(const psl_ctx_t *psl, const char *domain)
unsigned short length_bak; unsigned short length_bak;
if (!psl || !domain) if (!psl || !domain)
return 0; return 1;
// this function should be called without leading dots, just make sure // this function should be called without leading dots, just make sure
suffix.label = domain + (*domain == '.'); suffix.label = domain + (*domain == '.');
@ -273,7 +289,7 @@ int psl_is_public(const psl_ctx_t *psl, const char *domain)
rule = _vector_get(psl->suffixes, 0); rule = _vector_get(psl->suffixes, 0);
if (!rule || rule->nlabels < suffix.nlabels - 1) if (!rule || rule->nlabels < suffix.nlabels - 1)
return 1; return 0;
if (psl == &_builtin_psl) if (psl == &_builtin_psl)
rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare); rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare);
@ -282,10 +298,10 @@ int psl_is_public(const psl_ctx_t *psl, const char *domain)
if (rule) { if (rule) {
// definitely a match, no matter if the found rule is a wildcard or not // definitely a match, no matter if the found rule is a wildcard or not
return 0; return 1;
} else if (suffix.nlabels == 1) { } else if (suffix.nlabels == 1) {
// unknown TLD, this is the prevailing '*' match // unknown TLD, this is the prevailing '*' match
return 0; return 1;
} }
label_bak = suffix.label; label_bak = suffix.label;
@ -310,22 +326,34 @@ int psl_is_public(const psl_ctx_t *psl, const char *domain)
if (psl == &_builtin_psl) { if (psl == &_builtin_psl) {
if (bsearch(&suffix, suffix_exceptions, countof(suffix_exceptions), sizeof(suffix_exceptions[0]), (int(*)(const void *, const void *))_suffix_compare)) if (bsearch(&suffix, suffix_exceptions, countof(suffix_exceptions), sizeof(suffix_exceptions[0]), (int(*)(const void *, const void *))_suffix_compare))
return 1; // found an exception, so 'domain' is public return 0; // found an exception, so 'domain' is not a public suffix
} else { } else {
if (_vector_get(psl->suffix_exceptions, _vector_find(psl->suffix_exceptions, &suffix)) != 0) if (_vector_get(psl->suffix_exceptions, _vector_find(psl->suffix_exceptions, &suffix)) != 0)
return 1; // found an exception, so 'domain' is public return 0; // found an exception, so 'domain' is not a public suffix
}
return 0;
}
}
} }
return 1; return 1;
}
}
}
return 0;
} }
// return NULL, if string domain does not contain a registered domain /**
// else return a pointer to the longest registered domain within 'domain' * psl_unregistrable_domain:
* @psl: PSL context
* @domain: Domain string
*
* This function finds the longest publix suffix part of @domain by the means
* of the [Mozilla Public Suffix List](http://publicsuffix.org).
*
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
* psl_builtin().
*
* Returns: Pointer to longest public suffix part of @domain or NULL if @domain
* does not contain a public suffix (or if @psl is NULL).
*/
const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain) const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
{ {
const char *p, *ret_domain; const char *p, *ret_domain;
@ -337,10 +365,10 @@ const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
// for being a registered domain. // for being a registered domain.
if (!(p = strrchr(domain, '.'))) if (!(p = strrchr(domain, '.')))
return psl_is_public(psl, domain) ? NULL : domain; return psl_is_public(psl, domain) ? domain : NULL;
for (ret_domain = NULL; ;) { for (ret_domain = NULL; ;) {
if (psl_is_public(psl, p)) if (!psl_is_public(psl, p))
return ret_domain; return ret_domain;
else if (p == domain) else if (p == domain)
return domain; return domain;
@ -353,7 +381,20 @@ const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
} }
} }
// returns the shortest possible registrable domain part or NULL if domain is not registrable at all /**
* psl_registrable_domain:
* @psl: PSL context
* @domain: Domain string
*
* This function finds the shortest private suffix part of @domain by the means
* of the [Mozilla Public Suffix List](http://publicsuffix.org).
*
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
* psl_builtin().
*
* Returns: Pointer to shortest private suffix part of @domain or NULL if @domain
* does not contain a private suffix (or if @psl is NULL).
*/
const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain) const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
{ {
const char *p; const char *p;
@ -368,15 +409,24 @@ const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
if (!(p = strrchr(domain, '.'))) if (!(p = strrchr(domain, '.')))
p = domain; p = domain;
while (!(ispublic = psl_is_public(psl, p)) && p > domain) { while ((ispublic = psl_is_public(psl, p)) && p > domain) {
// go left to next dot // go left to next dot
while (p > domain && *--p != '.') while (p > domain && *--p != '.')
; ;
} }
return ispublic ? (*p == '.' ? p + 1 : p) : NULL; return ispublic ? NULL : (*p == '.' ? p + 1 : p);
} }
/**
* psl_load_file:
* @fname: Name of PSL file
*
* This function loads the public suffixes file named @fname.
* To free the allocated resources, call psl_free().
*
* Returns: Pointer to a PSL context private or NULL on failure.
*/
psl_ctx_t *psl_load_file(const char *fname) psl_ctx_t *psl_load_file(const char *fname)
{ {
FILE *fp; FILE *fp;
@ -393,6 +443,15 @@ psl_ctx_t *psl_load_file(const char *fname)
return psl; return psl;
} }
/**
* psl_load_fp:
* @fp: FILE pointer
*
* This function loads the public suffixes from a FILE pointer.
* To free the allocated resources, call psl_free().
*
* Returns: Pointer to a PSL context private or NULL on failure.
*/
psl_ctx_t *psl_load_fp(FILE *fp) psl_ctx_t *psl_load_fp(FILE *fp)
{ {
psl_ctx_t *psl; psl_ctx_t *psl;
@ -447,12 +506,6 @@ psl_ctx_t *psl_load_fp(FILE *fp)
return psl; return psl;
} }
// return built-in PSL structure
const psl_ctx_t *psl_builtin(void)
{
return &_builtin_psl;
}
void psl_free(psl_ctx_t *psl) void psl_free(psl_ctx_t *psl)
{ {
if (psl && psl != &_builtin_psl) { if (psl && psl != &_builtin_psl) {
@ -462,6 +515,12 @@ void psl_free(psl_ctx_t *psl)
} }
} }
// return built-in PSL structure
const psl_ctx_t *psl_builtin(void)
{
return &_builtin_psl;
}
/* does not include exceptions */ /* does not include exceptions */
int psl_suffix_count(const psl_ctx_t *psl) int psl_suffix_count(const psl_ctx_t *psl)
{ {

View File

@ -67,38 +67,38 @@ static void test_psl(void)
*linep = 0; *linep = 0;
if (*p == '!') { // an exception to a wildcard, e.g. !www.ck (wildcard is *.ck) if (*p == '!') { // an exception to a wildcard, e.g. !www.ck (wildcard is *.ck)
if ((result = psl_is_public(psl, p + 1))) {
failed++;
printf("psl_is_public(%s)=%d (expected 0)\n", p, result);
} else ok++;
if (!(result = psl_is_public(psl, strchr(p, '.') + 1))) {
failed++;
printf("psl_is_public(%s)=%d (expected 1)\n", strchr(p, '.') + 1, result);
} else ok++;
}
else if (*p == '*') { // a wildcard, e.g. *.ck
if (!(result = psl_is_public(psl, p + 1))) { if (!(result = psl_is_public(psl, p + 1))) {
failed++;
printf("psl_is_public(%s)=%d (expected 1)\n", p + 1, result);
} else ok++;
*p = 'x';
if (!(result = psl_is_public(psl, p))) {
failed++;
printf("psl_is_public(%s)=%d (expected 1)\n", p, result);
} else ok++;
}
else {
if (!(result = psl_is_public(psl, p))) {
failed++; failed++;
printf("psl_is_public(%s)=%d (expected 1)\n", p, result); printf("psl_is_public(%s)=%d (expected 1)\n", p, result);
} else ok++; } else ok++;
if ((result = psl_is_public(psl, strchr(p, '.') + 1))) {
failed++;
printf("psl_is_public(%s)=%d (expected 0)\n", strchr(p, '.') + 1, result);
} else ok++;
}
else if (*p == '*') { // a wildcard, e.g. *.ck
if ((result = psl_is_public(psl, p + 1))) {
failed++;
printf("psl_is_public(%s)=%d (expected 0)\n", p + 1, result);
} else ok++;
*p = 'x';
if ((result = psl_is_public(psl, p))) {
failed++;
printf("psl_is_public(%s)=%d (expected 0)\n", p, result);
} else ok++;
}
else {
if ((result = psl_is_public(psl, p))) {
failed++;
printf("psl_is_public(%s)=%d (expected 0)\n", p, result);
} else ok++;
snprintf(domain, sizeof(domain), "xxxx.%s", p); snprintf(domain, sizeof(domain), "xxxx.%s", p);
if (!(result = psl_is_public(psl, domain))) { if ((result = psl_is_public(psl, domain))) {
failed++; failed++;
printf("psl_is_public(%s)=%d (expected 1)\n", domain, result); printf("psl_is_public(%s)=%d (expected 0)\n", domain, result);
} else ok++; } else ok++;
} }
} }

View File

@ -54,20 +54,20 @@ static void test_psl(void)
int int
result; result;
} test_data[] = { } test_data[] = {
{ "www.example.com", 1 }, { "www.example.com", 0 },
{ "com.ar", 0 }, { "com.ar", 1 },
{ "www.com.ar", 1 }, { "www.com.ar", 0 },
{ "cc.ar.us", 0 }, { "cc.ar.us", 1 },
{ ".cc.ar.us", 0 }, { ".cc.ar.us", 1 },
{ "www.cc.ar.us", 1 }, { "www.cc.ar.us", 0 },
{ "www.ck", 1 }, // exception from *.ck { "www.ck", 0 }, // exception from *.ck
{ "abc.www.ck", 1 }, { "abc.www.ck", 0 },
{ "xxx.ck", 0 }, { "xxx.ck", 1 },
{ "www.xxx.ck", 1 }, { "www.xxx.ck", 0 },
{ "\345\225\206\346\240\207", 0 }, // xn--czr694b oder 商标 { "\345\225\206\346\240\207", 1 }, // xn--czr694b oder 商标
{ "www.\345\225\206\346\240\207", 1 }, { "www.\345\225\206\346\240\207", 0 },
{ "xn--czr694b", 0 }, { "xn--czr694b", 1 },
{ "www.xn--czr694b", 1 }, { "www.xn--czr694b", 0 },
}; };
unsigned it; unsigned it;
const psl_ctx_t *psl; const psl_ctx_t *psl;

View File

@ -54,18 +54,18 @@ static void test_psl(void)
int int
result; result;
} test_data[] = { } test_data[] = {
{ "www.example.com", 1 }, { "www.example.com", 0 },
{ "com.ar", 0 }, { "com.ar", 1 },
{ "www.com.ar", 1 }, { "www.com.ar", 0 },
{ "cc.ar.us", 0 }, { "cc.ar.us", 1 },
{ ".cc.ar.us", 0 }, { ".cc.ar.us", 1 },
{ "www.cc.ar.us", 1 }, { "www.cc.ar.us", 0 },
{ "www.ck", 1 }, // exception from *.ck { "www.ck", 0 }, // exception from *.ck
{ "abc.www.ck", 1 }, { "abc.www.ck", 0 },
{ "xxx.ck", 0 }, { "xxx.ck", 1 },
{ "www.xxx.ck", 1 }, { "www.xxx.ck", 0 },
{ "\345\225\206\346\240\207", 0 }, // xn--czr694b oder 商标 { "\345\225\206\346\240\207", 1 }, // xn--czr694b oder 商标
{ "www.\345\225\206\346\240\207", 1 }, { "www.\345\225\206\346\240\207", 0 },
}; };
unsigned it; unsigned it;
psl_ctx_t *psl; psl_ctx_t *psl;