added psl_registrable_domain(), renamed psl_registered_domain -> psl_unregistrable_domain
This commit is contained in:
parent
96574a795c
commit
fd0ff2023b
|
@ -0,0 +1,98 @@
|
|||
// Any copyright is dedicated to the Public Domain.
|
||||
// http://creativecommons.org/publicdomain/zero/1.0/
|
||||
|
||||
// null input.
|
||||
checkPublicSuffix(null, null);
|
||||
// Mixed case.
|
||||
checkPublicSuffix('COM', null);
|
||||
checkPublicSuffix('example.COM', 'example.com');
|
||||
checkPublicSuffix('WwW.example.COM', 'example.com');
|
||||
// Leading dot.
|
||||
checkPublicSuffix('.com', null);
|
||||
checkPublicSuffix('.example', null);
|
||||
checkPublicSuffix('.example.com', null);
|
||||
checkPublicSuffix('.example.example', null);
|
||||
// Unlisted TLD.
|
||||
checkPublicSuffix('example', null);
|
||||
checkPublicSuffix('example.example', 'example.example');
|
||||
checkPublicSuffix('b.example.example', 'example.example');
|
||||
checkPublicSuffix('a.b.example.example', 'example.example');
|
||||
// Listed, but non-Internet, TLD.
|
||||
//checkPublicSuffix('local', null);
|
||||
//checkPublicSuffix('example.local', null);
|
||||
//checkPublicSuffix('b.example.local', null);
|
||||
//checkPublicSuffix('a.b.example.local', null);
|
||||
// TLD with only 1 rule.
|
||||
checkPublicSuffix('biz', null);
|
||||
checkPublicSuffix('domain.biz', 'domain.biz');
|
||||
checkPublicSuffix('b.domain.biz', 'domain.biz');
|
||||
checkPublicSuffix('a.b.domain.biz', 'domain.biz');
|
||||
// TLD with some 2-level rules.
|
||||
checkPublicSuffix('com', null);
|
||||
checkPublicSuffix('example.com', 'example.com');
|
||||
checkPublicSuffix('b.example.com', 'example.com');
|
||||
checkPublicSuffix('a.b.example.com', 'example.com');
|
||||
checkPublicSuffix('uk.com', null);
|
||||
checkPublicSuffix('example.uk.com', 'example.uk.com');
|
||||
checkPublicSuffix('b.example.uk.com', 'example.uk.com');
|
||||
checkPublicSuffix('a.b.example.uk.com', 'example.uk.com');
|
||||
checkPublicSuffix('test.ac', 'test.ac');
|
||||
// TLD with only 1 (wildcard) rule.
|
||||
checkPublicSuffix('cy', null);
|
||||
checkPublicSuffix('c.cy', null);
|
||||
checkPublicSuffix('b.c.cy', 'b.c.cy');
|
||||
checkPublicSuffix('a.b.c.cy', 'b.c.cy');
|
||||
// More complex TLD.
|
||||
checkPublicSuffix('jp', null);
|
||||
checkPublicSuffix('test.jp', 'test.jp');
|
||||
checkPublicSuffix('www.test.jp', 'test.jp');
|
||||
checkPublicSuffix('ac.jp', null);
|
||||
checkPublicSuffix('test.ac.jp', 'test.ac.jp');
|
||||
checkPublicSuffix('www.test.ac.jp', 'test.ac.jp');
|
||||
checkPublicSuffix('kyoto.jp', null);
|
||||
checkPublicSuffix('test.kyoto.jp', 'test.kyoto.jp');
|
||||
checkPublicSuffix('ide.kyoto.jp', null);
|
||||
checkPublicSuffix('b.ide.kyoto.jp', 'b.ide.kyoto.jp');
|
||||
checkPublicSuffix('a.b.ide.kyoto.jp', 'b.ide.kyoto.jp');
|
||||
checkPublicSuffix('c.kobe.jp', null);
|
||||
checkPublicSuffix('b.c.kobe.jp', 'b.c.kobe.jp');
|
||||
checkPublicSuffix('a.b.c.kobe.jp', 'b.c.kobe.jp');
|
||||
checkPublicSuffix('city.kobe.jp', 'city.kobe.jp');
|
||||
checkPublicSuffix('www.city.kobe.jp', 'city.kobe.jp');
|
||||
// TLD with a wildcard rule and exceptions.
|
||||
checkPublicSuffix('ck', null);
|
||||
checkPublicSuffix('test.ck', null);
|
||||
checkPublicSuffix('b.test.ck', 'b.test.ck');
|
||||
checkPublicSuffix('a.b.test.ck', 'b.test.ck');
|
||||
checkPublicSuffix('www.ck', 'www.ck');
|
||||
checkPublicSuffix('www.www.ck', 'www.ck');
|
||||
// US K12.
|
||||
checkPublicSuffix('us', null);
|
||||
checkPublicSuffix('test.us', 'test.us');
|
||||
checkPublicSuffix('www.test.us', 'test.us');
|
||||
checkPublicSuffix('ak.us', null);
|
||||
checkPublicSuffix('test.ak.us', 'test.ak.us');
|
||||
checkPublicSuffix('www.test.ak.us', 'test.ak.us');
|
||||
checkPublicSuffix('k12.ak.us', null);
|
||||
checkPublicSuffix('test.k12.ak.us', 'test.k12.ak.us');
|
||||
checkPublicSuffix('www.test.k12.ak.us', 'test.k12.ak.us');
|
||||
// IDN labels.
|
||||
checkPublicSuffix('食狮.com.cn', '食狮.com.cn');
|
||||
checkPublicSuffix('食狮.公司.cn', '食狮.公司.cn');
|
||||
checkPublicSuffix('www.食狮.公司.cn', '食狮.公司.cn');
|
||||
checkPublicSuffix('shishi.公司.cn', 'shishi.公司.cn');
|
||||
checkPublicSuffix('公司.cn', null);
|
||||
checkPublicSuffix('食狮.中国', '食狮.中国');
|
||||
checkPublicSuffix('www.食狮.中国', '食狮.中国');
|
||||
checkPublicSuffix('shishi.中国', 'shishi.中国');
|
||||
checkPublicSuffix('中国', null);
|
||||
// Same as above, but punycoded.
|
||||
checkPublicSuffix('xn--85x722f.com.cn', 'xn--85x722f.com.cn');
|
||||
checkPublicSuffix('xn--85x722f.xn--55qx5d.cn', 'xn--85x722f.xn--55qx5d.cn');
|
||||
checkPublicSuffix('www.xn--85x722f.xn--55qx5d.cn', 'xn--85x722f.xn--55qx5d.cn');
|
||||
checkPublicSuffix('shishi.xn--55qx5d.cn', 'shishi.xn--55qx5d.cn');
|
||||
checkPublicSuffix('xn--55qx5d.cn', null);
|
||||
checkPublicSuffix('xn--85x722f.xn--fiqs8s', 'xn--85x722f.xn--fiqs8s');
|
||||
checkPublicSuffix('www.xn--85x722f.xn--fiqs8s', 'xn--85x722f.xn--fiqs8s');
|
||||
checkPublicSuffix('shishi.xn--fiqs8s', 'shishi.xn--fiqs8s');
|
||||
checkPublicSuffix('xn--fiqs8s', null);
|
|
@ -64,9 +64,12 @@ const psl_ctx_t *
|
|||
psl_builtin(void);
|
||||
int
|
||||
psl_is_public(const psl_ctx_t *psl, const char *domain);
|
||||
// return pointer to longest registered domain within 'domain' or NULL if none found
|
||||
// returns the longest unregistrable domain within 'domain' or NULL if none found
|
||||
const char *
|
||||
psl_registered_domain(const psl_ctx_t *psl, const char *domain);
|
||||
psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain);
|
||||
// returns the shortest possible registrable domain part or NULL if domain is not registrable at all
|
||||
const char *
|
||||
psl_registrable_domain(const psl_ctx_t *psl, const char *domain);
|
||||
// does not include exceptions
|
||||
int
|
||||
psl_suffix_count(const psl_ctx_t *psl);
|
||||
|
|
29
src/psl.c
29
src/psl.c
|
@ -301,10 +301,13 @@ int psl_is_public(const psl_ctx_t *psl, const char *domain)
|
|||
|
||||
// return NULL, if string domain does not contain a registered domain
|
||||
// else return a pointer to the longest registered domain within 'domain'
|
||||
const char *psl_registered_domain(const psl_ctx_t *psl, const char *domain)
|
||||
const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
|
||||
{
|
||||
const char *p, *ret_domain;
|
||||
|
||||
if (!psl || !domain)
|
||||
return NULL;
|
||||
|
||||
// We check from right to left, e.g. in www.xxx.org we check org, xxx.org, www.xxx.org in this order
|
||||
// for being a registered domain.
|
||||
|
||||
|
@ -325,6 +328,30 @@ const char *psl_registered_domain(const psl_ctx_t *psl, const char *domain)
|
|||
}
|
||||
}
|
||||
|
||||
// returns the shortest possible registrable domain part or NULL if domain is not registrable at all
|
||||
const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
|
||||
{
|
||||
const char *p;
|
||||
int ispublic;
|
||||
|
||||
if (!psl || !domain || *domain == '.')
|
||||
return NULL;
|
||||
|
||||
// We check from right to left, e.g. in www.xxx.org we check org, xxx.org, www.xxx.org in this order
|
||||
// for being a registrable domain.
|
||||
|
||||
if (!(p = strrchr(domain, '.')))
|
||||
p = domain;
|
||||
|
||||
while (!(ispublic = psl_is_public(psl, p)) && p > domain) {
|
||||
// go left to next dot
|
||||
while (p > domain && *--p != '.')
|
||||
;
|
||||
}
|
||||
|
||||
return ispublic ? (*p == '.' ? p + 1 : p) : NULL;
|
||||
}
|
||||
|
||||
psl_ctx_t *psl_load_file(const char *fname)
|
||||
{
|
||||
FILE *fp;
|
||||
|
|
|
@ -2,7 +2,7 @@ DEFS = @DEFS@ -DDATADIR=\"$(top_srcdir)/data\" -DSRCDIR=\"$(srcdir)\"
|
|||
AM_CPPFLAGS = -I$(top_srcdir)/include
|
||||
LDADD = ../src/libpsl-@LIBPSL_API_VERSION@.la
|
||||
|
||||
PSL_TESTS = test-is-public test-is-public-builtin test-is-public-all
|
||||
PSL_TESTS = test-is-public test-is-public-builtin test-is-public-all test-registrable-domain
|
||||
|
||||
check_PROGRAMS = $(PSL_TESTS)
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
* Test psl_is_public() for all entries in effective_tld_names.dat
|
||||
*
|
||||
* Changelog
|
||||
* 19.03.2014 Tim Ruehsen created from libmget/cookie.c
|
||||
* 19.03.2014 Tim Ruehsen created
|
||||
*
|
||||
*/
|
||||
|
||||
|
|
|
@ -0,0 +1,116 @@
|
|||
/*
|
||||
* Copyright(c) 2014 Tim Ruehsen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* This file is part of the test suite of libpsl.
|
||||
*
|
||||
* Test psl_registered_domain() for all entries in test_psl.dat
|
||||
*
|
||||
* Changelog
|
||||
* 26.03.2014 Tim Ruehsen created
|
||||
*
|
||||
*/
|
||||
|
||||
#if HAVE_CONFIG_H
|
||||
# include <config.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include <libpsl.h>
|
||||
|
||||
#define countof(a) (sizeof(a)/sizeof(*(a)))
|
||||
#define TESTDATA DATADIR"/test_psl.txt"
|
||||
static int
|
||||
ok,
|
||||
failed;
|
||||
|
||||
static void test_psl(void)
|
||||
{
|
||||
FILE *fp;
|
||||
const psl_ctx_t *psl;
|
||||
const char *result;
|
||||
char buf[256], domain[128], expected_regdom[128], *p;
|
||||
|
||||
psl = psl_builtin();
|
||||
|
||||
printf("have %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl));
|
||||
|
||||
if ((fp = fopen(TESTDATA, "r"))) {
|
||||
while ((fgets(buf, sizeof(buf), fp))) {
|
||||
if (sscanf(buf, " checkPublicSuffix('%127[^']' , '%127[^']", domain, expected_regdom) != 2) {
|
||||
if (sscanf(buf, " checkPublicSuffix('%127[^']' , %127[nul]", domain, expected_regdom) != 2)
|
||||
continue;
|
||||
}
|
||||
|
||||
// we have to lowercase the domain - the PSL API just takes lowercase
|
||||
for (p = domain; *p; p++)
|
||||
if (isupper(*p))
|
||||
*p = tolower(*p);
|
||||
|
||||
result = psl_registrable_domain(psl, domain);
|
||||
|
||||
if (result == NULL) {
|
||||
if (strcmp(expected_regdom, "null")) {
|
||||
failed++;
|
||||
printf("psl_registrable_domain(%s)=NULL (expected %s)\n", domain, expected_regdom);
|
||||
} else ok++;
|
||||
} else {
|
||||
if (strcmp(expected_regdom, result)) {
|
||||
failed++;
|
||||
printf("psl_registrable_domain(%s)=%s (expected %s)\n", domain, result, expected_regdom);
|
||||
} else ok++;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
} else {
|
||||
printf("Failed to open %s\n", TESTDATA);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, const char * const *argv)
|
||||
{
|
||||
// if VALGRIND testing is enabled, we have to call ourselves with valgrind checking
|
||||
if (argc == 1) {
|
||||
const char *valgrind = getenv("TESTS_VALGRIND");
|
||||
|
||||
if (valgrind && *valgrind) {
|
||||
char cmd[strlen(valgrind)+strlen(argv[0])+32];
|
||||
|
||||
snprintf(cmd, sizeof(cmd), "TESTS_VALGRIND="" %s %s", valgrind, argv[0]);
|
||||
return system(cmd) != 0;
|
||||
}
|
||||
}
|
||||
|
||||
test_psl();
|
||||
|
||||
if (failed) {
|
||||
printf("Summary: %d out of %d tests failed\n", failed, ok + failed);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("Summary: All %d tests passed\n", ok + failed);
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue