From f6a3b96f91c6995d9bf1fce1f9c2dd405a666a1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Tue, 15 Sep 2015 11:46:21 +0200 Subject: [PATCH 01/18] Check PSL entries before generating built-in data --- src/psl2c.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/src/psl2c.c b/src/psl2c.c index 7ee1fc9..14bc4b9 100644 --- a/src/psl2c.c +++ b/src/psl2c.c @@ -54,6 +54,109 @@ # include "psl.c" #undef _LIBPSL_INCLUDED_BY_PSL2C +static int _check_psl_entries(const _psl_vector_t *v) +{ + int it, doublet = 0, err = 0; + + for (it = 0; it < v->cur - 1; it++) { + _psl_entry_t *cur = _vector_get(v, it); + _psl_entry_t *next = _vector_get(v, it + 1); + + if (_suffix_compare(cur, next) == 0) { + /* we allow '*.foo' and 'foo' */ + if (cur->wildcard == next->wildcard) { + fprintf(stderr, "Double entry '%s' detected\n", cur->label); + err = 1; + } + else if (++doublet > 1) { + fprintf(stderr, "Double entry '%s' detected\n", cur->label); + err = 1; + } + } else + doublet = 0; + } + + return err; +} + +static int _check_psl(const psl_ctx_t *psl) +{ + int it, pos, err = 0; + + if (_check_psl_entries(psl->suffixes)) + err = 1; + + if (_check_psl_entries(psl->suffix_exceptions)) + err = 1; + + /* check if plain suffix also appears in exceptions */ + for (it = 0; it < psl->suffixes->cur; it++) { + _psl_entry_t *e = _vector_get(psl->suffixes, it); + + if (!e->wildcard && _vector_find(psl->suffix_exceptions, e) >= 0) { + fprintf(stderr, "Found entry '%s' also in exceptions\n", e->label); + err = 1; + } + } + + /* check if exception also appears in suffix list as plain entry */ + for (it = 0; it < psl->suffix_exceptions->cur; it++) { + _psl_entry_t *e2, *e = _vector_get(psl->suffix_exceptions, it); + + if ((e2 = _vector_get(psl->suffixes, pos = _vector_find(psl->suffixes, e)))) { + if (!e2->wildcard) { + fprintf(stderr, "Found exception '!%s' also as suffix\n", e->label); + err = 1; + } + /* Two same domains in a row are allowed: wildcard and non-wildcard. + * Binary search find either of them, so also check previous and next entry. */ + else if (pos > 0 && _suffix_compare(e, e2 = _vector_get(psl->suffixes, pos - 1)) == 0 && !e2->wildcard) { + fprintf(stderr, "Found exception '!%s' also as suffix\n", e->label); + err = 1; + } + else if (pos < psl->suffixes->cur - 1 && _suffix_compare(e, e2 = _vector_get(psl->suffixes, pos + 1)) == 0 && !e2->wildcard) { + fprintf(stderr, "Found exception '!%s' also as suffix\n", e->label); + err = 1; + } + } + } + + /* check if non-wildcard entry is already covered by wildcard entry */ + for (it = 0; it < psl->suffixes->cur; it++) { + const char *p; + _psl_entry_t *e = _vector_get(psl->suffixes, it); + + if (e->nlabels > 1 && !e->wildcard && (p = strchr(e->label, '.'))) { + _psl_entry_t *e2, *e3, suffix; + + suffix.label = p + 1; + suffix.length = strlen(p + 1); + suffix.nlabels = e->nlabels - 1; + + e2 = _vector_get(psl->suffixes, pos = _vector_find(psl->suffixes, &suffix)); + + if (e2) { + if (e2->wildcard) { + fprintf(stderr, "Found superfluous '%s' already covered by '*.%s'\n", e->label, e2->label); + err = 1; + } + /* Two same domains in a row are allowed: wildcard and non-wildcard. + * Binary search find either of them, so also check previous and next entry. */ + else if (pos > 0 && _suffix_compare(e2, e3 = _vector_get(psl->suffixes, pos - 1)) == 0 && e3->wildcard) { + fprintf(stderr, "Found superfluous '%s' already covered by '*.%s'\n", e->label, e2->label); + err = 1; + } + else if (pos < psl->suffixes->cur - 1 && _suffix_compare(e2, e3 = _vector_get(psl->suffixes, pos + 1)) == 0 && e3->wildcard) { + fprintf(stderr, "Found superfluous '%s' already covered by '*.%s'\n", e->label, e2->label); + err = 1; + } + } + } + } + + return err; +} + static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *varname) { int it; @@ -152,6 +255,12 @@ int main(int argc, const char **argv) if (!(psl = psl_load_file(argv[1]))) return 2; + /* look for ambigious or double entries */ + if (_check_psl(psl)) { + psl_free(psl); + return 5; + } + if ((fpout = fopen(argv[2], "w"))) { FILE *pp; struct stat st; From 0652af910e21896daeaa2c86ea2f64c6f2148541 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Tue, 15 Sep 2015 12:19:18 +0200 Subject: [PATCH 02/18] Fix SIGSEGV in test-is-public-all.c for TLD exceptions --- tests/test-is-public-all.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/test-is-public-all.c b/tests/test-is-public-all.c index 22e308a..0eec25d 100644 --- a/tests/test-is-public-all.c +++ b/tests/test-is-public-all.c @@ -80,10 +80,12 @@ static void test_psl(void) printf("psl_is_public_suffix(%s)=%d (expected 0)\n", p, result); } else ok++; - if (!(result = psl_is_public_suffix(psl, strchr(p, '.') + 1))) { - failed++; - printf("psl_is_public_suffix(%s)=%d (expected 1)\n", strchr(p, '.') + 1, result); - } else ok++; + if ((p = strchr(p, '.'))) { + if (!(result = psl_is_public_suffix(psl, p + 1))) { + failed++; + printf("psl_is_public_suffix(%s)=%d (expected 1)\n", p + 1, result); + } else ok++; + } } else if (*p == '*') { /* a wildcard, e.g. *.ck */ if (!(result = psl_is_public_suffix(psl, p + 1))) { From fa55bb75c4ff951ea6f5be289d2f45d23e4240e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Tue, 15 Sep 2015 14:46:07 +0200 Subject: [PATCH 03/18] Removed test with wrong assumptions --- tests/test-is-public-all.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/test-is-public-all.c b/tests/test-is-public-all.c index 0eec25d..0fea0ce 100644 --- a/tests/test-is-public-all.c +++ b/tests/test-is-public-all.c @@ -104,12 +104,6 @@ static void test_psl(void) failed++; printf("psl_is_public_suffix(%s)=%d (expected 1)\n", p, result); } else ok++; - - snprintf(domain, sizeof(domain), "xxxx.%s", p); - if ((result = psl_is_public_suffix(psl, domain))) { - failed++; - printf("psl_is_public_suffix(%s)=%d (expected 0)\n", domain, result); - } else ok++; } } From 597709cb11b9a125457035b5113c32aba78c72a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Tue, 15 Sep 2015 14:49:53 +0200 Subject: [PATCH 04/18] Support combination of foo.bar and *.foo.bar --- src/psl.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/src/psl.c b/src/psl.c index 538252b..224eb88 100644 --- a/src/psl.c +++ b/src/psl.c @@ -7,10 +7,10 @@ * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -18,7 +18,7 @@ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. - * + * * This file is part of libpsl. * * Public Suffix List routines @@ -172,7 +172,7 @@ static const psl_ctx_t static _psl_vector_t *_vector_alloc(int max, int (*cmp)(const _psl_entry_t **, const _psl_entry_t **)) { _psl_vector_t *v; - + if (!(v = calloc(1, sizeof(_psl_vector_t)))) return NULL; @@ -267,7 +267,7 @@ static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2) if ((n = s1->length - s2->length)) return n; /* shorter rules first */ - return strcmp(s1->label, s2->label ? s2->label : s2->label_buf); + return strcmp(s1->label ? s1->label : s1->label_buf, s2->label ? s2->label : s2->label_buf); } /* needed to sort array of pointers, given to qsort() */ @@ -357,6 +357,8 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain) length_bak = suffix.length; if ((suffix.label = strchr(suffix.label, '.'))) { + int pos = rule - suffixes; + suffix.label++; suffix.length = strlen(suffix.label); suffix.nlabels--; @@ -364,9 +366,30 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain) if (psl == &_builtin_psl) rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare); else - rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix)); + rule = _vector_get(psl->suffixes, (pos = _vector_find(psl->suffixes, &suffix))); if (rule) { + if (!rule->wildcard) { + /* Due to binary search ambiguity we need the following check of neighbour entries. + * TODO: The data structures needs a revision: wildcard and non-wildcard entries must be separated. */ + if (psl == &_builtin_psl) { + pos = rule - suffixes; + + if (pos > 0 && _suffix_compare(rule, &suffixes[pos - 1]) == 0 && suffixes[pos -1].wildcard) + rule = &suffixes[pos - 1]; + else if (pos < (int) (countof(suffixes) - 1) && _suffix_compare(rule, &suffixes[pos + 1]) == 0 && suffixes[pos + 1].wildcard) + rule = &suffixes[pos + 1]; + } else { + _psl_entry_t *e; + + if (pos > 0 && _suffix_compare(rule, e = _vector_get(psl->suffixes, pos - 1)) == 0 && e->wildcard) { + rule = e; + } + else if (pos < psl->suffixes->cur - 1 && _suffix_compare(rule, e = _vector_get(psl->suffixes, pos + 1)) == 0 && e->wildcard) { + rule = e; + } + } + } if (rule->wildcard) { /* now that we matched a wildcard, we have to check for an exception */ suffix.label = label_bak; From a9c71d6de518cb2e3bd5ce5545bb713ba51dc9ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Tue, 15 Sep 2015 14:50:58 +0200 Subject: [PATCH 05/18] Update submodule list --- list | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/list b/list index f1c4849..2930bb4 160000 --- a/list +++ b/list @@ -1 +1 @@ -Subproject commit f1c4849aefa4c61257aba5b595be5d552c99144d +Subproject commit 2930bb4a5256279e0f7ba44cf9d174fc93ecb732 From 4c8d3c4d73bc90168d9bf53660c7c2319fdf663c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Tue, 15 Sep 2015 15:16:04 +0200 Subject: [PATCH 06/18] Remove unused variable from tests/test-is-public-all.c --- tests/test-is-public-all.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-is-public-all.c b/tests/test-is-public-all.c index 0fea0ce..9bc7ea0 100644 --- a/tests/test-is-public-all.c +++ b/tests/test-is-public-all.c @@ -56,7 +56,7 @@ static void test_psl(void) FILE *fp; psl_ctx_t *psl; int result; - char buf[256], domain[64], *linep, *p; + char buf[256], *linep, *p; psl = psl_load_file(PSL_FILE); /* PSL_FILE can be set by ./configure --with-psl-file=[PATH] */ From fb942952ec0552e0df157b76c1f80316a7ba97d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Tue, 15 Sep 2015 15:17:20 +0200 Subject: [PATCH 07/18] Release v0.9.0 --- NEWS | 6 ++++++ configure.ac | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index 12b430c..a2f131c 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,11 @@ Copyright (C) 2014 Tim Rühsen +15.09.2015 Release V0.9.0 + * Added semantic checks to PSL entries when generating built-in data + * Fix test suite for TLD exceptions (not used yet in reality) + * Removed wrong assumption from test suite + * Support explicit combination of 'foo.bar' and '*.foo.bar' + 14.08.2015 Release V0.8.1 * Fix documentation * Add syntax checking of tests_psl.txt diff --git a/configure.ac b/configure.ac index fdf14e8..399565d 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ -AC_INIT([libpsl], [0.8.1], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl]) +AC_INIT([libpsl], [0.9.0], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl]) AC_PREREQ([2.59]) AM_INIT_AUTOMAKE([1.10 -Wall no-define foreign]) @@ -76,7 +76,7 @@ PKG_PROG_PKG_CONFIG # 4. If any interfaces have been added, removed, or changed since the last update, increment current, and set revision to 0. # 5. If any interfaces have been added since the last public release, then increment age. # 6. If any existing interfaces have been removed or changed since the last public release, then set age to 0. -AC_SUBST([LIBPSL_SO_VERSION], [2:5:2]) +AC_SUBST([LIBPSL_SO_VERSION], [2:6:2]) AC_SUBST([LIBPSL_VERSION], $VERSION) # Check for enable/disable builtin PSL data From e443d21b611e27e8c220a96605bfaebcb8182a8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Sat, 19 Sep 2015 10:50:00 +0200 Subject: [PATCH 08/18] Code cleanup, faster lookups --- Makefile.am | 2 +- src/psl.c | 171 +++++++++++++++++++++++----------------------------- src/psl2c.c | 46 +++----------- 3 files changed, 86 insertions(+), 133 deletions(-) diff --git a/Makefile.am b/Makefile.am index d27fa22..d488ce4 100644 --- a/Makefile.am +++ b/Makefile.am @@ -14,4 +14,4 @@ ACLOCAL_AMFLAGS = -I m4 ${ACLOCAL_FLAGS} pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = libpsl.pc -EXTRA_DIST = config.rpath LICENSE list/public_suffix_list.dat list/tests/test_psl.txt +EXTRA_DIST = config.rpath LICENSE $(PSL_FILE) list/tests/test_psl.txt diff --git a/src/psl.c b/src/psl.c index 224eb88..40003fa 100644 --- a/src/psl.c +++ b/src/psl.c @@ -123,6 +123,10 @@ static char *strndup(const char *s, size_t n) #define countof(a) (sizeof(a)/sizeof(*(a))) +#define _PSL_FLAG_PLAIN (1<<0) +#define _PSL_FLAG_EXCEPTION (1<<1) +#define _PSL_FLAG_WILDCARD (1<<2) + typedef struct { char label_buf[48]; @@ -132,7 +136,7 @@ typedef struct { length; unsigned char nlabels, /* number of labels */ - wildcard; /* this is a wildcard rule (e.g. *.sapporo.jp) */ + flags; } _psl_entry_t; /* stripped down version libmget vector routines */ @@ -148,8 +152,10 @@ typedef struct { struct _psl_ctx_st { _psl_vector_t - *suffixes, - *suffix_exceptions; + *suffixes; + int + nsuffixes, + nexceptions; }; /* include the PSL data compiled by 'psl2c' */ @@ -158,9 +164,10 @@ struct _psl_ctx_st { #else /* if this source file is included by psl2c.c, provide empty builtin data */ static _psl_entry_t suffixes[1]; - static _psl_entry_t suffix_exceptions[1]; static time_t _psl_file_time; static time_t _psl_compile_time; + static int _psl_nsuffixes; + static int _psl_nexceptions; static const char _psl_sha1_checksum[] = ""; static const char _psl_filename[] = ""; #endif @@ -251,11 +258,6 @@ static void _vector_sort(_psl_vector_t *v) qsort(v->entry, v->cur, sizeof(_psl_vector_t **), (int(*)(const void *, const void *))v->cmp); } -static int _vector_size(_psl_vector_t *v) -{ - return v ? v->cur : 0; -} - /* by this kind of sorting, we can easily see if a domain matches or not */ static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2) { @@ -289,19 +291,7 @@ static int _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length) return -1; } - if (*rule == '*') { - if (*++rule != '.') { - suffix->nlabels = 0; - /* fprintf(stderr, _("Unsupported kind of rule (ignored): %s\n"), rule); */ - return -2; - } - rule++; - suffix->wildcard = 1; - suffix->length = (unsigned char)length - 2; - } else { - suffix->wildcard = 0; - suffix->length = (unsigned char)length; - } + suffix->length = (unsigned char)length; suffix->nlabels = 1; @@ -318,19 +308,24 @@ static int _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length) static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain) { _psl_entry_t suffix, *rule; - const char *p, *label_bak; - unsigned short length_bak; + const char *p; /* this function should be called without leading dots, just make sure */ suffix.label = domain + (*domain == '.'); suffix.length = strlen(suffix.label); - suffix.wildcard = 0; suffix.nlabels = 1; for (p = suffix.label; *p; p++) if (*p == '.') suffix.nlabels++; + if (suffix.nlabels == 1) { + /* TLD, this is the prevailing '*' match. + * We don't currently support exception TLDs (TLDs that are not a public suffix) + */ + return 1; + } + /* if domain has enough labels, it is public */ if (psl == &_builtin_psl) rule = &suffixes[0]; @@ -347,15 +342,12 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain) if (rule) { /* definitely a match, no matter if the found rule is a wildcard or not */ - return 1; - } else if (suffix.nlabels == 1) { - /* unknown TLD, this is the prevailing '*' match */ - return 1; + if (rule->flags & _PSL_FLAG_EXCEPTION) + return 0; + if (rule->flags & _PSL_FLAG_PLAIN) + return 1; } - label_bak = suffix.label; - length_bak = suffix.length; - if ((suffix.label = strchr(suffix.label, '.'))) { int pos = rule - suffixes; @@ -369,43 +361,8 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain) rule = _vector_get(psl->suffixes, (pos = _vector_find(psl->suffixes, &suffix))); if (rule) { - if (!rule->wildcard) { - /* Due to binary search ambiguity we need the following check of neighbour entries. - * TODO: The data structures needs a revision: wildcard and non-wildcard entries must be separated. */ - if (psl == &_builtin_psl) { - pos = rule - suffixes; - - if (pos > 0 && _suffix_compare(rule, &suffixes[pos - 1]) == 0 && suffixes[pos -1].wildcard) - rule = &suffixes[pos - 1]; - else if (pos < (int) (countof(suffixes) - 1) && _suffix_compare(rule, &suffixes[pos + 1]) == 0 && suffixes[pos + 1].wildcard) - rule = &suffixes[pos + 1]; - } else { - _psl_entry_t *e; - - if (pos > 0 && _suffix_compare(rule, e = _vector_get(psl->suffixes, pos - 1)) == 0 && e->wildcard) { - rule = e; - } - else if (pos < psl->suffixes->cur - 1 && _suffix_compare(rule, e = _vector_get(psl->suffixes, pos + 1)) == 0 && e->wildcard) { - rule = e; - } - } - } - if (rule->wildcard) { - /* now that we matched a wildcard, we have to check for an exception */ - suffix.label = label_bak; - suffix.length = length_bak; - suffix.nlabels++; - - if (psl == &_builtin_psl) { - if (bsearch(&suffix, suffix_exceptions, countof(suffix_exceptions), sizeof(suffix_exceptions[0]), (int(*)(const void *, const void *))_suffix_compare)) - return 0; /* found an exception, so 'domain' is not a public suffix */ - } else { - if (_vector_get(psl->suffix_exceptions, _vector_find(psl->suffix_exceptions, &suffix)) != 0) - return 0; /* found an exception, so 'domain' is not a public suffix */ - } - + if ((rule->flags & _PSL_FLAG_WILDCARD)) return 1; - } } } @@ -596,7 +553,7 @@ static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t /* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */ _suffix_init(&suffix, lookupname, strlen(lookupname)); - suffix.wildcard = e->wildcard; + suffix.flags = e->flags; suffixp = _vector_get(v, _vector_add(v, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ } /* else ignore */ @@ -641,7 +598,7 @@ static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e) /* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */ _suffix_init(&suffix, lookupname, strlen(lookupname)); - suffix.wildcard = e->wildcard; + suffix.flags = e->flags; suffixp = _vector_get(v, _vector_add(v, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ } /* else ignore */ @@ -673,7 +630,7 @@ static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e) /* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */ _suffix_init(&suffix, lookupname, strlen(lookupname)); - suffix.wildcard = e->wildcard; + suffix.flags = e->flags; suffixp = _vector_get(v, _vector_add(v, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ } /* else ignore */ @@ -749,7 +706,6 @@ psl_ctx_t *psl_load_fp(FILE *fp) * as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions. */ psl->suffixes = _vector_alloc(8*1024, _suffix_compare_array); - psl->suffix_exceptions = _vector_alloc(64, _suffix_compare_array); while ((linep = fgets(buf, sizeof(buf), fp))) { while (_isspace_ascii(*linep)) linep++; /* ignore leading whitespace */ @@ -763,31 +719,57 @@ psl_ctx_t *psl_load_fp(FILE *fp) *linep = 0; if (*p == '!') { - /* add to exceptions */ - if (_suffix_init(&suffix, p + 1, linep - p - 1) == 0) { - suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix)); - suffixp->label = suffixp->label_buf; /* set label to changed address */ -#ifdef WITH_LIBICU - _add_punycode_if_needed(idna, psl->suffix_exceptions, suffixp); -#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) - _add_punycode_if_needed(psl->suffix_exceptions, suffixp); -#endif + p++; + suffix.flags = _PSL_FLAG_EXCEPTION; + psl->nexceptions++; + } else if (*p == '*') { + if (*++p != '.') { + /* fprintf(stderr, _("Unsupported kind of rule (ignored): %s\n"), p - 1); */ + continue; } + p++; + /* wildcard *.foo.bar implicitely make foo.bar a public suffix */ + suffix.flags = _PSL_FLAG_WILDCARD | _PSL_FLAG_PLAIN; + psl->nsuffixes++; } else { - /* add to suffixes */ - if (_suffix_init(&suffix, p, linep - p) == 0) { + if (!strchr(p, '.')) + continue; /* we do not need an explicit plain TLD rule, already covered by implicit '*' rule */ + suffix.flags = _PSL_FLAG_PLAIN; + psl->nsuffixes++; + } + + if (_suffix_init(&suffix, p, linep - p) == 0) { + int index; + + if ((index = _vector_find(psl->suffixes, &suffix)) >= 0) { + /* Found existing entry: + * Combination of exception and plain rule is ambigous + * !foo.bar + * foo.bar + * + * Allowed: + * !foo.bar + *.foo.bar + * foo.bar + *.foo.bar + * + * We do not check here, let's do it later. + */ + + suffixp = _vector_get(psl->suffixes, index); + suffixp->flags |= suffix.flags; + } else { + /* New entry */ suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix)); - suffixp->label = suffixp->label_buf; /* set label to changed address */ -#ifdef WITH_LIBICU - _add_punycode_if_needed(idna, psl->suffixes, suffixp); -#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) - _add_punycode_if_needed(psl->suffixes, suffixp); -#endif } + + suffixp->label = suffixp->label_buf; /* set label to changed address */ +#ifdef WITH_LIBICU + _add_punycode_if_needed(idna, psl->suffixes, suffixp); +#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) + _add_punycode_if_needed(psl->suffixes, suffixp); +#endif } } - _vector_sort(psl->suffix_exceptions); _vector_sort(psl->suffixes); #ifdef WITH_LIBICU @@ -811,7 +793,6 @@ void psl_free(psl_ctx_t *psl) { if (psl && psl != &_builtin_psl) { _vector_free(&psl->suffixes); - _vector_free(&psl->suffix_exceptions); free(psl); } } @@ -855,9 +836,9 @@ const psl_ctx_t *psl_builtin(void) int psl_suffix_count(const psl_ctx_t *psl) { if (psl == &_builtin_psl) - return countof(suffixes); + return _psl_nsuffixes; else if (psl) - return _vector_size(psl->suffixes); + return psl->nsuffixes; else return 0; } @@ -875,9 +856,9 @@ int psl_suffix_count(const psl_ctx_t *psl) int psl_suffix_exception_count(const psl_ctx_t *psl) { if (psl == &_builtin_psl) - return countof(suffix_exceptions); + return _psl_nexceptions; else if (psl) - return _vector_size(psl->suffix_exceptions); + return psl->nexceptions; else return 0; } diff --git a/src/psl2c.c b/src/psl2c.c index 14bc4b9..6daa722 100644 --- a/src/psl2c.c +++ b/src/psl2c.c @@ -54,41 +54,11 @@ # include "psl.c" #undef _LIBPSL_INCLUDED_BY_PSL2C -static int _check_psl_entries(const _psl_vector_t *v) -{ - int it, doublet = 0, err = 0; - - for (it = 0; it < v->cur - 1; it++) { - _psl_entry_t *cur = _vector_get(v, it); - _psl_entry_t *next = _vector_get(v, it + 1); - - if (_suffix_compare(cur, next) == 0) { - /* we allow '*.foo' and 'foo' */ - if (cur->wildcard == next->wildcard) { - fprintf(stderr, "Double entry '%s' detected\n", cur->label); - err = 1; - } - else if (++doublet > 1) { - fprintf(stderr, "Double entry '%s' detected\n", cur->label); - err = 1; - } - } else - doublet = 0; - } - - return err; -} - +#if 0 static int _check_psl(const psl_ctx_t *psl) { int it, pos, err = 0; - if (_check_psl_entries(psl->suffixes)) - err = 1; - - if (_check_psl_entries(psl->suffix_exceptions)) - err = 1; - /* check if plain suffix also appears in exceptions */ for (it = 0; it < psl->suffixes->cur; it++) { _psl_entry_t *e = _vector_get(psl->suffixes, it); @@ -156,6 +126,7 @@ static int _check_psl(const psl_ctx_t *psl) return err; } +#endif static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *varname) { @@ -184,7 +155,7 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char * _psl_entry_t *e = _vector_get(v, it); fprintf(fpout, "\t{ \"%s\", NULL, %hd, %d, %d },\n", - e->label_buf, e->length, (int) e->nlabels, (int) e->wildcard); + e->label_buf, e->length, (int) e->nlabels, (int) e->flags); } fprintf(fpout, "};\n"); @@ -256,11 +227,11 @@ int main(int argc, const char **argv) return 2; /* look for ambigious or double entries */ - if (_check_psl(psl)) { +/* if (_check_psl(psl)) { psl_free(psl); return 5; } - +*/ if ((fpout = fopen(argv[2], "w"))) { FILE *pp; struct stat st; @@ -271,11 +242,9 @@ int main(int argc, const char **argv) #if 0 /* include library code did not generate punycode, so let's do it for the builtin data */ _add_punycode_if_needed(psl->suffixes); - _add_punycode_if_needed(psl->suffix_exceptions); #endif _print_psl_entries(fpout, psl->suffixes, "suffixes"); - _print_psl_entries(fpout, psl->suffix_exceptions, "suffix_exceptions"); snprintf(cmd, cmdsize, "sha1sum %s", argv[1]); if ((pp = popen(cmd, "r"))) { @@ -291,6 +260,8 @@ int main(int argc, const char **argv) fprintf(fpout, "static time_t _psl_compile_time = %lu;\n", atol(source_date_epoch)); else fprintf(fpout, "static time_t _psl_compile_time = %lu;\n", time(NULL)); + fprintf(fpout, "static int _psl_nsuffixes = %d;\n", psl->nsuffixes); + fprintf(fpout, "static int _psl_nexceptions = %d;\n", psl->nexceptions); fprintf(fpout, "static const char _psl_sha1_checksum[] = \"%s\";\n", checksum); fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", argv[1]); @@ -305,9 +276,10 @@ int main(int argc, const char **argv) #else if ((fpout = fopen(argv[2], "w"))) { fprintf(fpout, "static _psl_entry_t suffixes[1];\n"); - fprintf(fpout, "static _psl_entry_t suffix_exceptions[1];\n"); fprintf(fpout, "static time_t _psl_file_time;\n"); fprintf(fpout, "static time_t _psl_compile_time;\n"); + fprintf(fpout, "static int _psl_nsuffixes = 0;\n"); + fprintf(fpout, "static int _psl_nexceptions = 0;\n"); fprintf(fpout, "static const char _psl_sha1_checksum[] = \"\";\n"); fprintf(fpout, "static const char _psl_filename[] = \"\";\n"); From 34289fa59b6e7e8ac89d0dce303f2603cf70ca9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Sat, 19 Sep 2015 10:55:09 +0200 Subject: [PATCH 09/18] Add function psl_suffix_wildcard_count() --- include/libpsl.h | 3 +++ src/psl.c | 25 ++++++++++++++++++++++++- src/psl2c.c | 2 ++ tools/psl.c | 2 ++ 4 files changed, 31 insertions(+), 1 deletion(-) diff --git a/include/libpsl.h b/include/libpsl.h index 39a5017..c059198 100644 --- a/include/libpsl.h +++ b/include/libpsl.h @@ -95,6 +95,9 @@ int /* just counts exceptions */ int psl_suffix_exception_count(const psl_ctx_t *psl); +/* just counts wildcards */ +int + psl_suffix_wildcard_count(const psl_ctx_t *psl); /* returns compilation time */ time_t psl_builtin_compile_time(void); diff --git a/src/psl.c b/src/psl.c index 40003fa..d311778 100644 --- a/src/psl.c +++ b/src/psl.c @@ -155,7 +155,8 @@ struct _psl_ctx_st { *suffixes; int nsuffixes, - nexceptions; + nexceptions, + nwildcards; }; /* include the PSL data compiled by 'psl2c' */ @@ -168,6 +169,7 @@ struct _psl_ctx_st { static time_t _psl_compile_time; static int _psl_nsuffixes; static int _psl_nexceptions; + static int _psl_nwildcards; static const char _psl_sha1_checksum[] = ""; static const char _psl_filename[] = ""; #endif @@ -730,6 +732,7 @@ psl_ctx_t *psl_load_fp(FILE *fp) p++; /* wildcard *.foo.bar implicitely make foo.bar a public suffix */ suffix.flags = _PSL_FLAG_WILDCARD | _PSL_FLAG_PLAIN; + psl->nwildcards++; psl->nsuffixes++; } else { if (!strchr(p, '.')) @@ -863,6 +866,26 @@ int psl_suffix_exception_count(const psl_ctx_t *psl) return 0; } +/** + * psl_suffix_wildcard_count: + * @psl: PSL context pointer + * + * This function returns number of public suffix wildcards maintained by @psl. + * + * Returns: Number of public suffix wildcards in PSL context. + * + * Since: 0.10.0 + */ +int psl_suffix_wildcard_count(const psl_ctx_t *psl) +{ + if (psl == &_builtin_psl) + return _psl_nwildcards; + else if (psl) + return psl->nwildcards; + else + return 0; +} + /** * psl_builtin_compile_time: * diff --git a/src/psl2c.c b/src/psl2c.c index 6daa722..86b3c6e 100644 --- a/src/psl2c.c +++ b/src/psl2c.c @@ -262,6 +262,7 @@ int main(int argc, const char **argv) fprintf(fpout, "static time_t _psl_compile_time = %lu;\n", time(NULL)); fprintf(fpout, "static int _psl_nsuffixes = %d;\n", psl->nsuffixes); fprintf(fpout, "static int _psl_nexceptions = %d;\n", psl->nexceptions); + fprintf(fpout, "static int _psl_nwildcards = %d;\n", psl->nwildcards); fprintf(fpout, "static const char _psl_sha1_checksum[] = \"%s\";\n", checksum); fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", argv[1]); @@ -280,6 +281,7 @@ int main(int argc, const char **argv) fprintf(fpout, "static time_t _psl_compile_time;\n"); fprintf(fpout, "static int _psl_nsuffixes = 0;\n"); fprintf(fpout, "static int _psl_nexceptions = 0;\n"); + fprintf(fpout, "static int _psl_nwildcards = 0;\n"); fprintf(fpout, "static const char _psl_sha1_checksum[] = \"\";\n"); fprintf(fpout, "static const char _psl_filename[] = \"\";\n"); diff --git a/tools/psl.c b/tools/psl.c index e1969c2..2756ca2 100644 --- a/tools/psl.c +++ b/tools/psl.c @@ -200,6 +200,7 @@ int main(int argc, const char *const *argv) if (psl && psl != psl_builtin()) { printf("suffixes: %d\n", psl_suffix_count(psl)); printf("exceptions: %d\n", psl_suffix_exception_count(psl)); + printf("wildcards: %d\n", psl_suffix_wildcard_count(psl)); } psl_free(psl); @@ -208,6 +209,7 @@ int main(int argc, const char *const *argv) if (psl) { printf("builtin suffixes: %d\n", psl_suffix_count(psl)); printf("builtin exceptions: %d\n", psl_suffix_exception_count(psl)); + printf("builtin wildcards: %d\n", psl_suffix_wildcard_count(psl)); printf("builtin filename: %s\n", psl_builtin_filename()); printf("builtin compile time: %ld (%s)\n", psl_builtin_compile_time(), time2str(psl_builtin_compile_time())); printf("builtin file time: %ld (%s)\n", psl_builtin_file_time(), time2str(psl_builtin_file_time())); From 6a8f33ee39d452e999cfde5fa473298696a88672 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Sat, 19 Sep 2015 14:00:49 +0200 Subject: [PATCH 10/18] Add new function psl_builtin_outdated() --- docs/libpsl/libpsl-sections.txt | 1 + include/libpsl.h | 4 +++- src/psl.c | 27 +++++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/docs/libpsl/libpsl-sections.txt b/docs/libpsl/libpsl-sections.txt index 4c73e47..2c43358 100644 --- a/docs/libpsl/libpsl-sections.txt +++ b/docs/libpsl/libpsl-sections.txt @@ -16,6 +16,7 @@ psl_builtin_compile_time psl_builtin_file_time psl_builtin_sha1sum psl_builtin_filename +psl_builtin_outdated psl_is_cookie_domain_acceptable psl_get_version psl_str_to_utf8lower diff --git a/include/libpsl.h b/include/libpsl.h index c059198..aa223e4 100644 --- a/include/libpsl.h +++ b/include/libpsl.h @@ -113,7 +113,9 @@ const char * /* returns library version */ const char * psl_get_version(void); - +/* returns wether the built-in data is outdated or not */ +int + psl_builtin_outdated(void); #ifdef __cplusplus } diff --git a/src/psl.c b/src/psl.c index d311778..6108aef 100644 --- a/src/psl.c +++ b/src/psl.c @@ -54,6 +54,9 @@ # define ngettext(STRING1,STRING2,N) STRING2 #endif +#include +#include +#include #include #include #include @@ -84,6 +87,7 @@ #endif #include +#include /* number of elements within an array */ #define countof(a) (sizeof(a)/sizeof(*(a))) @@ -951,6 +955,29 @@ const char *psl_builtin_filename(void) return _psl_filename; } +/** + * psl_builtin_outdated: + * + * This function checks if the built-in data is older than the file it has been created from. + * If it is, it might be a good idea for the application to reload the PSL. + * The mtime is taken as reference. + * + * If the PSL file does not exist, it is assumed that the built-in data is not outdated. + * + * Returns: 1 if the built-in is outdated, 0 otherwise. + * + * Since: 0.10.0 + */ +int psl_builtin_outdated(void) +{ + struct stat st; + + if (stat(_psl_filename, &st) == 0 && st.st_mtime > _psl_file_time) + return 0; + + return 1; +} + /** * psl_get_version: * From 3bf7275a1775ecd74a1da9b010997866e252ab7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Sat, 19 Sep 2015 14:02:49 +0200 Subject: [PATCH 11/18] Add psl_suffix_wildcard_count() to docs --- docs/libpsl/libpsl-sections.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/libpsl/libpsl-sections.txt b/docs/libpsl/libpsl-sections.txt index 2c43358..b0b66d7 100644 --- a/docs/libpsl/libpsl-sections.txt +++ b/docs/libpsl/libpsl-sections.txt @@ -12,6 +12,7 @@ psl_unregistrable_domain psl_registrable_domain psl_suffix_count psl_suffix_exception_count +psl_suffix_wildcard_count psl_builtin_compile_time psl_builtin_file_time psl_builtin_sha1sum From 789d7ad13244e0f2e606a79b2c8c161f41e24b41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Sat, 19 Sep 2015 14:12:49 +0200 Subject: [PATCH 12/18] Release v0.10.0 --- NEWS | 6 ++++++ configure.ac | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index a2f131c..6e7c53b 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,11 @@ Copyright (C) 2014 Tim Rühsen +19.09.2025 Release V0.10.0 + * Code simplified + * Less data entries, faster lookups + * Add new function psl_suffix_wildcard_count() + * Add new helper function psl_builtin_outdated() + 15.09.2015 Release V0.9.0 * Added semantic checks to PSL entries when generating built-in data * Fix test suite for TLD exceptions (not used yet in reality) diff --git a/configure.ac b/configure.ac index 399565d..9e7fa06 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ -AC_INIT([libpsl], [0.9.0], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl]) +AC_INIT([libpsl], [0.10.0], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl]) AC_PREREQ([2.59]) AM_INIT_AUTOMAKE([1.10 -Wall no-define foreign]) @@ -76,7 +76,7 @@ PKG_PROG_PKG_CONFIG # 4. If any interfaces have been added, removed, or changed since the last update, increment current, and set revision to 0. # 5. If any interfaces have been added since the last public release, then increment age. # 6. If any existing interfaces have been removed or changed since the last public release, then set age to 0. -AC_SUBST([LIBPSL_SO_VERSION], [2:6:2]) +AC_SUBST([LIBPSL_SO_VERSION], [3:0:3]) AC_SUBST([LIBPSL_VERSION], $VERSION) # Check for enable/disable builtin PSL data From eabf39c174b37928fb65bb8974cfd386022ad462 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Wed, 23 Sep 2015 12:38:34 +0200 Subject: [PATCH 13/18] Add version numbers to libpsl.h --- configure.ac | 9 +++ include/libpsl.h.in | 130 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 include/libpsl.h.in diff --git a/configure.ac b/configure.ac index 9e7fa06..cf16859 100644 --- a/configure.ac +++ b/configure.ac @@ -17,6 +17,15 @@ m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) dnl Check that compiler understands inline AC_C_INLINE +# +# Generate version defines for include file +# +AC_SUBST([LIBPSL_VERSION_MAJOR], [`echo -n $VERSION|cut -d'.' -f1`]) +AC_SUBST([LIBPSL_VERSION_MINOR], [`echo -n $VERSION|cut -d'.' -f2`]) +AC_SUBST([LIBPSL_VERSION_PATCH], [`echo -n $VERSION|cut -d'.' -f3`]) +AC_SUBST([LIBPSL_VERSION_NUMBER], [`printf '0x%02x%02x%02x' $LIBPSL_VERSION_MAJOR $LIBPSL_VERSION_MINOR $LIBPSL_VERSION_PATCH`]) +AC_CONFIG_FILES([include/libpsl.h]) + # # Gettext # diff --git a/include/libpsl.h.in b/include/libpsl.h.in new file mode 100644 index 0000000..87f4594 --- /dev/null +++ b/include/libpsl.h.in @@ -0,0 +1,130 @@ +/* + * Copyright(c) 2014 Tim Ruehsen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * This file is part of libpsl. + * + * Header file for libpsl library routines + * + * Changelog + * 20.03.2014 Tim Ruehsen created + * + */ + +#ifndef _LIBPSL_LIBPSL_H +#define _LIBPSL_LIBPSL_H + +#include +#include + +#define PSL_VERSION "@LIBPSL_VERSION@" +#define PSL_VERSION_MAJOR @LIBPSL_VERSION_MAJOR@ +#define PSL_VERSION_MINOR @LIBPSL_VERSION_MINOR@ +#define PSL_VERSION_PATCH @LIBPSL_VERSION_PATCH@ +#define PSL_VERSION_NUMBER @LIBPSL_VERSION_NUMBER@ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * psl_error_t: + * @PSL_SUCCESS: Successful return. + * @PSL_ERR_INVALID_ARG: Invalid argument. + * @PSL_ERR_CONVERTER: Failed to open libicu utf-16 converter + * @PSL_ERR_TO_UTF16: Failed to convert to utf-16. + * @PSL_ERR_TO_LOWER: Failed to convert utf-16 to lowercase. + * @PSL_ERR_TO_UTF8: Failed to convert utf-16 to utf-8. + * + * Return codes for PSL functions. + * Negative return codes mean failure. + * Positive values are reserved for non-error return codes. + */ +typedef enum { + PSL_SUCCESS = 0, + PSL_ERR_INVALID_ARG = -1, + PSL_ERR_CONVERTER = -2, /* failed to open libicu utf-16 converter */ + PSL_ERR_TO_UTF16 = -3, /* failed to convert to utf-16 */ + PSL_ERR_TO_LOWER = -4, /* failed to convert utf-16 to lowercase */ + PSL_ERR_TO_UTF8 = -5 /* failed to convert utf-16 to utf-8 */ +} psl_error_t; + +typedef struct _psl_ctx_st psl_ctx_t; + +/* frees PSL context */ +void + psl_free(psl_ctx_t *psl); +/* loads PSL data from file */ +psl_ctx_t * + psl_load_file(const char *fname); +/* loads PSL data from FILE pointer */ +psl_ctx_t * + psl_load_fp(FILE *fp); +/* retrieves builtin PSL data */ +const psl_ctx_t * + psl_builtin(void); +/* checks whether domain is a public suffix or not */ +int + psl_is_public_suffix(const psl_ctx_t *psl, const char *domain); +/* checks whether cookie_domain is acceptable for domain or not */ +int + psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, const char *cookie_domain); +/* returns the longest not registrable domain within 'domain' or NULL if none found */ +const char * + psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain); +/* returns the shortest possible registrable domain part or NULL if domain is not registrable at all */ +const char * + psl_registrable_domain(const psl_ctx_t *psl, const char *domain); +/* convert a string into lowercase UTF-8 */ +psl_error_t + psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower); +/* does not include exceptions */ +int + psl_suffix_count(const psl_ctx_t *psl); +/* just counts exceptions */ +int + psl_suffix_exception_count(const psl_ctx_t *psl); +/* just counts wildcards */ +int + psl_suffix_wildcard_count(const psl_ctx_t *psl); +/* returns compilation time */ +time_t + psl_builtin_compile_time(void); +/* returns mtime of PSL source file */ +time_t + psl_builtin_file_time(void); +/* returns SHA1 checksum (hex-encoded, lowercase) of PSL source file */ +const char * + psl_builtin_sha1sum(void); +/* returns file name of PSL source file */ +const char * + psl_builtin_filename(void); +/* returns library version */ +const char * + psl_get_version(void); +/* returns wether the built-in data is outdated or not */ +int + psl_builtin_outdated(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBPSL_LIBPSL_H */ From 00b9cfb119c0edb7d86a3cb43406faa0ff68d4a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Wed, 23 Sep 2015 14:04:17 +0200 Subject: [PATCH 14/18] Add function psl_check_version_number() --- include/libpsl.h | 124 -------------------------------------------- include/libpsl.h.in | 5 +- src/psl.c | 31 +++++++++++ 3 files changed, 35 insertions(+), 125 deletions(-) delete mode 100644 include/libpsl.h diff --git a/include/libpsl.h b/include/libpsl.h deleted file mode 100644 index aa223e4..0000000 --- a/include/libpsl.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright(c) 2014 Tim Ruehsen - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * This file is part of libpsl. - * - * Header file for libpsl library routines - * - * Changelog - * 20.03.2014 Tim Ruehsen created - * - */ - -#ifndef _LIBPSL_LIBPSL_H -#define _LIBPSL_LIBPSL_H - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * psl_error_t: - * @PSL_SUCCESS: Successful return. - * @PSL_ERR_INVALID_ARG: Invalid argument. - * @PSL_ERR_CONVERTER: Failed to open libicu utf-16 converter - * @PSL_ERR_TO_UTF16: Failed to convert to utf-16. - * @PSL_ERR_TO_LOWER: Failed to convert utf-16 to lowercase. - * @PSL_ERR_TO_UTF8: Failed to convert utf-16 to utf-8. - * - * Return codes for PSL functions. - * Negative return codes mean failure. - * Positive values are reserved for non-error return codes. - */ -typedef enum { - PSL_SUCCESS = 0, - PSL_ERR_INVALID_ARG = -1, - PSL_ERR_CONVERTER = -2, /* failed to open libicu utf-16 converter */ - PSL_ERR_TO_UTF16 = -3, /* failed to convert to utf-16 */ - PSL_ERR_TO_LOWER = -4, /* failed to convert utf-16 to lowercase */ - PSL_ERR_TO_UTF8 = -5 /* failed to convert utf-16 to utf-8 */ -} psl_error_t; - -typedef struct _psl_ctx_st psl_ctx_t; - -/* frees PSL context */ -void - psl_free(psl_ctx_t *psl); -/* loads PSL data from file */ -psl_ctx_t * - psl_load_file(const char *fname); -/* loads PSL data from FILE pointer */ -psl_ctx_t * - psl_load_fp(FILE *fp); -/* retrieves builtin PSL data */ -const psl_ctx_t * - psl_builtin(void); -/* checks whether domain is a public suffix or not */ -int - psl_is_public_suffix(const psl_ctx_t *psl, const char *domain); -/* checks whether cookie_domain is acceptable for domain or not */ -int - psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, const char *cookie_domain); -/* returns the longest not registrable domain within 'domain' or NULL if none found */ -const char * - psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain); -/* returns the shortest possible registrable domain part or NULL if domain is not registrable at all */ -const char * - psl_registrable_domain(const psl_ctx_t *psl, const char *domain); -/* convert a string into lowercase UTF-8 */ -psl_error_t - psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower); -/* does not include exceptions */ -int - psl_suffix_count(const psl_ctx_t *psl); -/* just counts exceptions */ -int - psl_suffix_exception_count(const psl_ctx_t *psl); -/* just counts wildcards */ -int - psl_suffix_wildcard_count(const psl_ctx_t *psl); -/* returns compilation time */ -time_t - psl_builtin_compile_time(void); -/* returns mtime of PSL source file */ -time_t - psl_builtin_file_time(void); -/* returns SHA1 checksum (hex-encoded, lowercase) of PSL source file */ -const char * - psl_builtin_sha1sum(void); -/* returns file name of PSL source file */ -const char * - psl_builtin_filename(void); -/* returns library version */ -const char * - psl_get_version(void); -/* returns wether the built-in data is outdated or not */ -int - psl_builtin_outdated(void); - -#ifdef __cplusplus -} -#endif - -#endif /* _LIBPSL_LIBPSL_H */ diff --git a/include/libpsl.h.in b/include/libpsl.h.in index 87f4594..33e6bfe 100644 --- a/include/libpsl.h.in +++ b/include/libpsl.h.in @@ -116,9 +116,12 @@ const char * /* returns file name of PSL source file */ const char * psl_builtin_filename(void); -/* returns library version */ +/* returns library version string */ const char * psl_get_version(void); +/* checks library version number */ +int + psl_check_version_number(int version); /* returns wether the built-in data is outdated or not */ int psl_builtin_outdated(void); diff --git a/src/psl.c b/src/psl.c index 6108aef..130b497 100644 --- a/src/psl.c +++ b/src/psl.c @@ -1000,6 +1000,37 @@ const char *psl_get_version(void) #endif } +/** + * psl_check_version_number: + * @version: Version number (hex) to check against. + * + * Check the given version number is at minimum the current library version number. + * The version number must be a hexadecimal number like 0x000a01 (V0.10.1). + * + * Returns: Returns the library version number if the given version number is at least + * the version of the library, else return 0; If the argument is 0, the function returns + * the library version number without performing a check. + * + * Since: 0.11.0 + **/ +int psl_check_version_number(int version) +{ + if (version) { + int major = version >> 16; + int minor = (version >> 8) & 0xFF; + int patch = version & 0xFF; + + if (major < PSL_VERSION_MAJOR + || (major == PSL_VERSION_MAJOR && minor < PSL_VERSION_MINOR) + || (major == PSL_VERSION_MAJOR && minor == PSL_VERSION_MINOR && patch < PSL_VERSION_PATCH)) + { + return 0; + } + } + + return PSL_VERSION_NUMBER; +} + /* return whether hostname is an IP address or not */ static int _isip(const char *hostname) { From a9524b55700da8a3a1cdd74700ca157131a1940f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Wed, 23 Sep 2015 14:15:23 +0200 Subject: [PATCH 15/18] Add PSL_VERSION defines and psl_check_version_number() to docs --- docs/libpsl/libpsl-sections.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/libpsl/libpsl-sections.txt b/docs/libpsl/libpsl-sections.txt index b0b66d7..a27a9da 100644 --- a/docs/libpsl/libpsl-sections.txt +++ b/docs/libpsl/libpsl-sections.txt @@ -1,6 +1,11 @@
libpsl Public Suffix List functions +PSL_VERSION +PSL_VERSION_MAJOR +PSL_VERSION_MINOR +PSL_VERSION_NUMBER +PSL_VERSION_PATCH psl_error_t psl_ctx_t psl_load_file @@ -20,5 +25,6 @@ psl_builtin_filename psl_builtin_outdated psl_is_cookie_domain_acceptable psl_get_version +psl_check_version_number psl_str_to_utf8lower
From f1954e5311587176f61be64277c4ca4c0c97615c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Wed, 23 Sep 2015 14:16:52 +0200 Subject: [PATCH 16/18] Add gtk-doc.m4 and include/libpsl.h to .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index d96dc66..d49817a 100644 --- a/.gitignore +++ b/.gitignore @@ -38,7 +38,9 @@ docs/libpsl/libpsl.prerequisites docs/libpsl/libpsl.signals docs/libpsl/version.xml docs/libpsl/xml/ +gtk-doc.m4 gtk-doc.make +include/libpsl.h install-sh libpsl.pc libtool From 53c2fe31a8fe236a3bac1f6bdd8bf87a75585bee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Wed, 23 Sep 2015 14:50:01 +0200 Subject: [PATCH 17/18] Update copyright years --- COPYING | 2 +- LICENSE | 2 +- NEWS | 2 +- include/libpsl.h.in | 2 +- src/psl.c | 2 +- src/psl2c.c | 2 +- tests/test-is-cookie-domain-acceptable.c | 2 +- tests/test-is-public-all.c | 2 +- tests/test-is-public-builtin.c | 2 +- tests/test-is-public.c | 2 +- tests/test-registrable-domain.c | 2 +- tools/psl.c | 4 ++-- 12 files changed, 13 insertions(+), 13 deletions(-) diff --git a/COPYING b/COPYING index 90c5c79..2047187 100644 --- a/COPYING +++ b/COPYING @@ -1,4 +1,4 @@ -Copyright (C) 2014 Tim Rühsen +Copyright (C) 2014-2015 Tim Rühsen Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), diff --git a/LICENSE b/LICENSE index 90c5c79..2047187 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (C) 2014 Tim Rühsen +Copyright (C) 2014-2015 Tim Rühsen Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), diff --git a/NEWS b/NEWS index 6e7c53b..267603c 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -Copyright (C) 2014 Tim Rühsen +Copyright (C) 2014-2015 Tim Rühsen 19.09.2025 Release V0.10.0 * Code simplified diff --git a/include/libpsl.h.in b/include/libpsl.h.in index 33e6bfe..4f86a50 100644 --- a/include/libpsl.h.in +++ b/include/libpsl.h.in @@ -1,5 +1,5 @@ /* - * Copyright(c) 2014 Tim Ruehsen + * Copyright(c) 2014-2015 Tim Ruehsen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/src/psl.c b/src/psl.c index 130b497..e9bcae2 100644 --- a/src/psl.c +++ b/src/psl.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2014 Tim Ruehsen + * Copyright(c) 2014-2015 Tim Ruehsen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/src/psl2c.c b/src/psl2c.c index 86b3c6e..e050194 100644 --- a/src/psl2c.c +++ b/src/psl2c.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2014 Tim Ruehsen + * Copyright(c) 2014-2015 Tim Ruehsen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/tests/test-is-cookie-domain-acceptable.c b/tests/test-is-cookie-domain-acceptable.c index f5c9ae5..c50342d 100644 --- a/tests/test-is-cookie-domain-acceptable.c +++ b/tests/test-is-cookie-domain-acceptable.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2014 Tim Ruehsen + * Copyright(c) 2014-2015 Tim Ruehsen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/tests/test-is-public-all.c b/tests/test-is-public-all.c index 9bc7ea0..705e79f 100644 --- a/tests/test-is-public-all.c +++ b/tests/test-is-public-all.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2014 Tim Ruehsen + * Copyright(c) 2014-2015 Tim Ruehsen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/tests/test-is-public-builtin.c b/tests/test-is-public-builtin.c index d3c2d44..0aa5762 100644 --- a/tests/test-is-public-builtin.c +++ b/tests/test-is-public-builtin.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2014 Tim Ruehsen + * Copyright(c) 2014-2015 Tim Ruehsen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/tests/test-is-public.c b/tests/test-is-public.c index 828c946..5d32425 100644 --- a/tests/test-is-public.c +++ b/tests/test-is-public.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2014 Tim Ruehsen + * Copyright(c) 2014-2015 Tim Ruehsen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/tests/test-registrable-domain.c b/tests/test-registrable-domain.c index b263075..c9c3604 100644 --- a/tests/test-registrable-domain.c +++ b/tests/test-registrable-domain.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2014 Tim Ruehsen + * Copyright(c) 2014-2015 Tim Ruehsen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/tools/psl.c b/tools/psl.c index 2756ca2..6790efe 100644 --- a/tools/psl.c +++ b/tools/psl.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2014 Tim Ruehsen + * Copyright(c) 2014-2015 Tim Ruehsen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -120,7 +120,7 @@ int main(int argc, const char *const *argv) printf("psl %s\n", PACKAGE_VERSION); printf("libpsl %s\n", psl_get_version()); printf("\n"); - printf("Copyright (C) 2014 Tim Ruehsen\n"); + printf("Copyright (C) 2014-2015 Tim Ruehsen\n"); printf("License: MIT\n"); exit(0); } From d1dec3d16ee5c2254962fe60834b54805e772526 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Wed, 23 Sep 2015 14:52:04 +0200 Subject: [PATCH 18/18] Release v0.11.0 --- NEWS | 4 ++++ configure.ac | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index 267603c..b17e17e 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,9 @@ Copyright (C) 2014-2015 Tim Rühsen +23.09.2015 Release V0.11.0 + * Add new function psl_check_version_number() + * Add version defines to include file + 19.09.2025 Release V0.10.0 * Code simplified * Less data entries, faster lookups diff --git a/configure.ac b/configure.ac index cf16859..c3cbd2b 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ -AC_INIT([libpsl], [0.10.0], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl]) +AC_INIT([libpsl], [0.11.0], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl]) AC_PREREQ([2.59]) AM_INIT_AUTOMAKE([1.10 -Wall no-define foreign]) @@ -85,7 +85,7 @@ PKG_PROG_PKG_CONFIG # 4. If any interfaces have been added, removed, or changed since the last update, increment current, and set revision to 0. # 5. If any interfaces have been added since the last public release, then increment age. # 6. If any existing interfaces have been removed or changed since the last public release, then set age to 0. -AC_SUBST([LIBPSL_SO_VERSION], [3:0:3]) +AC_SUBST([LIBPSL_SO_VERSION], [4:0:4]) AC_SUBST([LIBPSL_VERSION], $VERSION) # Check for enable/disable builtin PSL data