diff --git a/.gitignore b/.gitignore index d96dc66..d49817a 100644 --- a/.gitignore +++ b/.gitignore @@ -38,7 +38,9 @@ docs/libpsl/libpsl.prerequisites docs/libpsl/libpsl.signals docs/libpsl/version.xml docs/libpsl/xml/ +gtk-doc.m4 gtk-doc.make +include/libpsl.h install-sh libpsl.pc libtool diff --git a/COPYING b/COPYING index 90c5c79..2047187 100644 --- a/COPYING +++ b/COPYING @@ -1,4 +1,4 @@ -Copyright (C) 2014 Tim Rühsen +Copyright (C) 2014-2015 Tim Rühsen Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), diff --git a/LICENSE b/LICENSE index 90c5c79..2047187 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (C) 2014 Tim Rühsen +Copyright (C) 2014-2015 Tim Rühsen Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), diff --git a/Makefile.am b/Makefile.am index d27fa22..d488ce4 100644 --- a/Makefile.am +++ b/Makefile.am @@ -14,4 +14,4 @@ ACLOCAL_AMFLAGS = -I m4 ${ACLOCAL_FLAGS} pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = libpsl.pc -EXTRA_DIST = config.rpath LICENSE list/public_suffix_list.dat list/tests/test_psl.txt +EXTRA_DIST = config.rpath LICENSE $(PSL_FILE) list/tests/test_psl.txt diff --git a/NEWS b/NEWS index 12b430c..b17e17e 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,20 @@ -Copyright (C) 2014 Tim Rühsen +Copyright (C) 2014-2015 Tim Rühsen + +23.09.2015 Release V0.11.0 + * Add new function psl_check_version_number() + * Add version defines to include file + +19.09.2025 Release V0.10.0 + * Code simplified + * Less data entries, faster lookups + * Add new function psl_suffix_wildcard_count() + * Add new helper function psl_builtin_outdated() + +15.09.2015 Release V0.9.0 + * Added semantic checks to PSL entries when generating built-in data + * Fix test suite for TLD exceptions (not used yet in reality) + * Removed wrong assumption from test suite + * Support explicit combination of 'foo.bar' and '*.foo.bar' 14.08.2015 Release V0.8.1 * Fix documentation diff --git a/configure.ac b/configure.ac index fdf14e8..c3cbd2b 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ -AC_INIT([libpsl], [0.8.1], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl]) +AC_INIT([libpsl], [0.11.0], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl]) AC_PREREQ([2.59]) AM_INIT_AUTOMAKE([1.10 -Wall no-define foreign]) @@ -17,6 +17,15 @@ m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) dnl Check that compiler understands inline AC_C_INLINE +# +# Generate version defines for include file +# +AC_SUBST([LIBPSL_VERSION_MAJOR], [`echo -n $VERSION|cut -d'.' -f1`]) +AC_SUBST([LIBPSL_VERSION_MINOR], [`echo -n $VERSION|cut -d'.' -f2`]) +AC_SUBST([LIBPSL_VERSION_PATCH], [`echo -n $VERSION|cut -d'.' -f3`]) +AC_SUBST([LIBPSL_VERSION_NUMBER], [`printf '0x%02x%02x%02x' $LIBPSL_VERSION_MAJOR $LIBPSL_VERSION_MINOR $LIBPSL_VERSION_PATCH`]) +AC_CONFIG_FILES([include/libpsl.h]) + # # Gettext # @@ -76,7 +85,7 @@ PKG_PROG_PKG_CONFIG # 4. If any interfaces have been added, removed, or changed since the last update, increment current, and set revision to 0. # 5. If any interfaces have been added since the last public release, then increment age. # 6. If any existing interfaces have been removed or changed since the last public release, then set age to 0. -AC_SUBST([LIBPSL_SO_VERSION], [2:5:2]) +AC_SUBST([LIBPSL_SO_VERSION], [4:0:4]) AC_SUBST([LIBPSL_VERSION], $VERSION) # Check for enable/disable builtin PSL data diff --git a/docs/libpsl/libpsl-sections.txt b/docs/libpsl/libpsl-sections.txt index 4c73e47..a27a9da 100644 --- a/docs/libpsl/libpsl-sections.txt +++ b/docs/libpsl/libpsl-sections.txt @@ -1,6 +1,11 @@
libpsl Public Suffix List functions +PSL_VERSION +PSL_VERSION_MAJOR +PSL_VERSION_MINOR +PSL_VERSION_NUMBER +PSL_VERSION_PATCH psl_error_t psl_ctx_t psl_load_file @@ -12,11 +17,14 @@ psl_unregistrable_domain psl_registrable_domain psl_suffix_count psl_suffix_exception_count +psl_suffix_wildcard_count psl_builtin_compile_time psl_builtin_file_time psl_builtin_sha1sum psl_builtin_filename +psl_builtin_outdated psl_is_cookie_domain_acceptable psl_get_version +psl_check_version_number psl_str_to_utf8lower
diff --git a/include/libpsl.h b/include/libpsl.h.in similarity index 87% rename from include/libpsl.h rename to include/libpsl.h.in index 39a5017..4f86a50 100644 --- a/include/libpsl.h +++ b/include/libpsl.h.in @@ -1,5 +1,5 @@ /* - * Copyright(c) 2014 Tim Ruehsen + * Copyright(c) 2014-2015 Tim Ruehsen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -34,6 +34,12 @@ #include #include +#define PSL_VERSION "@LIBPSL_VERSION@" +#define PSL_VERSION_MAJOR @LIBPSL_VERSION_MAJOR@ +#define PSL_VERSION_MINOR @LIBPSL_VERSION_MINOR@ +#define PSL_VERSION_PATCH @LIBPSL_VERSION_PATCH@ +#define PSL_VERSION_NUMBER @LIBPSL_VERSION_NUMBER@ + #ifdef __cplusplus extern "C" { #endif @@ -95,6 +101,9 @@ int /* just counts exceptions */ int psl_suffix_exception_count(const psl_ctx_t *psl); +/* just counts wildcards */ +int + psl_suffix_wildcard_count(const psl_ctx_t *psl); /* returns compilation time */ time_t psl_builtin_compile_time(void); @@ -107,10 +116,15 @@ const char * /* returns file name of PSL source file */ const char * psl_builtin_filename(void); -/* returns library version */ +/* returns library version string */ const char * psl_get_version(void); - +/* checks library version number */ +int + psl_check_version_number(int version); +/* returns wether the built-in data is outdated or not */ +int + psl_builtin_outdated(void); #ifdef __cplusplus } diff --git a/list b/list index f1c4849..2930bb4 160000 --- a/list +++ b/list @@ -1 +1 @@ -Subproject commit f1c4849aefa4c61257aba5b595be5d552c99144d +Subproject commit 2930bb4a5256279e0f7ba44cf9d174fc93ecb732 diff --git a/src/psl.c b/src/psl.c index 538252b..e9bcae2 100644 --- a/src/psl.c +++ b/src/psl.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2014 Tim Ruehsen + * Copyright(c) 2014-2015 Tim Ruehsen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -7,10 +7,10 @@ * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -18,7 +18,7 @@ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. - * + * * This file is part of libpsl. * * Public Suffix List routines @@ -54,6 +54,9 @@ # define ngettext(STRING1,STRING2,N) STRING2 #endif +#include +#include +#include #include #include #include @@ -84,6 +87,7 @@ #endif #include +#include /* number of elements within an array */ #define countof(a) (sizeof(a)/sizeof(*(a))) @@ -123,6 +127,10 @@ static char *strndup(const char *s, size_t n) #define countof(a) (sizeof(a)/sizeof(*(a))) +#define _PSL_FLAG_PLAIN (1<<0) +#define _PSL_FLAG_EXCEPTION (1<<1) +#define _PSL_FLAG_WILDCARD (1<<2) + typedef struct { char label_buf[48]; @@ -132,7 +140,7 @@ typedef struct { length; unsigned char nlabels, /* number of labels */ - wildcard; /* this is a wildcard rule (e.g. *.sapporo.jp) */ + flags; } _psl_entry_t; /* stripped down version libmget vector routines */ @@ -148,8 +156,11 @@ typedef struct { struct _psl_ctx_st { _psl_vector_t - *suffixes, - *suffix_exceptions; + *suffixes; + int + nsuffixes, + nexceptions, + nwildcards; }; /* include the PSL data compiled by 'psl2c' */ @@ -158,9 +169,11 @@ struct _psl_ctx_st { #else /* if this source file is included by psl2c.c, provide empty builtin data */ static _psl_entry_t suffixes[1]; - static _psl_entry_t suffix_exceptions[1]; static time_t _psl_file_time; static time_t _psl_compile_time; + static int _psl_nsuffixes; + static int _psl_nexceptions; + static int _psl_nwildcards; static const char _psl_sha1_checksum[] = ""; static const char _psl_filename[] = ""; #endif @@ -172,7 +185,7 @@ static const psl_ctx_t static _psl_vector_t *_vector_alloc(int max, int (*cmp)(const _psl_entry_t **, const _psl_entry_t **)) { _psl_vector_t *v; - + if (!(v = calloc(1, sizeof(_psl_vector_t)))) return NULL; @@ -251,11 +264,6 @@ static void _vector_sort(_psl_vector_t *v) qsort(v->entry, v->cur, sizeof(_psl_vector_t **), (int(*)(const void *, const void *))v->cmp); } -static int _vector_size(_psl_vector_t *v) -{ - return v ? v->cur : 0; -} - /* by this kind of sorting, we can easily see if a domain matches or not */ static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2) { @@ -267,7 +275,7 @@ static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2) if ((n = s1->length - s2->length)) return n; /* shorter rules first */ - return strcmp(s1->label, s2->label ? s2->label : s2->label_buf); + return strcmp(s1->label ? s1->label : s1->label_buf, s2->label ? s2->label : s2->label_buf); } /* needed to sort array of pointers, given to qsort() */ @@ -289,19 +297,7 @@ static int _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length) return -1; } - if (*rule == '*') { - if (*++rule != '.') { - suffix->nlabels = 0; - /* fprintf(stderr, _("Unsupported kind of rule (ignored): %s\n"), rule); */ - return -2; - } - rule++; - suffix->wildcard = 1; - suffix->length = (unsigned char)length - 2; - } else { - suffix->wildcard = 0; - suffix->length = (unsigned char)length; - } + suffix->length = (unsigned char)length; suffix->nlabels = 1; @@ -318,19 +314,24 @@ static int _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length) static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain) { _psl_entry_t suffix, *rule; - const char *p, *label_bak; - unsigned short length_bak; + const char *p; /* this function should be called without leading dots, just make sure */ suffix.label = domain + (*domain == '.'); suffix.length = strlen(suffix.label); - suffix.wildcard = 0; suffix.nlabels = 1; for (p = suffix.label; *p; p++) if (*p == '.') suffix.nlabels++; + if (suffix.nlabels == 1) { + /* TLD, this is the prevailing '*' match. + * We don't currently support exception TLDs (TLDs that are not a public suffix) + */ + return 1; + } + /* if domain has enough labels, it is public */ if (psl == &_builtin_psl) rule = &suffixes[0]; @@ -347,16 +348,15 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain) if (rule) { /* definitely a match, no matter if the found rule is a wildcard or not */ - return 1; - } else if (suffix.nlabels == 1) { - /* unknown TLD, this is the prevailing '*' match */ - return 1; + if (rule->flags & _PSL_FLAG_EXCEPTION) + return 0; + if (rule->flags & _PSL_FLAG_PLAIN) + return 1; } - label_bak = suffix.label; - length_bak = suffix.length; - if ((suffix.label = strchr(suffix.label, '.'))) { + int pos = rule - suffixes; + suffix.label++; suffix.length = strlen(suffix.label); suffix.nlabels--; @@ -364,25 +364,11 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain) if (psl == &_builtin_psl) rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare); else - rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix)); + rule = _vector_get(psl->suffixes, (pos = _vector_find(psl->suffixes, &suffix))); if (rule) { - if (rule->wildcard) { - /* now that we matched a wildcard, we have to check for an exception */ - suffix.label = label_bak; - suffix.length = length_bak; - suffix.nlabels++; - - if (psl == &_builtin_psl) { - if (bsearch(&suffix, suffix_exceptions, countof(suffix_exceptions), sizeof(suffix_exceptions[0]), (int(*)(const void *, const void *))_suffix_compare)) - return 0; /* found an exception, so 'domain' is not a public suffix */ - } else { - if (_vector_get(psl->suffix_exceptions, _vector_find(psl->suffix_exceptions, &suffix)) != 0) - return 0; /* found an exception, so 'domain' is not a public suffix */ - } - + if ((rule->flags & _PSL_FLAG_WILDCARD)) return 1; - } } } @@ -573,7 +559,7 @@ static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t /* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */ _suffix_init(&suffix, lookupname, strlen(lookupname)); - suffix.wildcard = e->wildcard; + suffix.flags = e->flags; suffixp = _vector_get(v, _vector_add(v, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ } /* else ignore */ @@ -618,7 +604,7 @@ static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e) /* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */ _suffix_init(&suffix, lookupname, strlen(lookupname)); - suffix.wildcard = e->wildcard; + suffix.flags = e->flags; suffixp = _vector_get(v, _vector_add(v, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ } /* else ignore */ @@ -650,7 +636,7 @@ static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e) /* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */ _suffix_init(&suffix, lookupname, strlen(lookupname)); - suffix.wildcard = e->wildcard; + suffix.flags = e->flags; suffixp = _vector_get(v, _vector_add(v, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ } /* else ignore */ @@ -726,7 +712,6 @@ psl_ctx_t *psl_load_fp(FILE *fp) * as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions. */ psl->suffixes = _vector_alloc(8*1024, _suffix_compare_array); - psl->suffix_exceptions = _vector_alloc(64, _suffix_compare_array); while ((linep = fgets(buf, sizeof(buf), fp))) { while (_isspace_ascii(*linep)) linep++; /* ignore leading whitespace */ @@ -740,31 +725,58 @@ psl_ctx_t *psl_load_fp(FILE *fp) *linep = 0; if (*p == '!') { - /* add to exceptions */ - if (_suffix_init(&suffix, p + 1, linep - p - 1) == 0) { - suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix)); - suffixp->label = suffixp->label_buf; /* set label to changed address */ -#ifdef WITH_LIBICU - _add_punycode_if_needed(idna, psl->suffix_exceptions, suffixp); -#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) - _add_punycode_if_needed(psl->suffix_exceptions, suffixp); -#endif + p++; + suffix.flags = _PSL_FLAG_EXCEPTION; + psl->nexceptions++; + } else if (*p == '*') { + if (*++p != '.') { + /* fprintf(stderr, _("Unsupported kind of rule (ignored): %s\n"), p - 1); */ + continue; } + p++; + /* wildcard *.foo.bar implicitely make foo.bar a public suffix */ + suffix.flags = _PSL_FLAG_WILDCARD | _PSL_FLAG_PLAIN; + psl->nwildcards++; + psl->nsuffixes++; } else { - /* add to suffixes */ - if (_suffix_init(&suffix, p, linep - p) == 0) { + if (!strchr(p, '.')) + continue; /* we do not need an explicit plain TLD rule, already covered by implicit '*' rule */ + suffix.flags = _PSL_FLAG_PLAIN; + psl->nsuffixes++; + } + + if (_suffix_init(&suffix, p, linep - p) == 0) { + int index; + + if ((index = _vector_find(psl->suffixes, &suffix)) >= 0) { + /* Found existing entry: + * Combination of exception and plain rule is ambigous + * !foo.bar + * foo.bar + * + * Allowed: + * !foo.bar + *.foo.bar + * foo.bar + *.foo.bar + * + * We do not check here, let's do it later. + */ + + suffixp = _vector_get(psl->suffixes, index); + suffixp->flags |= suffix.flags; + } else { + /* New entry */ suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix)); - suffixp->label = suffixp->label_buf; /* set label to changed address */ -#ifdef WITH_LIBICU - _add_punycode_if_needed(idna, psl->suffixes, suffixp); -#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) - _add_punycode_if_needed(psl->suffixes, suffixp); -#endif } + + suffixp->label = suffixp->label_buf; /* set label to changed address */ +#ifdef WITH_LIBICU + _add_punycode_if_needed(idna, psl->suffixes, suffixp); +#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) + _add_punycode_if_needed(psl->suffixes, suffixp); +#endif } } - _vector_sort(psl->suffix_exceptions); _vector_sort(psl->suffixes); #ifdef WITH_LIBICU @@ -788,7 +800,6 @@ void psl_free(psl_ctx_t *psl) { if (psl && psl != &_builtin_psl) { _vector_free(&psl->suffixes); - _vector_free(&psl->suffix_exceptions); free(psl); } } @@ -832,9 +843,9 @@ const psl_ctx_t *psl_builtin(void) int psl_suffix_count(const psl_ctx_t *psl) { if (psl == &_builtin_psl) - return countof(suffixes); + return _psl_nsuffixes; else if (psl) - return _vector_size(psl->suffixes); + return psl->nsuffixes; else return 0; } @@ -852,9 +863,29 @@ int psl_suffix_count(const psl_ctx_t *psl) int psl_suffix_exception_count(const psl_ctx_t *psl) { if (psl == &_builtin_psl) - return countof(suffix_exceptions); + return _psl_nexceptions; else if (psl) - return _vector_size(psl->suffix_exceptions); + return psl->nexceptions; + else + return 0; +} + +/** + * psl_suffix_wildcard_count: + * @psl: PSL context pointer + * + * This function returns number of public suffix wildcards maintained by @psl. + * + * Returns: Number of public suffix wildcards in PSL context. + * + * Since: 0.10.0 + */ +int psl_suffix_wildcard_count(const psl_ctx_t *psl) +{ + if (psl == &_builtin_psl) + return _psl_nwildcards; + else if (psl) + return psl->nwildcards; else return 0; } @@ -924,6 +955,29 @@ const char *psl_builtin_filename(void) return _psl_filename; } +/** + * psl_builtin_outdated: + * + * This function checks if the built-in data is older than the file it has been created from. + * If it is, it might be a good idea for the application to reload the PSL. + * The mtime is taken as reference. + * + * If the PSL file does not exist, it is assumed that the built-in data is not outdated. + * + * Returns: 1 if the built-in is outdated, 0 otherwise. + * + * Since: 0.10.0 + */ +int psl_builtin_outdated(void) +{ + struct stat st; + + if (stat(_psl_filename, &st) == 0 && st.st_mtime > _psl_file_time) + return 0; + + return 1; +} + /** * psl_get_version: * @@ -946,6 +1000,37 @@ const char *psl_get_version(void) #endif } +/** + * psl_check_version_number: + * @version: Version number (hex) to check against. + * + * Check the given version number is at minimum the current library version number. + * The version number must be a hexadecimal number like 0x000a01 (V0.10.1). + * + * Returns: Returns the library version number if the given version number is at least + * the version of the library, else return 0; If the argument is 0, the function returns + * the library version number without performing a check. + * + * Since: 0.11.0 + **/ +int psl_check_version_number(int version) +{ + if (version) { + int major = version >> 16; + int minor = (version >> 8) & 0xFF; + int patch = version & 0xFF; + + if (major < PSL_VERSION_MAJOR + || (major == PSL_VERSION_MAJOR && minor < PSL_VERSION_MINOR) + || (major == PSL_VERSION_MAJOR && minor == PSL_VERSION_MINOR && patch < PSL_VERSION_PATCH)) + { + return 0; + } + } + + return PSL_VERSION_NUMBER; +} + /* return whether hostname is an IP address or not */ static int _isip(const char *hostname) { diff --git a/src/psl2c.c b/src/psl2c.c index 7ee1fc9..e050194 100644 --- a/src/psl2c.c +++ b/src/psl2c.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2014 Tim Ruehsen + * Copyright(c) 2014-2015 Tim Ruehsen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -54,6 +54,80 @@ # include "psl.c" #undef _LIBPSL_INCLUDED_BY_PSL2C +#if 0 +static int _check_psl(const psl_ctx_t *psl) +{ + int it, pos, err = 0; + + /* check if plain suffix also appears in exceptions */ + for (it = 0; it < psl->suffixes->cur; it++) { + _psl_entry_t *e = _vector_get(psl->suffixes, it); + + if (!e->wildcard && _vector_find(psl->suffix_exceptions, e) >= 0) { + fprintf(stderr, "Found entry '%s' also in exceptions\n", e->label); + err = 1; + } + } + + /* check if exception also appears in suffix list as plain entry */ + for (it = 0; it < psl->suffix_exceptions->cur; it++) { + _psl_entry_t *e2, *e = _vector_get(psl->suffix_exceptions, it); + + if ((e2 = _vector_get(psl->suffixes, pos = _vector_find(psl->suffixes, e)))) { + if (!e2->wildcard) { + fprintf(stderr, "Found exception '!%s' also as suffix\n", e->label); + err = 1; + } + /* Two same domains in a row are allowed: wildcard and non-wildcard. + * Binary search find either of them, so also check previous and next entry. */ + else if (pos > 0 && _suffix_compare(e, e2 = _vector_get(psl->suffixes, pos - 1)) == 0 && !e2->wildcard) { + fprintf(stderr, "Found exception '!%s' also as suffix\n", e->label); + err = 1; + } + else if (pos < psl->suffixes->cur - 1 && _suffix_compare(e, e2 = _vector_get(psl->suffixes, pos + 1)) == 0 && !e2->wildcard) { + fprintf(stderr, "Found exception '!%s' also as suffix\n", e->label); + err = 1; + } + } + } + + /* check if non-wildcard entry is already covered by wildcard entry */ + for (it = 0; it < psl->suffixes->cur; it++) { + const char *p; + _psl_entry_t *e = _vector_get(psl->suffixes, it); + + if (e->nlabels > 1 && !e->wildcard && (p = strchr(e->label, '.'))) { + _psl_entry_t *e2, *e3, suffix; + + suffix.label = p + 1; + suffix.length = strlen(p + 1); + suffix.nlabels = e->nlabels - 1; + + e2 = _vector_get(psl->suffixes, pos = _vector_find(psl->suffixes, &suffix)); + + if (e2) { + if (e2->wildcard) { + fprintf(stderr, "Found superfluous '%s' already covered by '*.%s'\n", e->label, e2->label); + err = 1; + } + /* Two same domains in a row are allowed: wildcard and non-wildcard. + * Binary search find either of them, so also check previous and next entry. */ + else if (pos > 0 && _suffix_compare(e2, e3 = _vector_get(psl->suffixes, pos - 1)) == 0 && e3->wildcard) { + fprintf(stderr, "Found superfluous '%s' already covered by '*.%s'\n", e->label, e2->label); + err = 1; + } + else if (pos < psl->suffixes->cur - 1 && _suffix_compare(e2, e3 = _vector_get(psl->suffixes, pos + 1)) == 0 && e3->wildcard) { + fprintf(stderr, "Found superfluous '%s' already covered by '*.%s'\n", e->label, e2->label); + err = 1; + } + } + } + } + + return err; +} +#endif + static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *varname) { int it; @@ -81,7 +155,7 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char * _psl_entry_t *e = _vector_get(v, it); fprintf(fpout, "\t{ \"%s\", NULL, %hd, %d, %d },\n", - e->label_buf, e->length, (int) e->nlabels, (int) e->wildcard); + e->label_buf, e->length, (int) e->nlabels, (int) e->flags); } fprintf(fpout, "};\n"); @@ -152,6 +226,12 @@ int main(int argc, const char **argv) if (!(psl = psl_load_file(argv[1]))) return 2; + /* look for ambigious or double entries */ +/* if (_check_psl(psl)) { + psl_free(psl); + return 5; + } +*/ if ((fpout = fopen(argv[2], "w"))) { FILE *pp; struct stat st; @@ -162,11 +242,9 @@ int main(int argc, const char **argv) #if 0 /* include library code did not generate punycode, so let's do it for the builtin data */ _add_punycode_if_needed(psl->suffixes); - _add_punycode_if_needed(psl->suffix_exceptions); #endif _print_psl_entries(fpout, psl->suffixes, "suffixes"); - _print_psl_entries(fpout, psl->suffix_exceptions, "suffix_exceptions"); snprintf(cmd, cmdsize, "sha1sum %s", argv[1]); if ((pp = popen(cmd, "r"))) { @@ -182,6 +260,9 @@ int main(int argc, const char **argv) fprintf(fpout, "static time_t _psl_compile_time = %lu;\n", atol(source_date_epoch)); else fprintf(fpout, "static time_t _psl_compile_time = %lu;\n", time(NULL)); + fprintf(fpout, "static int _psl_nsuffixes = %d;\n", psl->nsuffixes); + fprintf(fpout, "static int _psl_nexceptions = %d;\n", psl->nexceptions); + fprintf(fpout, "static int _psl_nwildcards = %d;\n", psl->nwildcards); fprintf(fpout, "static const char _psl_sha1_checksum[] = \"%s\";\n", checksum); fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", argv[1]); @@ -196,9 +277,11 @@ int main(int argc, const char **argv) #else if ((fpout = fopen(argv[2], "w"))) { fprintf(fpout, "static _psl_entry_t suffixes[1];\n"); - fprintf(fpout, "static _psl_entry_t suffix_exceptions[1];\n"); fprintf(fpout, "static time_t _psl_file_time;\n"); fprintf(fpout, "static time_t _psl_compile_time;\n"); + fprintf(fpout, "static int _psl_nsuffixes = 0;\n"); + fprintf(fpout, "static int _psl_nexceptions = 0;\n"); + fprintf(fpout, "static int _psl_nwildcards = 0;\n"); fprintf(fpout, "static const char _psl_sha1_checksum[] = \"\";\n"); fprintf(fpout, "static const char _psl_filename[] = \"\";\n"); diff --git a/tests/test-is-cookie-domain-acceptable.c b/tests/test-is-cookie-domain-acceptable.c index f5c9ae5..c50342d 100644 --- a/tests/test-is-cookie-domain-acceptable.c +++ b/tests/test-is-cookie-domain-acceptable.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2014 Tim Ruehsen + * Copyright(c) 2014-2015 Tim Ruehsen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/tests/test-is-public-all.c b/tests/test-is-public-all.c index 22e308a..705e79f 100644 --- a/tests/test-is-public-all.c +++ b/tests/test-is-public-all.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2014 Tim Ruehsen + * Copyright(c) 2014-2015 Tim Ruehsen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -56,7 +56,7 @@ static void test_psl(void) FILE *fp; psl_ctx_t *psl; int result; - char buf[256], domain[64], *linep, *p; + char buf[256], *linep, *p; psl = psl_load_file(PSL_FILE); /* PSL_FILE can be set by ./configure --with-psl-file=[PATH] */ @@ -80,10 +80,12 @@ static void test_psl(void) printf("psl_is_public_suffix(%s)=%d (expected 0)\n", p, result); } else ok++; - if (!(result = psl_is_public_suffix(psl, strchr(p, '.') + 1))) { - failed++; - printf("psl_is_public_suffix(%s)=%d (expected 1)\n", strchr(p, '.') + 1, result); - } else ok++; + if ((p = strchr(p, '.'))) { + if (!(result = psl_is_public_suffix(psl, p + 1))) { + failed++; + printf("psl_is_public_suffix(%s)=%d (expected 1)\n", p + 1, result); + } else ok++; + } } else if (*p == '*') { /* a wildcard, e.g. *.ck */ if (!(result = psl_is_public_suffix(psl, p + 1))) { @@ -102,12 +104,6 @@ static void test_psl(void) failed++; printf("psl_is_public_suffix(%s)=%d (expected 1)\n", p, result); } else ok++; - - snprintf(domain, sizeof(domain), "xxxx.%s", p); - if ((result = psl_is_public_suffix(psl, domain))) { - failed++; - printf("psl_is_public_suffix(%s)=%d (expected 0)\n", domain, result); - } else ok++; } } diff --git a/tests/test-is-public-builtin.c b/tests/test-is-public-builtin.c index d3c2d44..0aa5762 100644 --- a/tests/test-is-public-builtin.c +++ b/tests/test-is-public-builtin.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2014 Tim Ruehsen + * Copyright(c) 2014-2015 Tim Ruehsen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/tests/test-is-public.c b/tests/test-is-public.c index 828c946..5d32425 100644 --- a/tests/test-is-public.c +++ b/tests/test-is-public.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2014 Tim Ruehsen + * Copyright(c) 2014-2015 Tim Ruehsen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/tests/test-registrable-domain.c b/tests/test-registrable-domain.c index b263075..c9c3604 100644 --- a/tests/test-registrable-domain.c +++ b/tests/test-registrable-domain.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2014 Tim Ruehsen + * Copyright(c) 2014-2015 Tim Ruehsen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/tools/psl.c b/tools/psl.c index e1969c2..6790efe 100644 --- a/tools/psl.c +++ b/tools/psl.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2014 Tim Ruehsen + * Copyright(c) 2014-2015 Tim Ruehsen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -120,7 +120,7 @@ int main(int argc, const char *const *argv) printf("psl %s\n", PACKAGE_VERSION); printf("libpsl %s\n", psl_get_version()); printf("\n"); - printf("Copyright (C) 2014 Tim Ruehsen\n"); + printf("Copyright (C) 2014-2015 Tim Ruehsen\n"); printf("License: MIT\n"); exit(0); } @@ -200,6 +200,7 @@ int main(int argc, const char *const *argv) if (psl && psl != psl_builtin()) { printf("suffixes: %d\n", psl_suffix_count(psl)); printf("exceptions: %d\n", psl_suffix_exception_count(psl)); + printf("wildcards: %d\n", psl_suffix_wildcard_count(psl)); } psl_free(psl); @@ -208,6 +209,7 @@ int main(int argc, const char *const *argv) if (psl) { printf("builtin suffixes: %d\n", psl_suffix_count(psl)); printf("builtin exceptions: %d\n", psl_suffix_exception_count(psl)); + printf("builtin wildcards: %d\n", psl_suffix_wildcard_count(psl)); printf("builtin filename: %s\n", psl_builtin_filename()); printf("builtin compile time: %ld (%s)\n", psl_builtin_compile_time(), time2str(psl_builtin_compile_time())); printf("builtin file time: %ld (%s)\n", psl_builtin_file_time(), time2str(psl_builtin_file_time()));