From abc56bc6ddb30320c8193a225ebae76238a77f63 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Sun, 15 Jun 2014 22:31:18 +0200 Subject: [PATCH 01/31] always search for libicu --- configure.ac | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/configure.ac b/configure.ac index 95f2df7..2d1ad11 100644 --- a/configure.ac +++ b/configure.ac @@ -66,6 +66,16 @@ AS_IF([ test "$enable_man" != no ], [ AC_SUBST([LIBPSL_SO_VERSION], [1:1:1]) AC_SUBST([LIBPSL_VERSION], $VERSION) +# Check for libicu +HAVE_LIBICU=no +AC_ARG_WITH(libicu, + AC_HELP_STRING([--without-libicu], [build libpsl without IDNA/Punycode support]), + [], + [ + PKG_CHECK_MODULES(LIBICU, [icu-uc], + [HAVE_LIBICU=yes; AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])]) + ]) + # Check for enable/disable builtin PSL data AC_ARG_ENABLE(builtin, AS_HELP_STRING([--disable-builtin], [do not compile PSL data into library]), @@ -74,10 +84,6 @@ AC_ARG_ENABLE(builtin, ], [ enable_builtin=yes AC_DEFINE([WITH_BUILTIN], [1], [compile PSL data into library]) - - PKG_CHECK_MODULES(LIBICU, [icu-uc], - [AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])], - [AC_CHECK_PROG(HAVE_IDN2, idn2, yes, AC_MSG_ERROR(Cannot find required tool 'idn2'.))]) ]) AM_CONDITIONAL([WITH_BUILTIN], [test $enable_builtin = yes]) @@ -102,16 +108,14 @@ fi # Check for custom PSL file AC_ARG_WITH(psl-file, - AC_HELP_STRING([--with-psl-file=[PATH]], - [path to PSL file]), + AC_HELP_STRING([--with-psl-file=[PATH]], [path to PSL file]), PSL_FILE=$withval, PSL_FILE="\$(top_srcdir)/data/effective_tld_names.dat") AC_SUBST(PSL_FILE) # Check for custom PSL test file AC_ARG_WITH(psl-testfile, - AC_HELP_STRING([--with-psl-testfile=[PATH]], - [path to PSL test file]), + AC_HELP_STRING([--with-psl-testfile=[PATH]], [path to PSL test file]), PSL_TESTFILE=$withval, PSL_TESTFILE="\$(top_srcdir)/data/test_psl.txt") AC_SUBST(PSL_TESTFILE) @@ -138,6 +142,7 @@ AC_MSG_NOTICE([Summary of build options: Compiler: ${CC} CFlags: ${CFLAGS} ${CPPFLAGS} LDFlags: ${LDFLAGS} + ICU: ${HAVE_LIBICU} Builtin PSL: ${enable_builtin} PSL File: ${PSL_FILE} PSL Test File: ${PSL_TESTFILE} From 7cc3eed46bc948abb62f2909312899b6f234fdf6 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Sun, 15 Jun 2014 22:32:18 +0200 Subject: [PATCH 02/31] read from stdin if no domain argument --- tools/psl.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/tools/psl.c b/tools/psl.c index 3764599..ff28285 100644 --- a/tools/psl.c +++ b/tools/psl.c @@ -32,8 +32,10 @@ # include #endif +#include #include #include +#include #include static void usage(int err, FILE* f) @@ -135,8 +137,33 @@ int main(int argc, const char *const *argv) exit(2); } if (arg >= argv + argc) { - fprintf(stderr, "No domains given - aborting\n"); - exit(3); + if (isatty(STDIN_FILENO)) { + char buf[256], *domain; + size_t len; + + // read URLs from STDIN + while (fgets(buf, sizeof(buf), stdin)) { + for (domain = buf; isspace(*domain); domain++); // skip leading spaces + if (*domain == '#' || !*domain) continue; // skip empty lines and comments + for (len = strlen(domain); len && isspace(domain[len - 1]); len--); // skip trailing spaces + domain[len] = 0; + + if (mode == 1) + printf("%s: %d\n", domain, psl_is_public_suffix(psl, domain)); + else if (mode == 2) + printf("%s: %s\n", domain, psl_unregistrable_domain(psl, domain)); + else if (mode == 3) + printf("%s: %s\n", domain, psl_registrable_domain(psl, domain)); + else if (mode == 4) + printf("%s: %d\n", domain, psl_is_cookie_domain_acceptable(psl, domain, cookie_domain)); + } + + psl_free(psl); + exit(0); + } else { + fprintf(stderr, "No domains given - aborting\n"); + exit(3); + } } } From a3c2195d05ded70a4215ad8b246cf4e45312ba22 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Mon, 16 Jun 2014 13:01:27 +0200 Subject: [PATCH 03/31] added libicu48 to .travis.yml --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 27bcd14..81538a2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,4 +7,4 @@ script: ./autogen.sh && ./configure --enable-gtk-doc && make -j4 && make check - before_install: - apt-cache search libicu | grep icu - sudo apt-get -qq update - - sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev libicu-dev + - sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev libicu48 libicu-dev From eaf3d1ee17d130470a39b530c1261a82f3438e8a Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Mon, 16 Jun 2014 15:17:19 +0200 Subject: [PATCH 04/31] print info of travis ci build environment --- .travis.yml | 2 ++ configure.ac | 1 + 2 files changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 81538a2..f26b687 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,3 +8,5 @@ before_install: - apt-cache search libicu | grep icu - sudo apt-get -qq update - sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev libicu48 libicu-dev + - ls -la /usr/lib/*/pkgconfig/icu-uc.pc + - updatedb && locate icu-cu diff --git a/configure.ac b/configure.ac index 2d1ad11..f23978d 100644 --- a/configure.ac +++ b/configure.ac @@ -8,6 +8,7 @@ AM_INIT_AUTOMAKE([1.10 -Wall no-define]) # the library. AC_CONFIG_HEADERS([config.h]) AC_PROG_CXX +AM_PROG_CC_C_O m4_ifdef([AM_PROG_AR], [AM_PROG_AR]) #LT_INIT([disable-static]) LT_INIT From 9d2fb8f8c5a9be9b155f58bba120d71427a3b7d0 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Mon, 16 Jun 2014 17:04:31 +0200 Subject: [PATCH 05/31] fix search for older libicu versions without pkg-config support --- .travis.yml | 1 - configure.ac | 19 ++++++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index f26b687..bd9e34d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,5 +8,4 @@ before_install: - apt-cache search libicu | grep icu - sudo apt-get -qq update - sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev libicu48 libicu-dev - - ls -la /usr/lib/*/pkgconfig/icu-uc.pc - updatedb && locate icu-cu diff --git a/configure.ac b/configure.ac index f23978d..e00e599 100644 --- a/configure.ac +++ b/configure.ac @@ -8,7 +8,6 @@ AM_INIT_AUTOMAKE([1.10 -Wall no-define]) # the library. AC_CONFIG_HEADERS([config.h]) AC_PROG_CXX -AM_PROG_CC_C_O m4_ifdef([AM_PROG_AR], [AM_PROG_AR]) #LT_INIT([disable-static]) LT_INIT @@ -73,8 +72,22 @@ AC_ARG_WITH(libicu, AC_HELP_STRING([--without-libicu], [build libpsl without IDNA/Punycode support]), [], [ - PKG_CHECK_MODULES(LIBICU, [icu-uc], - [HAVE_LIBICU=yes; AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])]) + # using pkg-config won't work on older systems like Ubuntu 12.04 LTS Server Edition 64bit + LIBS="-licuuc $LIBS" + AC_MSG_CHECKING([for ICU unicode library]) + AC_LINK_IFELSE( + AC_LANG_PROGRAM( + [[#include ]], + [[u_strToUTF8(NULL, 0, NULL, NULL, 0, NULL);]]), + [HAVE_LIBICU=yes; AC_MSG_RESULT([yes]) AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])], + [AC_MSG_FAILURE([no working ICU unicode library was found])]) + +# AC_SEARCH_LIBS(uidna_close, icuuc, +# [HAVE_LIBICU=yes; AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])], +# [AC_MSG_ERROR(*** libicu was not found. Aborting.)], +# -licudata ) +# PKG_CHECK_MODULES(LIBICU, [icu-uc], +# [HAVE_LIBICU=yes; AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])]) ]) # Check for enable/disable builtin PSL data From 0c3dbf14b6b27a5d1b1c8856afbdbaea9f4b1167 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Mon, 16 Jun 2014 21:21:06 +0200 Subject: [PATCH 06/31] added mlocate to .travis.yml --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index bd9e34d..f5c0ac4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,5 +7,5 @@ script: ./autogen.sh && ./configure --enable-gtk-doc && make -j4 && make check - before_install: - apt-cache search libicu | grep icu - sudo apt-get -qq update - - sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev libicu48 libicu-dev + - sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev libicu48 libicu-dev mlocate - updatedb && locate icu-cu From b77aaf780995bc05bdb20057bf52d34974e3aefd Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Mon, 16 Jun 2014 21:30:16 +0200 Subject: [PATCH 07/31] removed mlocate / updatedb from .travis.yml --- .travis.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index f5c0ac4..81538a2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,5 +7,4 @@ script: ./autogen.sh && ./configure --enable-gtk-doc && make -j4 && make check - before_install: - apt-cache search libicu | grep icu - sudo apt-get -qq update - - sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev libicu48 libicu-dev mlocate - - updatedb && locate icu-cu + - sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev libicu48 libicu-dev From 657a1d4b88b784d0d7b8bdada7ac071743b81e0b Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Mon, 16 Jun 2014 21:59:23 +0200 Subject: [PATCH 08/31] use libicu for UTF-8 lowercase conversion --- tests/test-registrable-domain.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/test-registrable-domain.c b/tests/test-registrable-domain.c index c075500..715498c 100644 --- a/tests/test-registrable-domain.c +++ b/tests/test-registrable-domain.c @@ -38,6 +38,11 @@ #include #include +#ifdef WITH_LIBICU +# include +# include +#endif + #include static int @@ -61,6 +66,26 @@ static void test(const psl_ctx_t *psl, const char *domain, const char *expected_ /* if we found utf-8, make sure to convert domain correctly to lowercase */ /* does it work, if we are not in a utf-8 env ? */ if (utf8) { +#ifdef WITH_LIBICU + UErrorCode status = 0; + UChar utf16_dst[64], utf16_src[64]; + int32_t utf16_src_length; + + /* UTF-8 to lowercase conversion */ + u_strFromUTF8(utf16_src, sizeof(utf16_src)/sizeof(utf16_src[0]), &utf16_src_length, domain, (int32_t) strlen(domain), &status); + if (U_SUCCESS(status)) { + int32_t dst_length = u_strToLower(utf16_dst, sizeof(utf16_dst)/sizeof(utf16_dst[0]), utf16_src, -1, "en", &status); + if (U_SUCCESS(status)) { + u_strToUTF8(lookupname, (int32_t) sizeof(lookupname), NULL, utf16_dst, dst_length, &status); + if (U_SUCCESS(status)) { + domain = lookupname; + } else + fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); + } else + fprintf(stderr, "Failed to convert to ASCII (status %d)\n", status); + } else + fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status); +#else FILE *pp; size_t cmdsize = 48 + strlen(domain); char *cmd = alloca(cmdsize); @@ -71,6 +96,7 @@ static void test(const psl_ctx_t *psl, const char *domain, const char *expected_ domain = lookupname; pclose(pp); } +#endif } } From 9fb63f7a49002957ae6da92f9d5c5077957ab72b Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Tue, 17 Jun 2014 12:31:18 +0200 Subject: [PATCH 09/31] fixed autoreconf warnings --- configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index e00e599..60c2917 100644 --- a/configure.ac +++ b/configure.ac @@ -76,9 +76,9 @@ AC_ARG_WITH(libicu, LIBS="-licuuc $LIBS" AC_MSG_CHECKING([for ICU unicode library]) AC_LINK_IFELSE( - AC_LANG_PROGRAM( + [AC_LANG_PROGRAM( [[#include ]], - [[u_strToUTF8(NULL, 0, NULL, NULL, 0, NULL);]]), + [[u_strToUTF8(NULL, 0, NULL, NULL, 0, NULL);]])], [HAVE_LIBICU=yes; AC_MSG_RESULT([yes]) AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])], [AC_MSG_FAILURE([no working ICU unicode library was found])]) From 57394eb1f8f3afc7c49461926935d52a500faa14 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Tue, 17 Jun 2014 17:14:02 +0200 Subject: [PATCH 10/31] added psl_str_to_utf8lower() --- docs/libpsl/libpsl-sections.txt | 1 + include/libpsl.h | 3 + src/psl.c | 235 ++++++++++++++++++++++++++------ tests/test-is-public.c | 2 +- tests/test-registrable-domain.c | 56 +------- 5 files changed, 200 insertions(+), 97 deletions(-) diff --git a/docs/libpsl/libpsl-sections.txt b/docs/libpsl/libpsl-sections.txt index b2b1617..5e758e7 100644 --- a/docs/libpsl/libpsl-sections.txt +++ b/docs/libpsl/libpsl-sections.txt @@ -17,4 +17,5 @@ psl_builtin_sha1sum psl_builtin_filename psl_is_cookie_domain_acceptable psl_get_version +psl_str_to_utf8lower diff --git a/include/libpsl.h b/include/libpsl.h index 265bdf6..dff8974 100644 --- a/include/libpsl.h +++ b/include/libpsl.h @@ -65,6 +65,9 @@ const char * /* returns the shortest possible registrable domain part or NULL if domain is not registrable at all */ const char * psl_registrable_domain(const psl_ctx_t *psl, const char *domain); +/* convert a string into lowercase UTF-8 */ +int + psl_str_to_utf8lower(const char *s, const char *encoding, const char *locale, char **lower); /* does not include exceptions */ int psl_suffix_count(const psl_ctx_t *psl); diff --git a/src/psl.c b/src/psl.c index 881052e..133b66f 100644 --- a/src/psl.c +++ b/src/psl.c @@ -49,9 +49,20 @@ #include #include #include +#include + +#ifdef WITH_LIBICU +# include +# include +# include +# include +#endif #include +/* number of elements within an array */ +#define countof(a) (sizeof(a)/sizeof(*(a))) + /** * SECTION:libpsl * @short_description: Public Suffix List library functions @@ -239,39 +250,19 @@ static int _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length) for (dst = suffix->label_buf, src = rule; *src;) { if (*src == '.') suffix->nlabels++; - *dst++ = tolower(*src++); + *dst++ = *src++; } *dst = 0; return 0; } -/** - * psl_is_public_suffix: - * @psl: PSL context - * @domain: Domain string - * - * This function checks if @domain is a public suffix by the means of the - * [Mozilla Public Suffix List](http://publicsuffix.org). - * - * For cookie domain checking see psl_is_cookie_domain_acceptable(). - * - * @psl is a context returned by either psl_load_file(), psl_load_fp() or - * psl_builtin(). - * - * Returns: 1 if domain is a public suffix, 0 if not. - * - * Since: 0.1 - */ -int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain) +static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain) { _psl_entry_t suffix, *rule; const char *p, *label_bak; unsigned short length_bak; - if (!psl || !domain) - return 1; - /* this function should be called without leading dots, just make sure */ suffix.label = domain + (*domain == '.'); suffix.length = strlen(suffix.label); @@ -340,6 +331,31 @@ int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain) return 0; } +/** + * psl_is_public_suffix: + * @psl: PSL context + * @domain: Domain string + * + * This function checks if @domain is a public suffix by the means of the + * [Mozilla Public Suffix List](http://publicsuffix.org). + * + * For cookie domain checking see psl_is_cookie_domain_acceptable(). + * + * @psl is a context returned by either psl_load_file(), psl_load_fp() or + * psl_builtin(). + * + * Returns: 1 if domain is a public suffix, 0 if not. + * + * Since: 0.1 + */ +int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain) +{ + if (!psl || !domain) + return 1; + + return _psl_is_public_suffix(psl, domain); +} + /** * psl_unregistrable_domain: * @psl: PSL context @@ -366,7 +382,7 @@ const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain) * 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not. */ - while (!psl_is_public_suffix(psl, domain)) { + while (!_psl_is_public_suffix(psl, domain)) { if ((domain = strchr(domain, '.'))) domain++; else @@ -404,7 +420,7 @@ const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain) * 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not. */ - while (!psl_is_public_suffix(psl, domain)) { + while (!_psl_is_public_suffix(psl, domain)) { if ((p = strchr(domain, '.'))) { regdom = domain; domain = p + 1; @@ -415,6 +431,51 @@ const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain) return regdom; } +static int _str_is_ascii(const char *s) +{ + while (*s > 0) s++; + + return !*s; +} + +static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t *e) +{ + if (_str_is_ascii(e->label_buf)) + return; + +#ifdef WITH_LIBICU + /* IDNA2008 UTS#46 punycode conversion */ + if (idna) { + _psl_entry_t suffix, *suffixp; + char lookupname[128] = ""; + UErrorCode status = 0; + UIDNAInfo info = UIDNA_INFO_INITIALIZER; + UChar utf16_dst[128], utf16_src[128]; + int32_t utf16_src_length; + + u_strFromUTF8(utf16_src, sizeof(utf16_src)/sizeof(utf16_src[0]), &utf16_src_length, e->label_buf, -1, &status); + if (U_SUCCESS(status)) { + int32_t dst_length = uidna_nameToASCII(idna, utf16_src, utf16_src_length, utf16_dst, sizeof(utf16_dst)/sizeof(utf16_dst[0]), &info, &status); + if (U_SUCCESS(status)) { + u_strToUTF8(lookupname, sizeof(lookupname), NULL, utf16_dst, dst_length, &status); + if (U_SUCCESS(status)) { + if (strcmp(e->label_buf, lookupname)) { + /* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */ + _suffix_init(&suffix, lookupname, strlen(lookupname)); + suffix.wildcard = e->wildcard; + suffixp = _vector_get(v, _vector_add(v, &suffix)); + suffixp->label = suffixp->label_buf; /* set label to changed address */ + } /* else ignore */ + } /* else + fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */ + } /* else + fprintf(stderr, "Failed to convert to ASCII (status %d)\n", status); */ + } /* else + fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status); */ + } +#endif +} + /** * psl_load_file: * @fname: Name of PSL file @@ -422,13 +483,7 @@ const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain) * This function loads the public suffixes file named @fname. * To free the allocated resources, call psl_free(). * - * If you want to use punycode representations for functions like psl_is_public_suffix(), - * these have to exist as entries within @fname. This is a design decision to not pull in - * dependencies for UTF-8 case-handling and IDNA libraries. - * - * On the contrary, the builtin data already contains punycode entries. - * - * Have a look into psl2c.c for example code on how to convert UTF-8 to lowercase and to punycode. + * The suffixes are expected to be lowercase UTF-8 encoded. * * Returns: Pointer to a PSL context or %NULL on failure. * @@ -457,7 +512,7 @@ psl_ctx_t *psl_load_file(const char *fname) * This function loads the public suffixes from a FILE pointer. * To free the allocated resources, call psl_free(). * - * Have a look at psl_load_fp() for punycode considerations. + * The suffixes are expected to be lowercase UTF-8 encoded. * * Returns: Pointer to a PSL context or %NULL on failure. * @@ -467,8 +522,11 @@ psl_ctx_t *psl_load_fp(FILE *fp) { psl_ctx_t *psl; _psl_entry_t suffix, *suffixp; - int nsuffixes = 0; char buf[256], *linep, *p; +#ifdef WITH_LIBICU + UIDNA *idna; + UErrorCode status = 0; +#endif if (!fp) return NULL; @@ -476,6 +534,10 @@ psl_ctx_t *psl_load_fp(FILE *fp) if (!(psl = calloc(1, sizeof(psl_ctx_t)))) return NULL; +#ifdef WITH_LIBICU + idna = uidna_openUTS46(UIDNA_USE_STD3_RULES, &status); +#endif + /* * as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions. * as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions. @@ -496,26 +558,29 @@ psl_ctx_t *psl_load_fp(FILE *fp) if (*p == '!') { /* add to exceptions */ - if (_suffix_init(&suffix, p + 1, linep - p - 1) == 0) + if (_suffix_init(&suffix, p + 1, linep - p - 1) == 0) { suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix)); - else - suffixp = NULL; + suffixp->label = suffixp->label_buf; /* set label to changed address */ + _add_punycode_if_needed(idna, psl->suffix_exceptions, suffixp); + } } else { - if (_suffix_init(&suffix, p, linep - p) == 0) + /* add to suffixes */ + if (_suffix_init(&suffix, p, linep - p) == 0) { suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix)); - else - suffixp = NULL; + suffixp->label = suffixp->label_buf; /* set label to changed address */ + _add_punycode_if_needed(idna, psl->suffixes, suffixp); + } } - - if (suffixp) - suffixp->label = suffixp->label_buf; /* set label to changed address */ - - nsuffixes++;; } _vector_sort(psl->suffix_exceptions); _vector_sort(psl->suffixes); +#ifdef WITH_LIBICU + if (idna) + uidna_close(idna); +#endif + return psl; } @@ -685,7 +750,11 @@ const char *psl_builtin_filename(void) **/ const char *psl_get_version (void) { +#ifdef WITH_LIBICU + return PACKAGE_VERSION " +libicu/" U_ICU_VERSION; +#else return PACKAGE_VERSION; +#endif } /** @@ -741,3 +810,81 @@ int psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, return 0; } + +/** + * psl_str_to_utf8lower: + * @str: string to convert + * @encoding: charset encoding of @str, e.g. 'iso-8859-1' or %NULL + * @locale: locale of @str for to lowercase conversion, e.g. 'de' or %NULL + * @lower: return value containing the converted string + * + * This helper function converts a string to lowercase UTF-8 representation. + * Lowercase UTF-8 is needed as input to the domain checking functions. + * + * The return value 'lower' must be freed after usage. + * + * Returns: 0 on success, negative value on error. + * -2 failed to open converter with name @encoding + * -3 failed to convert @str to unicode + * -4 failed to convert unicode to lowercase + * -5 failed to convert unicode to UTF-8 + * + * Since: 0.4 + */ +int psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower) +{ + int ret = -1; + + if (lower) + *lower = NULL; + + if (!str) + return 0; + +#ifdef WITH_LIBICU + size_t str_length = strlen(str); + UErrorCode status = 0; + UChar *utf16_dst, *utf16_lower; + int32_t utf16_dst_length; + char *utf8_lower; + UConverter *uconv; + + /* C89 allocation */ + utf16_dst = alloca(sizeof(UChar) * (str_length * 2 + 1)); + utf16_lower = alloca(sizeof(UChar) * (str_length * 2 + 1)); + utf8_lower = alloca(str_length * 2 + 1); + + uconv = ucnv_open(encoding, &status); + if (U_SUCCESS(status)) { + utf16_dst_length = ucnv_toUChars(uconv, utf16_dst, str_length * 2 + 1, str, str_length, &status); + ucnv_close(uconv); + + if (U_SUCCESS(status)) { + int32_t utf16_lower_length = u_strToLower(utf16_lower, str_length * 2 + 1, utf16_dst, utf16_dst_length, locale, &status); + if (U_SUCCESS(status)) { + u_strToUTF8(utf8_lower, str_length * 8 + 1, NULL, utf16_lower, utf16_lower_length, &status); + if (U_SUCCESS(status)) { + if (lower) + *lower = strdup(utf8_lower); + ret = 0; + } else { + ret = -5; + fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); + } + } else { + ret = -4; + fprintf(stderr, "Failed to convert UTF-16 to lowercase (status %d)\n", status); + } + } else { + ret = -3; + fprintf(stderr, "Failed to convert string to UTF-16 (status %d)\n", status); + } + } else { + ret = -2; + fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); + } + +#endif + + return ret; +} diff --git a/tests/test-is-public.c b/tests/test-is-public.c index 1f3b8b3..fdc60b9 100644 --- a/tests/test-is-public.c +++ b/tests/test-is-public.c @@ -65,7 +65,7 @@ static void test_psl(void) { "abc.www.ck", 0 }, { "xxx.ck", 1 }, { "www.xxx.ck", 0 }, - { "\345\225\206\346\240\207", 1 }, /* xn--czr694b oder 商标 */ + { "\345\225\206\346\240\207", 1 }, /* xn--czr694b or 商标 */ { "www.\345\225\206\346\240\207", 0 }, /* some special test follow ('name' and 'forgot.his.name' are public, but e.g. his.name is not) */ { "name", 1 }, diff --git a/tests/test-registrable-domain.c b/tests/test-registrable-domain.c index 715498c..8c492fb 100644 --- a/tests/test-registrable-domain.c +++ b/tests/test-registrable-domain.c @@ -52,53 +52,10 @@ static int static void test(const psl_ctx_t *psl, const char *domain, const char *expected_result) { const char *result; - char lookupname[128]; + char *lower; - /* check if there might be some utf-8 characters */ - if (domain) { - int utf8; - const char *p; - - for (p = domain, utf8 = 0; *p && !utf8; p++) - if (*p < 0) - utf8 = 1; - - /* if we found utf-8, make sure to convert domain correctly to lowercase */ - /* does it work, if we are not in a utf-8 env ? */ - if (utf8) { -#ifdef WITH_LIBICU - UErrorCode status = 0; - UChar utf16_dst[64], utf16_src[64]; - int32_t utf16_src_length; - - /* UTF-8 to lowercase conversion */ - u_strFromUTF8(utf16_src, sizeof(utf16_src)/sizeof(utf16_src[0]), &utf16_src_length, domain, (int32_t) strlen(domain), &status); - if (U_SUCCESS(status)) { - int32_t dst_length = u_strToLower(utf16_dst, sizeof(utf16_dst)/sizeof(utf16_dst[0]), utf16_src, -1, "en", &status); - if (U_SUCCESS(status)) { - u_strToUTF8(lookupname, (int32_t) sizeof(lookupname), NULL, utf16_dst, dst_length, &status); - if (U_SUCCESS(status)) { - domain = lookupname; - } else - fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); - } else - fprintf(stderr, "Failed to convert to ASCII (status %d)\n", status); - } else - fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status); -#else - FILE *pp; - size_t cmdsize = 48 + strlen(domain); - char *cmd = alloca(cmdsize); - - snprintf(cmd, cmdsize, "echo -n '%s' | sed -e 's/./\\L\\0/g'", domain); - if ((pp = popen(cmd, "r"))) { - if (fscanf(pp, "%127s", lookupname) >= 1) - domain = lookupname; - pclose(pp); - } -#endif - } - } + if (psl_str_to_utf8lower(domain, NULL, NULL, &lower) == 0) + domain = lower; result = psl_registrable_domain(psl, domain); @@ -115,7 +72,7 @@ static void test_psl(void) { FILE *fp; const psl_ctx_t *psl; - char buf[256], domain[128], expected_regdom[128], *p; + char buf[256], domain[128], expected_regdom[128]; psl = psl_builtin(); @@ -152,11 +109,6 @@ static void test_psl(void) continue; } - /* we have to lowercase the domain - the PSL API just takes lowercase */ - for (p = domain; *p; p++) - if (*p > 0 && isupper(*p)) - *p = tolower(*p); - if (!strcmp(expected_regdom, "null")) test(psl, domain, NULL); else From 29d1300c864ce1289fa09b8117c59ddc91a25ea0 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Wed, 18 Jun 2014 12:25:06 +0200 Subject: [PATCH 11/31] free allocated variable --- tests/test-registrable-domain.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test-registrable-domain.c b/tests/test-registrable-domain.c index 8c492fb..c48ccae 100644 --- a/tests/test-registrable-domain.c +++ b/tests/test-registrable-domain.c @@ -66,6 +66,8 @@ static void test(const psl_ctx_t *psl, const char *domain, const char *expected_ printf("psl_registrable_domain(%s)=%s (expected %s)\n", domain, result ? result : "NULL", expected_result ? expected_result : "NULL"); } + + free(lower); } static void test_psl(void) From 935b44b3ea641f35bb9b4a28e65af15b68f8c4fb Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Wed, 18 Jun 2014 12:26:45 +0200 Subject: [PATCH 12/31] updated docs, removed printing to stderr --- src/psl.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/src/psl.c b/src/psl.c index 133b66f..87b7d7b 100644 --- a/src/psl.c +++ b/src/psl.c @@ -341,6 +341,9 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain) * * For cookie domain checking see psl_is_cookie_domain_acceptable(). * + * International @domain names have to be either in lowercase UTF-8 or in ASCII form (punycode). + * Other encodings result in unexpected behavior. + * * @psl is a context returned by either psl_load_file(), psl_load_fp() or * psl_builtin(). * @@ -364,6 +367,9 @@ int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain) * This function finds the longest publix suffix part of @domain by the means * of the [Mozilla Public Suffix List](http://publicsuffix.org). * + * International @domain names have to be either in lowercase UTF-8 or in ASCII form (punycode). + * Other encodings result in unexpected behavior. + * * @psl is a context returned by either psl_load_file(), psl_load_fp() or * psl_builtin(). * @@ -400,6 +406,9 @@ const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain) * This function finds the shortest private suffix part of @domain by the means * of the [Mozilla Public Suffix List](http://publicsuffix.org). * + * International @domain names have to be either in lowercase UTF-8 or in ASCII form (punycode). + * Other encodings result in unexpected behavior. + * * @psl is a context returned by either psl_load_file(), psl_load_fp() or * psl_builtin(). * @@ -483,7 +492,7 @@ static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t * This function loads the public suffixes file named @fname. * To free the allocated resources, call psl_free(). * - * The suffixes are expected to be lowercase UTF-8 encoded. + * The suffixes are expected to be lowercase UTF-8 encoded if they are international. * * Returns: Pointer to a PSL context or %NULL on failure. * @@ -512,7 +521,7 @@ psl_ctx_t *psl_load_file(const char *fname) * This function loads the public suffixes from a FILE pointer. * To free the allocated resources, call psl_free(). * - * The suffixes are expected to be lowercase UTF-8 encoded. + * The suffixes are expected to be lowercase UTF-8 encoded if they are international. * * Returns: Pointer to a PSL context or %NULL on failure. * @@ -766,6 +775,9 @@ const char *psl_get_version (void) * This helper function checks whether @cookie_domain is an acceptable cookie domain value for the request * @hostname. * + * For international domain names both, @hostname and @cookie_domain, have to be either in lowercase UTF-8 + * or in ASCII form (punycode). Other encodings or mixing UTF-8 and punycode result in unexpected behavior. + * * Examples: * 1. Cookie domain 'example.com' would be acceptable for hostname 'www.example.com', * but '.com' or 'com' would NOT be acceptable since 'com' is a public suffix. @@ -821,9 +833,11 @@ int psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, * This helper function converts a string to lowercase UTF-8 representation. * Lowercase UTF-8 is needed as input to the domain checking functions. * + * @lower is %NULL on error. * The return value 'lower' must be freed after usage. * * Returns: 0 on success, negative value on error. + * -1 @str is a %NULL value * -2 failed to open converter with name @encoding * -3 failed to convert @str to unicode * -4 failed to convert unicode to lowercase @@ -839,7 +853,14 @@ int psl_str_to_utf8lower(const char *str, const char *encoding, const char *loca *lower = NULL; if (!str) + return -1; + + /* shortcut to avoid costly conversion */ + if (_str_is_ascii(str)) { + if (lower) + *lower = strdup(str); return 0; + } #ifdef WITH_LIBICU size_t str_length = strlen(str); @@ -869,21 +890,20 @@ int psl_str_to_utf8lower(const char *str, const char *encoding, const char *loca ret = 0; } else { ret = -5; - fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); + /* fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */ } } else { ret = -4; - fprintf(stderr, "Failed to convert UTF-16 to lowercase (status %d)\n", status); + /* fprintf(stderr, "Failed to convert UTF-16 to lowercase (status %d)\n", status); */ } } else { ret = -3; - fprintf(stderr, "Failed to convert string to UTF-16 (status %d)\n", status); + /* fprintf(stderr, "Failed to convert string to UTF-16 (status %d)\n", status); */ } } else { ret = -2; - fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); + /* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */ } - #endif return ret; From a0a9e762613fc45838388685aff3fd74ceffba55 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Wed, 18 Jun 2014 12:28:22 +0200 Subject: [PATCH 13/31] fixed locale/charset support --- tools/psl.c | 63 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 22 deletions(-) diff --git a/tools/psl.c b/tools/psl.c index ff28285..7d5db0c 100644 --- a/tools/psl.c +++ b/tools/psl.c @@ -73,6 +73,10 @@ int main(int argc, const char *const *argv) const char *const *arg, *psl_file = NULL, *cookie_domain = NULL; psl_ctx_t *psl = (psl_ctx_t *) psl_builtin(); + /* set current locale according to the environment variables */ + #include + setlocale(LC_ALL, ""); + for (arg = argv + 1; arg < argv + argc; arg++) { if (!strncmp(*arg, "--", 2)) { if (!strcmp(*arg, "--is-public-suffix")) @@ -137,33 +141,41 @@ int main(int argc, const char *const *argv) exit(2); } if (arg >= argv + argc) { - if (isatty(STDIN_FILENO)) { - char buf[256], *domain; - size_t len; + char buf[256], *domain, *lower; + size_t len; + int rc; - // read URLs from STDIN - while (fgets(buf, sizeof(buf), stdin)) { - for (domain = buf; isspace(*domain); domain++); // skip leading spaces - if (*domain == '#' || !*domain) continue; // skip empty lines and comments - for (len = strlen(domain); len && isspace(domain[len - 1]); len--); // skip trailing spaces - domain[len] = 0; + // read URLs from STDIN + while (fgets(buf, sizeof(buf), stdin)) { + for (domain = buf; isspace(*domain); domain++); // skip leading spaces + if (*domain == '#' || !*domain) continue; // skip empty lines and comments + for (len = strlen(domain); len && isspace(domain[len - 1]); len--); // skip trailing spaces + domain[len] = 0; - if (mode == 1) - printf("%s: %d\n", domain, psl_is_public_suffix(psl, domain)); - else if (mode == 2) - printf("%s: %s\n", domain, psl_unregistrable_domain(psl, domain)); - else if (mode == 3) - printf("%s: %s\n", domain, psl_registrable_domain(psl, domain)); - else if (mode == 4) - printf("%s: %d\n", domain, psl_is_cookie_domain_acceptable(psl, domain, cookie_domain)); + if ((rc = psl_str_to_utf8lower(domain, NULL, NULL, &lower)) != 0) + fprintf(stderr, "%s: Failed to convert to lowercase UTF-8 (%d)\n", domain, rc); + else if (mode == 1) + printf("%s: %d (%s)\n", domain, psl_is_public_suffix(psl, lower), lower); + else if (mode == 2) + printf("%s: %s\n", domain, psl_unregistrable_domain(psl, lower)); + else if (mode == 3) + printf("%s: %s\n", domain, psl_registrable_domain(psl, lower)); + else if (mode == 4) { + char *cookie_domain_lower; + + if ((rc = psl_str_to_utf8lower(domain, NULL, NULL, &cookie_domain_lower)) != 0) + fprintf(stderr, "%s: Failed to convert cookie domain '%s' to lowercase UTF-8 (%d)\n", domain, cookie_domain, rc); + else + printf("%s: %d\n", domain, psl_is_cookie_domain_acceptable(psl, lower, cookie_domain)); + + free(cookie_domain_lower); } - psl_free(psl); - exit(0); - } else { - fprintf(stderr, "No domains given - aborting\n"); - exit(3); + free(lower); } + + psl_free(psl); + exit(0); } } @@ -199,6 +211,13 @@ int main(int argc, const char *const *argv) printf("builtin compile time: %ld (%s)\n", psl_builtin_compile_time(), time2str(psl_builtin_compile_time())); printf("builtin file time: %ld (%s)\n", psl_builtin_file_time(), time2str(psl_builtin_file_time())); printf("builtin SHA1 file hash: %s\n", psl_builtin_sha1sum()); + +#ifdef WITH_LIBICU + #include + #include + printf("uloc_getDefault=%s\n", uloc_getDefault()); + printf("ucnv_getDefaultName=%s\n", ucnv_getDefaultName()); +#endif } else printf("No builtin PSL data available\n"); } From e6e0f7759f069af9fb1cf278d84772b5b6e946f4 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Wed, 18 Jun 2014 12:39:55 +0200 Subject: [PATCH 14/31] added lowercase conversion to ASCII strings --- src/psl.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/psl.c b/src/psl.c index 87b7d7b..62cb20c 100644 --- a/src/psl.c +++ b/src/psl.c @@ -857,8 +857,16 @@ int psl_str_to_utf8lower(const char *str, const char *encoding, const char *loca /* shortcut to avoid costly conversion */ if (_str_is_ascii(str)) { - if (lower) + if (lower) { + char *p; + *lower = strdup(str); + + /* convert ASCII string to lowercase */ + for (p = *lower; *p; p++) + if (isupper(*p)) + *p = tolower(*p); + } return 0; } From 9aea73fb646d11bb9dfcb6014d6bc8dc04809f3b Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Wed, 18 Jun 2014 12:41:23 +0200 Subject: [PATCH 15/31] respect test data being always UTF-8 --- tests/test-registrable-domain.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test-registrable-domain.c b/tests/test-registrable-domain.c index c48ccae..e10e6ff 100644 --- a/tests/test-registrable-domain.c +++ b/tests/test-registrable-domain.c @@ -54,7 +54,8 @@ static void test(const psl_ctx_t *psl, const char *domain, const char *expected_ const char *result; char *lower; - if (psl_str_to_utf8lower(domain, NULL, NULL, &lower) == 0) + /* our test data is fixed to UTF-8 (english), so provide it here */ + if (psl_str_to_utf8lower(domain, "utf-8", "en", &lower) == 0) domain = lower; result = psl_registrable_domain(psl, domain); From 8c0e8a9283d9a471135c8a3c9e934cd04aaf4a8c Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Wed, 18 Jun 2014 14:50:26 +0200 Subject: [PATCH 16/31] fixed psl_str_to_utf8lower prototype --- include/libpsl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/libpsl.h b/include/libpsl.h index dff8974..3b5dd74 100644 --- a/include/libpsl.h +++ b/include/libpsl.h @@ -67,7 +67,7 @@ const char * psl_registrable_domain(const psl_ctx_t *psl, const char *domain); /* convert a string into lowercase UTF-8 */ int - psl_str_to_utf8lower(const char *s, const char *encoding, const char *locale, char **lower); + psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower); /* does not include exceptions */ int psl_suffix_count(const psl_ctx_t *psl); From 3cb370417609c2e235bd6caf1c4ca9aea663d70d Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Wed, 18 Jun 2014 14:51:33 +0200 Subject: [PATCH 17/31] fixed compile issue with older libicu versions --- tools/psl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/psl.c b/tools/psl.c index 7d5db0c..5a6a9cc 100644 --- a/tools/psl.c +++ b/tools/psl.c @@ -36,6 +36,12 @@ #include #include #include + +#ifdef WITH_LIBICU +# include +# include +#endif + #include static void usage(int err, FILE* f) @@ -213,8 +219,6 @@ int main(int argc, const char *const *argv) printf("builtin SHA1 file hash: %s\n", psl_builtin_sha1sum()); #ifdef WITH_LIBICU - #include - #include printf("uloc_getDefault=%s\n", uloc_getDefault()); printf("ucnv_getDefaultName=%s\n", ucnv_getDefaultName()); #endif From 370fd3639d6d1be3799a29470217ba9765a44d69 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Wed, 18 Jun 2014 15:20:02 +0200 Subject: [PATCH 18/31] don't fail configure run if libicu not detected --- configure.ac | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 60c2917..0b0527d 100644 --- a/configure.ac +++ b/configure.ac @@ -73,6 +73,7 @@ AC_ARG_WITH(libicu, [], [ # using pkg-config won't work on older systems like Ubuntu 12.04 LTS Server Edition 64bit + OLDLIBS=$LIBS LIBS="-licuuc $LIBS" AC_MSG_CHECKING([for ICU unicode library]) AC_LINK_IFELSE( @@ -80,7 +81,7 @@ AC_ARG_WITH(libicu, [[#include ]], [[u_strToUTF8(NULL, 0, NULL, NULL, 0, NULL);]])], [HAVE_LIBICU=yes; AC_MSG_RESULT([yes]) AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])], - [AC_MSG_FAILURE([no working ICU unicode library was found])]) + [LIBS=$OLDLIBS; AC_MSG_WARN([no working ICU unicode library was found])]) # AC_SEARCH_LIBS(uidna_close, icuuc, # [HAVE_LIBICU=yes; AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])], From 28adbe1f1b247c85afc2ef46dc8becb01a84ab7a Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Wed, 18 Jun 2014 15:20:48 +0200 Subject: [PATCH 19/31] removed IDNA case test if libicu is not configured --- tests/test-registrable-domain.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test-registrable-domain.c b/tests/test-registrable-domain.c index e10e6ff..d0dda62 100644 --- a/tests/test-registrable-domain.c +++ b/tests/test-registrable-domain.c @@ -91,7 +91,9 @@ static void test_psl(void) test(NULL, "com", NULL); /* Norwegian with uppercase oe */ +#ifdef WITH_LIBICU test(psl, "www.\303\230yer.no", "www.\303\270yer.no"); +#endif /* Norwegian with lowercase oe */ test(psl, "www.\303\270yer.no", "www.\303\270yer.no"); From 4ae0fecc64dc49306a9ce9cfb33385bda64e6441 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Wed, 18 Jun 2014 15:21:22 +0200 Subject: [PATCH 20/31] some libicu cleanups --- src/psl.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/psl.c b/src/psl.c index 62cb20c..7b5a7f6 100644 --- a/src/psl.c +++ b/src/psl.c @@ -447,12 +447,12 @@ static int _str_is_ascii(const char *s) return !*s; } +#ifdef WITH_LIBICU static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t *e) { if (_str_is_ascii(e->label_buf)) return; -#ifdef WITH_LIBICU /* IDNA2008 UTS#46 punycode conversion */ if (idna) { _psl_entry_t suffix, *suffixp; @@ -482,8 +482,8 @@ static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t } /* else fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status); */ } -#endif } +#endif /** * psl_load_file: @@ -570,14 +570,18 @@ psl_ctx_t *psl_load_fp(FILE *fp) if (_suffix_init(&suffix, p + 1, linep - p - 1) == 0) { suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ +#ifdef WITH_LIBICU _add_punycode_if_needed(idna, psl->suffix_exceptions, suffixp); +#endif } } else { /* add to suffixes */ if (_suffix_init(&suffix, p, linep - p) == 0) { suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ +#ifdef WITH_LIBICU _add_punycode_if_needed(idna, psl->suffixes, suffixp); +#endif } } } @@ -759,11 +763,13 @@ const char *psl_builtin_filename(void) **/ const char *psl_get_version (void) { + return PACKAGE_VERSION #ifdef WITH_LIBICU - return PACKAGE_VERSION " +libicu/" U_ICU_VERSION; + " (+libicu/" U_ICU_VERSION ")" #else - return PACKAGE_VERSION; + " (limited IDNA support)" #endif + ; } /** From 58bdb225460edf754744333de6c289e496a1d744 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Wed, 18 Jun 2014 15:36:26 +0200 Subject: [PATCH 21/31] revoked UTF-8 IDNA comments --- tests/test-is-public-builtin.c | 6 +++--- tests/test-is-public.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test-is-public-builtin.c b/tests/test-is-public-builtin.c index ffcac12..2ccde36 100644 --- a/tests/test-is-public-builtin.c +++ b/tests/test-is-public-builtin.c @@ -47,8 +47,8 @@ static int static void test_psl(void) { - /* punycode generation: idn 商标 */ - /* octal code generation: echo -n "商标" | od -b */ + /* punycode generation: idn ?? */ + /* octal code generation: echo -n "??" | od -b */ static const struct test_data { const char *domain; @@ -65,7 +65,7 @@ static void test_psl(void) { "abc.www.ck", 0 }, { "xxx.ck", 1 }, { "www.xxx.ck", 0 }, - { "\345\225\206\346\240\207", 1 }, /* xn--czr694b oder 商标 */ + { "\345\225\206\346\240\207", 1 }, /* xn--czr694b oder ?? */ { "www.\345\225\206\346\240\207", 0 }, { "xn--czr694b", 1 }, { "www.xn--czr694b", 0 }, diff --git a/tests/test-is-public.c b/tests/test-is-public.c index fdc60b9..0afbdd7 100644 --- a/tests/test-is-public.c +++ b/tests/test-is-public.c @@ -47,8 +47,8 @@ static int static void test_psl(void) { - /* punycode generation: idn 商标 */ - /* octal code generation: echo -n "商标" | od -b */ + /* punycode generation: idn ?? */ + /* octal code generation: echo -n "??" | od -b */ static const struct test_data { const char *domain; @@ -65,7 +65,7 @@ static void test_psl(void) { "abc.www.ck", 0 }, { "xxx.ck", 1 }, { "www.xxx.ck", 0 }, - { "\345\225\206\346\240\207", 1 }, /* xn--czr694b or 商标 */ + { "\345\225\206\346\240\207", 1 }, /* xn--czr694b or ?? */ { "www.\345\225\206\346\240\207", 0 }, /* some special test follow ('name' and 'forgot.his.name' are public, but e.g. his.name is not) */ { "name", 1 }, From a1a5b5e5d7e269aafd00319ebfbb933a43c4ce8d Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Wed, 18 Jun 2014 16:27:29 +0200 Subject: [PATCH 22/31] fixed c89 compatibility --- src/psl.c | 2 ++ tools/psl.c | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/psl.c b/src/psl.c index 7b5a7f6..a1a7d3f 100644 --- a/src/psl.c +++ b/src/psl.c @@ -877,6 +877,7 @@ int psl_str_to_utf8lower(const char *str, const char *encoding, const char *loca } #ifdef WITH_LIBICU + do { size_t str_length = strlen(str); UErrorCode status = 0; UChar *utf16_dst, *utf16_lower; @@ -918,6 +919,7 @@ int psl_str_to_utf8lower(const char *str, const char *encoding, const char *loca ret = -2; /* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */ } + } while (0); #endif return ret; diff --git a/tools/psl.c b/tools/psl.c index 5a6a9cc..80f0933 100644 --- a/tools/psl.c +++ b/tools/psl.c @@ -151,11 +151,11 @@ int main(int argc, const char *const *argv) size_t len; int rc; - // read URLs from STDIN + /* read URLs from STDIN */ while (fgets(buf, sizeof(buf), stdin)) { - for (domain = buf; isspace(*domain); domain++); // skip leading spaces - if (*domain == '#' || !*domain) continue; // skip empty lines and comments - for (len = strlen(domain); len && isspace(domain[len - 1]); len--); // skip trailing spaces + for (domain = buf; isspace(*domain); domain++); /* skip leading spaces */ + if (*domain == '#' || !*domain) continue; /* skip empty lines and comments */ + for (len = strlen(domain); len && isspace(domain[len - 1]); len--); /* skip trailing spaces */ domain[len] = 0; if ((rc = psl_str_to_utf8lower(domain, NULL, NULL, &lower)) != 0) From c8171c9ac872473f74f5d8a32db604f5d5f29f59 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Wed, 18 Jun 2014 16:48:15 +0200 Subject: [PATCH 23/31] updated PSL data --- data/effective_tld_names.dat | 398 ++++++++++++++++++++++++++++++++--- 1 file changed, 373 insertions(+), 25 deletions(-) diff --git a/data/effective_tld_names.dat b/data/effective_tld_names.dat index 989ec21..fd84dc6 100644 --- a/data/effective_tld_names.dat +++ b/data/effective_tld_names.dat @@ -180,6 +180,7 @@ ar com.ar edu.ar gob.ar +gov.ar int.ar mil.ar net.ar @@ -222,7 +223,6 @@ edu.au gov.au asn.au id.au -csiro.au // Historic 2LDs (closed to new registration, but sites still exist) info.au conf.au @@ -245,7 +245,7 @@ sa.edu.au tas.edu.au vic.edu.au wa.edu.au -act.gov.au +// act.gov.au Bug 984824 - Removed at request of Greg Tankard // nsw.gov.au Bug 547985 - Removed at request of // nt.gov.au Bug 940478 - Removed at request of Greg Connors qld.gov.au @@ -292,6 +292,7 @@ rs.ba // bb : http://en.wikipedia.org/wiki/.bb bb biz.bb +co.bb com.bb edu.bb gov.bb @@ -299,6 +300,7 @@ info.bb net.bb org.bb store.bb +tv.bb // bd : http://en.wikipedia.org/wiki/.bd *.bd @@ -596,9 +598,12 @@ gob.cl co.cl mil.cl -// cm : http://en.wikipedia.org/wiki/.cm +// cm : http://en.wikipedia.org/wiki/.cm plus bug 981927 cm +co.cm +com.cm gov.cm +net.cm // cn : http://en.wikipedia.org/wiki/.cn // Submitted by registry 2008-06-11 @@ -5146,7 +5151,24 @@ com.nr nu // nz : http://en.wikipedia.org/wiki/.nz -*.nz +// Confirmed by registry 2014-05-19 +nz +ac.nz +co.nz +cri.nz +geek.nz +gen.nz +govt.nz +health.nz +iwi.nz +kiwi.nz +maori.nz +mil.nz +māori.nz +net.nz +org.nz +parliament.nz +school.nz // om : http://en.wikipedia.org/wiki/.om om @@ -5613,7 +5635,6 @@ oryol.ru palana.ru penza.ru perm.ru -pskov.ru ptz.ru rnd.ru ryazan.ru @@ -6150,19 +6171,19 @@ com.ug org.ug // uk : http://en.wikipedia.org/wiki/.uk -// Submitted by registry 2012-10-02 -// and tweaked by us pending further consultation. -*.uk +// Submitted by registry +uk +ac.uk +co.uk +gov.uk +ltd.uk +me.uk +net.uk +nhs.uk +org.uk +plc.uk +police.uk *.sch.uk -!bl.uk -!british-library.uk -!jet.uk -!mod.uk -!national-library-scotland.uk -!nel.uk -!nic.uk -!nls.uk -!parliament.uk // us : http://en.wikipedia.org/wiki/.us us @@ -6440,16 +6461,24 @@ edu.vc // ve : https://registro.nic.ve/ // Confirmed by registry 2012-10-04 +// Updated 2014-05-20 - Bug 940478 ve +arts.ve co.ve com.ve e12.ve edu.ve +firm.ve +gob.ve gov.ve info.ve +int.ve mil.ve net.ve org.ve +rec.ve +store.ve +tec.ve web.ve // vg : http://en.wikipedia.org/wiki/.vg @@ -6482,8 +6511,12 @@ pro.vn health.vn // vu : http://en.wikipedia.org/wiki/.vu -// list of 2nd level tlds ? +// http://www.vunic.vu/ vu +com.vu +edu.vu +net.vu +org.vu // wf : http://www.afnic.fr/medias/documents/AFNIC-naming-policy2012.pdf wf @@ -6609,7 +6642,14 @@ yt فلسطين // xn--90a3ac ("srb" Cyrillic) : RS +// http://www.rnids.rs/en/the-.срб-domain срб +пр.срб +орг.срб +обр.срб +од.срб +упр.срб +ак.срб // xn--p1ai ("rf" Russian-Cyrillic) : RU // http://www.cctld.ru/en/docs/rulesrf.php @@ -7651,9 +7691,302 @@ hiv // sca : 2014-03-13 SVENSKA CELLULOSA AKTIEBOLAGET SCA (publ) sca -// reise : 2014-03-13 dotreise GmbH +// reise : 2014-03-13 dotreise GmbH reise +// accountants : 2014-03-20 Knob Town, LLC +accountants + +// clinic : 2014-03-20 Goose Park, LLC +clinic + +// versicherung : 2014-03-20 dotversicherung-registry GmbH +versicherung + +// top : 2014-03-20 Jiangsu Bangning Science & Technology Co.,Ltd. +top + +// furniture : 2014-03-20 Lone Fields, LLC +furniture + +// dental : 2014-03-20 Tin Birch, LLC +dental + +// fund : 2014-03-20 John Castle, LLC +fund + +// creditcard : 2014-03-20 Binky Frostbite, LLC +creditcard + +// insure : 2014-03-20 Pioneer Willow, LLC +insure + +// audio : 2014-03-20 Uniregistry, Corp. +audio + +// claims : 2014-03-20 Black Corner, LLC +claims + +// loans : 2014-03-20 June Woods, LLC +loans + +// auction : 2014-03-20 Sand Galley, LLC +auction + +// attorney : 2014-03-20 Victor North, LLC +attorney + +// finance : 2014-03-20 Cotton Cypress, LLC +finance + +// investments : 2014-03-20 Holly Glen, LLC +investments + +// juegos : 2014-03-20 Uniregistry, Corp. +juegos + +// dentist : 2014-03-20 Outer Lake, LLC +dentist + +// lds : 2014-03-20 IRI Domain Management, LLC +lds + +// lawyer : 2014-03-20 Atomic Station, LLC +lawyer + +// surgery : 2014-03-20 Tin Avenue, LLC +surgery + +// gratis : 2014-03-20 Pioneer Tigers, LLC +gratis + +// software : 2014-03-20 Over Birch, LLC +software + +// mortgage : 2014-03-20 Outer Gardens, LLC +mortgage + +// republican : 2014-03-20 United TLD Holdco Ltd. +republican + +// credit : 2014-03-20 Snow Shadow, LLC +credit + +// tax : 2014-03-20 Storm Orchard, LLC +tax + +// africa : 2014-03-24 ZA Central Registry NPC trading as Registry.Africa +africa + +// joburg : 2014-03-24 ZA Central Registry NPC trading as ZA Central Registry +joburg + +// durban : 2014-03-24 ZA Central Registry NPC trading as ZA Central Registry +durban + +// capetown : 2014-03-24 ZA Central Registry NPC trading as ZA Central Registry +capetown + +// sap : 2014-03-27 SAP AG +sap + +// datsun : 2014-03-27 NISSAN MOTOR CO., LTD. +datsun + +// infiniti : 2014-03-27 NISSAN MOTOR CO., LTD. +infiniti + +// firmdale : 2014-03-27 Firmdale Holdings Limited +firmdale + +// organic : 2014-03-27 Afilias Limited +organic + +// nissan : 2014-03-27 NISSAN MOTOR CO., LTD. +nissan + +// website : 2014-04-03 DotWebsite Inc. +website + +// space : 2014-04-03 DotSpace Inc. +space + +// schmidt : 2014-04-03 SALM S.A.S. +schmidt + +// cuisinella : 2014-04-03 SALM S.A.S. +cuisinella + +// samsung : 2014-04-03 SAMSUNG SDS CO., LTD +samsung + +// crs : 2014-04-03 Federated Co operatives Limited +crs + +// doosan : 2014-04-03 Doosan Corporation +doosan + +// press : 2014-04-03 DotPress Inc. +press + +// emerck : 2014-04-03 Merck KGaA +emerck + +// erni : 2014-04-03 ERNI Group Holding AG +erni + +// direct : 2014-04-10 Half Trail, LLC +direct + +// yandex : 2014-04-10 YANDEX, LLC +yandex + +// lotto : 2014-04-10 Afilias Limited +lotto + +// toshiba : 2014-04-10 TOSHIBA Corporation +toshiba + +// bauhaus : 2014-04-17 Werkhaus GmbH +bauhaus + +// host : 2014-04-17 DotHost Inc. +host + +// ltda : 2014-04-17 DOMAIN ROBOT SERVICOS DE HOSPEDAGEM NA INTERNET LTDA +ltda + +// global : 2014-04-17 Dot GLOBAL AS +global + +// abogado : 2014-04-24 Top Level Domain Holdings Limited +abogado + +// place : 2014-04-24 Snow Galley, LLC +place + +// tirol : 2014-04-24 punkt Tirol GmbH +tirol + +// gmx : 2014-04-24 1&1 Mail & Media GmbH +gmx + +// tatar : 2014-04-24 Limited Liability Company "Coordination Center of Regional Domain of Tatarstan Republic" +tatar + +// scholarships : 2014-04-24 Scholarships.com, LLC +scholarships + +// eurovision : 2014-04-24 European Broadcasting Union (EBU) +eurovision + +// wedding : 2014-04-24 Top Level Domain Holdings Limited +wedding + +// active : 2014-05-01 The Active Network, Inc +active + +// madrid : 2014-05-01 Comunidad de Madrid +madrid + +// youtube : 2014-05-01 Charleston Road Registry Inc. +youtube + +// sharp : 2014-05-01 Sharp Corporation +sharp + +// uol : 2014-05-01 UBN INTERNET LTDA. +uol + +// physio : 2014-05-01 PhysBiz Pty Ltd +physio + +// gmail : 2014-05-01 Charleston Road Registry Inc. +gmail + +// channel : 2014-05-08 Charleston Road Registry Inc. +channel + +// fly : 2014-05-08 Charleston Road Registry Inc. +fly + +// zip : 2014-05-08 Charleston Road Registry Inc. +zip + +// esq : 2014-05-08 Charleston Road Registry Inc. +esq + +// rsvp : 2014-05-08 Charleston Road Registry Inc. +rsvp + +// wales : 2014-05-08 Nominet UK +wales + +// cymru : 2014-05-08 Nominet UK +cymru + +// green : 2014-05-08 Afilias Limited +green + +// lgbt : 2014-05-08 Afilias Limited +lgbt + +// xn--hxt814e : 2014-05-15 Zodiac Libra Limited +网店 + +// cancerresearch : 2014-05-15 Australian Cancer Research Foundation +cancerresearch + +// everbank : 2014-05-15 EverBank +everbank + +// frl : 2014-05-15 FRLregistry B.V. +frl + +// property : 2014-05-22 Uniregistry, Corp. +property + +// forsale : 2014-05-22 Sea Oaks, LLC +forsale + +// seat : 2014-05-22 SEAT, S.A. (Sociedad Unipersonal) +seat + +// deals : 2014-05-22 Sand Sunset, LLC +deals + +// nra : 2014-05-22 NRA Holdings Company, INC. +nra + +// xn--fjq720a : 2014-05-22 Will Bloom, LLC +娱乐 + +// realtor : 2014-05-29 Real Estate Domains LLC +realtor + +// bnpparibas : 2014-05-29 BNP Paribas +bnpparibas + +// melbourne : 2014-05-29 The Crown in right of the State of Victoria, represented by its Department of State Development, Business and Innovation +melbourne + +// hosting : 2014-05-29 Uniregistry, Corp. +hosting + +// yoga : 2014-05-29 Top Level Domain Holdings Limited +yoga + +// city : 2014-05-29 Snow Sky, LLC +city + +// bond : 2014-06-05 Bond University Limited +bond + +// click : 2014-06-05 Uniregistry, Corp. +click + +// cern : 2014-06-05 European Organization for Nuclear Research ("CERN") +cern // ===END ICANN DOMAINS=== // ===BEGIN PRIVATE DOMAINS=== @@ -7663,20 +7996,22 @@ reise cloudfront.net // Amazon Elastic Compute Cloud: https://aws.amazon.com/ec2/ -// Submitted by Osman Surkatty 2013-04-02 -compute.amazonaws.com -us-east-1.amazonaws.com -compute-1.amazonaws.com -z-1.compute-1.amazonaws.com -z-2.compute-1.amazonaws.com +// Submitted by Osman Surkatty 2014-05-20 ap-northeast-1.compute.amazonaws.com ap-southeast-1.compute.amazonaws.com ap-southeast-2.compute.amazonaws.com +cn-north-1.compute.amazonaws.cn +compute.amazonaws.cn +compute.amazonaws.com +compute-1.amazonaws.com eu-west-1.compute.amazonaws.com sa-east-1.compute.amazonaws.com +us-east-1.amazonaws.com us-gov-west-1.compute.amazonaws.com us-west-1.compute.amazonaws.com us-west-2.compute.amazonaws.com +z-1.compute-1.amazonaws.com +z-2.compute-1.amazonaws.com // Amazon Elastic Beanstalk : https://aws.amazon.com/elasticbeanstalk/ // Submitted by Adam Stein 2013-04-02 @@ -7719,6 +8054,7 @@ ar.com br.com cn.com com.de +com.se de.com eu.com gb.com @@ -8074,6 +8410,10 @@ global.ssl.fastly.net a.prod.fastly.net global.prod.fastly.net +// Firebase, Inc. +// Submitted by Chris Raynor 2014-01-21 +firebaseapp.com + // GitHub, Inc. // Submitted by Ben Toews 2014-02-06 github.io @@ -8153,10 +8493,18 @@ azurewebsites.net azure-mobile.net cloudapp.net +// NFSN, Inc. : https://www.NearlyFreeSpeech.NET/ +// Submitted by Jeff Wheelhouse 2014-02-02 +nfshost.com + // NYC.mn : http://www.information.nyc.mn // Submitted by Matthew Brown 2013-03-11 nyc.mn +// One Fold Media : http://www.onefoldmedia.com/ +// Submitted by Eddie Jones 2014-06-10 +nid.io + // Opera Software, A.S.A. // Submitted by Yngve Pettersen 2009-11-26 operaunite.com From 1d13ab1d18969eda46ba89ddb400d0caab258f0a Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Thu, 19 Jun 2014 12:06:54 +0200 Subject: [PATCH 24/31] removed redundant code from psl2c.c --- src/psl.c | 12 +- src/psl2c.c | 341 ++-------------------------------------------------- 2 files changed, 19 insertions(+), 334 deletions(-) diff --git a/src/psl.c b/src/psl.c index a1a7d3f..6f5947b 100644 --- a/src/psl.c +++ b/src/psl.c @@ -106,7 +106,17 @@ struct _psl_ctx_st { }; /* include the PSL data compiled by 'psl2c' */ -#include "suffixes.c" +#ifndef _LIBPSL_INCLUDED_BY_PSL2C +# include "suffixes.c" +#else + /* if this source file is included by psl2c.c, provide empty builtin data */ + static _psl_entry_t suffixes[0]; + static _psl_entry_t suffix_exceptions[0]; + static time_t _psl_file_time; + static time_t _psl_compile_time; + static const char _psl_sha1_checksum[] = ""; + static const char _psl_filename[] = ""; +#endif /* references to this PSL will result in lookups to built-in data */ static const psl_ctx_t diff --git a/src/psl2c.c b/src/psl2c.c index 8b1ec95..a270307 100644 --- a/src/psl2c.c +++ b/src/psl2c.c @@ -28,6 +28,8 @@ * */ + + #if HAVE_CONFIG_H # include #endif @@ -39,235 +41,14 @@ #include #include -/* -#ifdef WITH_LIBIDN2 -# include -#endif -*/ - -#ifdef WITH_LIBICU -# include -# include -# include -#endif - #ifdef WITH_BUILTIN #include -typedef struct { - char - label_buf[48]; - const char * - label; - unsigned short - length; - unsigned char - nlabels, /* number of labels */ - wildcard; /* this is a wildcard rule (e.g. *.sapporo.jp) */ -} _psl_entry_t; - -/* stripped down version libmget vector routines */ -typedef struct { - int - (*cmp)(const _psl_entry_t *, const _psl_entry_t *); /* comparison function */ - _psl_entry_t - **entry; /* pointer to array of pointers to elements */ - int - max, /* allocated elements */ - cur; /* number of elements in use */ -} _psl_vector_t; - -struct _psl_ctx_st { - _psl_vector_t - *suffixes, - *suffix_exceptions; -}; - -static _psl_vector_t *_vector_alloc(int max, int (*cmp)(const _psl_entry_t *, const _psl_entry_t *)) -{ - _psl_vector_t *v; - - if (!(v = calloc(1, sizeof(_psl_vector_t)))) - return NULL; - - if (!(v->entry = malloc(max * sizeof(_psl_entry_t *)))) { - free(v); - return NULL; - } - - v->max = max; - v->cmp = cmp; - return v; -} - -static void _vector_free(_psl_vector_t **v) -{ - if (v && *v) { - if ((*v)->entry) { - int it; - - for (it = 0; it < (*v)->cur; it++) - free((*v)->entry[it]); - - free((*v)->entry); - } - free(*v); - } -} - -static _psl_entry_t *_vector_get(const _psl_vector_t *v, int pos) -{ - if (pos < 0 || !v || pos >= v->cur) return NULL; - - return v->entry[pos]; -} - -static int _vector_add(_psl_vector_t *v, const _psl_entry_t *elem) -{ - if (v) { - void *elemp; - - elemp = malloc(sizeof(_psl_entry_t)); - memcpy(elemp, elem, sizeof(_psl_entry_t)); - - if (v->max == v->cur) - v->entry = realloc(v->entry, (v->max *= 2) * sizeof(_psl_entry_t *)); - - v->entry[v->cur++] = elemp; - return v->cur - 1; - } - - return -1; -} - -static int _compare(const void *p1, const void *p2, void *v) -{ - return ((_psl_vector_t *)v)->cmp(*((_psl_entry_t **)p1), *((_psl_entry_t **)p2)); -} - -static void _vector_sort(_psl_vector_t *v) -{ - if (v && v->cmp) - qsort_r(v->entry, v->cur, sizeof(_psl_vector_t *), _compare, v); -} - -/* by this kind of sorting, we can easily see if a domain matches or not (match = supercookie !) */ - -static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2) -{ - int n; - - if ((n = s2->nlabels - s1->nlabels)) - return n; /* most labels first */ - - if ((n = s1->length - s2->length)) - return n; /* shorter rules first */ - - return strcmp(s1->label, s2->label); -} - -static void _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length) -{ - const char *src; - char *dst; - - suffix->label = suffix->label_buf; - - if (length >= sizeof(suffix->label_buf) - 1) { - suffix->nlabels = 0; - fprintf(stderr, "Suffix rule too long (%d, ignored): %s\n", (int) length, rule); - return; - } - - if (*rule == '*') { - if (*++rule != '.') { - suffix->nlabels = 0; - fprintf(stderr, "Unsupported kind of rule (ignored): %s\n", rule); - return; - } - rule++; - suffix->wildcard = 1; - suffix->length = (unsigned char)length - 2; - } else { - suffix->wildcard = 0; - suffix->length = (unsigned char)length; - } - - suffix->nlabels = 1; - - for (dst = suffix->label_buf, src = rule; *src;) { - if (*src == '.') - suffix->nlabels++; - *dst++ = tolower(*src++); - } - *dst = 0; -} - -psl_ctx_t *psl_load_file(const char *fname) -{ - FILE *fp; - psl_ctx_t *psl = NULL; - - if ((fp = fopen(fname, "r"))) { - psl = psl_load_fp(fp); - fclose(fp); - } - - return psl; -} - -psl_ctx_t *psl_load_fp(FILE *fp) -{ - psl_ctx_t *psl; - _psl_entry_t suffix, *suffixp; - int nsuffixes = 0; - char buf[256], *linep, *p; - - if (!fp) - return NULL; - - if (!(psl = calloc(1, sizeof(psl_ctx_t)))) - return NULL; - - /* - * as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions. - * as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions. - */ - psl->suffixes = _vector_alloc(8*1024, _suffix_compare); - psl->suffix_exceptions = _vector_alloc(64, _suffix_compare); - - while ((linep = fgets(buf, sizeof(buf), fp))) { - while (isspace(*linep)) linep++; /* ignore leading whitespace */ - if (!*linep) continue; /* skip empty lines */ - - if (*linep == '/' && linep[1] == '/') - continue; /* skip comments */ - - /* parse suffix rule */ - for (p = linep; *linep && !isspace(*linep);) linep++; - *linep = 0; - - if (*p == '!') { - /* add to exceptions */ - _suffix_init(&suffix, p + 1, linep - p - 1); - suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix)); - } else { - _suffix_init(&suffix, p, linep - p); - suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix)); - } - - if (suffixp) - suffixp->label = suffixp->label_buf; /* set label to changed address */ - - nsuffixes++;; - } - - _vector_sort(psl->suffix_exceptions); - _vector_sort(psl->suffixes); - - return psl; -} +/* here we include the library source code to have access to internal functions and data structures */ +#define _LIBPSL_INCLUDED_BY_PSL2C +# include "psl.c" +#undef _LIBPSL_INCLUDED_BY_PSL2C static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *varname) { @@ -283,7 +64,7 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char * fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libicu/%s) */\n", version); } while (0); #else - fprintf(fpout, "/* automatically generated by psl2c (punycode generated with idn2) */\n"); + fprintf(fpout, "/* automatically generated by psl2c (without punycode support) */\n"); #endif fprintf(fpout, "static _psl_entry_t %s[] = {\n", varname); @@ -298,109 +79,6 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char * fprintf(fpout, "};\n"); } -void psl_free(psl_ctx_t *psl) -{ - if (psl) { - _vector_free(&psl->suffixes); - _vector_free(&psl->suffix_exceptions); - free(psl); - } -} - -static int _str_needs_encoding(const char *s) -{ - while (*s > 0) s++; - - return !!*s; -} - -static void _add_punycode_if_needed(_psl_vector_t *v) -{ - int it, n; - - /* do not use 'it < v->cur' since v->cur is changed by _vector_add() ! */ - for (it = 0, n = v->cur; it < n; it++) { - _psl_entry_t *e = _vector_get(v, it); - - if (_str_needs_encoding(e->label_buf)) { - _psl_entry_t suffix, *suffixp; - char lookupname[64] = ""; - - /* the following lines will have GPL3+ license issues */ -/* char *asc = NULL; - int rc; - - if ((rc = idn2_lookup_u8((uint8_t *)e->label_buf, (uint8_t **)&asc, 0)) == IDN2_OK) { - // fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, asc); - _suffix_init(&suffix, asc, strlen(asc)); - suffix.wildcard = e->wildcard; - suffixp = _vector_get(v, _vector_add(v, &suffix)); - suffixp->label = suffixp->e_label_buf; // set label to changed address - } else - fprintf(stderr, "toASCII(%s) failed (%d): %s\n", e->label_buf, rc, idn2_strerror(rc)); -*/ - -#ifdef WITH_LIBICU - UIDNA *idna; - UErrorCode status = 0; - - /* IDNA2003 punycode conversion */ - /* destLen = uidna_toASCII(e->label_buf, (int32_t) strlen(e->label_buf), lookupname, (int32_t) sizeof(lookupname), - UIDNA_DEFAULT, NULL, &status); - */ - - /* IDNA2008 UTS#46 punycode conversion */ - if ((idna = uidna_openUTS46(UIDNA_USE_STD3_RULES, &status))) { - UChar utf16_dst[64], utf16_src[64]; - int32_t utf16_src_length; - UIDNAInfo info = UIDNA_INFO_INITIALIZER; - - u_strFromUTF8(utf16_src, sizeof(utf16_src)/sizeof(utf16_src[0]), &utf16_src_length, e->label_buf, (int32_t) strlen(e->label_buf), &status); - if (U_SUCCESS(status)) { - int32_t dst_length = uidna_nameToASCII(idna, utf16_src, utf16_src_length, utf16_dst, sizeof(utf16_dst)/sizeof(utf16_dst[0]), &info, &status); - if (U_SUCCESS(status)) { - u_strToUTF8(lookupname, (int32_t) sizeof(lookupname), NULL, utf16_dst, dst_length, &status); - if (U_SUCCESS(status)) { - if (strcmp(e->label_buf, lookupname)) { - /* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */ - _suffix_init(&suffix, lookupname, strlen(lookupname)); - suffix.wildcard = e->wildcard; - suffixp = _vector_get(v, _vector_add(v, &suffix)); - suffixp->label = suffixp->label_buf; /* set label to changed address */ - } /* else ignore */ - } else - fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); - } else - fprintf(stderr, "Failed to convert to ASCII (status %d)\n", status); - } else - fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status); - - uidna_close(idna); - } else - fprintf(stderr, "Failed to get UTS46 IDNA handle\n"); - -#else - /* this is much slower than the libidn2 API but should have no license issues */ - FILE *pp; - char cmd[16 + sizeof(e->label_buf)]; - snprintf(cmd, sizeof(cmd), "idn2 '%s'", e->label_buf); - if ((pp = popen(cmd, "r"))) { - if (fscanf(pp, "%63s", lookupname) >= 1 && strcmp(e->label_buf, lookupname)) { - /* fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, lookupname); */ - _suffix_init(&suffix, lookupname, strlen(lookupname)); - suffix.wildcard = e->wildcard; - suffixp = _vector_get(v, _vector_add(v, &suffix)); - suffixp->label = suffixp->label_buf; /* set label to changed address */ - } - pclose(pp); - } else - fprintf(stderr, "Failed to call popen(%s, \"r\")\n", cmd); -#endif - } - } - - _vector_sort(v); -} #endif /* WITH_BUILTIN */ int main(int argc, const char **argv) @@ -413,7 +91,7 @@ int main(int argc, const char **argv) if (argc != 3) { fprintf(stderr, "Usage: psl2c \n"); - fprintf(stderr, " is the 'effective_tld_names.dat' (aka Public Suffix List)\n"); + fprintf(stderr, " is the 'effective_tld_names.dat' (aka Public Suffix List), lowercase UTF-8 encoded\n"); fprintf(stderr, " is the the C filename to be generated from \n"); return 1; } @@ -428,9 +106,6 @@ int main(int argc, const char **argv) size_t cmdsize = 16 + strlen(argv[1]); char *cmd = alloca(cmdsize), checksum[64] = ""; - _add_punycode_if_needed(psl->suffixes); - _add_punycode_if_needed(psl->suffix_exceptions); - _print_psl_entries(fpout, psl->suffixes, "suffixes"); _print_psl_entries(fpout, psl->suffix_exceptions, "suffix_exceptions"); From 9f5d6b1e9dd9c275bd0ff4adb49ad2821a141c15 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Thu, 19 Jun 2014 13:15:31 +0200 Subject: [PATCH 25/31] added idn2 punycode generation as fallback for missing libicu --- configure.ac | 7 ++++++- src/psl2c.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 0b0527d..ca49b20 100644 --- a/configure.ac +++ b/configure.ac @@ -81,7 +81,7 @@ AC_ARG_WITH(libicu, [[#include ]], [[u_strToUTF8(NULL, 0, NULL, NULL, 0, NULL);]])], [HAVE_LIBICU=yes; AC_MSG_RESULT([yes]) AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])], - [LIBS=$OLDLIBS; AC_MSG_WARN([no working ICU unicode library was found])]) + [LIBS=$OLDLIBS; AC_MSG_ERROR([no working ICU unicode library was found])]) # AC_SEARCH_LIBS(uidna_close, icuuc, # [HAVE_LIBICU=yes; AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])], @@ -99,6 +99,11 @@ AC_ARG_ENABLE(builtin, ], [ enable_builtin=yes AC_DEFINE([WITH_BUILTIN], [1], [compile PSL data into library]) + AS_IF([test $HAVE_LIBICU != yes], + [ + # Check for idn2 fallback to generate punycode + AC_CHECK_PROG(HAVE_IDN2, idn2, yes, AC_MSG_ERROR(Cannot find required tool 'idn2' as fallback.)) + ]) ]) AM_CONDITIONAL([WITH_BUILTIN], [test $enable_builtin = yes]) diff --git a/src/psl2c.c b/src/psl2c.c index a270307..8ec4a3c 100644 --- a/src/psl2c.c +++ b/src/psl2c.c @@ -65,7 +65,7 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char * } while (0); #else fprintf(fpout, "/* automatically generated by psl2c (without punycode support) */\n"); -#endif +#endif /* WITH_LIBICU */ fprintf(fpout, "static _psl_entry_t %s[] = {\n", varname); @@ -79,6 +79,48 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char * fprintf(fpout, "};\n"); } +#ifndef WITH_LIBICU +static int _str_needs_encoding(const char *s) +{ + while (*s > 0) s++; + + return !!*s; +} + +static void _add_punycode_if_needed(_psl_vector_t *v) +{ + int it, n; + + /* do not use 'it < v->cur' since v->cur is changed by _vector_add() ! */ + for (it = 0, n = v->cur; it < n; it++) { + _psl_entry_t *e = _vector_get(v, it); + + if (_str_needs_encoding(e->label_buf)) { + _psl_entry_t suffix, *suffixp; + char lookupname[64] = ""; + + /* this is much slower than the libidn2 API but should have no license issues */ + FILE *pp; + char cmd[16 + sizeof(e->label_buf)]; + snprintf(cmd, sizeof(cmd), "idn2 '%s'", e->label_buf); + if ((pp = popen(cmd, "r"))) { + if (fscanf(pp, "%63s", lookupname) >= 1 && strcmp(e->label_buf, lookupname)) { + /* fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, lookupname); */ + _suffix_init(&suffix, lookupname, strlen(lookupname)); + suffix.wildcard = e->wildcard; + suffixp = _vector_get(v, _vector_add(v, &suffix)); + suffixp->label = suffixp->label_buf; /* set label to changed address */ + } + pclose(pp); + } else + fprintf(stderr, "Failed to call popen(%s, \"r\")\n", cmd); + } + } + + _vector_sort(v); +} +#endif /* ! WITH_LIBICU */ + #endif /* WITH_BUILTIN */ int main(int argc, const char **argv) @@ -106,6 +148,13 @@ int main(int argc, const char **argv) size_t cmdsize = 16 + strlen(argv[1]); char *cmd = alloca(cmdsize), checksum[64] = ""; +#ifndef WITH_LIBICU + /* If libicu is not configured, we still need to have punycode in our built-in data. */ + /* Else the test suite fails. */ + _add_punycode_if_needed(psl->suffixes); + _add_punycode_if_needed(psl->suffix_exceptions); +#endif + _print_psl_entries(fpout, psl->suffixes, "suffixes"); _print_psl_entries(fpout, psl->suffix_exceptions, "suffix_exceptions"); From 1a22cb91fb047eeb98fe8909407830a9f0daef7a Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Thu, 19 Jun 2014 13:17:28 +0200 Subject: [PATCH 26/31] added more build checks --- .travis.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 81538a2..04ac2ab 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,13 @@ compiler: - gcc - clang # Change this to your needs -script: ./autogen.sh && ./configure --enable-gtk-doc && make -j4 && make check -j4 && make distcheck +script: + - ./autogen.sh + - ./configure --enable-gtk-doc && make -j4 && make check -j4 + - ./configure --without-libicu && make clean && make -j4 && make check -j4 + - ./configure --disable-builtin && make clean && make -j4 && make check -j4 + - ./configure --disable-builtin --without-libicu && make clean && make -j4 && make check -j4 + - make distcheck before_install: - apt-cache search libicu | grep icu - sudo apt-get -qq update From d34938703aff9d018e57d6155f2e06985cd4aa34 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Thu, 19 Jun 2014 15:26:00 +0200 Subject: [PATCH 27/31] adjusted .travis.yml --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 04ac2ab..6976291 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,10 +5,11 @@ compiler: # Change this to your needs script: - ./autogen.sh - - ./configure --enable-gtk-doc && make -j4 && make check -j4 + - ./configure && make -j4 && make check -j4 - ./configure --without-libicu && make clean && make -j4 && make check -j4 - ./configure --disable-builtin && make clean && make -j4 && make check -j4 - ./configure --disable-builtin --without-libicu && make clean && make -j4 && make check -j4 + - ./configure --enable-gtk-doc && make -j4 && make check -j4 - make distcheck before_install: - apt-cache search libicu | grep icu From 1c209318964c86ce53b27215cf01358dfffeaa27 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Fri, 20 Jun 2014 12:36:51 +0200 Subject: [PATCH 28/31] introduced defines for error codes --- include/libpsl.h | 8 ++++++++ src/psl.c | 31 ++++++++++++++++--------------- tests/test-registrable-domain.c | 2 +- tools/psl.c | 6 +++--- 4 files changed, 28 insertions(+), 19 deletions(-) diff --git a/include/libpsl.h b/include/libpsl.h index 3b5dd74..716c711 100644 --- a/include/libpsl.h +++ b/include/libpsl.h @@ -38,6 +38,14 @@ extern "C" { #endif +typedef enum { + PSL_SUCCESS = 0, + PSL_ERR_INVALID_ARG = -1, + PSL_ERR_CONVERTER = -2, /* failed to open libicu utf-16 converter */ + PSL_ERR_TO_UTF16 = -3, /* failed to convert to utf-16 */ + PSL_ERR_TO_LOWER = -4, /* failed to convert utf-16 to lowercase */ + PSL_ERR_TO_UTF8 = -5 /* failed to convert utf-16 to utf-8 */ +} psl_error_t; typedef struct _psl_ctx_st psl_ctx_t; diff --git a/src/psl.c b/src/psl.c index 6f5947b..4eea3bc 100644 --- a/src/psl.c +++ b/src/psl.c @@ -852,24 +852,25 @@ int psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, * @lower is %NULL on error. * The return value 'lower' must be freed after usage. * - * Returns: 0 on success, negative value on error. - * -1 @str is a %NULL value - * -2 failed to open converter with name @encoding - * -3 failed to convert @str to unicode - * -4 failed to convert unicode to lowercase - * -5 failed to convert unicode to UTF-8 + * Returns: psl_error_t value. + * PSL_SUCCESS: Success + * PSL_ERR_INVALID_ARG: @str is a %NULL value. + * PSL_ERR_CONVERTER: Failed to open the unicode converter with name @encoding + * PSL_ERR_TO_UTF16: Failed to convert @str to unicode + * PSL_ERR_TO_LOWER: Failed to convert unicode to lowercase + * PSL_ERR_TO_UTF8: Failed to convert unicode to UTF-8 * * Since: 0.4 */ -int psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower) +psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower) { - int ret = -1; + int ret = PSL_ERR_INVALID_ARG; if (lower) *lower = NULL; if (!str) - return -1; + return PSL_ERR_INVALID_ARG; /* shortcut to avoid costly conversion */ if (_str_is_ascii(str)) { @@ -883,7 +884,7 @@ int psl_str_to_utf8lower(const char *str, const char *encoding, const char *loca if (isupper(*p)) *p = tolower(*p); } - return 0; + return PSL_SUCCESS; } #ifdef WITH_LIBICU @@ -912,21 +913,21 @@ int psl_str_to_utf8lower(const char *str, const char *encoding, const char *loca if (U_SUCCESS(status)) { if (lower) *lower = strdup(utf8_lower); - ret = 0; + ret = PSL_SUCCESS; } else { - ret = -5; + ret = PSL_ERR_TO_UTF8; /* fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */ } } else { - ret = -4; + ret = PSL_ERR_TO_LOWER; /* fprintf(stderr, "Failed to convert UTF-16 to lowercase (status %d)\n", status); */ } } else { - ret = -3; + ret = PSL_ERR_TO_UTF16; /* fprintf(stderr, "Failed to convert string to UTF-16 (status %d)\n", status); */ } } else { - ret = -2; + ret = PSL_ERR_CONVERTER; /* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */ } } while (0); diff --git a/tests/test-registrable-domain.c b/tests/test-registrable-domain.c index d0dda62..8bc06b1 100644 --- a/tests/test-registrable-domain.c +++ b/tests/test-registrable-domain.c @@ -55,7 +55,7 @@ static void test(const psl_ctx_t *psl, const char *domain, const char *expected_ char *lower; /* our test data is fixed to UTF-8 (english), so provide it here */ - if (psl_str_to_utf8lower(domain, "utf-8", "en", &lower) == 0) + if (psl_str_to_utf8lower(domain, "utf-8", "en", &lower) == PSL_SUCCESS) domain = lower; result = psl_registrable_domain(psl, domain); diff --git a/tools/psl.c b/tools/psl.c index 80f0933..976ada6 100644 --- a/tools/psl.c +++ b/tools/psl.c @@ -149,7 +149,7 @@ int main(int argc, const char *const *argv) if (arg >= argv + argc) { char buf[256], *domain, *lower; size_t len; - int rc; + psl_error_t rc; /* read URLs from STDIN */ while (fgets(buf, sizeof(buf), stdin)) { @@ -158,7 +158,7 @@ int main(int argc, const char *const *argv) for (len = strlen(domain); len && isspace(domain[len - 1]); len--); /* skip trailing spaces */ domain[len] = 0; - if ((rc = psl_str_to_utf8lower(domain, NULL, NULL, &lower)) != 0) + if ((rc = psl_str_to_utf8lower(domain, NULL, NULL, &lower)) != PSL_SUCCESS) fprintf(stderr, "%s: Failed to convert to lowercase UTF-8 (%d)\n", domain, rc); else if (mode == 1) printf("%s: %d (%s)\n", domain, psl_is_public_suffix(psl, lower), lower); @@ -169,7 +169,7 @@ int main(int argc, const char *const *argv) else if (mode == 4) { char *cookie_domain_lower; - if ((rc = psl_str_to_utf8lower(domain, NULL, NULL, &cookie_domain_lower)) != 0) + if ((rc = psl_str_to_utf8lower(domain, NULL, NULL, &cookie_domain_lower)) != PSL_SUCCESS) fprintf(stderr, "%s: Failed to convert cookie domain '%s' to lowercase UTF-8 (%d)\n", domain, cookie_domain, rc); else printf("%s: %d\n", domain, psl_is_cookie_domain_acceptable(psl, lower, cookie_domain)); From f7f14080888f586afd3840a80eca11fdd256df12 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Fri, 20 Jun 2014 17:04:22 +0200 Subject: [PATCH 29/31] removed possible C89 compilation issue --- src/psl.c | 4 ++-- src/psl2c.c | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/psl.c b/src/psl.c index 4eea3bc..2875c35 100644 --- a/src/psl.c +++ b/src/psl.c @@ -110,8 +110,8 @@ struct _psl_ctx_st { # include "suffixes.c" #else /* if this source file is included by psl2c.c, provide empty builtin data */ - static _psl_entry_t suffixes[0]; - static _psl_entry_t suffix_exceptions[0]; + static _psl_entry_t suffixes[1]; + static _psl_entry_t suffix_exceptions[1]; static time_t _psl_file_time; static time_t _psl_compile_time; static const char _psl_sha1_checksum[] = ""; diff --git a/src/psl2c.c b/src/psl2c.c index 8ec4a3c..daeec83 100644 --- a/src/psl2c.c +++ b/src/psl2c.c @@ -28,8 +28,6 @@ * */ - - #if HAVE_CONFIG_H # include #endif @@ -182,8 +180,8 @@ int main(int argc, const char **argv) psl_free(psl); #else if ((fpout = fopen(argv[2], "w"))) { - fprintf(fpout, "static _psl_entry_t suffixes[0];\n"); - fprintf(fpout, "static _psl_entry_t suffix_exceptions[0];\n"); + fprintf(fpout, "static _psl_entry_t suffixes[1];\n"); + fprintf(fpout, "static _psl_entry_t suffix_exceptions[1];\n"); fprintf(fpout, "static time_t _psl_file_time;\n"); fprintf(fpout, "static time_t _psl_compile_time;\n"); fprintf(fpout, "static const char _psl_sha1_checksum[] = \"\";\n"); From 29a74ebe3afe962cacbc4a599f6d91180ef15ff8 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Fri, 20 Jun 2014 17:12:17 +0200 Subject: [PATCH 30/31] added docs for psl_error_t --- docs/libpsl/libpsl-sections.txt | 1 + include/libpsl.h | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/docs/libpsl/libpsl-sections.txt b/docs/libpsl/libpsl-sections.txt index 5e758e7..4c73e47 100644 --- a/docs/libpsl/libpsl-sections.txt +++ b/docs/libpsl/libpsl-sections.txt @@ -1,6 +1,7 @@
libpsl Public Suffix List functions +psl_error_t psl_ctx_t psl_load_file psl_load_fp diff --git a/include/libpsl.h b/include/libpsl.h index 716c711..b7fe952 100644 --- a/include/libpsl.h +++ b/include/libpsl.h @@ -38,6 +38,19 @@ extern "C" { #endif +/** + * psl_error_t: + * @PSL_SUCCESS: Successful return. + * @PSL_ERR_INVALID_ARG: Invalid argument. + * @PSL_ERR_CONVERTER: Failed to open libicu utf-16 converter + * @PSL_ERR_TO_UTF16: Failed to convert to utf-16. + * @PSL_ERR_TO_LOWER: Failed to convert utf-16 to lowercase. + * @PSL_ERR_TO_UTF8: Failed to convert utf-16 to utf-8. + * + * Return codes for PSL functions. + * Negative return codes mean failure. + * Positive values are reserved for non-error return codes. + */ typedef enum { PSL_SUCCESS = 0, PSL_ERR_INVALID_ARG = -1, From 7354a50c4885ca3240ed5cd04ed36f14622c5385 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Mon, 23 Jun 2014 12:12:54 +0200 Subject: [PATCH 31/31] Release V0.4.0 --- AUTHORS | 4 +++- NEWS | 9 +++++++++ configure.ac | 6 +++--- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/AUTHORS b/AUTHORS index b1bf772..562d9b7 100644 --- a/AUTHORS +++ b/AUTHORS @@ -8,4 +8,6 @@ Please drop me a note if you feel you should have been mentioned here. Tim Ruehsen (Implementation of libpsl) -Daniel Kahn Gillmor (Discussion, Ideas, Organization) +Daniel Kahn Gillmor (Discussion, Ideas, Organization, Code) +Daniel Stenberg (Discussion, Ideas) +Darshit Shah (Patching Wget to work with libpsl) diff --git a/NEWS b/NEWS index 8323009..df6d258 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,14 @@ Copyright (C) 2014 Tim Ruehsen +23.06.2014 Release V0.4.0 + * depend on libicu for punycode, utf-8 and lowercase conversions + * added function psl_str_to_utf8lower() + * fixed locale issues + * introducing psl_error_t for error codes + defines + * removed redundant code from psl2c.c + * updated docs + * psl utility reads from stdin if no argument specified + 10.06.2014 Release V0.3.1 * link psl utility dynamically * fix output of psl_filename() diff --git a/configure.ac b/configure.ac index ca49b20..f89b1e1 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ -AC_INIT([libpsl], [0.3.1], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl]) +AC_INIT([libpsl], [0.4.0], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl]) AC_PREREQ([2.59]) AM_INIT_AUTOMAKE([1.10 -Wall no-define]) @@ -62,8 +62,8 @@ AS_IF([ test "$enable_man" != no ], [ # 3. If the library source code has changed at all since the last update, then increment revision (‘c:r:a’ becomes ‘c:r+1:a’). # 4. If any interfaces have been added, removed, or changed since the last update, increment current, and set revision to 0. # 5. If any interfaces have been added since the last public release, then increment age. -# 6. If any interfaces have been removed or changed since the last public release, then set age to 0. -AC_SUBST([LIBPSL_SO_VERSION], [1:1:1]) +# 6. If any existing interfaces have been removed or changed since the last public release, then set age to 0. +AC_SUBST([LIBPSL_SO_VERSION], [2:0:2]) AC_SUBST([LIBPSL_VERSION], $VERSION) # Check for libicu