diff --git a/.travis.yml b/.travis.yml index 54965ca..27bcd14 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,5 +5,6 @@ compiler: # Change this to your needs script: ./autogen.sh && ./configure --enable-gtk-doc && make -j4 && make check -j4 && make distcheck before_install: + - apt-cache search libicu | grep icu - sudo apt-get -qq update - - sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev + - sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev libicu-dev diff --git a/NEWS b/NEWS index 8faee0e..0e6c51e 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,10 @@ Copyright (C) 2014 Tim Ruehsen +05.06.2014 Release V0.3.0 + * added support for libicu in psl2c (IDNA2008 UTS#46) + this needs pkg-config and libicu-dev installed + * added --version to psl utility + 31.05.2014 Release V0.2.5 * added psl_get_version() * removed version from library name diff --git a/README.md b/README.md index d71497e..7bc8fbc 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ Libpsl... - finds the shortest private part of a given domain - works with international domains (UTF-8 and IDNA2008 Punycode) - is thread-safe +- handles IDNA2008 UTS#46 (libicu is used by psl2c if installed) Find more information about the Publix Suffix List [here](http://publicsuffix.org/). diff --git a/configure.ac b/configure.ac index 705c5d5..970a0b3 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ -AC_INIT([libpsl], [0.2.5], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl]) +AC_INIT([libpsl], [0.3.0], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl]) AC_PREREQ([2.59]) AM_INIT_AUTOMAKE([1.10 -Wall no-define]) @@ -66,9 +66,6 @@ AS_IF([ test "$enable_man" != no ], [ AC_SUBST([LIBPSL_SO_VERSION], [1:0:1]) AC_SUBST([LIBPSL_VERSION], $VERSION) -# Check for idn2 -AC_CHECK_PROG(HAVE_IDN2, idn2, yes, AC_MSG_ERROR(Cannot find required tool 'idn2'.)) - # Check for enable/disable builtin PSL data AC_ARG_ENABLE(builtin, AS_HELP_STRING([--disable-builtin], [do not compile PSL data into library]), @@ -77,6 +74,10 @@ AC_ARG_ENABLE(builtin, ], [ enable_builtin=yes AC_DEFINE([WITH_BUILTIN], [1], [compile PSL data into library]) + + PKG_CHECK_MODULES(LIBICU, [icu-uc], + [AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])], + [AC_CHECK_PROG(HAVE_IDN2, idn2, yes, AC_MSG_ERROR(Cannot find required tool 'idn2'.))]) ]) AM_CONDITIONAL([WITH_BUILTIN], [test $enable_builtin = yes]) diff --git a/src/Makefile.am b/src/Makefile.am index 93010e4..0fe1ec7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -13,8 +13,8 @@ libpsl_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSION) noinst_PROGRAMS = psl2c psl2c_SOURCES = psl2c.c -psl2c_CPPFLAGS = -I$(top_srcdir)/include -D _GNU_SOURCE -#psl2c_LDADD = -lidn2 +psl2c_CPPFLAGS = -I$(top_srcdir)/include -D _GNU_SOURCE $(LIBICU_CFLAGS) +psl2c_LDADD = $(LIBICU_LIBS) # Build rule for suffix.c # PSL_FILE can be set by ./configure --with-psl-file=[PATH] diff --git a/src/psl2c.c b/src/psl2c.c index c3364fa..a6b5b6c 100644 --- a/src/psl2c.c +++ b/src/psl2c.c @@ -45,6 +45,12 @@ #endif */ +#ifdef WITH_LIBICU +# include +# include +# include +#endif + #ifdef WITH_BUILTIN #include @@ -267,7 +273,19 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char * { int it; - fprintf(fpout, "/* automatically generated by psl2c */\n"); +#ifdef WITH_LIBICU + do { + UVersionInfo version_info; + char version[U_MAX_VERSION_STRING_LENGTH]; + + u_getVersion(version_info); + u_versionToString(version_info, version); + fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libicu/%s) */\n", version); + } while (0); +#else + fprintf(fpout, "/* automatically generated by psl2c (punycode generated with idn2) */\n"); +#endif + fprintf(fpout, "static _psl_entry_t %s[] = {\n", varname); for (it = 0; it < v->cur; it++) { @@ -306,6 +324,7 @@ static void _add_punycode_if_needed(_psl_vector_t *v) if (_str_needs_encoding(e->label_buf)) { _psl_entry_t suffix, *suffixp; + char lookupname[64] = ""; /* the following lines will have GPL3+ license issues */ /* char *asc = NULL; @@ -321,9 +340,49 @@ static void _add_punycode_if_needed(_psl_vector_t *v) fprintf(stderr, "toASCII(%s) failed (%d): %s\n", e->label_buf, rc, idn2_strerror(rc)); */ +#ifdef WITH_LIBICU + UIDNA *idna; + UErrorCode status = 0; + + /* IDNA2003 punycode conversion */ + /* destLen = uidna_toASCII(e->label_buf, (int32_t) strlen(e->label_buf), lookupname, (int32_t) sizeof(lookupname), + UIDNA_DEFAULT, NULL, &status); + */ + + /* IDNA2008 UTS#46 punycode conversion */ + if ((idna = uidna_openUTS46(UIDNA_USE_STD3_RULES, &status))) { + UChar utf16_dst[64], utf16_src[64]; + int32_t utf16_src_length; + UIDNAInfo info = UIDNA_INFO_INITIALIZER; + + u_strFromUTF8(utf16_src, sizeof(utf16_src)/sizeof(utf16_src[0]), &utf16_src_length, e->label_buf, (int32_t) strlen(e->label_buf), &status); + if (U_SUCCESS(status)) { + int32_t dst_length = uidna_nameToASCII(idna, utf16_src, utf16_src_length, utf16_dst, sizeof(utf16_dst)/sizeof(utf16_dst[0]), &info, &status); + if (U_SUCCESS(status)) { + u_strToUTF8(lookupname, (int32_t) sizeof(lookupname), NULL, utf16_dst, dst_length, &status); + if (U_SUCCESS(status)) { + if (strcmp(e->label_buf, lookupname)) { + /* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */ + _suffix_init(&suffix, lookupname, strlen(lookupname)); + suffix.wildcard = e->wildcard; + suffixp = _vector_get(v, _vector_add(v, &suffix)); + suffixp->label = suffixp->label_buf; /* set label to changed address */ + } /* else ignore */ + } else + fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); + } else + fprintf(stderr, "Failed to convert to ASCII (status %d)\n", status); + } else + fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status); + + uidna_close(idna); + } else + fprintf(stderr, "Failed to get UTS46 IDNA handle\n"); + +#else /* this is much slower than the libidn2 API but should have no license issues */ FILE *pp; - char cmd[16 + sizeof(e->label_buf)], lookupname[64] = ""; + char cmd[16 + sizeof(e->label_buf)]; snprintf(cmd, sizeof(cmd), "idn2 '%s'", e->label_buf); if ((pp = popen(cmd, "r"))) { if (fscanf(pp, "%63s", lookupname) >= 1 && strcmp(e->label_buf, lookupname)) { @@ -336,6 +395,7 @@ static void _add_punycode_if_needed(_psl_vector_t *v) pclose(pp); } else fprintf(stderr, "Failed to call popen(%s, \"r\")\n", cmd); +#endif } } diff --git a/tools/psl.c b/tools/psl.c index 841340c..9d684e6 100644 --- a/tools/psl.c +++ b/tools/psl.c @@ -42,6 +42,7 @@ static void usage(int err) fprintf(stderr, "Usage: psl [options] \n"); fprintf(stderr, "\n"); fprintf(stderr, "Options:\n"); + fprintf(stderr, " --version show library version information\n"); fprintf(stderr, " --use-builtin-data use the builtin PSL data. [default]\n"); fprintf(stderr, " --load-psl-file load PSL data from file.\n"); fprintf(stderr, " --is-public-suffix check if domains are public suffixes or not. [default]\n"); @@ -49,6 +50,7 @@ static void usage(int err) fprintf(stderr, " check if cookie-domain is acceptable for domains.\n"); fprintf(stderr, " --print-unreg-domain print the longest publix suffix part\n"); fprintf(stderr, " --print-reg-domain print the shortest private suffix part\n"); + fprintf(stderr, " --print-info print info about library builtin data\n"); fprintf(stderr, "\n"); exit(err); @@ -107,6 +109,14 @@ int main(int argc, const char *const *argv) else if (!strcmp(*arg, "--help")) { usage(0); } + else if (!strcmp(*arg, "--version")) { + printf("psl %s\n", PACKAGE_VERSION); + printf("libpsl %s\n", psl_get_version()); + printf("\n"); + printf("Copyright (C) 2014 Tim Ruehsen\n"); + printf("License: MIT\n"); + exit(0); + } else if (!strcmp(*arg, "--")) { arg++; break;