From 58daea97ce271aeaa47d913d35d8904a6277a378 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Wed, 4 Jun 2014 13:20:34 +0200 Subject: [PATCH] added IDNA2008 UTS#46 via libicu --- .travis.yml | 3 ++- README.md | 1 + configure.ac | 13 ++++++++++--- src/psl2c.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 60 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 54965ca..27bcd14 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,5 +5,6 @@ compiler: # Change this to your needs script: ./autogen.sh && ./configure --enable-gtk-doc && make -j4 && make check -j4 && make distcheck before_install: + - apt-cache search libicu | grep icu - sudo apt-get -qq update - - sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev + - sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev libicu-dev diff --git a/README.md b/README.md index d71497e..6bb6711 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ Libpsl... - finds the shortest private part of a given domain - works with international domains (UTF-8 and IDNA2008 Punycode) - is thread-safe +- handles IDNA2008 UTS#46 (library has to be compiled with libicu) Find more information about the Publix Suffix List [here](http://publicsuffix.org/). diff --git a/configure.ac b/configure.ac index 705c5d5..ffd406a 100644 --- a/configure.ac +++ b/configure.ac @@ -66,9 +66,6 @@ AS_IF([ test "$enable_man" != no ], [ AC_SUBST([LIBPSL_SO_VERSION], [1:0:1]) AC_SUBST([LIBPSL_VERSION], $VERSION) -# Check for idn2 -AC_CHECK_PROG(HAVE_IDN2, idn2, yes, AC_MSG_ERROR(Cannot find required tool 'idn2'.)) - # Check for enable/disable builtin PSL data AC_ARG_ENABLE(builtin, AS_HELP_STRING([--disable-builtin], [do not compile PSL data into library]), @@ -80,6 +77,16 @@ AC_ARG_ENABLE(builtin, ]) AM_CONDITIONAL([WITH_BUILTIN], [test $enable_builtin = yes]) +AC_ARG_WITH(icu, AS_HELP_STRING([--without-icu], [disable ICU punycode conversion]), with_icu=$withval, with_icu=yes) +if test $with_icu != "no" +then + AC_CHECK_LIB(icuuc, uidna_openUTS46, [with_icu=yes; AC_SUBST(ICU_LIBS, "-licu") AC_DEFINE([WITH_LIBICU], [1], [Use libicu])], [with_icu=no; AC_MSG_WARN(*** LIBICU was not found. Falling back to idn2.)]) +fi +AM_CONDITIONAL([WITH_LIBICU], [test $with_icu = "yes"]) + +# Check for idn2 +AC_CHECK_PROG(HAVE_IDN2, idn2, yes, AC_MSG_ERROR(Cannot find required tool 'idn2'.)) + # Check for valgrind ac_enable_valgrind=no AC_ARG_ENABLE(valgrind-tests, diff --git a/src/psl2c.c b/src/psl2c.c index c3364fa..8bcad50 100644 --- a/src/psl2c.c +++ b/src/psl2c.c @@ -45,6 +45,11 @@ #endif */ +#ifdef WITH_LIBICU +# include +# include +#endif + #ifdef WITH_BUILTIN #include @@ -267,7 +272,19 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char * { int it; - fprintf(fpout, "/* automatically generated by psl2c */\n"); +#ifdef WITH_LIBICU + do { + UVersionInfo version_info; + char version[U_MAX_VERSION_STRING_LENGTH]; + + u_getVersion(version_info); + u_versionToString(version_info, version); + fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libicu/%s) */\n", version); + } while (0); +#else + fprintf(fpout, "/* automatically generated by psl2c (punycode generated with idn2) */\n"); +#endif + fprintf(fpout, "static _psl_entry_t %s[] = {\n", varname); for (it = 0; it < v->cur; it++) { @@ -306,6 +323,7 @@ static void _add_punycode_if_needed(_psl_vector_t *v) if (_str_needs_encoding(e->label_buf)) { _psl_entry_t suffix, *suffixp; + char lookupname[64] = ""; /* the following lines will have GPL3+ license issues */ /* char *asc = NULL; @@ -321,9 +339,35 @@ static void _add_punycode_if_needed(_psl_vector_t *v) fprintf(stderr, "toASCII(%s) failed (%d): %s\n", e->label_buf, rc, idn2_strerror(rc)); */ +#ifdef WITH_LIBICU + UIDNA *idna; + UErrorCode status = 0; + + /* IDNA2003 punycode conversion */ + /* destLen = uidna_toASCII(e->label_buf, (int32_t) strlen(e->label_buf), lookupname, (int32_t) sizeof(lookupname), + UIDNA_DEFAULT, NULL, &status); + */ + + /* IDNA2008 UTS#46 punycode conversion */ + if ((idna = uidna_openUTS46(UIDNA_USE_STD3_RULES, &status))) { + uidna_nameToASCII(idna, (UChar *) e->label_buf, (int32_t) strlen(e->label_buf), + (UChar *) lookupname, (int32_t) sizeof(lookupname), NULL, &status); + uidna_close(idna); + } + + if (U_FAILURE(status)) { + fprintf(stderr, "Failed to convert '%s' to ASCII\n", e->label_buf); + } else if (strcmp(e->label_buf, lookupname)) { + /* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */ + _suffix_init(&suffix, lookupname, strlen(lookupname)); + suffix.wildcard = e->wildcard; + suffixp = _vector_get(v, _vector_add(v, &suffix)); + suffixp->label = suffixp->label_buf; /* set label to changed address */ + } /* else ignore */ +#else /* this is much slower than the libidn2 API but should have no license issues */ FILE *pp; - char cmd[16 + sizeof(e->label_buf)], lookupname[64] = ""; + char cmd[16 + sizeof(e->label_buf)]; snprintf(cmd, sizeof(cmd), "idn2 '%s'", e->label_buf); if ((pp = popen(cmd, "r"))) { if (fscanf(pp, "%63s", lookupname) >= 1 && strcmp(e->label_buf, lookupname)) { @@ -336,6 +380,7 @@ static void _add_punycode_if_needed(_psl_vector_t *v) pclose(pp); } else fprintf(stderr, "Failed to call popen(%s, \"r\")\n", cmd); +#endif } }