From 8d9e899039a376b22c0bad58f13d49cfcb21c4f7 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Sun, 1 Jun 2014 12:01:47 +0200 Subject: [PATCH 1/5] added --version to psl utility --- tools/psl.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/psl.c b/tools/psl.c index 841340c..9d684e6 100644 --- a/tools/psl.c +++ b/tools/psl.c @@ -42,6 +42,7 @@ static void usage(int err) fprintf(stderr, "Usage: psl [options] \n"); fprintf(stderr, "\n"); fprintf(stderr, "Options:\n"); + fprintf(stderr, " --version show library version information\n"); fprintf(stderr, " --use-builtin-data use the builtin PSL data. [default]\n"); fprintf(stderr, " --load-psl-file load PSL data from file.\n"); fprintf(stderr, " --is-public-suffix check if domains are public suffixes or not. [default]\n"); @@ -49,6 +50,7 @@ static void usage(int err) fprintf(stderr, " check if cookie-domain is acceptable for domains.\n"); fprintf(stderr, " --print-unreg-domain print the longest publix suffix part\n"); fprintf(stderr, " --print-reg-domain print the shortest private suffix part\n"); + fprintf(stderr, " --print-info print info about library builtin data\n"); fprintf(stderr, "\n"); exit(err); @@ -107,6 +109,14 @@ int main(int argc, const char *const *argv) else if (!strcmp(*arg, "--help")) { usage(0); } + else if (!strcmp(*arg, "--version")) { + printf("psl %s\n", PACKAGE_VERSION); + printf("libpsl %s\n", psl_get_version()); + printf("\n"); + printf("Copyright (C) 2014 Tim Ruehsen\n"); + printf("License: MIT\n"); + exit(0); + } else if (!strcmp(*arg, "--")) { arg++; break; From 58daea97ce271aeaa47d913d35d8904a6277a378 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Wed, 4 Jun 2014 13:20:34 +0200 Subject: [PATCH 2/5] added IDNA2008 UTS#46 via libicu --- .travis.yml | 3 ++- README.md | 1 + configure.ac | 13 ++++++++++--- src/psl2c.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 60 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 54965ca..27bcd14 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,5 +5,6 @@ compiler: # Change this to your needs script: ./autogen.sh && ./configure --enable-gtk-doc && make -j4 && make check -j4 && make distcheck before_install: + - apt-cache search libicu | grep icu - sudo apt-get -qq update - - sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev + - sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev libicu-dev diff --git a/README.md b/README.md index d71497e..6bb6711 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ Libpsl... - finds the shortest private part of a given domain - works with international domains (UTF-8 and IDNA2008 Punycode) - is thread-safe +- handles IDNA2008 UTS#46 (library has to be compiled with libicu) Find more information about the Publix Suffix List [here](http://publicsuffix.org/). diff --git a/configure.ac b/configure.ac index 705c5d5..ffd406a 100644 --- a/configure.ac +++ b/configure.ac @@ -66,9 +66,6 @@ AS_IF([ test "$enable_man" != no ], [ AC_SUBST([LIBPSL_SO_VERSION], [1:0:1]) AC_SUBST([LIBPSL_VERSION], $VERSION) -# Check for idn2 -AC_CHECK_PROG(HAVE_IDN2, idn2, yes, AC_MSG_ERROR(Cannot find required tool 'idn2'.)) - # Check for enable/disable builtin PSL data AC_ARG_ENABLE(builtin, AS_HELP_STRING([--disable-builtin], [do not compile PSL data into library]), @@ -80,6 +77,16 @@ AC_ARG_ENABLE(builtin, ]) AM_CONDITIONAL([WITH_BUILTIN], [test $enable_builtin = yes]) +AC_ARG_WITH(icu, AS_HELP_STRING([--without-icu], [disable ICU punycode conversion]), with_icu=$withval, with_icu=yes) +if test $with_icu != "no" +then + AC_CHECK_LIB(icuuc, uidna_openUTS46, [with_icu=yes; AC_SUBST(ICU_LIBS, "-licu") AC_DEFINE([WITH_LIBICU], [1], [Use libicu])], [with_icu=no; AC_MSG_WARN(*** LIBICU was not found. Falling back to idn2.)]) +fi +AM_CONDITIONAL([WITH_LIBICU], [test $with_icu = "yes"]) + +# Check for idn2 +AC_CHECK_PROG(HAVE_IDN2, idn2, yes, AC_MSG_ERROR(Cannot find required tool 'idn2'.)) + # Check for valgrind ac_enable_valgrind=no AC_ARG_ENABLE(valgrind-tests, diff --git a/src/psl2c.c b/src/psl2c.c index c3364fa..8bcad50 100644 --- a/src/psl2c.c +++ b/src/psl2c.c @@ -45,6 +45,11 @@ #endif */ +#ifdef WITH_LIBICU +# include +# include +#endif + #ifdef WITH_BUILTIN #include @@ -267,7 +272,19 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char * { int it; - fprintf(fpout, "/* automatically generated by psl2c */\n"); +#ifdef WITH_LIBICU + do { + UVersionInfo version_info; + char version[U_MAX_VERSION_STRING_LENGTH]; + + u_getVersion(version_info); + u_versionToString(version_info, version); + fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libicu/%s) */\n", version); + } while (0); +#else + fprintf(fpout, "/* automatically generated by psl2c (punycode generated with idn2) */\n"); +#endif + fprintf(fpout, "static _psl_entry_t %s[] = {\n", varname); for (it = 0; it < v->cur; it++) { @@ -306,6 +323,7 @@ static void _add_punycode_if_needed(_psl_vector_t *v) if (_str_needs_encoding(e->label_buf)) { _psl_entry_t suffix, *suffixp; + char lookupname[64] = ""; /* the following lines will have GPL3+ license issues */ /* char *asc = NULL; @@ -321,9 +339,35 @@ static void _add_punycode_if_needed(_psl_vector_t *v) fprintf(stderr, "toASCII(%s) failed (%d): %s\n", e->label_buf, rc, idn2_strerror(rc)); */ +#ifdef WITH_LIBICU + UIDNA *idna; + UErrorCode status = 0; + + /* IDNA2003 punycode conversion */ + /* destLen = uidna_toASCII(e->label_buf, (int32_t) strlen(e->label_buf), lookupname, (int32_t) sizeof(lookupname), + UIDNA_DEFAULT, NULL, &status); + */ + + /* IDNA2008 UTS#46 punycode conversion */ + if ((idna = uidna_openUTS46(UIDNA_USE_STD3_RULES, &status))) { + uidna_nameToASCII(idna, (UChar *) e->label_buf, (int32_t) strlen(e->label_buf), + (UChar *) lookupname, (int32_t) sizeof(lookupname), NULL, &status); + uidna_close(idna); + } + + if (U_FAILURE(status)) { + fprintf(stderr, "Failed to convert '%s' to ASCII\n", e->label_buf); + } else if (strcmp(e->label_buf, lookupname)) { + /* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */ + _suffix_init(&suffix, lookupname, strlen(lookupname)); + suffix.wildcard = e->wildcard; + suffixp = _vector_get(v, _vector_add(v, &suffix)); + suffixp->label = suffixp->label_buf; /* set label to changed address */ + } /* else ignore */ +#else /* this is much slower than the libidn2 API but should have no license issues */ FILE *pp; - char cmd[16 + sizeof(e->label_buf)], lookupname[64] = ""; + char cmd[16 + sizeof(e->label_buf)]; snprintf(cmd, sizeof(cmd), "idn2 '%s'", e->label_buf); if ((pp = popen(cmd, "r"))) { if (fscanf(pp, "%63s", lookupname) >= 1 && strcmp(e->label_buf, lookupname)) { @@ -336,6 +380,7 @@ static void _add_punycode_if_needed(_psl_vector_t *v) pclose(pp); } else fprintf(stderr, "Failed to call popen(%s, \"r\")\n", cmd); +#endif } } From 79cd551b17622e694b053a21c5d722263ee21370 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Thu, 5 Jun 2014 11:39:28 +0200 Subject: [PATCH 3/5] fixed libicu implementation, use pkg-config for libicu detection --- configure.ac | 14 ++++---------- src/Makefile.am | 4 ++-- src/psl2c.c | 44 ++++++++++++++++++++++++++++++-------------- 3 files changed, 36 insertions(+), 26 deletions(-) diff --git a/configure.ac b/configure.ac index ffd406a..52da77c 100644 --- a/configure.ac +++ b/configure.ac @@ -74,19 +74,13 @@ AC_ARG_ENABLE(builtin, ], [ enable_builtin=yes AC_DEFINE([WITH_BUILTIN], [1], [compile PSL data into library]) + + PKG_CHECK_MODULES(LIBICU, [icu-uc], + [AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])], + [AC_CHECK_PROG(HAVE_IDN2, idn2, yes, AC_MSG_ERROR(Cannot find required tool 'idn2'.))]) ]) AM_CONDITIONAL([WITH_BUILTIN], [test $enable_builtin = yes]) -AC_ARG_WITH(icu, AS_HELP_STRING([--without-icu], [disable ICU punycode conversion]), with_icu=$withval, with_icu=yes) -if test $with_icu != "no" -then - AC_CHECK_LIB(icuuc, uidna_openUTS46, [with_icu=yes; AC_SUBST(ICU_LIBS, "-licu") AC_DEFINE([WITH_LIBICU], [1], [Use libicu])], [with_icu=no; AC_MSG_WARN(*** LIBICU was not found. Falling back to idn2.)]) -fi -AM_CONDITIONAL([WITH_LIBICU], [test $with_icu = "yes"]) - -# Check for idn2 -AC_CHECK_PROG(HAVE_IDN2, idn2, yes, AC_MSG_ERROR(Cannot find required tool 'idn2'.)) - # Check for valgrind ac_enable_valgrind=no AC_ARG_ENABLE(valgrind-tests, diff --git a/src/Makefile.am b/src/Makefile.am index 93010e4..0fe1ec7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -13,8 +13,8 @@ libpsl_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSION) noinst_PROGRAMS = psl2c psl2c_SOURCES = psl2c.c -psl2c_CPPFLAGS = -I$(top_srcdir)/include -D _GNU_SOURCE -#psl2c_LDADD = -lidn2 +psl2c_CPPFLAGS = -I$(top_srcdir)/include -D _GNU_SOURCE $(LIBICU_CFLAGS) +psl2c_LDADD = $(LIBICU_LIBS) # Build rule for suffix.c # PSL_FILE can be set by ./configure --with-psl-file=[PATH] diff --git a/src/psl2c.c b/src/psl2c.c index 8bcad50..d0d9f0d 100644 --- a/src/psl2c.c +++ b/src/psl2c.c @@ -47,6 +47,7 @@ #ifdef WITH_LIBICU # include +# include # include #endif @@ -349,21 +350,36 @@ static void _add_punycode_if_needed(_psl_vector_t *v) */ /* IDNA2008 UTS#46 punycode conversion */ - if ((idna = uidna_openUTS46(UIDNA_USE_STD3_RULES, &status))) { - uidna_nameToASCII(idna, (UChar *) e->label_buf, (int32_t) strlen(e->label_buf), - (UChar *) lookupname, (int32_t) sizeof(lookupname), NULL, &status); - uidna_close(idna); - } +// if ((idna = uidna_openUTS46(UIDNA_USE_STD3_RULES, &status))) { + if ((idna = uidna_openUTS46(UIDNA_DEFAULT, &status))) { + UChar utf16_dst[64], utf16_src[64]; + int32_t utf16_src_length; + UIDNAInfo info = UIDNA_INFO_INITIALIZER; + + u_strFromUTF8(utf16_src, sizeof(utf16_src)/sizeof(utf16_src[0]), &utf16_src_length, e->label_buf, (int32_t) strlen(e->label_buf), &status); + if (U_SUCCESS(status)) { + int32_t dst_length = uidna_nameToASCII(idna, utf16_src, utf16_src_length, utf16_dst, sizeof(utf16_dst)/sizeof(utf16_dst[0]), &info, &status); + if (U_SUCCESS(status)) { + u_strToUTF8(lookupname, (int32_t) sizeof(lookupname), NULL, utf16_dst, dst_length, &status); + if (U_SUCCESS(status)) { + if (strcmp(e->label_buf, lookupname)) { + /* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */ + _suffix_init(&suffix, lookupname, strlen(lookupname)); + suffix.wildcard = e->wildcard; + suffixp = _vector_get(v, _vector_add(v, &suffix)); + suffixp->label = suffixp->label_buf; /* set label to changed address */ + } // else ignore + } else + fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); + } else + fprintf(stderr, "Failed to convert to ASCII (status %d)\n", status); + } else + fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status); + + uidna_close(idna); + } else + fprintf(stderr, "Failed to get UTS46 IDNA handle\n"); - if (U_FAILURE(status)) { - fprintf(stderr, "Failed to convert '%s' to ASCII\n", e->label_buf); - } else if (strcmp(e->label_buf, lookupname)) { - /* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */ - _suffix_init(&suffix, lookupname, strlen(lookupname)); - suffix.wildcard = e->wildcard; - suffixp = _vector_get(v, _vector_add(v, &suffix)); - suffixp->label = suffixp->label_buf; /* set label to changed address */ - } /* else ignore */ #else /* this is much slower than the libidn2 API but should have no license issues */ FILE *pp; From 7621dce71d4f4103771b4400661252ce6333ee05 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Thu, 5 Jun 2014 11:53:29 +0200 Subject: [PATCH 4/5] fixed C89 comment incompatibility --- src/psl2c.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/psl2c.c b/src/psl2c.c index d0d9f0d..a6b5b6c 100644 --- a/src/psl2c.c +++ b/src/psl2c.c @@ -350,8 +350,7 @@ static void _add_punycode_if_needed(_psl_vector_t *v) */ /* IDNA2008 UTS#46 punycode conversion */ -// if ((idna = uidna_openUTS46(UIDNA_USE_STD3_RULES, &status))) { - if ((idna = uidna_openUTS46(UIDNA_DEFAULT, &status))) { + if ((idna = uidna_openUTS46(UIDNA_USE_STD3_RULES, &status))) { UChar utf16_dst[64], utf16_src[64]; int32_t utf16_src_length; UIDNAInfo info = UIDNA_INFO_INITIALIZER; @@ -368,7 +367,7 @@ static void _add_punycode_if_needed(_psl_vector_t *v) suffix.wildcard = e->wildcard; suffixp = _vector_get(v, _vector_add(v, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ - } // else ignore + } /* else ignore */ } else fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); } else From b67ef20c827187bd128a854b135c90352fe499e6 Mon Sep 17 00:00:00 2001 From: Tim Ruehsen Date: Thu, 5 Jun 2014 16:29:20 +0200 Subject: [PATCH 5/5] Release V0.3.0 --- NEWS | 5 +++++ README.md | 2 +- configure.ac | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index 8faee0e..0e6c51e 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,10 @@ Copyright (C) 2014 Tim Ruehsen +05.06.2014 Release V0.3.0 + * added support for libicu in psl2c (IDNA2008 UTS#46) + this needs pkg-config and libicu-dev installed + * added --version to psl utility + 31.05.2014 Release V0.2.5 * added psl_get_version() * removed version from library name diff --git a/README.md b/README.md index 6bb6711..7bc8fbc 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ Libpsl... - finds the shortest private part of a given domain - works with international domains (UTF-8 and IDNA2008 Punycode) - is thread-safe -- handles IDNA2008 UTS#46 (library has to be compiled with libicu) +- handles IDNA2008 UTS#46 (libicu is used by psl2c if installed) Find more information about the Publix Suffix List [here](http://publicsuffix.org/). diff --git a/configure.ac b/configure.ac index 52da77c..970a0b3 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ -AC_INIT([libpsl], [0.2.5], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl]) +AC_INIT([libpsl], [0.3.0], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl]) AC_PREREQ([2.59]) AM_INIT_AUTOMAKE([1.10 -Wall no-define])