diff --git a/.travis.yml b/.travis.yml index 6976291..d72425b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,12 +6,24 @@ compiler: script: - ./autogen.sh - ./configure && make -j4 && make check -j4 - - ./configure --without-libicu && make clean && make -j4 && make check -j4 - - ./configure --disable-builtin && make clean && make -j4 && make check -j4 - - ./configure --disable-builtin --without-libicu && make clean && make -j4 && make check -j4 + - ./configure --enable-runtime=libicu --enable-builtin=libicu && make clean && make -j4 && make check -j4 + - ./configure --enable-runtime=libicu --enable-builtin=libidn2 && make clean && make -j4 && make check -j4 + - ./configure --enable-runtime=libicu --enable-builtin=libidn && make clean && make -j4 && make check -j4 + - ./configure --enable-runtime=libicu --disable-builtin && make clean && make -j4 && make check -j4 + - ./configure --enable-runtime=libidn2 --enable-builtin=libicu && make clean && make -j4 && make check -j4 + - ./configure --enable-runtime=libidn2 --enable-builtin=libidn2 && make clean && make -j4 && make check -j4 + - ./configure --enable-runtime=libidn2 --enable-builtin=libidn && make clean && make -j4 && make check -j4 + - ./configure --enable-runtime=libidn2 --disable-builtin && make clean && make -j4 && make check -j4 + - ./configure --enable-runtime=libidn --enable-builtin=libicu && make clean && make -j4 && make check -j4 + - ./configure --enable-runtime=libidn --enable-builtin=libidn2 && make clean && make -j4 && make check -j4 + - ./configure --enable-runtime=libidn --enable-builtin=libidn && make clean && make -j4 && make check -j4 + - ./configure --enable-runtime=libidn --disable-builtin && make clean && make -j4 && make check -j4 + - ./configure --disable-runtime --enable-builtin=libicu && make clean && make -j4 && make check -j4 + - ./configure --disable-runtime --enable-builtin=libidn2 && make clean && make -j4 && make check -j4 + - ./configure --disable-runtime --enable-builtin=libidn && make clean && make -j4 && make check -j4 + - ./configure --disable-runtime --disable-builtin && make clean && make -j4 && make check -j4 - ./configure --enable-gtk-doc && make -j4 && make check -j4 - make distcheck before_install: - - apt-cache search libicu | grep icu - sudo apt-get -qq update - - sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev libicu48 libicu-dev + - sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext libidn11 libidn11-dev libidn2-0 libidn2-0-dev libicu48 libicu-dev libunistring0 libunistring-dev diff --git a/COPYING b/COPYING index b6a1570..90c5c79 100644 --- a/COPYING +++ b/COPYING @@ -1,4 +1,4 @@ -Copyright (C) 2014 Tim Ruehsen +Copyright (C) 2014 Tim Rühsen Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), diff --git a/ChangeLog b/ChangeLog deleted file mode 100644 index 714fcb1..0000000 --- a/ChangeLog +++ /dev/null @@ -1,17 +0,0 @@ -2014-03-20 gettextize - - * m4/gettext.m4: New file, from gettext-0.18.3. - * m4/iconv.m4: New file, from gettext-0.18.3. - * m4/lib-ld.m4: New file, from gettext-0.18.3. - * m4/lib-link.m4: New file, from gettext-0.18.3. - * m4/lib-prefix.m4: New file, from gettext-0.18.3. - * m4/nls.m4: New file, from gettext-0.18.3. - * m4/po.m4: New file, from gettext-0.18.3. - * m4/progtest.m4: New file, from gettext-0.18.3. - * Makefile.am (SUBDIRS): Add po. - (ACLOCAL_AMFLAGS): Add -I m4. - (EXTRA_DIST): New variable. - * configure.ac (AC_CONFIG_FILES): Add po/Makefile.in. - -2014-02-20 Tim Ruehsen - * inital setup diff --git a/LICENSE b/LICENSE index b6a1570..90c5c79 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (C) 2014 Tim Ruehsen +Copyright (C) 2014 Tim Rühsen Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), diff --git a/NEWS b/NEWS index df6d258..51f21dd 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,12 @@ -Copyright (C) 2014 Tim Ruehsen +Copyright (C) 2014 Tim Rühsen + +02.07.2014 Release V0.5.0 + * added configure --enable-runtime to allow for IDNA library + selection as runtime dependency + * added configure --enable-builtin to allow for IDNA library + selection for generating the built-in PSL data + * fixed psl_str_to_utf8lower prototype + * fixed authors name to UTF-8 23.06.2014 Release V0.4.0 * depend on libicu for punycode, utf-8 and lowercase conversions diff --git a/configure.ac b/configure.ac index f89b1e1..9419f8d 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ -AC_INIT([libpsl], [0.4.0], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl]) +AC_INIT([libpsl], [0.5.0], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl]) AC_PREREQ([2.59]) -AM_INIT_AUTOMAKE([1.10 -Wall no-define]) +AM_INIT_AUTOMAKE([1.10 -Wall no-define foreign]) # Generate two configuration headers; one for building the library itself with # an autogenerated template, and a second one that will be installed alongside @@ -63,55 +63,120 @@ AS_IF([ test "$enable_man" != no ], [ # 4. If any interfaces have been added, removed, or changed since the last update, increment current, and set revision to 0. # 5. If any interfaces have been added since the last public release, then increment age. # 6. If any existing interfaces have been removed or changed since the last public release, then set age to 0. -AC_SUBST([LIBPSL_SO_VERSION], [2:0:2]) +AC_SUBST([LIBPSL_SO_VERSION], [2:1:2]) AC_SUBST([LIBPSL_VERSION], $VERSION) -# Check for libicu -HAVE_LIBICU=no -AC_ARG_WITH(libicu, - AC_HELP_STRING([--without-libicu], [build libpsl without IDNA/Punycode support]), - [], +# Check for enable/disable builtin PSL data +AC_ARG_ENABLE(runtime, [ - # using pkg-config won't work on older systems like Ubuntu 12.04 LTS Server Edition 64bit - OLDLIBS=$LIBS - LIBS="-licuuc $LIBS" - AC_MSG_CHECKING([for ICU unicode library]) - AC_LINK_IFELSE( - [AC_LANG_PROGRAM( - [[#include ]], - [[u_strToUTF8(NULL, 0, NULL, NULL, 0, NULL);]])], - [HAVE_LIBICU=yes; AC_MSG_RESULT([yes]) AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])], - [LIBS=$OLDLIBS; AC_MSG_ERROR([no working ICU unicode library was found])]) - -# AC_SEARCH_LIBS(uidna_close, icuuc, -# [HAVE_LIBICU=yes; AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])], -# [AC_MSG_ERROR(*** libicu was not found. Aborting.)], -# -licudata ) -# PKG_CHECK_MODULES(LIBICU, [icu-uc], -# [HAVE_LIBICU=yes; AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])]) + --enable-runtime[[=IDNA library]] + Specify the IDNA library used for libpsl run-time conversions: + libicu [[default]]: IDNA2008 UTS#46 library + libidn2: IDNA2008 library (also needs libunistring) + libidn: IDNA2003 library (also needs libunistring) + --disable-runtime Do not link runtime IDNA functionality + ], [ + if test "$enableval" = "libicu" -o "$enableval" = "yes"; then + enable_runtime=libicu + AC_DEFINE([WITH_LIBICU], [1], [generate PSL data using libicu]) + elif test "$enableval" = "libidn2"; then + enable_runtime=libidn2 + AC_DEFINE([WITH_LIBIDN2], [1], [generate PSL data using libidn2]) + elif test "$enableval" = "libidn"; then + enable_runtime=libidn + AC_DEFINE([WITH_LIBIDN], [1], [generate PSL data using libidn]) + elif test "$enableval" = "no"; then + enable_runtime=no + else + AC_MSG_ERROR([Unknown value $enableval for --enable-runtime]) + fi + ], [ + # this is the default if neither --enable-runtime nor --disable-runtime were specified + enable_runtime=libicu + AC_DEFINE([WITH_LIBICU], [1], [generate PSL data using libicu]) ]) # Check for enable/disable builtin PSL data AC_ARG_ENABLE(builtin, - AS_HELP_STRING([--disable-builtin], [do not compile PSL data into library]), [ - enable_builtin=no + --enable-builtin[[=IDNA library]] + Specify the IDNA library used for built-in data generation: + libicu [[default]]: IDNA2008 UTS#46 library + libidn2: IDNA2008 library (also needs libunistring) + libidn: IDNA2003 library (also needs libunistring) + --disable-builtin Do not generate built-in PSL data ], [ - enable_builtin=yes - AC_DEFINE([WITH_BUILTIN], [1], [compile PSL data into library]) - AS_IF([test $HAVE_LIBICU != yes], - [ - # Check for idn2 fallback to generate punycode - AC_CHECK_PROG(HAVE_IDN2, idn2, yes, AC_MSG_ERROR(Cannot find required tool 'idn2' as fallback.)) - ]) + if test "$enableval" = "libicu" -o "$enableval" = "yes"; then + enable_builtin=libicu + AC_DEFINE([BUILTIN_GENERATOR_LIBICU], [1], [generate PSL data using libicu]) + elif test "$enableval" = "libidn2"; then + enable_builtin=libidn2 + AC_DEFINE([BUILTIN_GENERATOR_LIBIDN2], [1], [generate PSL data using libidn2]) + elif test "$enableval" = "libidn"; then + enable_builtin=libidn + AC_DEFINE([BUILTIN_GENERATOR_LIBIDN], [1], [generate PSL data using libidn]) + elif test "$enableval" = "no"; then + enable_builtin=no + else + AC_MSG_ERROR(Unknown value $enableval) + fi + ], [ + # this is the default if neither --enable-builtin nor --disable-builtin were specified + enable_builtin=libicu + AC_DEFINE([BUILTIN_GENERATOR_LIBICU], [1], [generate PSL data using libicu]) ]) -AM_CONDITIONAL([WITH_BUILTIN], [test $enable_builtin = yes]) + +if test "$enable_runtime" = "libicu" -o "$enable_builtin" = "libicu"; then + # Check for libicu + # using pkg-config won't work on older systems like Ubuntu 12.04 LTS Server Edition 64bit + # using AC_SEARCH_LIBS also don't work since functions have the library version appended + OLDLIBS=$LIBS + LIBS="-licuuc $LIBS" + AC_MSG_CHECKING([for ICU unicode library]) + AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [[#include ]], + [[u_strToUTF8(NULL, 0, NULL, NULL, 0, NULL);]])], + [HAVE_LIBICU=yes; AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no]); AC_MSG_ERROR(You requested libicu but it is not installed.)]) + LIBS=$OLDLIBS +fi + +if test "$enable_runtime" = "libidn2" -o "$enable_builtin" = "libidn2"; then + # Check for libidn2 + OLDLIBS=$LIBS + AC_SEARCH_LIBS(idn2_lookup_u8, idn2, HAVE_LIBIDN2=yes, AC_MSG_ERROR(You requested libidn2 but it is not installed.)) + LIBS=$OLDLIBS +fi + +if test "$enable_runtime" = "libidn" -o "$enable_builtin" = "libidn"; then + # Check for libidn + OLDLIBS=$LIBS + AC_SEARCH_LIBS(idna_to_ascii_8z, idn, HAVE_LIBIDN=yes, AC_MSG_ERROR(You requested libidn but it is not installed.)) + LIBS=$OLDLIBS +fi + +if test "x$HAVE_LIBIDN2" = "xyes" -o "x$HAVE_LIBIDN" = "xyes"; then + # Check for libunistring, we need it for psl_str_to_utf8lower() + OLDLIBS=$LIBS + AC_SEARCH_LIBS(u8_tolower, unistring, HAVE_UNISTRING=yes, AC_MSG_ERROR(You requested libidn2 but libunistring is not installed.)) + LIBS=$OLDLIBS +fi + +AM_CONDITIONAL([WITH_LIBICU], test "x$enable_runtime" = "xlibicu") +AM_CONDITIONAL([WITH_LIBIDN2], test "x$enable_runtime" = "xlibidn2") +AM_CONDITIONAL([WITH_LIBIDN], test "x$enable_runtime" = "xlibidn") +AM_CONDITIONAL([BUILTIN_GENERATOR_LIBICU], test "x$enable_builtin" = "xlibicu") +AM_CONDITIONAL([BUILTIN_GENERATOR_LIBIDN2], test "x$enable_builtin" = "xlibidn2") +AM_CONDITIONAL([BUILTIN_GENERATOR_LIBIDN], test "x$enable_builtin" = "xlibidn") +AM_CONDITIONAL([WITH_BUILTIN], test $enable_builtin = yes) # Check for valgrind ac_enable_valgrind=no AC_ARG_ENABLE(valgrind-tests, AS_HELP_STRING([--enable-valgrind-tests], [enable using Valgrind for tests]), - [ac_enable_valgrind=$enableval], [ac_enable_valgrind=no]) + [ac_enable_valgrind=$enableval], + [ac_enable_valgrind=no]) if test "${ac_enable_valgrind}" = "yes" ; then AC_CHECK_PROG(HAVE_VALGRIND, valgrind, yes, no) @@ -162,8 +227,9 @@ AC_MSG_NOTICE([Summary of build options: Compiler: ${CC} CFlags: ${CFLAGS} ${CPPFLAGS} LDFlags: ${LDFLAGS} - ICU: ${HAVE_LIBICU} - Builtin PSL: ${enable_builtin} + Libs: ${LIBS} + Runtime: ${enable_runtime} + Builtin: ${enable_builtin} PSL File: ${PSL_FILE} PSL Test File: ${PSL_TESTFILE} Tests: ${TESTS_INFO} diff --git a/include/libpsl.h b/include/libpsl.h index b7fe952..4ead03d 100644 --- a/include/libpsl.h +++ b/include/libpsl.h @@ -87,7 +87,7 @@ const char * const char * psl_registrable_domain(const psl_ctx_t *psl, const char *domain); /* convert a string into lowercase UTF-8 */ -int +psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower); /* does not include exceptions */ int diff --git a/src/Makefile.am b/src/Makefile.am index 0fe1ec7..f600134 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -10,11 +10,28 @@ libpsl_la_SOURCES = psl.c libpsl_la_CPPFLAGS = -I$(top_srcdir)/include # include ABI version information libpsl_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSION) +if WITH_LIBICU + libpsl_la_LDFLAGS += -licuuc +endif +if WITH_LIBIDN2 + libpsl_la_LDFLAGS += -lidn2 -lunistring +endif +if WITH_LIBIDN + libpsl_la_LDFLAGS += -lidn -lunistring +endif noinst_PROGRAMS = psl2c psl2c_SOURCES = psl2c.c -psl2c_CPPFLAGS = -I$(top_srcdir)/include -D _GNU_SOURCE $(LIBICU_CFLAGS) -psl2c_LDADD = $(LIBICU_LIBS) +psl2c_CPPFLAGS = -I$(top_srcdir)/include -D _GNU_SOURCE +if BUILTIN_GENERATOR_LIBICU + psl2c_LDADD = -licuuc +endif +if BUILTIN_GENERATOR_LIBIDN2 + psl2c_LDADD = -lidn2 -lunistring +endif +if BUILTIN_GENERATOR_LIBIDN + psl2c_LDADD = -lidn -lunistring +endif # Build rule for suffix.c # PSL_FILE can be set by ./configure --with-psl-file=[PATH] diff --git a/src/psl.c b/src/psl.c index 2875c35..9798e61 100644 --- a/src/psl.c +++ b/src/psl.c @@ -37,6 +37,20 @@ # include #endif +/* if this file is included by psl2c, redefine to use requested library for builtin data */ +#ifdef _LIBPSL_INCLUDED_BY_PSL2C +# undef WITH_LIBICU +# undef WITH_LIBIDN2 +# undef WITH_LIBIDN +# ifdef BUILTIN_GENERATOR_LIBICU +# define WITH_LIBICU +# elif defined(BUILTIN_GENERATOR_LIBIDN2) +# define WITH_LIBIDN2 +# elif defined(BUILTIN_GENERATOR_LIBIDN) +# define WITH_LIBIDN +# endif +#endif + #if ENABLE_NLS != 0 # include # define _(STRING) gettext(STRING) @@ -50,12 +64,25 @@ #include #include #include +#include +#include #ifdef WITH_LIBICU # include # include # include # include +#elif defined(WITH_LIBIDN2) +# include +# include +# include +# include +#elif defined(WITH_LIBIDN) +# include +# include +# include +# include +# include #endif #include @@ -457,7 +484,7 @@ static int _str_is_ascii(const char *s) return !*s; } -#ifdef WITH_LIBICU +#if defined(WITH_LIBICU) static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t *e) { if (_str_is_ascii(e->label_buf)) @@ -465,7 +492,6 @@ static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t /* IDNA2008 UTS#46 punycode conversion */ if (idna) { - _psl_entry_t suffix, *suffixp; char lookupname[128] = ""; UErrorCode status = 0; UIDNAInfo info = UIDNA_INFO_INITIALIZER; @@ -479,6 +505,8 @@ static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t u_strToUTF8(lookupname, sizeof(lookupname), NULL, utf16_dst, dst_length, &status); if (U_SUCCESS(status)) { if (strcmp(e->label_buf, lookupname)) { + _psl_entry_t suffix, *suffixp; + /* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */ _suffix_init(&suffix, lookupname, strlen(lookupname)); suffix.wildcard = e->wildcard; @@ -493,6 +521,73 @@ static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status); */ } } +#elif defined(WITH_LIBIDN2) +static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e) +{ + char *lookupname = NULL; + int rc; + uint8_t *lower, resbuf[256]; + size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */ + + if (_str_is_ascii(e->label_buf)) + return; + + /* we need a conversion to lowercase */ + lower = u8_tolower((uint8_t *)e->label_buf, u8_strlen((uint8_t *)e->label_buf), 0, UNINORM_NFKC, resbuf, &len); + if (!lower) { + /* fprintf(stderr, "u8_tolower(%s) failed (%d)\n", e->label_buf, errno); */ + return; + } + + /* u8_tolower() does not terminate the result string */ + if (lower == resbuf) { + lower[len]=0; + } else { + uint8_t *tmp = lower; + lower = (uint8_t *)strndup((char *)lower, len); + free(tmp); + } + + if ((rc = idn2_lookup_u8(lower, (uint8_t **)&lookupname, 0)) == IDN2_OK) { + if (strcmp(e->label_buf, lookupname)) { + _psl_entry_t suffix, *suffixp; + + /* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */ + _suffix_init(&suffix, lookupname, strlen(lookupname)); + suffix.wildcard = e->wildcard; + suffixp = _vector_get(v, _vector_add(v, &suffix)); + suffixp->label = suffixp->label_buf; /* set label to changed address */ + } /* else ignore */ + } /* else + fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */ + + if (lower != resbuf) + free(lower); +} +#elif defined(WITH_LIBIDN) +static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e) +{ + char *lookupname = NULL; + int rc; + + if (_str_is_ascii(e->label_buf)) + return; + + /* idna_to_ascii_8z() automatically converts UTF-8 to lowercase */ + + if ((rc = idna_to_ascii_8z(e->label_buf, &lookupname, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) { + if (strcmp(e->label_buf, lookupname)) { + _psl_entry_t suffix, *suffixp; + + /* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */ + _suffix_init(&suffix, lookupname, strlen(lookupname)); + suffix.wildcard = e->wildcard; + suffixp = _vector_get(v, _vector_add(v, &suffix)); + suffixp->label = suffixp->label_buf; /* set label to changed address */ + } /* else ignore */ + } /* else + fprintf(_(stderr, "toASCII failed (%d): %s\n"), rc, idna_strerror(rc)); */ +} #endif /** @@ -582,6 +677,8 @@ psl_ctx_t *psl_load_fp(FILE *fp) suffixp->label = suffixp->label_buf; /* set label to changed address */ #ifdef WITH_LIBICU _add_punycode_if_needed(idna, psl->suffix_exceptions, suffixp); +#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) + _add_punycode_if_needed(psl->suffix_exceptions, suffixp); #endif } } else { @@ -591,6 +688,8 @@ psl_ctx_t *psl_load_fp(FILE *fp) suffixp->label = suffixp->label_buf; /* set label to changed address */ #ifdef WITH_LIBICU _add_punycode_if_needed(idna, psl->suffixes, suffixp); +#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) + _add_punycode_if_needed(psl->suffixes, suffixp); #endif } } @@ -645,7 +744,7 @@ void psl_free(psl_ctx_t *psl) */ const psl_ctx_t *psl_builtin(void) { -#ifdef WITH_BUILTIN +#if defined(BUILTIN_GENERATOR_LIBICU) || defined(BUILTIN_GENERATOR_LIBIDN2) || defined(BUILTIN_GENERATOR_LIBIDN) return &_builtin_psl; #else return NULL; @@ -773,13 +872,15 @@ const char *psl_builtin_filename(void) **/ const char *psl_get_version (void) { - return PACKAGE_VERSION #ifdef WITH_LIBICU - " (+libicu/" U_ICU_VERSION ")" + return PACKAGE_VERSION " (+libicu/" U_ICU_VERSION ")"; +#elif defined(WITH_LIBIDN2) + return PACKAGE_VERSION " (+libidn2/" IDN2_VERSION ")"; +#elif defined(WITH_LIBIDN) + return PACKAGE_VERSION " (+libidn/" STRINGPREP_VERSION ")"; #else - " (limited IDNA support)" + return PACKAGE_VERSION " (no IDNA support)"; #endif - ; } /** @@ -849,7 +950,8 @@ int psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, * This helper function converts a string to lowercase UTF-8 representation. * Lowercase UTF-8 is needed as input to the domain checking functions. * - * @lower is %NULL on error. + * @lower is set to %NULL on error. + * * The return value 'lower' must be freed after usage. * * Returns: psl_error_t value. @@ -930,6 +1032,73 @@ psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding, const ch ret = PSL_ERR_CONVERTER; /* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */ } + } while (0); +#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) + do { + /* find out local charset encoding */ + if (!encoding) { + encoding = nl_langinfo(CODESET); + + if (!encoding || !*encoding) + encoding = "ASCII"; + } + + /* convert to UTF-8 */ + if (strcasecmp(encoding, "utf-8")) { + iconv_t cd = iconv_open("utf-8", encoding); + + if (cd != (iconv_t)-1) { + char *tmp = (char *)str; /* iconv won't change where str points to, but changes tmp itself */ + size_t tmp_len = strlen(str); + size_t dst_len = tmp_len * 6, dst_len_tmp = dst_len; + char *dst = malloc(dst_len + 1), *dst_tmp = dst; + + if (iconv(cd, &tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1) { + uint8_t *resbuf = malloc(dst_len * 2 + 1); + size_t len = dst_len * 2; /* leave space for additional \0 byte */ + + if ((dst = (char *)u8_tolower((uint8_t *)dst, dst_len - dst_len_tmp, 0, UNINORM_NFKC, resbuf, &len))) { + /* u8_tolower() does not terminate the result string */ + if (lower) + *lower = strndup((char *)dst, len); + } else { + ret = PSL_ERR_TO_LOWER; + /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */ + } + + if (lower) + *lower = strndup(dst, dst_len - dst_len_tmp); + ret = PSL_SUCCESS; + } else { + ret = PSL_ERR_TO_UTF8; + /* fprintf(stderr, "Failed to convert '%s' string into '%s' (%d)\n", src_encoding, dst_encoding, errno); */ + } + + free(dst); + iconv_close(cd); + } else { + ret = PSL_ERR_TO_UTF8; + /* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */ + } + } else + ret = PSL_SUCCESS; + + /* convert to lowercase */ + if (ret == PSL_SUCCESS) { + uint8_t *dst, resbuf[256]; + size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */ + + /* we need a conversion to lowercase */ + if ((dst = u8_tolower((uint8_t *)str, u8_strlen((uint8_t *)str), 0, UNINORM_NFKC, resbuf, &len))) { + /* u8_tolower() does not terminate the result string */ + if (lower) + *lower = strndup((char *)dst, len); + } else { + ret = PSL_ERR_TO_LOWER; + /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */ + } + } + } while (0); #endif diff --git a/src/psl2c.c b/src/psl2c.c index daeec83..0f2e066 100644 --- a/src/psl2c.c +++ b/src/psl2c.c @@ -39,7 +39,11 @@ #include #include -#ifdef WITH_BUILTIN +#if defined(BUILTIN_GENERATOR_LIBICU) || defined(BUILTIN_GENERATOR_LIBIDN2) || defined(BUILTIN_GENERATOR_LIBIDN) +# define _GENERATE_BUILTIN_DATA +#endif + +#ifdef _GENERATE_BUILTIN_DATA #include @@ -52,7 +56,7 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char * { int it; -#ifdef WITH_LIBICU +#ifdef BUILTIN_GENERATOR_LIBICU do { UVersionInfo version_info; char version[U_MAX_VERSION_STRING_LENGTH]; @@ -61,9 +65,13 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char * u_versionToString(version_info, version); fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libicu/%s) */\n", version); } while (0); +#elif defined(BUILTIN_GENERATOR_LIBIDN2) + fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn2/%s) */\n", idn2_check_version(NULL)); +#elif defined(BUILTIN_GENERATOR_LIBIDN) + fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn/%s) */\n", stringprep_check_version(NULL)); #else fprintf(fpout, "/* automatically generated by psl2c (without punycode support) */\n"); -#endif /* WITH_LIBICU */ +#endif fprintf(fpout, "static _psl_entry_t %s[] = {\n", varname); @@ -77,7 +85,8 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char * fprintf(fpout, "};\n"); } -#ifndef WITH_LIBICU +#if 0 +#if !defined(WITH_LIBICU) && !defined(WITH_IDN2) static int _str_needs_encoding(const char *s) { while (*s > 0) s++; @@ -117,14 +126,15 @@ static void _add_punycode_if_needed(_psl_vector_t *v) _vector_sort(v); } -#endif /* ! WITH_LIBICU */ +#endif /* !defined(WITH_LIBICU) && !defined(WITH_IDN2) */ +#endif -#endif /* WITH_BUILTIN */ +#endif /* _GENERATE_BUILTIN_DATA */ int main(int argc, const char **argv) { FILE *fpout; -#ifdef WITH_BUILTIN +#ifdef _GENERATE_BUILTIN_DATA psl_ctx_t *psl; #endif int ret = 0; @@ -136,7 +146,7 @@ int main(int argc, const char **argv) return 1; } -#ifdef WITH_BUILTIN +#ifdef _GENERATE_BUILTIN_DATA if (!(psl = psl_load_file(argv[1]))) return 2; @@ -146,9 +156,8 @@ int main(int argc, const char **argv) size_t cmdsize = 16 + strlen(argv[1]); char *cmd = alloca(cmdsize), checksum[64] = ""; -#ifndef WITH_LIBICU - /* If libicu is not configured, we still need to have punycode in our built-in data. */ - /* Else the test suite fails. */ +#if 0 + /* include library code did not generate punycode, so let's do it for the builtin data */ _add_punycode_if_needed(psl->suffixes); _add_punycode_if_needed(psl->suffix_exceptions); #endif @@ -193,8 +202,7 @@ int main(int argc, const char **argv) fprintf(stderr, "Failed to write open '%s'\n", argv[2]); ret = 3; } - -#endif /* WITH_BUILTIN */ +#endif /* GENERATE_BUILTIN_DATA */ return ret; } diff --git a/tests/Makefile.am b/tests/Makefile.am index 9234320..2bc8f82 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -2,18 +2,21 @@ DEFS = @DEFS@ -DDATADIR=\"$(top_srcdir)/data\" -DSRCDIR=\"$(srcdir)\" -DPSL_FILE AM_CPPFLAGS = -I$(top_srcdir)/include LDADD = ../src/libpsl.la -if WITH_BUILTIN - -PSL_TESTS = test-is-public test-is-public-builtin test-is-public-all test-registrable-domain \ - test-is-cookie-domain-acceptable - -else - # ./configure'd with '--disable-builtin' # Do not call test-is-public-builtin here: it does not make sense. # Do not call test-registrable-domain here: it would fail due to missing punycode entries in PSL file. PSL_TESTS = test-is-public test-is-public-all test-is-cookie-domain-acceptable +if BUILTIN_GENERATOR_LIBICU + PSL_TESTS += test-is-public-builtin test-registrable-domain +endif + +if BUILTIN_GENERATOR_LIBIDN2 + PSL_TESTS += test-is-public-builtin test-registrable-domain +endif + +if BUILTIN_GENERATOR_LIBIDN + PSL_TESTS += test-is-public-builtin test-registrable-domain endif check_PROGRAMS = $(PSL_TESTS) diff --git a/tools/Makefile.am b/tools/Makefile.am index d2b973e..f758ccd 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -2,3 +2,13 @@ bin_PROGRAMS = psl AM_CPPFLAGS = -I$(top_srcdir)/include LDADD = ../src/libpsl.la + +#if WITH_LIBICU +# LDADD += -licuuc +#endif +#if WITH_LIBIDN2 +# LDADD += -lidn2 +#endif +#if WITH_LIBIDN +# LDADD += -lidn +#endif diff --git a/tools/psl.c b/tools/psl.c index 976ada6..e1969c2 100644 --- a/tools/psl.c +++ b/tools/psl.c @@ -37,11 +37,6 @@ #include #include -#ifdef WITH_LIBICU -# include -# include -#endif - #include static void usage(int err, FILE* f) @@ -217,11 +212,6 @@ int main(int argc, const char *const *argv) printf("builtin compile time: %ld (%s)\n", psl_builtin_compile_time(), time2str(psl_builtin_compile_time())); printf("builtin file time: %ld (%s)\n", psl_builtin_file_time(), time2str(psl_builtin_file_time())); printf("builtin SHA1 file hash: %s\n", psl_builtin_sha1sum()); - -#ifdef WITH_LIBICU - printf("uloc_getDefault=%s\n", uloc_getDefault()); - printf("ucnv_getDefaultName=%s\n", ucnv_getDefaultName()); -#endif } else printf("No builtin PSL data available\n"); }