more work on support for libidn, libidn2, libicu

This commit is contained in:
Tim Ruehsen 2014-06-29 22:56:33 +02:00
parent 74f715bd9c
commit 373bcb912c
7 changed files with 219 additions and 57 deletions

View File

@ -12,6 +12,5 @@ script:
- ./configure --enable-gtk-doc && make -j4 && make check -j4
- make distcheck
before_install:
- apt-cache search libicu | grep icu
- sudo apt-get -qq update
- sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev libicu48 libicu-dev
- sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext libidn libidn-dev libidn2-0 libidn2-0-dev libicu48 libicu-dev

View File

@ -66,29 +66,34 @@ AS_IF([ test "$enable_man" != no ], [
AC_SUBST([LIBPSL_SO_VERSION], [2:0:2])
AC_SUBST([LIBPSL_VERSION], $VERSION)
# Check for libicu
HAVE_LIBICU=no
AC_ARG_WITH(libicu,
AC_HELP_STRING([--without-libicu], [build libpsl without IDNA/Punycode support]),
[],
# Check for enable/disable builtin PSL data
AC_ARG_ENABLE(runtime,
[
# using pkg-config won't work on older systems like Ubuntu 12.04 LTS Server Edition 64bit
OLDLIBS=$LIBS
LIBS="-licuuc $LIBS"
AC_MSG_CHECKING([for ICU unicode library])
AC_LINK_IFELSE(
[AC_LANG_PROGRAM(
[[#include <unicode/ustring.h>]],
[[u_strToUTF8(NULL, 0, NULL, NULL, 0, NULL);]])],
[HAVE_LIBICU=yes; AC_MSG_RESULT([yes]) AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])],
[LIBS=$OLDLIBS; AC_MSG_ERROR([no working ICU unicode library was found])])
# AC_SEARCH_LIBS(uidna_close, icuuc,
# [HAVE_LIBICU=yes; AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])],
# [AC_MSG_ERROR(*** libicu was not found. Aborting.)],
# -licudata )
# PKG_CHECK_MODULES(LIBICU, [icu-uc],
# [HAVE_LIBICU=yes; AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])])
--enable-runtime[=IDNA library]
Specify the IDNA library used for libpsl run-time conversions:
libicu [[default]]: IDNA2008 UTS#46 library
libidn2: IDNA2008 library (also needs libunistring)
libidn: IDNA2003 library
--disable-runtime Do not link runtime IDNA functionality
], [
if test "$enableval" = "libicu" -o "$enableval" = "yes"; then
enable_runtime=libicu
AC_DEFINE([WITH_LIBICU], [1], [generate PSL data using libicu])
elif test "$enableval" = "libidn2"; then
enable_runtime=libidn2
AC_DEFINE([WITH_LIBIDN2], [1], [generate PSL data using libidn2])
elif test "$enableval" = "libidn"; then
enable_runtime=libidn
AC_DEFINE([WITH_LIBIDN], [1], [generate PSL data using libidn])
elif test "$enableval" = "no"; then
enable_runtime=no
else
AC_MSG_ERROR([Unknown value $enableval for --enable-runtime])
fi
], [
# this is the default if neither --enable-runtime nor --disable-runtime were specified
enable_runtime=libicu
AC_DEFINE([WITH_LIBICU], [1], [generate PSL data using libicu])
])
# Check for enable/disable builtin PSL data
@ -97,38 +102,73 @@ AC_ARG_ENABLE(builtin,
--enable-builtin[=IDNA library]
Specify the IDNA library used for built-in data generation:
libicu [[default]]: IDNA2008 UTS#46 library
libidn2: IDNA2008 library also needs libunistring
libidn2: IDNA2008 library (also needs libunistring)
libidn: IDNA2003 library
--disable-builtin Do not generate built-in data
], [
if test "$enableval" = "libicu" -o "$enableval" = "yes"; then
if test "$HAVE_LIBICU" != "yes"; then
AC_MSG_ERROR(You requested libicu but it is not installed.)
fi
enable_builtin=libicu
AC_DEFINE([BUILTIN_GENERATOR_LIBICU], [1], [generate PSL data using libicu])
elif test "$enableval" = "libidn2"; then
if test "$HAVE_LIBIDN2" != "yes"; then
AC_MSG_ERROR(You requested libidn2 but it is not installed.)
fi
enable_builtin=libidn2
AC_DEFINE([BUILTIN_GENERATOR_LIBIDN2], [1], [generate PSL data using libidn2])
elif test "$enableval" = "libidn"; then
enable_builtin=libidn
AC_DEFINE([BUILTIN_GENERATOR_LIBIDN], [1], [generate PSL data using libidn])
elif test "$enableval" = "no"; then
enable_builtin=no
else
AC_MSG_ERROR(Unknown value $enableval)
fi
], [
# this is the default if neither --enable-builtin nor --disable-built were specified
# this is the default if neither --enable-builtin nor --disable-builtin were specified
enable_builtin=libicu
if test "$HAVE_LIBIDN2" != "yes"; then
AC_MSG_ERROR(You requested libidn2 but it is not installed.)
fi
AC_DEFINE([BUILTIN_GENERATOR_LIBICU], [1], [generate PSL data using libicu])
AC_DEFINE([WITH_BUILTIN], [1], [compile PSL data into library])
])
if test "$enable_runtime" = "libicu" -o "$enable_builtin" = "libicu"; then
# Check for libicu
# using pkg-config won't work on older systems like Ubuntu 12.04 LTS Server Edition 64bit
# using AC_SEARCH_LIBS also don't work since functions have the library version appended
OLDLIBS=$LIBS
LIBS="-licuuc $LIBS"
AC_MSG_CHECKING([for ICU unicode library])
AC_LINK_IFELSE(
[AC_LANG_PROGRAM(
[[#include <unicode/ustring.h>]],
[[u_strToUTF8(NULL, 0, NULL, NULL, 0, NULL);]])],
[HAVE_LIBICU=yes; AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no]); AC_MSG_ERROR(You requested libicu but it is not installed.)])
LIBS=$OLDLIBS
fi
if test "$enable_runtime" = "libidn2" -o "$enable_builtin" = "libidn2"; then
# Check for libidn2
OLDLIBS=$LIBS
AC_SEARCH_LIBS(idn2_lookup_u8, idn2, HAVE_LIBIDN2=yes, AC_MSG_ERROR(You requested libidn2 but it is not installed.))
LIBS=$OLDLIBS
fi
if test "$enable_runtime" = "libidn" -o "$enable_builtin" = "libidn"; then
# Check for libidn
OLDLIBS=$LIBS
AC_SEARCH_LIBS(idna_to_ascii_8z, idn, HAVE_LIBIDN=yes, AC_MSG_ERROR(You requested libidn but it is not installed.))
LIBS=$OLDLIBS
fi
if test "x$HAVE_LIBIDN2" = "xyes" -o "x$HAVE_LIBIDN" = "xyes"; then
# Check for libunistring, we need it for psl_str_to_utf8lower()
OLDLIBS=$LIBS
AC_SEARCH_LIBS(u8_tolower, unistring, HAVE_UNISTRING=yes, AC_MSG_ERROR(You requested libidn2 but libunistring is not installed.))
LIBS=$OLDLIBS
fi
AM_CONDITIONAL([WITH_LIBICU], test "x$enable_runtime" = "xlibicu")
AM_CONDITIONAL([WITH_LIBIDN2], test "x$enable_runtime" = "xlibidn2")
AM_CONDITIONAL([WITH_LIBIDN], test "x$enable_runtime" = "xlibidn")
AM_CONDITIONAL([BUILTIN_GENERATOR_LIBICU], test "x$enable_builtin" = "xlibicu")
AM_CONDITIONAL([BUILTIN_GENERATOR_LIBIDN2], test "x$enable_builtin" = "xlibidn2")
AM_CONDITIONAL([BUILTIN_GENERATOR_LIBIDN], test "x$enable_builtin" = "xlibidn")
AM_CONDITIONAL([WITH_BUILTIN], test $enable_builtin = yes)
# Check for valgrind
@ -187,8 +227,9 @@ AC_MSG_NOTICE([Summary of build options:
Compiler: ${CC}
CFlags: ${CFLAGS} ${CPPFLAGS}
LDFlags: ${LDFLAGS}
ICU: ${HAVE_LIBICU}
Builtin PSL: ${enable_builtin}
Libs: ${LIBS}
Runtime: ${enable_runtime}
Builtin: ${enable_builtin}
PSL File: ${PSL_FILE}
PSL Test File: ${PSL_TESTFILE}
Tests: ${TESTS_INFO}

View File

@ -10,17 +10,27 @@ libpsl_la_SOURCES = psl.c
libpsl_la_CPPFLAGS = -I$(top_srcdir)/include
# include ABI version information
libpsl_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSION)
if WITH_LIBICU
libpsl_la_LDFLAGS += -licuuc
endif
if WITH_LIBIDN2
libpsl_la_LDFLAGS += -lidn2
endif
if WITH_LIBIDN
libpsl_la_LDFLAGS += -lunistring -lidn
endif
noinst_PROGRAMS = psl2c
psl2c_SOURCES = psl2c.c
psl2c_CPPFLAGS = -I$(top_srcdir)/include -D _GNU_SOURCE
if BUILTIN_GENERATOR_LIBICU
psl2c_CPPFLAGS += $(LIBICU_CFLAGS)
psl2c_LDADD = $(LIBICU_LIBS)
psl2c_LDADD = -licuuc
endif
if BUILTIN_GENERATOR_LIBIDN2
psl2c_CPPFLAGS += $(LIBIDN2_CFLAGS)
psl2c_LDADD = $(LIBIDN2_LIBS)
psl2c_LDADD = -lidn2
endif
if BUILTIN_GENERATOR_LIBIDN
psl2c_LDADD = -lidn
endif
# Build rule for suffix.c

104
src/psl.c
View File

@ -37,6 +37,20 @@
# include <config.h>
#endif
/* if this file is included by psl2c, redefine to use requested library for builtin data */
#ifdef _LIBPSL_INCLUDED_BY_PSL2C
# undef WITH_LIBICU
# undef WITH_LIBIDN2
# undef WITH_LIBIDN
# ifdef BUILTIN_GENERATOR_LIBICU
# define WITH_LIBICU
# elif defined(BUILTIN_GENERATOR_LIBIDN2)
# define WITH_LIBIDN2
# elif defined(BUILTIN_GENERATOR_LIBIDN)
# define WITH_LIBIDN
# endif
#endif
#if ENABLE_NLS != 0
# include <libintl.h>
# define _(STRING) gettext(STRING)
@ -56,6 +70,13 @@
# include <unicode/ustring.h>
# include <unicode/uidna.h>
# include <unicode/ucnv.h>
#elif defined(WITH_LIBIDN2)
# include <idn2.h>
# include <unicase.h>
# include <unistr.h>
#elif defined(WITH_LIBIDN)
# include <stringprep.h>
# include <idna.h>
#endif
#include <libpsl.h>
@ -457,7 +478,7 @@ static int _str_is_ascii(const char *s)
return !*s;
}
#ifdef WITH_LIBICU
#if defined(WITH_LIBICU)
static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t *e)
{
if (_str_is_ascii(e->label_buf))
@ -465,7 +486,6 @@ static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t
/* IDNA2008 UTS#46 punycode conversion */
if (idna) {
_psl_entry_t suffix, *suffixp;
char lookupname[128] = "";
UErrorCode status = 0;
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
@ -479,6 +499,8 @@ static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t
u_strToUTF8(lookupname, sizeof(lookupname), NULL, utf16_dst, dst_length, &status);
if (U_SUCCESS(status)) {
if (strcmp(e->label_buf, lookupname)) {
_psl_entry_t suffix, *suffixp;
/* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */
_suffix_init(&suffix, lookupname, strlen(lookupname));
suffix.wildcard = e->wildcard;
@ -493,6 +515,66 @@ static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t
fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status); */
}
}
#elif defined(WITH_LIBIDN2)
static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e)
{
char *lookupname = NULL;
int rc;
uint8_t *lower, resbuf[256];
size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */
if (_str_is_ascii(e->label_buf))
return;
/* we need a conversion to lowercase */
lower = u8_tolower((uint8_t *)src, u8_strlen((uint8_t *)src), 0, UNINORM_NFKC, resbuf, &len);
if (!lower) {
printf("u8_tolower(%s) failed (%d)\n", src, errno);
return src;
}
/* u8_tolower() does not terminate the result string */
if (lower == resbuf) {
lower[len]=0;
} else {
uint8_t *tmp = lower;
lower = (uint8_t *)strndup((char *)lower, len);
xfree(tmp);
}
if ((rc = idn2_lookup_u8(lower, (uint8_t **)&asc, 0)) == IDN2_OK) {
debug_printf("idn2 '%s' -> '%s'\n", src, asc);
src = asc;
} else
error_printf(_("toASCII(%s) failed (%d): %s\n"), lower, rc, idn2_strerror(rc));
if (lower != resbuf)
xfree(lower);
}
#elif defined(WITH_LIBIDN)
static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e)
{
char *lookupname = NULL;
int rc;
if (_str_is_ascii(e->label_buf))
return;
/* idna_to_ascii_8z() automatically converts UTF-8 to lowercase */
if ((rc = idna_to_ascii_8z(e->label_buf, &lookupname, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) {
if (strcmp(e->label_buf, lookupname)) {
_psl_entry_t suffix, *suffixp;
/* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */
_suffix_init(&suffix, lookupname, strlen(lookupname));
suffix.wildcard = e->wildcard;
suffixp = _vector_get(v, _vector_add(v, &suffix));
suffixp->label = suffixp->label_buf; /* set label to changed address */
} /* else ignore */
} /* else
fprintf(_(stderr, "toASCII failed (%d): %s\n"), rc, idna_strerror(rc)); */
}
#endif
/**
@ -582,6 +664,8 @@ psl_ctx_t *psl_load_fp(FILE *fp)
suffixp->label = suffixp->label_buf; /* set label to changed address */
#ifdef WITH_LIBICU
_add_punycode_if_needed(idna, psl->suffix_exceptions, suffixp);
#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
_add_punycode_if_needed(psl->suffix_exceptions, suffixp);
#endif
}
} else {
@ -591,6 +675,8 @@ psl_ctx_t *psl_load_fp(FILE *fp)
suffixp->label = suffixp->label_buf; /* set label to changed address */
#ifdef WITH_LIBICU
_add_punycode_if_needed(idna, psl->suffixes, suffixp);
#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
_add_punycode_if_needed(psl->suffix_exceptions, suffixp);
#endif
}
}
@ -645,7 +731,7 @@ void psl_free(psl_ctx_t *psl)
*/
const psl_ctx_t *psl_builtin(void)
{
#ifdef WITH_BUILTIN
#if defined(BUILTIN_GENERATOR_LIBICU) || defined(BUILTIN_GENERATOR_LIBIDN2) || defined(BUILTIN_GENERATOR_LIBIDN)
return &_builtin_psl;
#else
return NULL;
@ -773,13 +859,15 @@ const char *psl_builtin_filename(void)
**/
const char *psl_get_version (void)
{
return PACKAGE_VERSION
#ifdef WITH_LIBICU
" (+libicu/" U_ICU_VERSION ")"
return PACKAGE_VERSION " (+libicu/" U_ICU_VERSION ")";
#elif defined(WITH_LIBIDN2)
return PACKAGE_VERSION " (+libidn2/" IDN2_VERSION ")";
#elif defined(WITH_LIBIDN)
return PACKAGE_VERSION " (+libidn/" STRINGPREP_VERSION ")";
#else
" (limited IDNA support)"
return PACKAGE_VERSION " (limited IDNA support)";
#endif
;
}
/**
@ -932,6 +1020,8 @@ psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding, const ch
/* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */
}
} while (0);
#elif defined(WITH_LIBIDN2)
#elif defined(WITH_LIBIDN)
#endif
return ret;

View File

@ -39,7 +39,11 @@
#include <ctype.h>
#include <sys/stat.h>
#if defined(BUILTIN_GENERATOR_LIBICU) || defined(BUILTIN_GENERATOR_LIBIDN2)
#if defined(BUILTIN_GENERATOR_LIBICU) || defined(BUILTIN_GENERATOR_LIBIDN2) || defined(BUILTIN_GENERATOR_LIBIDN)
# define _GENERATE_BUILTIN_DATA
#endif
#ifdef _GENERATE_BUILTIN_DATA
#include <libpsl.h>
@ -61,8 +65,10 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *
u_versionToString(version_info, version);
fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libicu/%s) */\n", version);
} while (0);
#elif BUILTIN_GENERATOR_LIBIDN2
#elif defined(BUILTIN_GENERATOR_LIBIDN2)
fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn2/%s) */\n", idn2_check_version(NULL));
#elif defined(BUILTIN_GENERATOR_LIBIDN)
fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn/%s) */\n", stringprep_check_version(NULL));
#else
fprintf(fpout, "/* automatically generated by psl2c (without punycode support) */\n");
#endif
@ -79,6 +85,7 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *
fprintf(fpout, "};\n");
}
#if 0
#if !defined(WITH_LIBICU) && !defined(WITH_IDN2)
static int _str_needs_encoding(const char *s)
{
@ -120,13 +127,14 @@ static void _add_punycode_if_needed(_psl_vector_t *v)
_vector_sort(v);
}
#endif /* !defined(WITH_LIBICU) && !defined(WITH_IDN2) */
#endif
#endif /* defined(BUILTIN_GENERATOR_LIBICU) || defined(BUILTIN_GENERATOR_LIBIDN2) */
#endif /* _GENERATE_BUILTIN_DATA */
int main(int argc, const char **argv)
{
FILE *fpout;
#if defined(BUILTIN_GENERATOR_LIBICU) || defined(BUILTIN_GENERATOR_LIBIDN2)
#ifdef _GENERATE_BUILTIN_DATA
psl_ctx_t *psl;
#endif
int ret = 0;
@ -138,7 +146,7 @@ int main(int argc, const char **argv)
return 1;
}
#if defined(BUILTIN_GENERATOR_LIBICU) || defined(BUILTIN_GENERATOR_LIBIDN2)
#ifdef _GENERATE_BUILTIN_DATA
if (!(psl = psl_load_file(argv[1])))
return 2;
@ -148,8 +156,8 @@ int main(int argc, const char **argv)
size_t cmdsize = 16 + strlen(argv[1]);
char *cmd = alloca(cmdsize), checksum[64] = "";
#if !defined(WITH_LIBICU) && !defined(WITH_IDN2)
/* library is compiled without ability to generate punycode, so let's generate punycode at least for the builtin data */
#if 0
/* include library code did not generate punycode, so let's do it for the builtin data */
_add_punycode_if_needed(psl->suffixes);
_add_punycode_if_needed(psl->suffix_exceptions);
#endif
@ -194,7 +202,7 @@ int main(int argc, const char **argv)
fprintf(stderr, "Failed to write open '%s'\n", argv[2]);
ret = 3;
}
#endif /* WITH_BUILTIN */
#endif /* GENERATE_BUILTIN_DATA */
return ret;
}

View File

@ -15,6 +15,10 @@ if BUILTIN_GENERATOR_LIBIDN2
PSL_TESTS += test-is-public-builtin test-registrable-domain
endif
if BUILTIN_GENERATOR_LIBIDN
PSL_TESTS += test-is-public-builtin test-registrable-domain
endif
check_PROGRAMS = $(PSL_TESTS)
TESTS_ENVIRONMENT = TESTS_VALGRIND="@VALGRIND_ENVIRONMENT@"

View File

@ -2,3 +2,13 @@ bin_PROGRAMS = psl
AM_CPPFLAGS = -I$(top_srcdir)/include
LDADD = ../src/libpsl.la
#if WITH_LIBICU
# LDADD += $(LIBICU_LIBS)
#endif
#if WITH_LIBIDN2
# LDADD += -lidn2
#endif
#if WITH_LIBIDN
# LDADD += -lidn
#endif