From 5c5ee3aad7dc739c4f26c6b43fe331de0c1112e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Mon, 30 Jun 2014 13:21:16 +0200 Subject: [PATCH] added code for all of runtime and builtin options --- .travis.yml | 2 +- src/Makefile.am | 8 ++-- src/psl.c | 106 +++++++++++++++++++++++++++++++++++++++++------- tools/psl.c | 13 ------ 4 files changed, 97 insertions(+), 32 deletions(-) diff --git a/.travis.yml b/.travis.yml index 59fa4b3..489ad18 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,4 +26,4 @@ script: - make distcheck before_install: - sudo apt-get -qq update - - sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext libidn libidn-dev libidn2-0 libidn2-0-dev libicu48 libicu-dev + - sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext libidn libidn-dev libidn2-0 libidn2-0-dev libicu48 libicu-dev libunistring0 libunistring-dev diff --git a/src/Makefile.am b/src/Makefile.am index eef692b..f600134 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -14,10 +14,10 @@ if WITH_LIBICU libpsl_la_LDFLAGS += -licuuc endif if WITH_LIBIDN2 - libpsl_la_LDFLAGS += -lidn2 + libpsl_la_LDFLAGS += -lidn2 -lunistring endif if WITH_LIBIDN - libpsl_la_LDFLAGS += -lunistring -lidn + libpsl_la_LDFLAGS += -lidn -lunistring endif noinst_PROGRAMS = psl2c @@ -27,10 +27,10 @@ if BUILTIN_GENERATOR_LIBICU psl2c_LDADD = -licuuc endif if BUILTIN_GENERATOR_LIBIDN2 - psl2c_LDADD = -lidn2 + psl2c_LDADD = -lidn2 -lunistring endif if BUILTIN_GENERATOR_LIBIDN - psl2c_LDADD = -lidn + psl2c_LDADD = -lidn -lunistring endif # Build rule for suffix.c diff --git a/src/psl.c b/src/psl.c index 3b263c8..9798e61 100644 --- a/src/psl.c +++ b/src/psl.c @@ -64,6 +64,8 @@ #include #include #include +#include +#include #ifdef WITH_LIBICU # include @@ -71,12 +73,16 @@ # include # include #elif defined(WITH_LIBIDN2) +# include # include # include # include #elif defined(WITH_LIBIDN) +# include # include # include +# include +# include #endif #include @@ -527,10 +533,10 @@ static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e) return; /* we need a conversion to lowercase */ - lower = u8_tolower((uint8_t *)src, u8_strlen((uint8_t *)src), 0, UNINORM_NFKC, resbuf, &len); + lower = u8_tolower((uint8_t *)e->label_buf, u8_strlen((uint8_t *)e->label_buf), 0, UNINORM_NFKC, resbuf, &len); if (!lower) { - printf("u8_tolower(%s) failed (%d)\n", src, errno); - return src; + /* fprintf(stderr, "u8_tolower(%s) failed (%d)\n", e->label_buf, errno); */ + return; } /* u8_tolower() does not terminate the result string */ @@ -539,17 +545,24 @@ static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e) } else { uint8_t *tmp = lower; lower = (uint8_t *)strndup((char *)lower, len); - xfree(tmp); + free(tmp); } - if ((rc = idn2_lookup_u8(lower, (uint8_t **)&asc, 0)) == IDN2_OK) { - debug_printf("idn2 '%s' -> '%s'\n", src, asc); - src = asc; - } else - error_printf(_("toASCII(%s) failed (%d): %s\n"), lower, rc, idn2_strerror(rc)); + if ((rc = idn2_lookup_u8(lower, (uint8_t **)&lookupname, 0)) == IDN2_OK) { + if (strcmp(e->label_buf, lookupname)) { + _psl_entry_t suffix, *suffixp; + + /* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */ + _suffix_init(&suffix, lookupname, strlen(lookupname)); + suffix.wildcard = e->wildcard; + suffixp = _vector_get(v, _vector_add(v, &suffix)); + suffixp->label = suffixp->label_buf; /* set label to changed address */ + } /* else ignore */ + } /* else + fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */ if (lower != resbuf) - xfree(lower); + free(lower); } #elif defined(WITH_LIBIDN) static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e) @@ -676,7 +689,7 @@ psl_ctx_t *psl_load_fp(FILE *fp) #ifdef WITH_LIBICU _add_punycode_if_needed(idna, psl->suffixes, suffixp); #elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) - _add_punycode_if_needed(psl->suffix_exceptions, suffixp); + _add_punycode_if_needed(psl->suffixes, suffixp); #endif } } @@ -866,7 +879,7 @@ const char *psl_get_version (void) #elif defined(WITH_LIBIDN) return PACKAGE_VERSION " (+libidn/" STRINGPREP_VERSION ")"; #else - return PACKAGE_VERSION " (limited IDNA support)"; + return PACKAGE_VERSION " (no IDNA support)"; #endif } @@ -1020,8 +1033,73 @@ psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding, const ch /* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */ } } while (0); -#elif defined(WITH_LIBIDN2) -#elif defined(WITH_LIBIDN) +#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) + do { + /* find out local charset encoding */ + if (!encoding) { + encoding = nl_langinfo(CODESET); + + if (!encoding || !*encoding) + encoding = "ASCII"; + } + + /* convert to UTF-8 */ + if (strcasecmp(encoding, "utf-8")) { + iconv_t cd = iconv_open("utf-8", encoding); + + if (cd != (iconv_t)-1) { + char *tmp = (char *)str; /* iconv won't change where str points to, but changes tmp itself */ + size_t tmp_len = strlen(str); + size_t dst_len = tmp_len * 6, dst_len_tmp = dst_len; + char *dst = malloc(dst_len + 1), *dst_tmp = dst; + + if (iconv(cd, &tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1) { + uint8_t *resbuf = malloc(dst_len * 2 + 1); + size_t len = dst_len * 2; /* leave space for additional \0 byte */ + + if ((dst = (char *)u8_tolower((uint8_t *)dst, dst_len - dst_len_tmp, 0, UNINORM_NFKC, resbuf, &len))) { + /* u8_tolower() does not terminate the result string */ + if (lower) + *lower = strndup((char *)dst, len); + } else { + ret = PSL_ERR_TO_LOWER; + /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */ + } + + if (lower) + *lower = strndup(dst, dst_len - dst_len_tmp); + ret = PSL_SUCCESS; + } else { + ret = PSL_ERR_TO_UTF8; + /* fprintf(stderr, "Failed to convert '%s' string into '%s' (%d)\n", src_encoding, dst_encoding, errno); */ + } + + free(dst); + iconv_close(cd); + } else { + ret = PSL_ERR_TO_UTF8; + /* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */ + } + } else + ret = PSL_SUCCESS; + + /* convert to lowercase */ + if (ret == PSL_SUCCESS) { + uint8_t *dst, resbuf[256]; + size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */ + + /* we need a conversion to lowercase */ + if ((dst = u8_tolower((uint8_t *)str, u8_strlen((uint8_t *)str), 0, UNINORM_NFKC, resbuf, &len))) { + /* u8_tolower() does not terminate the result string */ + if (lower) + *lower = strndup((char *)dst, len); + } else { + ret = PSL_ERR_TO_LOWER; + /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */ + } + } + + } while (0); #endif return ret; diff --git a/tools/psl.c b/tools/psl.c index 82618b5..e1969c2 100644 --- a/tools/psl.c +++ b/tools/psl.c @@ -37,13 +37,6 @@ #include #include -/* -#ifdef WITH_LIBICU -# include -# include -#endif -*/ - #include static void usage(int err, FILE* f) @@ -219,12 +212,6 @@ int main(int argc, const char *const *argv) printf("builtin compile time: %ld (%s)\n", psl_builtin_compile_time(), time2str(psl_builtin_compile_time())); printf("builtin file time: %ld (%s)\n", psl_builtin_file_time(), time2str(psl_builtin_file_time())); printf("builtin SHA1 file hash: %s\n", psl_builtin_sha1sum()); -/* -#ifdef WITH_LIBICU - printf("uloc_getDefault=%s\n", uloc_getDefault()); - printf("ucnv_getDefaultName=%s\n", ucnv_getDefaultName()); -#endif -*/ } else printf("No builtin PSL data available\n"); }