From 1baaacccd5e1349f12c32f57e4a5e7098525415c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Fri, 23 Sep 2016 11:12:52 +0200 Subject: [PATCH] Fix libidn/libidn2 code path of psl_str_to_utf8lower() * fixing memory leaks * proper handling of unterminated results of u8_tolower() * second call to iconv() ensures flush of internal memory * check more code paths of psl_str_to_utf8lower() via tests/test-registrable-domain.c --- src/psl.c | 50 ++++++++++++++++++++------------ tests/test-registrable-domain.c | 51 +++++++++++++++++++++++++++++++-- 2 files changed, 79 insertions(+), 22 deletions(-) diff --git a/src/psl.c b/src/psl.c index d637394..68bb014 100644 --- a/src/psl.c +++ b/src/psl.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include /* for UINT_MAX */ #include @@ -1678,7 +1679,6 @@ out: } while (0); #elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) do { - printf("### encoding=%s lower=%p\n", encoding, lower ? *lower : NULL); /* find out local charset encoding */ if (!encoding) { encoding = nl_langinfo(CODESET); @@ -1700,19 +1700,25 @@ out: if (!dst) { ret = PSL_ERR_NO_MEM; } - else if (iconv(cd, &tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1) { - uint8_t *resbuf = malloc(dst_len * 2 + 1); - size_t len = dst_len * 2; /* leave space for additional \0 byte */ + else if (iconv(cd, &tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1 + && iconv(cd, NULL, NULL, &dst_tmp, &dst_len_tmp) != (size_t)-1) + { + uint8_t resbuf[256]; + size_t len = sizeof(resbuf); - if (!resbuf) { - ret = PSL_ERR_NO_MEM; - } - else if ((dst = (char *)u8_tolower((uint8_t *)dst, dst_len - dst_len_tmp, 0, UNINORM_NFKC, resbuf, &len))) { + if ((tmp = (char *)u8_tolower((uint8_t *)dst, dst_len - dst_len_tmp, 0, UNINORM_NFKC, resbuf, &len))) { /* u8_tolower() does not terminate the result string */ ret = PSL_SUCCESS; - if (lower) - if (!(*lower = strndup((char *)dst, len))) + if (lower) { + if ((*lower = malloc(len + 1))) { + /* tmp is not 0 terminated */ + memcpy(*lower, tmp, len); + (*lower)[len] = 0; + } else ret = PSL_ERR_NO_MEM; + } + if (tmp != (char *)resbuf) + free(tmp); } else { ret = PSL_ERR_TO_LOWER; /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */ @@ -1728,20 +1734,26 @@ out: ret = PSL_ERR_TO_UTF8; /* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */ } - } else + } else { + /* convert to lowercase */ + uint8_t resbuf[256], *tmp; + size_t len = sizeof(resbuf); + ret = PSL_SUCCESS; - /* convert to lowercase */ - if (ret == PSL_SUCCESS) { - uint8_t *dst, resbuf[256]; - size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */ - /* we need a conversion to lowercase */ - if ((dst = u8_tolower((uint8_t *)str, u8_strlen((uint8_t *)str), 0, UNINORM_NFKC, resbuf, &len))) { + if ((tmp = u8_tolower((uint8_t *)str, u8_strlen((uint8_t *)str), 0, UNINORM_NFKC, resbuf, &len))) { /* u8_tolower() does not terminate the result string */ - if (lower) - if (!(*lower = strndup((char *)dst, len))) + if (lower) { + if ((*lower = malloc(len + 1))) { + /* tmp is not 0 terminated */ + memcpy(*lower, tmp, len); + (*lower)[len] = 0; + } else ret = PSL_ERR_NO_MEM; + } + if (tmp != resbuf) + free(tmp); } else { ret = PSL_ERR_TO_LOWER; /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */ diff --git a/tests/test-registrable-domain.c b/tests/test-registrable-domain.c index 6a227e1..819216b 100644 --- a/tests/test-registrable-domain.c +++ b/tests/test-registrable-domain.c @@ -50,14 +50,28 @@ static int ok, failed; -static void test(const psl_ctx_t *psl, const char *domain, const char *expected_result) +static void testx(const psl_ctx_t *psl, const char *domain, const char *encoding, const char *lang, const char *expected_result) { const char *result; char *lower; + int rc; - /* our test data is fixed to UTF-8 (english), so provide it here */ - if (psl_str_to_utf8lower(domain, "utf-8", "en", &lower) == PSL_SUCCESS) + /* just to cover special code paths for valgrind checking */ + psl_str_to_utf8lower(domain, encoding, lang, NULL); + + if ((rc = psl_str_to_utf8lower(domain, encoding, lang, &lower)) == PSL_SUCCESS) domain = lower; + /* non-ASCII domains fail here if no runtime IDN library is configured, so skip it */ +#if defined(WITH_LIBIDN) || defined(WITH_LIBIDN2) || defined(WITH_LIBICU) + else if (domain) { + /* if we do not runtime support, test failure have to be skipped */ + failed++; + printf("psl_str_to_utf8lower(%s)=%d\n", domain ? domain : "NULL", rc); + + free(lower); + return; + } +#endif result = psl_registrable_domain(psl, domain); @@ -72,13 +86,28 @@ static void test(const psl_ctx_t *psl, const char *domain, const char *expected_ free(lower); } +static void test(const psl_ctx_t *psl, const char *domain, const char *expected_result) +{ + testx(psl, domain, "utf-8", "en", expected_result); +} + +static void test_iso(const psl_ctx_t *psl, const char *domain, const char *expected_result) +{ + /* makes only sense with a runtime IDN library configured */ +#if defined(WITH_LIBIDN) || defined(WITH_LIBIDN2) || defined(WITH_LIBICU) + testx(psl, domain, "iso-8859-15", "de", expected_result); +#endif +} + static void test_psl(void) { FILE *fp; const psl_ctx_t *psl; const char *p; char buf[256], domain[128], expected_regdom[128], semicolon[2]; + char lbuf[258]; int er_is_null, d_is_null; + unsigned it; psl = psl_builtin(); @@ -101,6 +130,22 @@ static void test_psl(void) /* Norwegian with lowercase oe */ test(psl, "www.\303\270yer.no", "www.\303\270yer.no"); + /* Norwegian with lowercase oe, encoded as ISO-8859-15 */ + test_iso(psl, "www.\370yer.no", "www.\303\270yer.no"); + + /* Testing special code paths of psl_str_to_utf8lower() */ + for (it = 254; it <= 257; it++) { + memset(lbuf, 'a', it); + lbuf[it] = 0; + + lbuf[0] = '\370'; + test_iso(psl, lbuf, NULL); + + lbuf[0] = '\303'; + lbuf[1] = '\270'; + test(psl, lbuf, NULL); + } + /* special check with NULL psl context and TLD */ test(psl, "whoever.forgot.his.name", "whoever.forgot.his.name");