Fix libidn/libidn2 code path of psl_str_to_utf8lower()

* fixing memory leaks
* proper handling of unterminated results of u8_tolower()
* second call to iconv() ensures flush of internal memory
* check more code paths of psl_str_to_utf8lower() via
  tests/test-registrable-domain.c
This commit is contained in:
Tim Rühsen 2016-09-23 11:12:52 +02:00
parent e2812e8c4c
commit 1baaacccd5
2 changed files with 79 additions and 22 deletions

View File

@ -73,6 +73,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <ctype.h> #include <ctype.h>
#include <time.h>
#include <errno.h> #include <errno.h>
#include <limits.h> /* for UINT_MAX */ #include <limits.h> /* for UINT_MAX */
#include <langinfo.h> #include <langinfo.h>
@ -1678,7 +1679,6 @@ out:
} while (0); } while (0);
#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) #elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
do { do {
printf("### encoding=%s lower=%p\n", encoding, lower ? *lower : NULL);
/* find out local charset encoding */ /* find out local charset encoding */
if (!encoding) { if (!encoding) {
encoding = nl_langinfo(CODESET); encoding = nl_langinfo(CODESET);
@ -1700,19 +1700,25 @@ out:
if (!dst) { if (!dst) {
ret = PSL_ERR_NO_MEM; ret = PSL_ERR_NO_MEM;
} }
else if (iconv(cd, &tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1) { else if (iconv(cd, &tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1
uint8_t *resbuf = malloc(dst_len * 2 + 1); && iconv(cd, NULL, NULL, &dst_tmp, &dst_len_tmp) != (size_t)-1)
size_t len = dst_len * 2; /* leave space for additional \0 byte */ {
uint8_t resbuf[256];
size_t len = sizeof(resbuf);
if (!resbuf) { if ((tmp = (char *)u8_tolower((uint8_t *)dst, dst_len - dst_len_tmp, 0, UNINORM_NFKC, resbuf, &len))) {
ret = PSL_ERR_NO_MEM;
}
else if ((dst = (char *)u8_tolower((uint8_t *)dst, dst_len - dst_len_tmp, 0, UNINORM_NFKC, resbuf, &len))) {
/* u8_tolower() does not terminate the result string */ /* u8_tolower() does not terminate the result string */
ret = PSL_SUCCESS; ret = PSL_SUCCESS;
if (lower) if (lower) {
if (!(*lower = strndup((char *)dst, len))) if ((*lower = malloc(len + 1))) {
/* tmp is not 0 terminated */
memcpy(*lower, tmp, len);
(*lower)[len] = 0;
} else
ret = PSL_ERR_NO_MEM; ret = PSL_ERR_NO_MEM;
}
if (tmp != (char *)resbuf)
free(tmp);
} else { } else {
ret = PSL_ERR_TO_LOWER; ret = PSL_ERR_TO_LOWER;
/* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */ /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
@ -1728,20 +1734,26 @@ out:
ret = PSL_ERR_TO_UTF8; ret = PSL_ERR_TO_UTF8;
/* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */ /* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
} }
} else } else {
/* convert to lowercase */
uint8_t resbuf[256], *tmp;
size_t len = sizeof(resbuf);
ret = PSL_SUCCESS; ret = PSL_SUCCESS;
/* convert to lowercase */
if (ret == PSL_SUCCESS) {
uint8_t *dst, resbuf[256];
size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */
/* we need a conversion to lowercase */ /* we need a conversion to lowercase */
if ((dst = u8_tolower((uint8_t *)str, u8_strlen((uint8_t *)str), 0, UNINORM_NFKC, resbuf, &len))) { if ((tmp = u8_tolower((uint8_t *)str, u8_strlen((uint8_t *)str), 0, UNINORM_NFKC, resbuf, &len))) {
/* u8_tolower() does not terminate the result string */ /* u8_tolower() does not terminate the result string */
if (lower) if (lower) {
if (!(*lower = strndup((char *)dst, len))) if ((*lower = malloc(len + 1))) {
/* tmp is not 0 terminated */
memcpy(*lower, tmp, len);
(*lower)[len] = 0;
} else
ret = PSL_ERR_NO_MEM; ret = PSL_ERR_NO_MEM;
}
if (tmp != resbuf)
free(tmp);
} else { } else {
ret = PSL_ERR_TO_LOWER; ret = PSL_ERR_TO_LOWER;
/* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */ /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */

View File

@ -50,14 +50,28 @@ static int
ok, ok,
failed; failed;
static void test(const psl_ctx_t *psl, const char *domain, const char *expected_result) static void testx(const psl_ctx_t *psl, const char *domain, const char *encoding, const char *lang, const char *expected_result)
{ {
const char *result; const char *result;
char *lower; char *lower;
int rc;
/* our test data is fixed to UTF-8 (english), so provide it here */ /* just to cover special code paths for valgrind checking */
if (psl_str_to_utf8lower(domain, "utf-8", "en", &lower) == PSL_SUCCESS) psl_str_to_utf8lower(domain, encoding, lang, NULL);
if ((rc = psl_str_to_utf8lower(domain, encoding, lang, &lower)) == PSL_SUCCESS)
domain = lower; domain = lower;
/* non-ASCII domains fail here if no runtime IDN library is configured, so skip it */
#if defined(WITH_LIBIDN) || defined(WITH_LIBIDN2) || defined(WITH_LIBICU)
else if (domain) {
/* if we do not runtime support, test failure have to be skipped */
failed++;
printf("psl_str_to_utf8lower(%s)=%d\n", domain ? domain : "NULL", rc);
free(lower);
return;
}
#endif
result = psl_registrable_domain(psl, domain); result = psl_registrable_domain(psl, domain);
@ -72,13 +86,28 @@ static void test(const psl_ctx_t *psl, const char *domain, const char *expected_
free(lower); free(lower);
} }
static void test(const psl_ctx_t *psl, const char *domain, const char *expected_result)
{
testx(psl, domain, "utf-8", "en", expected_result);
}
static void test_iso(const psl_ctx_t *psl, const char *domain, const char *expected_result)
{
/* makes only sense with a runtime IDN library configured */
#if defined(WITH_LIBIDN) || defined(WITH_LIBIDN2) || defined(WITH_LIBICU)
testx(psl, domain, "iso-8859-15", "de", expected_result);
#endif
}
static void test_psl(void) static void test_psl(void)
{ {
FILE *fp; FILE *fp;
const psl_ctx_t *psl; const psl_ctx_t *psl;
const char *p; const char *p;
char buf[256], domain[128], expected_regdom[128], semicolon[2]; char buf[256], domain[128], expected_regdom[128], semicolon[2];
char lbuf[258];
int er_is_null, d_is_null; int er_is_null, d_is_null;
unsigned it;
psl = psl_builtin(); psl = psl_builtin();
@ -101,6 +130,22 @@ static void test_psl(void)
/* Norwegian with lowercase oe */ /* Norwegian with lowercase oe */
test(psl, "www.\303\270yer.no", "www.\303\270yer.no"); test(psl, "www.\303\270yer.no", "www.\303\270yer.no");
/* Norwegian with lowercase oe, encoded as ISO-8859-15 */
test_iso(psl, "www.\370yer.no", "www.\303\270yer.no");
/* Testing special code paths of psl_str_to_utf8lower() */
for (it = 254; it <= 257; it++) {
memset(lbuf, 'a', it);
lbuf[it] = 0;
lbuf[0] = '\370';
test_iso(psl, lbuf, NULL);
lbuf[0] = '\303';
lbuf[1] = '\270';
test(psl, lbuf, NULL);
}
/* special check with NULL psl context and TLD */ /* special check with NULL psl context and TLD */
test(psl, "whoever.forgot.his.name", "whoever.forgot.his.name"); test(psl, "whoever.forgot.his.name", "whoever.forgot.his.name");