From 1baaacccd5e1349f12c32f57e4a5e7098525415c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20R=C3=BChsen?= <tim.ruehsen@gmx.de>
Date: Fri, 23 Sep 2016 11:12:52 +0200
Subject: [PATCH] Fix libidn/libidn2 code path of psl_str_to_utf8lower()

* fixing memory leaks
* proper handling of unterminated results of u8_tolower()
* second call to iconv() ensures flush of internal memory
* check more code paths of psl_str_to_utf8lower() via
  tests/test-registrable-domain.c
---
 src/psl.c                       | 50 ++++++++++++++++++++------------
 tests/test-registrable-domain.c | 51 +++++++++++++++++++++++++++++++--
 2 files changed, 79 insertions(+), 22 deletions(-)

diff --git a/src/psl.c b/src/psl.c
index d637394..68bb014 100644
--- a/src/psl.c
+++ b/src/psl.c
@@ -73,6 +73,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
+#include <time.h>
 #include <errno.h>
 #include <limits.h> /* for UINT_MAX */
 #include <langinfo.h>
@@ -1678,7 +1679,6 @@ out:
 	} while (0);
 #elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
 	do {
-		printf("### encoding=%s lower=%p\n", encoding, lower ? *lower : NULL);
 		/* find out local charset encoding */
 		if (!encoding) {
 			encoding = nl_langinfo(CODESET);
@@ -1700,19 +1700,25 @@ out:
 				if (!dst) {
 					ret = PSL_ERR_NO_MEM;
 				}
-				else if (iconv(cd, &tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1) {
-					uint8_t *resbuf = malloc(dst_len * 2 + 1);
-					size_t len = dst_len * 2; /* leave space for additional \0 byte */
+				else if (iconv(cd, &tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1
+					&& iconv(cd, NULL, NULL, &dst_tmp, &dst_len_tmp) != (size_t)-1)
+				{
+					uint8_t resbuf[256];
+					size_t len = sizeof(resbuf);
 
-					if (!resbuf) {
-						ret = PSL_ERR_NO_MEM;
-					}
-					else if ((dst = (char *)u8_tolower((uint8_t *)dst, dst_len - dst_len_tmp, 0, UNINORM_NFKC, resbuf, &len))) {
+					if ((tmp = (char *)u8_tolower((uint8_t *)dst, dst_len - dst_len_tmp, 0, UNINORM_NFKC, resbuf, &len))) {
 						/* u8_tolower() does not terminate the result string */
 						ret = PSL_SUCCESS;
-						if (lower)
-							if (!(*lower = strndup((char *)dst, len)))
+						if (lower) {
+							if ((*lower = malloc(len + 1))) {
+								/* tmp is not 0 terminated */
+								memcpy(*lower, tmp, len);
+								(*lower)[len] = 0;
+							} else
 								ret = PSL_ERR_NO_MEM;
+						}
+						if (tmp != (char *)resbuf)
+							free(tmp);
 					} else {
 						ret = PSL_ERR_TO_LOWER;
 						/* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
@@ -1728,20 +1734,26 @@ out:
 				ret = PSL_ERR_TO_UTF8;
 				/* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
 			}
-		} else
+		} else {
+			/* convert to lowercase */
+			uint8_t resbuf[256], *tmp;
+			size_t len = sizeof(resbuf);
+
 			ret = PSL_SUCCESS;
 
-		/* convert to lowercase */
-		if (ret == PSL_SUCCESS) {
-			uint8_t *dst, resbuf[256];
-			size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */
-
 			/* we need a conversion to lowercase */
-			if ((dst = u8_tolower((uint8_t *)str, u8_strlen((uint8_t *)str), 0, UNINORM_NFKC, resbuf, &len))) {
+			if ((tmp = u8_tolower((uint8_t *)str, u8_strlen((uint8_t *)str), 0, UNINORM_NFKC, resbuf, &len))) {
 				/* u8_tolower() does not terminate the result string */
-				if (lower)
-					if (!(*lower = strndup((char *)dst, len)))
+				if (lower) {
+					if ((*lower = malloc(len + 1))) {
+						/* tmp is not 0 terminated */
+						memcpy(*lower, tmp, len);
+						(*lower)[len] = 0;
+					} else
 						ret = PSL_ERR_NO_MEM;
+				}
+				if (tmp != resbuf)
+					free(tmp);
 			} else {
 				ret = PSL_ERR_TO_LOWER;
 				/* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
diff --git a/tests/test-registrable-domain.c b/tests/test-registrable-domain.c
index 6a227e1..819216b 100644
--- a/tests/test-registrable-domain.c
+++ b/tests/test-registrable-domain.c
@@ -50,14 +50,28 @@ static int
 	ok,
 	failed;
 
-static void test(const psl_ctx_t *psl, const char *domain, const char *expected_result)
+static void testx(const psl_ctx_t *psl, const char *domain, const char *encoding, const char *lang, const char *expected_result)
 {
 	const char *result;
 	char *lower;
+	int rc;
 
-	/* our test data is fixed to UTF-8 (english), so provide it here */
-	if (psl_str_to_utf8lower(domain, "utf-8", "en", &lower) == PSL_SUCCESS)
+	/* just to cover special code paths for valgrind checking */
+	psl_str_to_utf8lower(domain, encoding, lang, NULL);
+
+	if ((rc = psl_str_to_utf8lower(domain, encoding, lang, &lower)) == PSL_SUCCESS)
 		domain = lower;
+	/* non-ASCII domains fail here if no runtime IDN library is configured, so skip it */
+#if defined(WITH_LIBIDN) || defined(WITH_LIBIDN2) || defined(WITH_LIBICU)
+	else if (domain) {
+		/* if we do not runtime support, test failure have to be skipped */
+		failed++;
+		printf("psl_str_to_utf8lower(%s)=%d\n", domain ? domain : "NULL", rc);
+
+		free(lower);
+		return;
+	}
+#endif
 
 	result = psl_registrable_domain(psl, domain);
 
@@ -72,13 +86,28 @@ static void test(const psl_ctx_t *psl, const char *domain, const char *expected_
 	free(lower);
 }
 
+static void test(const psl_ctx_t *psl, const char *domain, const char *expected_result)
+{
+	testx(psl, domain, "utf-8", "en", expected_result);
+}
+
+static void test_iso(const psl_ctx_t *psl, const char *domain, const char *expected_result)
+{
+	/* makes only sense with a runtime IDN library configured */
+#if defined(WITH_LIBIDN) || defined(WITH_LIBIDN2) || defined(WITH_LIBICU)
+	testx(psl, domain, "iso-8859-15", "de", expected_result);
+#endif
+}
+
 static void test_psl(void)
 {
 	FILE *fp;
 	const psl_ctx_t *psl;
 	const char *p;
 	char buf[256], domain[128], expected_regdom[128], semicolon[2];
+	char lbuf[258];
 	int er_is_null, d_is_null;
+	unsigned it;
 
 	psl = psl_builtin();
 
@@ -101,6 +130,22 @@ static void test_psl(void)
 	/* Norwegian with lowercase oe */
 	test(psl, "www.\303\270yer.no", "www.\303\270yer.no");
 
+	/* Norwegian with lowercase oe, encoded as ISO-8859-15 */
+	test_iso(psl, "www.\370yer.no", "www.\303\270yer.no");
+
+	/* Testing special code paths of psl_str_to_utf8lower() */
+	for (it = 254; it <= 257; it++) {
+		memset(lbuf, 'a', it);
+		lbuf[it] = 0;
+
+		lbuf[0] = '\370';
+		test_iso(psl, lbuf, NULL);
+
+		lbuf[0] = '\303';
+		lbuf[1] = '\270';
+		test(psl, lbuf, NULL);
+	}
+
 	/* special check with NULL psl context and TLD */
 	test(psl, "whoever.forgot.his.name", "whoever.forgot.his.name");