Fix libidn/libidn2 code path of psl_str_to_utf8lower()
* fixing memory leaks * proper handling of unterminated results of u8_tolower() * second call to iconv() ensures flush of internal memory * check more code paths of psl_str_to_utf8lower() via tests/test-registrable-domain.c
This commit is contained in:
parent
e2812e8c4c
commit
1baaacccd5
50
src/psl.c
50
src/psl.c
|
@ -73,6 +73,7 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
#include <time.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <limits.h> /* for UINT_MAX */
|
#include <limits.h> /* for UINT_MAX */
|
||||||
#include <langinfo.h>
|
#include <langinfo.h>
|
||||||
|
@ -1678,7 +1679,6 @@ out:
|
||||||
} while (0);
|
} while (0);
|
||||||
#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
|
#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
|
||||||
do {
|
do {
|
||||||
printf("### encoding=%s lower=%p\n", encoding, lower ? *lower : NULL);
|
|
||||||
/* find out local charset encoding */
|
/* find out local charset encoding */
|
||||||
if (!encoding) {
|
if (!encoding) {
|
||||||
encoding = nl_langinfo(CODESET);
|
encoding = nl_langinfo(CODESET);
|
||||||
|
@ -1700,19 +1700,25 @@ out:
|
||||||
if (!dst) {
|
if (!dst) {
|
||||||
ret = PSL_ERR_NO_MEM;
|
ret = PSL_ERR_NO_MEM;
|
||||||
}
|
}
|
||||||
else if (iconv(cd, &tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1) {
|
else if (iconv(cd, &tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1
|
||||||
uint8_t *resbuf = malloc(dst_len * 2 + 1);
|
&& iconv(cd, NULL, NULL, &dst_tmp, &dst_len_tmp) != (size_t)-1)
|
||||||
size_t len = dst_len * 2; /* leave space for additional \0 byte */
|
{
|
||||||
|
uint8_t resbuf[256];
|
||||||
|
size_t len = sizeof(resbuf);
|
||||||
|
|
||||||
if (!resbuf) {
|
if ((tmp = (char *)u8_tolower((uint8_t *)dst, dst_len - dst_len_tmp, 0, UNINORM_NFKC, resbuf, &len))) {
|
||||||
ret = PSL_ERR_NO_MEM;
|
|
||||||
}
|
|
||||||
else if ((dst = (char *)u8_tolower((uint8_t *)dst, dst_len - dst_len_tmp, 0, UNINORM_NFKC, resbuf, &len))) {
|
|
||||||
/* u8_tolower() does not terminate the result string */
|
/* u8_tolower() does not terminate the result string */
|
||||||
ret = PSL_SUCCESS;
|
ret = PSL_SUCCESS;
|
||||||
if (lower)
|
if (lower) {
|
||||||
if (!(*lower = strndup((char *)dst, len)))
|
if ((*lower = malloc(len + 1))) {
|
||||||
|
/* tmp is not 0 terminated */
|
||||||
|
memcpy(*lower, tmp, len);
|
||||||
|
(*lower)[len] = 0;
|
||||||
|
} else
|
||||||
ret = PSL_ERR_NO_MEM;
|
ret = PSL_ERR_NO_MEM;
|
||||||
|
}
|
||||||
|
if (tmp != (char *)resbuf)
|
||||||
|
free(tmp);
|
||||||
} else {
|
} else {
|
||||||
ret = PSL_ERR_TO_LOWER;
|
ret = PSL_ERR_TO_LOWER;
|
||||||
/* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
|
/* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
|
||||||
|
@ -1728,20 +1734,26 @@ out:
|
||||||
ret = PSL_ERR_TO_UTF8;
|
ret = PSL_ERR_TO_UTF8;
|
||||||
/* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
|
/* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
|
||||||
}
|
}
|
||||||
} else
|
} else {
|
||||||
|
/* convert to lowercase */
|
||||||
|
uint8_t resbuf[256], *tmp;
|
||||||
|
size_t len = sizeof(resbuf);
|
||||||
|
|
||||||
ret = PSL_SUCCESS;
|
ret = PSL_SUCCESS;
|
||||||
|
|
||||||
/* convert to lowercase */
|
|
||||||
if (ret == PSL_SUCCESS) {
|
|
||||||
uint8_t *dst, resbuf[256];
|
|
||||||
size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */
|
|
||||||
|
|
||||||
/* we need a conversion to lowercase */
|
/* we need a conversion to lowercase */
|
||||||
if ((dst = u8_tolower((uint8_t *)str, u8_strlen((uint8_t *)str), 0, UNINORM_NFKC, resbuf, &len))) {
|
if ((tmp = u8_tolower((uint8_t *)str, u8_strlen((uint8_t *)str), 0, UNINORM_NFKC, resbuf, &len))) {
|
||||||
/* u8_tolower() does not terminate the result string */
|
/* u8_tolower() does not terminate the result string */
|
||||||
if (lower)
|
if (lower) {
|
||||||
if (!(*lower = strndup((char *)dst, len)))
|
if ((*lower = malloc(len + 1))) {
|
||||||
|
/* tmp is not 0 terminated */
|
||||||
|
memcpy(*lower, tmp, len);
|
||||||
|
(*lower)[len] = 0;
|
||||||
|
} else
|
||||||
ret = PSL_ERR_NO_MEM;
|
ret = PSL_ERR_NO_MEM;
|
||||||
|
}
|
||||||
|
if (tmp != resbuf)
|
||||||
|
free(tmp);
|
||||||
} else {
|
} else {
|
||||||
ret = PSL_ERR_TO_LOWER;
|
ret = PSL_ERR_TO_LOWER;
|
||||||
/* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
|
/* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
|
||||||
|
|
|
@ -50,14 +50,28 @@ static int
|
||||||
ok,
|
ok,
|
||||||
failed;
|
failed;
|
||||||
|
|
||||||
static void test(const psl_ctx_t *psl, const char *domain, const char *expected_result)
|
static void testx(const psl_ctx_t *psl, const char *domain, const char *encoding, const char *lang, const char *expected_result)
|
||||||
{
|
{
|
||||||
const char *result;
|
const char *result;
|
||||||
char *lower;
|
char *lower;
|
||||||
|
int rc;
|
||||||
|
|
||||||
/* our test data is fixed to UTF-8 (english), so provide it here */
|
/* just to cover special code paths for valgrind checking */
|
||||||
if (psl_str_to_utf8lower(domain, "utf-8", "en", &lower) == PSL_SUCCESS)
|
psl_str_to_utf8lower(domain, encoding, lang, NULL);
|
||||||
|
|
||||||
|
if ((rc = psl_str_to_utf8lower(domain, encoding, lang, &lower)) == PSL_SUCCESS)
|
||||||
domain = lower;
|
domain = lower;
|
||||||
|
/* non-ASCII domains fail here if no runtime IDN library is configured, so skip it */
|
||||||
|
#if defined(WITH_LIBIDN) || defined(WITH_LIBIDN2) || defined(WITH_LIBICU)
|
||||||
|
else if (domain) {
|
||||||
|
/* if we do not runtime support, test failure have to be skipped */
|
||||||
|
failed++;
|
||||||
|
printf("psl_str_to_utf8lower(%s)=%d\n", domain ? domain : "NULL", rc);
|
||||||
|
|
||||||
|
free(lower);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
result = psl_registrable_domain(psl, domain);
|
result = psl_registrable_domain(psl, domain);
|
||||||
|
|
||||||
|
@ -72,13 +86,28 @@ static void test(const psl_ctx_t *psl, const char *domain, const char *expected_
|
||||||
free(lower);
|
free(lower);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test(const psl_ctx_t *psl, const char *domain, const char *expected_result)
|
||||||
|
{
|
||||||
|
testx(psl, domain, "utf-8", "en", expected_result);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_iso(const psl_ctx_t *psl, const char *domain, const char *expected_result)
|
||||||
|
{
|
||||||
|
/* makes only sense with a runtime IDN library configured */
|
||||||
|
#if defined(WITH_LIBIDN) || defined(WITH_LIBIDN2) || defined(WITH_LIBICU)
|
||||||
|
testx(psl, domain, "iso-8859-15", "de", expected_result);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
static void test_psl(void)
|
static void test_psl(void)
|
||||||
{
|
{
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
const psl_ctx_t *psl;
|
const psl_ctx_t *psl;
|
||||||
const char *p;
|
const char *p;
|
||||||
char buf[256], domain[128], expected_regdom[128], semicolon[2];
|
char buf[256], domain[128], expected_regdom[128], semicolon[2];
|
||||||
|
char lbuf[258];
|
||||||
int er_is_null, d_is_null;
|
int er_is_null, d_is_null;
|
||||||
|
unsigned it;
|
||||||
|
|
||||||
psl = psl_builtin();
|
psl = psl_builtin();
|
||||||
|
|
||||||
|
@ -101,6 +130,22 @@ static void test_psl(void)
|
||||||
/* Norwegian with lowercase oe */
|
/* Norwegian with lowercase oe */
|
||||||
test(psl, "www.\303\270yer.no", "www.\303\270yer.no");
|
test(psl, "www.\303\270yer.no", "www.\303\270yer.no");
|
||||||
|
|
||||||
|
/* Norwegian with lowercase oe, encoded as ISO-8859-15 */
|
||||||
|
test_iso(psl, "www.\370yer.no", "www.\303\270yer.no");
|
||||||
|
|
||||||
|
/* Testing special code paths of psl_str_to_utf8lower() */
|
||||||
|
for (it = 254; it <= 257; it++) {
|
||||||
|
memset(lbuf, 'a', it);
|
||||||
|
lbuf[it] = 0;
|
||||||
|
|
||||||
|
lbuf[0] = '\370';
|
||||||
|
test_iso(psl, lbuf, NULL);
|
||||||
|
|
||||||
|
lbuf[0] = '\303';
|
||||||
|
lbuf[1] = '\270';
|
||||||
|
test(psl, lbuf, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
/* special check with NULL psl context and TLD */
|
/* special check with NULL psl context and TLD */
|
||||||
test(psl, "whoever.forgot.his.name", "whoever.forgot.his.name");
|
test(psl, "whoever.forgot.his.name", "whoever.forgot.his.name");
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue