use libicu for UTF-8 lowercase conversion

This commit is contained in:
Tim Ruehsen 2014-06-16 21:59:23 +02:00
parent b77aaf7809
commit 657a1d4b88
1 changed files with 26 additions and 0 deletions

View File

@ -38,6 +38,11 @@
#include <ctype.h> #include <ctype.h>
#include <alloca.h> #include <alloca.h>
#ifdef WITH_LIBICU
# include <unicode/uversion.h>
# include <unicode/ustring.h>
#endif
#include <libpsl.h> #include <libpsl.h>
static int static int
@ -61,6 +66,26 @@ static void test(const psl_ctx_t *psl, const char *domain, const char *expected_
/* if we found utf-8, make sure to convert domain correctly to lowercase */ /* if we found utf-8, make sure to convert domain correctly to lowercase */
/* does it work, if we are not in a utf-8 env ? */ /* does it work, if we are not in a utf-8 env ? */
if (utf8) { if (utf8) {
#ifdef WITH_LIBICU
UErrorCode status = 0;
UChar utf16_dst[64], utf16_src[64];
int32_t utf16_src_length;
/* UTF-8 to lowercase conversion */
u_strFromUTF8(utf16_src, sizeof(utf16_src)/sizeof(utf16_src[0]), &utf16_src_length, domain, (int32_t) strlen(domain), &status);
if (U_SUCCESS(status)) {
int32_t dst_length = u_strToLower(utf16_dst, sizeof(utf16_dst)/sizeof(utf16_dst[0]), utf16_src, -1, "en", &status);
if (U_SUCCESS(status)) {
u_strToUTF8(lookupname, (int32_t) sizeof(lookupname), NULL, utf16_dst, dst_length, &status);
if (U_SUCCESS(status)) {
domain = lookupname;
} else
fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status);
} else
fprintf(stderr, "Failed to convert to ASCII (status %d)\n", status);
} else
fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status);
#else
FILE *pp; FILE *pp;
size_t cmdsize = 48 + strlen(domain); size_t cmdsize = 48 + strlen(domain);
char *cmd = alloca(cmdsize); char *cmd = alloca(cmdsize);
@ -71,6 +96,7 @@ static void test(const psl_ctx_t *psl, const char *domain, const char *expected_
domain = lookupname; domain = lookupname;
pclose(pp); pclose(pp);
} }
#endif
} }
} }