added IDNA2008 punycode support for psl_inline_is_public()

This commit is contained in:
Tim Ruehsen 2014-03-22 22:55:34 +01:00
parent 99d057d514
commit a906062b85
6 changed files with 59 additions and 27 deletions

View File

@ -21,8 +21,8 @@ libpsl_inline_@LIBPSL_API_VERSION@_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSIO
noinst_PROGRAMS = psl2c noinst_PROGRAMS = psl2c
psl2c_SOURCES = psl2c.c psl2c_SOURCES = psl2c.c
psl2c_CPPFLAGS = -I$(top_srcdir)/include psl2c_CPPFLAGS = -I$(top_srcdir)/include -D _GNU_SOURCE
#psl2c_LDADD = libpsl-@LIBPSL_API_VERSION@.la psl2c_LDADD = -lidn2
#psl2c_LDFLAGS = -static #psl2c_LDFLAGS = -static
# Build rule for suffix.c # Build rule for suffix.c

View File

@ -44,7 +44,7 @@
typedef struct { typedef struct {
char char
label_buf[42]; label_buf[48];
const char * const char *
label; label;
unsigned short unsigned short
@ -102,15 +102,15 @@ int psl_inline_is_public(const char *domain)
if (*p == '.') if (*p == '.')
suffix.nlabels++; suffix.nlabels++;
// if domain has enough labels, it won't match // if domain has enough labels, it is public
rule = &suffixes[0]; rule = &suffixes[0];
if (!rule || rule->nlabels < suffix.nlabels - 1) if (!rule || rule->nlabels < suffix.nlabels - 1)
return 0; return 1;
rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare); rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare);
if (rule) { if (rule) {
// definitely a match, no matter if the found rule is a wildcard or not // definitely a match, no matter if the found rule is a wildcard or not
return 1; return 0;
} }
label_bak = suffix.label; label_bak = suffix.label;
@ -130,15 +130,15 @@ int psl_inline_is_public(const char *domain)
suffix.nlabels++; suffix.nlabels++;
if (bsearch(&suffix, suffix_exceptions, countof(suffix_exceptions), sizeof(suffix_exceptions[0]), (int(*)(const void *, const void *))_suffix_compare)) if (bsearch(&suffix, suffix_exceptions, countof(suffix_exceptions), sizeof(suffix_exceptions[0]), (int(*)(const void *, const void *))_suffix_compare))
return 0; // found an exception, so 'domain' is not a public suffix return 1; // found an exception, so 'domain' is public
return 1;
}
}
}
return 0; return 0;
} }
}
}
return 1;
}
/* does not include exceptions */ /* does not include exceptions */
int psl_inline_suffix_count(void) int psl_inline_suffix_count(void)

View File

@ -44,7 +44,7 @@
typedef struct { typedef struct {
char char
label_buf[42]; label_buf[48];
const char * const char *
label; label;
unsigned short unsigned short
@ -187,7 +187,7 @@ static void _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length)
if (length >= sizeof(suffix->label_buf) - 1) { if (length >= sizeof(suffix->label_buf) - 1) {
suffix->nlabels = 0; suffix->nlabels = 0;
fprintf(stderr, _("Suffix rule too long (ignored): %s\n"), rule); fprintf(stderr, _("Suffix rule too long (%zd, ignored): %s\n"), length, rule);
return; return;
} }

View File

@ -28,7 +28,10 @@
# include <config.h> # include <config.h>
#endif #endif
// # include <idn2.h> //#ifdef WITH_LIBIDN2
# include <idn2.h>
//#endif
#include "psl.c" #include "psl.c"
static void _print_psl_entries(_psl_vector_t *v, const char *varname) static void _print_psl_entries(_psl_vector_t *v, const char *varname)
@ -43,22 +46,44 @@ static void _print_psl_entries(_psl_vector_t *v, const char *varname)
printf("\t{ \"%s\", NULL, %hd, %hhd, %hhd },\n", printf("\t{ \"%s\", NULL, %hd, %hhd, %hhd },\n",
e->label_buf, e->length, e->nlabels, e->wildcard); e->label_buf, e->length, e->nlabels, e->wildcard);
/*
if (str_needs_encoding(e->label_buf)) {
char *asc = NULL;
int rc;
if ((rc = idn2_lookup_u8((uint8_t *)e->label_buf, (uint8_t **)&asc, 0)) == IDN2_OK) {
fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, asc);
} else
fprintf(stderr, "toASCII(%s) failed (%d): %s\n", e->label_buf, rc, idn2_strerror(rc));
}
*/
} }
printf("};\n"); printf("};\n");
} }
static int _str_needs_encoding(const char *s)
{
while (*s > 0) s++;
return !!*s;
}
static void _add_punycode_if_needed(_psl_vector_t *v)
{
int it;
for (it = 0; it < v->cur; it++) {
_psl_entry_t *e = _vector_get(v, it);
if (_str_needs_encoding(e->label_buf)) {
_psl_entry_t suffix;
char *asc = NULL;
int rc;
if ((rc = idn2_lookup_u8((uint8_t *)e->label_buf, (uint8_t **)&asc, 0)) == IDN2_OK) {
fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, asc);
_suffix_init(&suffix, asc, strlen(asc));
suffix.wildcard = e->wildcard;
_vector_add(v, &suffix);
} else
fprintf(stderr, "toASCII(%s) failed (%d): %s\n", e->label_buf, rc, idn2_strerror(rc));
}
}
_vector_sort(v);
}
// int main(int argc, const char **argv) // int main(int argc, const char **argv)
int main(void) int main(void)
{ {
@ -67,6 +92,9 @@ int main(void)
if (!(psl = psl_load_fp(stdin))) if (!(psl = psl_load_fp(stdin)))
return 1; return 1;
_add_punycode_if_needed(psl->suffixes);
_add_punycode_if_needed(psl->suffix_exceptions);
_print_psl_entries(psl->suffixes, "suffixes"); _print_psl_entries(psl->suffixes, "suffixes");
_print_psl_entries(psl->suffix_exceptions, "suffix_exceptions"); _print_psl_entries(psl->suffix_exceptions, "suffix_exceptions");

View File

@ -62,8 +62,8 @@ static void test_psl(void)
{ "www.xxx.ck", 1 }, { "www.xxx.ck", 1 },
{ "\345\225\206\346\240\207", 0 }, // xn--czr694b oder 商标 { "\345\225\206\346\240\207", 0 }, // xn--czr694b oder 商标
{ "www.\345\225\206\346\240\207", 1 }, { "www.\345\225\206\346\240\207", 1 },
// { "xn--czr694b", 1 }, { "xn--czr694b", 0 },
// { "www.xn--czr694b", 1 }, { "www.xn--czr694b", 1 },
}; };
unsigned it; unsigned it;
@ -79,7 +79,7 @@ static void test_psl(void)
ok++; ok++;
} else { } else {
failed++; failed++;
printf("psl_is_tld(%s)=%d (expected %d)\n", t->domain, result, t->result); printf("psl_is_public(%s)=%d (expected %d)\n", t->domain, result, t->result);
} }
} }

View File

@ -42,6 +42,8 @@ static int
static void test_psl(void) static void test_psl(void)
{ {
// punycode generation: idn 商标
// octal code generation: echo -n "商标" | od -b
static const struct test_data { static const struct test_data {
const char const char
*domain; *domain;
@ -58,6 +60,8 @@ static void test_psl(void)
{ "abc.www.ck", 1 }, { "abc.www.ck", 1 },
{ "xxx.ck", 0 }, { "xxx.ck", 0 },
{ "www.xxx.ck", 1 }, { "www.xxx.ck", 1 },
{ "\345\225\206\346\240\207", 0 }, // xn--czr694b oder 商标
{ "www.\345\225\206\346\240\207", 1 },
}; };
unsigned it; unsigned it;
psl_ctx_t *psl; psl_ctx_t *psl;
@ -74,7 +78,7 @@ static void test_psl(void)
ok++; ok++;
} else { } else {
failed++; failed++;
printf("psl_is_tld(%s)=%d (expected %d)\n", t->domain, result, t->result); printf("psl_is_public(%s)=%d (expected %d)\n", t->domain, result, t->result);
} }
} }