added IDNA2008 punycode support for psl_inline_is_public()
This commit is contained in:
parent
99d057d514
commit
a906062b85
|
@ -21,8 +21,8 @@ libpsl_inline_@LIBPSL_API_VERSION@_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSIO
|
||||||
|
|
||||||
noinst_PROGRAMS = psl2c
|
noinst_PROGRAMS = psl2c
|
||||||
psl2c_SOURCES = psl2c.c
|
psl2c_SOURCES = psl2c.c
|
||||||
psl2c_CPPFLAGS = -I$(top_srcdir)/include
|
psl2c_CPPFLAGS = -I$(top_srcdir)/include -D _GNU_SOURCE
|
||||||
#psl2c_LDADD = libpsl-@LIBPSL_API_VERSION@.la
|
psl2c_LDADD = -lidn2
|
||||||
#psl2c_LDFLAGS = -static
|
#psl2c_LDFLAGS = -static
|
||||||
|
|
||||||
# Build rule for suffix.c
|
# Build rule for suffix.c
|
||||||
|
|
|
@ -44,7 +44,7 @@
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
char
|
char
|
||||||
label_buf[42];
|
label_buf[48];
|
||||||
const char *
|
const char *
|
||||||
label;
|
label;
|
||||||
unsigned short
|
unsigned short
|
||||||
|
@ -102,15 +102,15 @@ int psl_inline_is_public(const char *domain)
|
||||||
if (*p == '.')
|
if (*p == '.')
|
||||||
suffix.nlabels++;
|
suffix.nlabels++;
|
||||||
|
|
||||||
// if domain has enough labels, it won't match
|
// if domain has enough labels, it is public
|
||||||
rule = &suffixes[0];
|
rule = &suffixes[0];
|
||||||
if (!rule || rule->nlabels < suffix.nlabels - 1)
|
if (!rule || rule->nlabels < suffix.nlabels - 1)
|
||||||
return 0;
|
return 1;
|
||||||
|
|
||||||
rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare);
|
rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare);
|
||||||
if (rule) {
|
if (rule) {
|
||||||
// definitely a match, no matter if the found rule is a wildcard or not
|
// definitely a match, no matter if the found rule is a wildcard or not
|
||||||
return 1;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
label_bak = suffix.label;
|
label_bak = suffix.label;
|
||||||
|
@ -130,15 +130,15 @@ int psl_inline_is_public(const char *domain)
|
||||||
suffix.nlabels++;
|
suffix.nlabels++;
|
||||||
|
|
||||||
if (bsearch(&suffix, suffix_exceptions, countof(suffix_exceptions), sizeof(suffix_exceptions[0]), (int(*)(const void *, const void *))_suffix_compare))
|
if (bsearch(&suffix, suffix_exceptions, countof(suffix_exceptions), sizeof(suffix_exceptions[0]), (int(*)(const void *, const void *))_suffix_compare))
|
||||||
return 0; // found an exception, so 'domain' is not a public suffix
|
return 1; // found an exception, so 'domain' is public
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
/* does not include exceptions */
|
/* does not include exceptions */
|
||||||
int psl_inline_suffix_count(void)
|
int psl_inline_suffix_count(void)
|
||||||
|
|
|
@ -44,7 +44,7 @@
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
char
|
char
|
||||||
label_buf[42];
|
label_buf[48];
|
||||||
const char *
|
const char *
|
||||||
label;
|
label;
|
||||||
unsigned short
|
unsigned short
|
||||||
|
@ -187,7 +187,7 @@ static void _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length)
|
||||||
|
|
||||||
if (length >= sizeof(suffix->label_buf) - 1) {
|
if (length >= sizeof(suffix->label_buf) - 1) {
|
||||||
suffix->nlabels = 0;
|
suffix->nlabels = 0;
|
||||||
fprintf(stderr, _("Suffix rule too long (ignored): %s\n"), rule);
|
fprintf(stderr, _("Suffix rule too long (%zd, ignored): %s\n"), length, rule);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
52
src/psl2c.c
52
src/psl2c.c
|
@ -28,7 +28,10 @@
|
||||||
# include <config.h>
|
# include <config.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// # include <idn2.h>
|
//#ifdef WITH_LIBIDN2
|
||||||
|
# include <idn2.h>
|
||||||
|
//#endif
|
||||||
|
|
||||||
#include "psl.c"
|
#include "psl.c"
|
||||||
|
|
||||||
static void _print_psl_entries(_psl_vector_t *v, const char *varname)
|
static void _print_psl_entries(_psl_vector_t *v, const char *varname)
|
||||||
|
@ -43,22 +46,44 @@ static void _print_psl_entries(_psl_vector_t *v, const char *varname)
|
||||||
|
|
||||||
printf("\t{ \"%s\", NULL, %hd, %hhd, %hhd },\n",
|
printf("\t{ \"%s\", NULL, %hd, %hhd, %hhd },\n",
|
||||||
e->label_buf, e->length, e->nlabels, e->wildcard);
|
e->label_buf, e->length, e->nlabels, e->wildcard);
|
||||||
/*
|
|
||||||
if (str_needs_encoding(e->label_buf)) {
|
|
||||||
char *asc = NULL;
|
|
||||||
int rc;
|
|
||||||
|
|
||||||
if ((rc = idn2_lookup_u8((uint8_t *)e->label_buf, (uint8_t **)&asc, 0)) == IDN2_OK) {
|
|
||||||
fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, asc);
|
|
||||||
} else
|
|
||||||
fprintf(stderr, "toASCII(%s) failed (%d): %s\n", e->label_buf, rc, idn2_strerror(rc));
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("};\n");
|
printf("};\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int _str_needs_encoding(const char *s)
|
||||||
|
{
|
||||||
|
while (*s > 0) s++;
|
||||||
|
|
||||||
|
return !!*s;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void _add_punycode_if_needed(_psl_vector_t *v)
|
||||||
|
{
|
||||||
|
int it;
|
||||||
|
|
||||||
|
for (it = 0; it < v->cur; it++) {
|
||||||
|
_psl_entry_t *e = _vector_get(v, it);
|
||||||
|
|
||||||
|
if (_str_needs_encoding(e->label_buf)) {
|
||||||
|
_psl_entry_t suffix;
|
||||||
|
char *asc = NULL;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
|
||||||
|
if ((rc = idn2_lookup_u8((uint8_t *)e->label_buf, (uint8_t **)&asc, 0)) == IDN2_OK) {
|
||||||
|
fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, asc);
|
||||||
|
_suffix_init(&suffix, asc, strlen(asc));
|
||||||
|
suffix.wildcard = e->wildcard;
|
||||||
|
_vector_add(v, &suffix);
|
||||||
|
} else
|
||||||
|
fprintf(stderr, "toASCII(%s) failed (%d): %s\n", e->label_buf, rc, idn2_strerror(rc));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_vector_sort(v);
|
||||||
|
}
|
||||||
|
|
||||||
// int main(int argc, const char **argv)
|
// int main(int argc, const char **argv)
|
||||||
int main(void)
|
int main(void)
|
||||||
{
|
{
|
||||||
|
@ -67,6 +92,9 @@ int main(void)
|
||||||
if (!(psl = psl_load_fp(stdin)))
|
if (!(psl = psl_load_fp(stdin)))
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
|
_add_punycode_if_needed(psl->suffixes);
|
||||||
|
_add_punycode_if_needed(psl->suffix_exceptions);
|
||||||
|
|
||||||
_print_psl_entries(psl->suffixes, "suffixes");
|
_print_psl_entries(psl->suffixes, "suffixes");
|
||||||
_print_psl_entries(psl->suffix_exceptions, "suffix_exceptions");
|
_print_psl_entries(psl->suffix_exceptions, "suffix_exceptions");
|
||||||
|
|
||||||
|
|
|
@ -62,8 +62,8 @@ static void test_psl(void)
|
||||||
{ "www.xxx.ck", 1 },
|
{ "www.xxx.ck", 1 },
|
||||||
{ "\345\225\206\346\240\207", 0 }, // xn--czr694b oder 商标
|
{ "\345\225\206\346\240\207", 0 }, // xn--czr694b oder 商标
|
||||||
{ "www.\345\225\206\346\240\207", 1 },
|
{ "www.\345\225\206\346\240\207", 1 },
|
||||||
// { "xn--czr694b", 1 },
|
{ "xn--czr694b", 0 },
|
||||||
// { "www.xn--czr694b", 1 },
|
{ "www.xn--czr694b", 1 },
|
||||||
};
|
};
|
||||||
unsigned it;
|
unsigned it;
|
||||||
|
|
||||||
|
@ -79,7 +79,7 @@ static void test_psl(void)
|
||||||
ok++;
|
ok++;
|
||||||
} else {
|
} else {
|
||||||
failed++;
|
failed++;
|
||||||
printf("psl_is_tld(%s)=%d (expected %d)\n", t->domain, result, t->result);
|
printf("psl_is_public(%s)=%d (expected %d)\n", t->domain, result, t->result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,6 +42,8 @@ static int
|
||||||
|
|
||||||
static void test_psl(void)
|
static void test_psl(void)
|
||||||
{
|
{
|
||||||
|
// punycode generation: idn 商标
|
||||||
|
// octal code generation: echo -n "商标" | od -b
|
||||||
static const struct test_data {
|
static const struct test_data {
|
||||||
const char
|
const char
|
||||||
*domain;
|
*domain;
|
||||||
|
@ -58,6 +60,8 @@ static void test_psl(void)
|
||||||
{ "abc.www.ck", 1 },
|
{ "abc.www.ck", 1 },
|
||||||
{ "xxx.ck", 0 },
|
{ "xxx.ck", 0 },
|
||||||
{ "www.xxx.ck", 1 },
|
{ "www.xxx.ck", 1 },
|
||||||
|
{ "\345\225\206\346\240\207", 0 }, // xn--czr694b oder 商标
|
||||||
|
{ "www.\345\225\206\346\240\207", 1 },
|
||||||
};
|
};
|
||||||
unsigned it;
|
unsigned it;
|
||||||
psl_ctx_t *psl;
|
psl_ctx_t *psl;
|
||||||
|
@ -74,7 +78,7 @@ static void test_psl(void)
|
||||||
ok++;
|
ok++;
|
||||||
} else {
|
} else {
|
||||||
failed++;
|
failed++;
|
||||||
printf("psl_is_tld(%s)=%d (expected %d)\n", t->domain, result, t->result);
|
printf("psl_is_public(%s)=%d (expected %d)\n", t->domain, result, t->result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue