added IDNA2008 punycode support for psl_inline_is_public()
This commit is contained in:
parent
99d057d514
commit
a906062b85
|
@ -21,8 +21,8 @@ libpsl_inline_@LIBPSL_API_VERSION@_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSIO
|
|||
|
||||
noinst_PROGRAMS = psl2c
|
||||
psl2c_SOURCES = psl2c.c
|
||||
psl2c_CPPFLAGS = -I$(top_srcdir)/include
|
||||
#psl2c_LDADD = libpsl-@LIBPSL_API_VERSION@.la
|
||||
psl2c_CPPFLAGS = -I$(top_srcdir)/include -D _GNU_SOURCE
|
||||
psl2c_LDADD = -lidn2
|
||||
#psl2c_LDFLAGS = -static
|
||||
|
||||
# Build rule for suffix.c
|
||||
|
|
|
@ -44,7 +44,7 @@
|
|||
|
||||
typedef struct {
|
||||
char
|
||||
label_buf[42];
|
||||
label_buf[48];
|
||||
const char *
|
||||
label;
|
||||
unsigned short
|
||||
|
@ -102,15 +102,15 @@ int psl_inline_is_public(const char *domain)
|
|||
if (*p == '.')
|
||||
suffix.nlabels++;
|
||||
|
||||
// if domain has enough labels, it won't match
|
||||
// if domain has enough labels, it is public
|
||||
rule = &suffixes[0];
|
||||
if (!rule || rule->nlabels < suffix.nlabels - 1)
|
||||
return 0;
|
||||
return 1;
|
||||
|
||||
rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare);
|
||||
if (rule) {
|
||||
// definitely a match, no matter if the found rule is a wildcard or not
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
label_bak = suffix.label;
|
||||
|
@ -130,14 +130,14 @@ int psl_inline_is_public(const char *domain)
|
|||
suffix.nlabels++;
|
||||
|
||||
if (bsearch(&suffix, suffix_exceptions, countof(suffix_exceptions), sizeof(suffix_exceptions[0]), (int(*)(const void *, const void *))_suffix_compare))
|
||||
return 0; // found an exception, so 'domain' is not a public suffix
|
||||
return 1; // found an exception, so 'domain' is public
|
||||
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* does not include exceptions */
|
||||
|
|
|
@ -44,7 +44,7 @@
|
|||
|
||||
typedef struct {
|
||||
char
|
||||
label_buf[42];
|
||||
label_buf[48];
|
||||
const char *
|
||||
label;
|
||||
unsigned short
|
||||
|
@ -187,7 +187,7 @@ static void _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length)
|
|||
|
||||
if (length >= sizeof(suffix->label_buf) - 1) {
|
||||
suffix->nlabels = 0;
|
||||
fprintf(stderr, _("Suffix rule too long (ignored): %s\n"), rule);
|
||||
fprintf(stderr, _("Suffix rule too long (%zd, ignored): %s\n"), length, rule);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
52
src/psl2c.c
52
src/psl2c.c
|
@ -28,7 +28,10 @@
|
|||
# include <config.h>
|
||||
#endif
|
||||
|
||||
// # include <idn2.h>
|
||||
//#ifdef WITH_LIBIDN2
|
||||
# include <idn2.h>
|
||||
//#endif
|
||||
|
||||
#include "psl.c"
|
||||
|
||||
static void _print_psl_entries(_psl_vector_t *v, const char *varname)
|
||||
|
@ -43,22 +46,44 @@ static void _print_psl_entries(_psl_vector_t *v, const char *varname)
|
|||
|
||||
printf("\t{ \"%s\", NULL, %hd, %hhd, %hhd },\n",
|
||||
e->label_buf, e->length, e->nlabels, e->wildcard);
|
||||
/*
|
||||
if (str_needs_encoding(e->label_buf)) {
|
||||
char *asc = NULL;
|
||||
int rc;
|
||||
|
||||
if ((rc = idn2_lookup_u8((uint8_t *)e->label_buf, (uint8_t **)&asc, 0)) == IDN2_OK) {
|
||||
fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, asc);
|
||||
} else
|
||||
fprintf(stderr, "toASCII(%s) failed (%d): %s\n", e->label_buf, rc, idn2_strerror(rc));
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
printf("};\n");
|
||||
}
|
||||
|
||||
static int _str_needs_encoding(const char *s)
|
||||
{
|
||||
while (*s > 0) s++;
|
||||
|
||||
return !!*s;
|
||||
}
|
||||
|
||||
static void _add_punycode_if_needed(_psl_vector_t *v)
|
||||
{
|
||||
int it;
|
||||
|
||||
for (it = 0; it < v->cur; it++) {
|
||||
_psl_entry_t *e = _vector_get(v, it);
|
||||
|
||||
if (_str_needs_encoding(e->label_buf)) {
|
||||
_psl_entry_t suffix;
|
||||
char *asc = NULL;
|
||||
int rc;
|
||||
|
||||
|
||||
if ((rc = idn2_lookup_u8((uint8_t *)e->label_buf, (uint8_t **)&asc, 0)) == IDN2_OK) {
|
||||
fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, asc);
|
||||
_suffix_init(&suffix, asc, strlen(asc));
|
||||
suffix.wildcard = e->wildcard;
|
||||
_vector_add(v, &suffix);
|
||||
} else
|
||||
fprintf(stderr, "toASCII(%s) failed (%d): %s\n", e->label_buf, rc, idn2_strerror(rc));
|
||||
}
|
||||
}
|
||||
|
||||
_vector_sort(v);
|
||||
}
|
||||
|
||||
// int main(int argc, const char **argv)
|
||||
int main(void)
|
||||
{
|
||||
|
@ -67,6 +92,9 @@ int main(void)
|
|||
if (!(psl = psl_load_fp(stdin)))
|
||||
return 1;
|
||||
|
||||
_add_punycode_if_needed(psl->suffixes);
|
||||
_add_punycode_if_needed(psl->suffix_exceptions);
|
||||
|
||||
_print_psl_entries(psl->suffixes, "suffixes");
|
||||
_print_psl_entries(psl->suffix_exceptions, "suffix_exceptions");
|
||||
|
||||
|
|
|
@ -62,8 +62,8 @@ static void test_psl(void)
|
|||
{ "www.xxx.ck", 1 },
|
||||
{ "\345\225\206\346\240\207", 0 }, // xn--czr694b oder 商标
|
||||
{ "www.\345\225\206\346\240\207", 1 },
|
||||
// { "xn--czr694b", 1 },
|
||||
// { "www.xn--czr694b", 1 },
|
||||
{ "xn--czr694b", 0 },
|
||||
{ "www.xn--czr694b", 1 },
|
||||
};
|
||||
unsigned it;
|
||||
|
||||
|
@ -79,7 +79,7 @@ static void test_psl(void)
|
|||
ok++;
|
||||
} else {
|
||||
failed++;
|
||||
printf("psl_is_tld(%s)=%d (expected %d)\n", t->domain, result, t->result);
|
||||
printf("psl_is_public(%s)=%d (expected %d)\n", t->domain, result, t->result);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -42,6 +42,8 @@ static int
|
|||
|
||||
static void test_psl(void)
|
||||
{
|
||||
// punycode generation: idn 商标
|
||||
// octal code generation: echo -n "商标" | od -b
|
||||
static const struct test_data {
|
||||
const char
|
||||
*domain;
|
||||
|
@ -58,6 +60,8 @@ static void test_psl(void)
|
|||
{ "abc.www.ck", 1 },
|
||||
{ "xxx.ck", 0 },
|
||||
{ "www.xxx.ck", 1 },
|
||||
{ "\345\225\206\346\240\207", 0 }, // xn--czr694b oder 商标
|
||||
{ "www.\345\225\206\346\240\207", 1 },
|
||||
};
|
||||
unsigned it;
|
||||
psl_ctx_t *psl;
|
||||
|
@ -74,7 +78,7 @@ static void test_psl(void)
|
|||
ok++;
|
||||
} else {
|
||||
failed++;
|
||||
printf("psl_is_tld(%s)=%d (expected %d)\n", t->domain, result, t->result);
|
||||
printf("psl_is_public(%s)=%d (expected %d)\n", t->domain, result, t->result);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue