diff --git a/src/Makefile.am b/src/Makefile.am index f86bb4b..bbd54e0 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -21,8 +21,8 @@ libpsl_inline_@LIBPSL_API_VERSION@_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSIO noinst_PROGRAMS = psl2c psl2c_SOURCES = psl2c.c -psl2c_CPPFLAGS = -I$(top_srcdir)/include -#psl2c_LDADD = libpsl-@LIBPSL_API_VERSION@.la +psl2c_CPPFLAGS = -I$(top_srcdir)/include -D _GNU_SOURCE +psl2c_LDADD = -lidn2 #psl2c_LDFLAGS = -static # Build rule for suffix.c diff --git a/src/psl-inline.c b/src/psl-inline.c index b441182..901c57d 100644 --- a/src/psl-inline.c +++ b/src/psl-inline.c @@ -44,7 +44,7 @@ typedef struct { char - label_buf[42]; + label_buf[48]; const char * label; unsigned short @@ -102,15 +102,15 @@ int psl_inline_is_public(const char *domain) if (*p == '.') suffix.nlabels++; - // if domain has enough labels, it won't match + // if domain has enough labels, it is public rule = &suffixes[0]; if (!rule || rule->nlabels < suffix.nlabels - 1) - return 0; + return 1; rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare); if (rule) { // definitely a match, no matter if the found rule is a wildcard or not - return 1; + return 0; } label_bak = suffix.label; @@ -130,14 +130,14 @@ int psl_inline_is_public(const char *domain) suffix.nlabels++; if (bsearch(&suffix, suffix_exceptions, countof(suffix_exceptions), sizeof(suffix_exceptions[0]), (int(*)(const void *, const void *))_suffix_compare)) - return 0; // found an exception, so 'domain' is not a public suffix + return 1; // found an exception, so 'domain' is public - return 1; + return 0; } } } - return 0; + return 1; } /* does not include exceptions */ diff --git a/src/psl.c b/src/psl.c index fed72b7..3a1cd1c 100644 --- a/src/psl.c +++ b/src/psl.c @@ -44,7 +44,7 @@ typedef struct { char - label_buf[42]; + label_buf[48]; const char * label; unsigned short @@ -187,7 +187,7 @@ static void _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length) if (length >= sizeof(suffix->label_buf) - 1) { suffix->nlabels = 0; - fprintf(stderr, _("Suffix rule too long (ignored): %s\n"), rule); + fprintf(stderr, _("Suffix rule too long (%zd, ignored): %s\n"), length, rule); return; } diff --git a/src/psl2c.c b/src/psl2c.c index 511e19b..08108f4 100644 --- a/src/psl2c.c +++ b/src/psl2c.c @@ -28,7 +28,10 @@ # include #endif -// # include +//#ifdef WITH_LIBIDN2 +# include +//#endif + #include "psl.c" static void _print_psl_entries(_psl_vector_t *v, const char *varname) @@ -43,22 +46,44 @@ static void _print_psl_entries(_psl_vector_t *v, const char *varname) printf("\t{ \"%s\", NULL, %hd, %hhd, %hhd },\n", e->label_buf, e->length, e->nlabels, e->wildcard); -/* - if (str_needs_encoding(e->label_buf)) { - char *asc = NULL; - int rc; - - if ((rc = idn2_lookup_u8((uint8_t *)e->label_buf, (uint8_t **)&asc, 0)) == IDN2_OK) { - fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, asc); - } else - fprintf(stderr, "toASCII(%s) failed (%d): %s\n", e->label_buf, rc, idn2_strerror(rc)); - } -*/ } printf("};\n"); } +static int _str_needs_encoding(const char *s) +{ + while (*s > 0) s++; + + return !!*s; +} + +static void _add_punycode_if_needed(_psl_vector_t *v) +{ + int it; + + for (it = 0; it < v->cur; it++) { + _psl_entry_t *e = _vector_get(v, it); + + if (_str_needs_encoding(e->label_buf)) { + _psl_entry_t suffix; + char *asc = NULL; + int rc; + + + if ((rc = idn2_lookup_u8((uint8_t *)e->label_buf, (uint8_t **)&asc, 0)) == IDN2_OK) { + fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, asc); + _suffix_init(&suffix, asc, strlen(asc)); + suffix.wildcard = e->wildcard; + _vector_add(v, &suffix); + } else + fprintf(stderr, "toASCII(%s) failed (%d): %s\n", e->label_buf, rc, idn2_strerror(rc)); + } + } + + _vector_sort(v); +} + // int main(int argc, const char **argv) int main(void) { @@ -67,6 +92,9 @@ int main(void) if (!(psl = psl_load_fp(stdin))) return 1; + _add_punycode_if_needed(psl->suffixes); + _add_punycode_if_needed(psl->suffix_exceptions); + _print_psl_entries(psl->suffixes, "suffixes"); _print_psl_entries(psl->suffix_exceptions, "suffix_exceptions"); diff --git a/tests/test-is-public-inline.c b/tests/test-is-public-inline.c index 1777317..e946617 100644 --- a/tests/test-is-public-inline.c +++ b/tests/test-is-public-inline.c @@ -62,8 +62,8 @@ static void test_psl(void) { "www.xxx.ck", 1 }, { "\345\225\206\346\240\207", 0 }, // xn--czr694b oder 商标 { "www.\345\225\206\346\240\207", 1 }, -// { "xn--czr694b", 1 }, -// { "www.xn--czr694b", 1 }, + { "xn--czr694b", 0 }, + { "www.xn--czr694b", 1 }, }; unsigned it; @@ -79,7 +79,7 @@ static void test_psl(void) ok++; } else { failed++; - printf("psl_is_tld(%s)=%d (expected %d)\n", t->domain, result, t->result); + printf("psl_is_public(%s)=%d (expected %d)\n", t->domain, result, t->result); } } diff --git a/tests/test-is-public.c b/tests/test-is-public.c index 00db2d0..d535f29 100644 --- a/tests/test-is-public.c +++ b/tests/test-is-public.c @@ -42,6 +42,8 @@ static int static void test_psl(void) { + // punycode generation: idn 商标 + // octal code generation: echo -n "商标" | od -b static const struct test_data { const char *domain; @@ -58,6 +60,8 @@ static void test_psl(void) { "abc.www.ck", 1 }, { "xxx.ck", 0 }, { "www.xxx.ck", 1 }, + { "\345\225\206\346\240\207", 0 }, // xn--czr694b oder 商标 + { "www.\345\225\206\346\240\207", 1 }, }; unsigned it; psl_ctx_t *psl; @@ -74,7 +78,7 @@ static void test_psl(void) ok++; } else { failed++; - printf("psl_is_tld(%s)=%d (expected %d)\n", t->domain, result, t->result); + printf("psl_is_public(%s)=%d (expected %d)\n", t->domain, result, t->result); } }