diff --git a/include/Makefile.am b/include/Makefile.am index a45de59..dd315d8 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -1 +1 @@ -include_HEADERS = libpsl.h +include_HEADERS = libpsl.h libpsl-inline.h diff --git a/include/libpsl-inline.h b/include/libpsl-inline.h new file mode 100644 index 0000000..d9997a6 --- /dev/null +++ b/include/libpsl-inline.h @@ -0,0 +1,67 @@ +/* + * Copyright(c) 2014 Tim Ruehsen + * + * This file is part of libpsl. + * + * Libpsl is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Libpsl is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with libpsl. If not, see . + * + * + * Header file for libpsl library routines + * + * Changelog + * 22.03.2014 Tim Ruehsen created + * + */ + +#ifndef _LIBPSL_LIBPSL_INLINE_H +#define _LIBPSL_LIBPSL_INLINE_H + +#include + +// Let C++ include C headers +#ifdef __cplusplus +# define PSL_BEGIN_DECLS extern "C" { +# define PSL_END_DECLS } +#else +# define PSL_BEGIN_DECLS +# define PSL_END_DECLS +#endif + +#if ENABLE_NLS != 0 +# include +# define _(STRING) gettext(STRING) +#else +# define _(STRING) STRING +# define ngettext(STRING1,STRING2,N) STRING2 +#endif + +PSL_BEGIN_DECLS + +void + psl_inline_init(void); +void + psl_inline_deinit(void); +int + psl_inline_is_public(const char *domain); + +/* does not include exceptions */ +int + psl_inline_suffix_count(void); +/* just counts exceptions */ +int + psl_inline_suffix_exception_count(void); + +PSL_END_DECLS + +#endif /* _LIBPSL_LIBPSL_INLINE_H */ diff --git a/src/Makefile.am b/src/Makefile.am index f8339d5..f86bb4b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,14 +1,30 @@ -lib_LTLIBRARIES = libpsl-@LIBPSL_API_VERSION@.la +#EXTRA_DIST = $(top_srcdir)/data/effective_tld_names.dat + +# suffixes.c must be created before psl.c is compiled +BUILT_SOURCES = suffixes.c + +# suffixes.c is a built source that must be cleaned +CLEANFILES = suffixes.c + +# build two libraries, 'inline' version with PSL entries compiled in +lib_LTLIBRARIES = libpsl-@LIBPSL_API_VERSION@.la libpsl-inline-@LIBPSL_API_VERSION@.la + libpsl_@LIBPSL_API_VERSION@_la_SOURCES = psl.c - libpsl_@LIBPSL_API_VERSION@_la_CPPFLAGS = -I$(top_srcdir)/include - # include ABI version information libpsl_@LIBPSL_API_VERSION@_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSION) -#bin_PROGRAMS = test_linking -#noinst_PROGRAMS = test_linking -#test_linking_SOURCES = test_linking.c -#test_linking_CPPFLAGS = -I$(top_srcdir)/include -#test_linking_LDADD = libpsl-@LIBPSL_API_VERSION@.la -#test_linking_LDFLAGS = -static +libpsl_inline_@LIBPSL_API_VERSION@_la_SOURCES = psl-inline.c +libpsl_inline_@LIBPSL_API_VERSION@_la_CPPFLAGS = -I$(top_srcdir)/include +# include ABI version information +libpsl_inline_@LIBPSL_API_VERSION@_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSION) + +noinst_PROGRAMS = psl2c +psl2c_SOURCES = psl2c.c +psl2c_CPPFLAGS = -I$(top_srcdir)/include +#psl2c_LDADD = libpsl-@LIBPSL_API_VERSION@.la +#psl2c_LDFLAGS = -static + +# Build rule for suffix.c +suffixes.c: $(top_srcdir)/data/effective_tld_names.dat psl2c$(EXEEXT) + ./psl2c$(EXEEXT) <$(top_srcdir)/data/effective_tld_names.dat >suffixes.c diff --git a/src/psl-inline.c b/src/psl-inline.c new file mode 100644 index 0000000..b441182 --- /dev/null +++ b/src/psl-inline.c @@ -0,0 +1,153 @@ +/* + * Copyright(c) 2014 Tim Ruehsen + * + * This file is part of MGet. + * + * Mget is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Mget is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Mget. If not, see . + * + * + * Public Suffix List routines (right now experimental) + * + * Changelog + * 22.03.2014 Tim Ruehsen created + * + */ + +// need _GNU_SOURCE for qsort_r() +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif + +#if HAVE_CONFIG_H +# include +#endif + +#include +#include +#include +#include + +#include + +#define countof(a) (sizeof(a)/sizeof(*(a))) + +typedef struct { + char + label_buf[42]; + const char * + label; + unsigned short + length; + unsigned char + nlabels, // number of labels + wildcard; // this is a wildcard rule (e.g. *.sapporo.jp) +} _psl_entry_t; + +#include "suffixes.c" + +// by this kind of sorting, we can easily see if a domain matches or not (match = supercookie !) + +static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2) +{ + int n; + + if ((n = s2->nlabels - s1->nlabels)) + return n; // most labels first + + if ((n = s1->length - s2->length)) + return n; // shorter rules first + + return strcmp(s1->label, s2->label); +} + +void psl_inline_init(void) +{ + size_t it; + + for (it = 0; it < countof(suffixes); it++) + suffixes[it].label = suffixes[it].label_buf; + + for (it = 0; it < countof(suffix_exceptions); it++) + suffix_exceptions[it].label = suffix_exceptions[it].label_buf; +} + +void psl_inline_deinit(void) +{ +} + +int psl_inline_is_public(const char *domain) +{ + _psl_entry_t suffix, *rule; + const char *p, *label_bak; + unsigned short length_bak; + + // this function should be called without leading dots, just make sure + suffix.label = domain + (*domain == '.'); + suffix.length = strlen(suffix.label); + suffix.wildcard = 0; + suffix.nlabels = 1; + + for (p = suffix.label; *p; p++) + if (*p == '.') + suffix.nlabels++; + + // if domain has enough labels, it won't match + rule = &suffixes[0]; + if (!rule || rule->nlabels < suffix.nlabels - 1) + return 0; + + rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare); + if (rule) { + // definitely a match, no matter if the found rule is a wildcard or not + return 1; + } + + label_bak = suffix.label; + length_bak = suffix.length; + + if ((suffix.label = strchr(suffix.label, '.'))) { + suffix.label++; + suffix.length = strlen(suffix.label); + suffix.nlabels--; + + rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare); + if (rule) { + if (rule->wildcard) { + // now that we matched a wildcard, we have to check for an exception + suffix.label = label_bak; + suffix.length = length_bak; + suffix.nlabels++; + + if (bsearch(&suffix, suffix_exceptions, countof(suffix_exceptions), sizeof(suffix_exceptions[0]), (int(*)(const void *, const void *))_suffix_compare)) + return 0; // found an exception, so 'domain' is not a public suffix + + return 1; + } + } + } + + return 0; +} + +/* does not include exceptions */ +int psl_inline_suffix_count(void) +{ + return countof(suffixes); +} + +/* just counts exceptions */ +int psl_inline_suffix_exception_count(void) +{ + return countof(suffix_exceptions); +} diff --git a/src/psl.c b/src/psl.c index 5fa03a9..aba1594 100644 --- a/src/psl.c +++ b/src/psl.c @@ -158,6 +158,11 @@ static void _vector_sort(_psl_vector_t *v) qsort_r(v->entry, v->cur, sizeof(_psl_vector_t *), _compare, v); } +static inline int _vector_size(_psl_vector_t *v) +{ + return v ? v->cur : 0; +} + // by this kind of sorting, we can easily see if a domain matches or not (match = supercookie !) static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2) @@ -167,7 +172,7 @@ static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2) if ((n = s2->nlabels - s1->nlabels)) return n; // most labels first - if ((n=s1->length - s2->length)) + if ((n = s1->length - s2->length)) return n; // shorter rules first return strcmp(s1->label, s2->label); @@ -216,7 +221,7 @@ int psl_is_public(const psl_ctx_t *psl, const char *domain) const char *p, *label_bak; unsigned short length_bak; - // this function should be called without leading dots, just make shure + // this function should be called without leading dots, just make sure suffix.label = domain + (*domain == '.'); suffix.length = strlen(suffix.label); suffix.wildcard = 0; @@ -327,19 +332,18 @@ psl_ctx_t *psl_load_fp(FILE *fp) return psl; } - /* does not include exceptions */ int psl_suffix_count(const psl_ctx_t *psl) { - return psl->suffixes->cur; + return _vector_size(psl->suffixes); } + /* just counts exceptions */ int psl_suffix_exception_count(const psl_ctx_t *psl) { - return psl->suffix_exceptions->cur; + return _vector_size(psl->suffix_exceptions); } - void psl_free(psl_ctx_t **psl) { if (psl && *psl) { diff --git a/src/psl2c.c b/src/psl2c.c new file mode 100644 index 0000000..ffe1487 --- /dev/null +++ b/src/psl2c.c @@ -0,0 +1,63 @@ +/* + * Copyright(c) 2014 Tim Ruehsen + * + * This file is part of libpsl. + * + * Libpsl is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Libpsl is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with libpsl. If not, see . + * + * + * Precompile Public Suffix List into + * + * Changelog + * 22.03.2014 Tim Ruehsen created + * + */ + +#if HAVE_CONFIG_H +# include +#endif + +#include "psl.c" + +static void _print_psl_entries(_psl_vector_t *v, const char *varname) +{ + int it; + + printf("// automatically generated by psl2c\n"); + printf("static _psl_entry_t %s[] = {\n", varname); + + for (it = 0; it < v->cur; it++) { + _psl_entry_t *e = _vector_get(v, it); + + printf("\t{ \"%s\", NULL, %hd, %hhd, %hhd },\n", + e->label_buf, e->length, e->nlabels, e->wildcard); + } + + printf("};\n"); +} + +// int main(int argc, const char **argv) +int main(void) +{ + psl_ctx_t *psl; + + if (!(psl = psl_load_fp(stdin))) + return 1; + + _print_psl_entries(psl->suffixes, "suffixes"); + _print_psl_entries(psl->suffix_exceptions, "suffix_exceptions"); + + psl_free(&psl); + return 0; +} diff --git a/tests/Makefile.am b/tests/Makefile.am index f6d6f11..6595619 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -4,12 +4,12 @@ AM_CPPFLAGS = -Wno-missing-field-initializers -I$(top_srcdir)/include AM_LDFLAGS = -static LDADD = ../src/libpsl-@LIBPSL_API_VERSION@.la -PSL_TESTS = test-is-public +PSL_TESTS = test-is-public test-is-public-inline check_PROGRAMS = $(PSL_TESTS) -test_is_tld_SOURCES = test-is-public.c -test_is_tld_LDADD = ../src/libpsl-@LIBPSL_API_VERSION@.la +#test_is_public_inline_SOURCES = test-is-public-inline.c +test_is_public_inline_LDADD = ../src/libpsl-inline-@LIBPSL_API_VERSION@.la TESTS_ENVIRONMENT = TESTS_VALGRIND="@VALGRIND_ENVIRONMENT@" TESTS = $(PSL_TESTS) diff --git a/tests/test-is-public-inline.c b/tests/test-is-public-inline.c new file mode 100644 index 0000000..62d0c42 --- /dev/null +++ b/tests/test-is-public-inline.c @@ -0,0 +1,106 @@ +/* + * Copyright(c) 2014 Tim Ruehsen + * + * This file is part of MGet. + * + * Mget is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Mget is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Mget. If not, see . + * + * + * Public Suffix List routines (right now experimental) + * + * Changelog + * 19.03.2014 Tim Ruehsen created from libmget/cookie.c + * + */ + +#if HAVE_CONFIG_H +# include +#endif + +#include +#include +#include + +#include + +#define countof(a) (sizeof(a)/sizeof(*(a))) + +static int + ok, + failed; + +static void test_psl(void) +{ + static const struct test_data { + const char + *domain; + int + result; + } test_data[] = { + { "www.example.com", 0 }, + { "com.ar", 1 }, + { "www.com.ar", 0 }, + { "cc.ar.us", 1 }, + { ".cc.ar.us", 1 }, + { "www.cc.ar.us", 0 }, + { "www.ck", 0 }, // exception from *.ck + { "abc.www.ck", 0 }, + { "xxx.ck", 1 }, + { "www.xxx.ck", 0 }, + }; + unsigned it; + + psl_inline_init(); + + printf("have %d suffixes and %d exceptions\n", psl_inline_suffix_count(), psl_inline_suffix_exception_count()); + + for (it = 0; it < countof(test_data); it++) { + const struct test_data *t = &test_data[it]; + int result = psl_inline_is_public(t->domain); + + if (result == t->result) { + ok++; + } else { + failed++; + printf("psl_is_tld(%s)=%d (expected %d)\n", t->domain, result, t->result); + } + } + + psl_inline_deinit(); +} + +int main(int argc, const char * const *argv) +{ + // if VALGRIND testing is enabled, we have to call ourselves with valgrind checking + if (argc == 1) { + const char *valgrind = getenv("TESTS_VALGRIND"); + + if (valgrind && *valgrind) { + char cmd[strlen(valgrind)+strlen(argv[0])+32]; + + snprintf(cmd, sizeof(cmd), "TESTS_VALGRIND="" %s %s", valgrind, argv[0]); + return system(cmd) != 0; + } + } + + test_psl(); + + if (failed) { + printf("Summary: %d out of %d tests failed\n", failed, ok + failed); + return 1; + } + + printf("Summary: All %d tests passed\n", ok + failed); + return 0; +}