diff --git a/include/Makefile.am b/include/Makefile.am
index a45de59..dd315d8 100644
--- a/include/Makefile.am
+++ b/include/Makefile.am
@@ -1 +1 @@
-include_HEADERS = libpsl.h
+include_HEADERS = libpsl.h libpsl-inline.h
diff --git a/include/libpsl-inline.h b/include/libpsl-inline.h
new file mode 100644
index 0000000..d9997a6
--- /dev/null
+++ b/include/libpsl-inline.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright(c) 2014 Tim Ruehsen
+ *
+ * This file is part of libpsl.
+ *
+ * Libpsl is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Libpsl is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with libpsl. If not, see .
+ *
+ *
+ * Header file for libpsl library routines
+ *
+ * Changelog
+ * 22.03.2014 Tim Ruehsen created
+ *
+ */
+
+#ifndef _LIBPSL_LIBPSL_INLINE_H
+#define _LIBPSL_LIBPSL_INLINE_H
+
+#include
+
+// Let C++ include C headers
+#ifdef __cplusplus
+# define PSL_BEGIN_DECLS extern "C" {
+# define PSL_END_DECLS }
+#else
+# define PSL_BEGIN_DECLS
+# define PSL_END_DECLS
+#endif
+
+#if ENABLE_NLS != 0
+# include
+# define _(STRING) gettext(STRING)
+#else
+# define _(STRING) STRING
+# define ngettext(STRING1,STRING2,N) STRING2
+#endif
+
+PSL_BEGIN_DECLS
+
+void
+ psl_inline_init(void);
+void
+ psl_inline_deinit(void);
+int
+ psl_inline_is_public(const char *domain);
+
+/* does not include exceptions */
+int
+ psl_inline_suffix_count(void);
+/* just counts exceptions */
+int
+ psl_inline_suffix_exception_count(void);
+
+PSL_END_DECLS
+
+#endif /* _LIBPSL_LIBPSL_INLINE_H */
diff --git a/src/Makefile.am b/src/Makefile.am
index f8339d5..f86bb4b 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,14 +1,30 @@
-lib_LTLIBRARIES = libpsl-@LIBPSL_API_VERSION@.la
+#EXTRA_DIST = $(top_srcdir)/data/effective_tld_names.dat
+
+# suffixes.c must be created before psl.c is compiled
+BUILT_SOURCES = suffixes.c
+
+# suffixes.c is a built source that must be cleaned
+CLEANFILES = suffixes.c
+
+# build two libraries, 'inline' version with PSL entries compiled in
+lib_LTLIBRARIES = libpsl-@LIBPSL_API_VERSION@.la libpsl-inline-@LIBPSL_API_VERSION@.la
+
libpsl_@LIBPSL_API_VERSION@_la_SOURCES = psl.c
-
libpsl_@LIBPSL_API_VERSION@_la_CPPFLAGS = -I$(top_srcdir)/include
-
# include ABI version information
libpsl_@LIBPSL_API_VERSION@_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSION)
-#bin_PROGRAMS = test_linking
-#noinst_PROGRAMS = test_linking
-#test_linking_SOURCES = test_linking.c
-#test_linking_CPPFLAGS = -I$(top_srcdir)/include
-#test_linking_LDADD = libpsl-@LIBPSL_API_VERSION@.la
-#test_linking_LDFLAGS = -static
+libpsl_inline_@LIBPSL_API_VERSION@_la_SOURCES = psl-inline.c
+libpsl_inline_@LIBPSL_API_VERSION@_la_CPPFLAGS = -I$(top_srcdir)/include
+# include ABI version information
+libpsl_inline_@LIBPSL_API_VERSION@_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSION)
+
+noinst_PROGRAMS = psl2c
+psl2c_SOURCES = psl2c.c
+psl2c_CPPFLAGS = -I$(top_srcdir)/include
+#psl2c_LDADD = libpsl-@LIBPSL_API_VERSION@.la
+#psl2c_LDFLAGS = -static
+
+# Build rule for suffix.c
+suffixes.c: $(top_srcdir)/data/effective_tld_names.dat psl2c$(EXEEXT)
+ ./psl2c$(EXEEXT) <$(top_srcdir)/data/effective_tld_names.dat >suffixes.c
diff --git a/src/psl-inline.c b/src/psl-inline.c
new file mode 100644
index 0000000..b441182
--- /dev/null
+++ b/src/psl-inline.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright(c) 2014 Tim Ruehsen
+ *
+ * This file is part of MGet.
+ *
+ * Mget is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Mget is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Mget. If not, see .
+ *
+ *
+ * Public Suffix List routines (right now experimental)
+ *
+ * Changelog
+ * 22.03.2014 Tim Ruehsen created
+ *
+ */
+
+// need _GNU_SOURCE for qsort_r()
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
+
+#if HAVE_CONFIG_H
+# include
+#endif
+
+#include
+#include
+#include
+#include
+
+#include
+
+#define countof(a) (sizeof(a)/sizeof(*(a)))
+
+typedef struct {
+ char
+ label_buf[42];
+ const char *
+ label;
+ unsigned short
+ length;
+ unsigned char
+ nlabels, // number of labels
+ wildcard; // this is a wildcard rule (e.g. *.sapporo.jp)
+} _psl_entry_t;
+
+#include "suffixes.c"
+
+// by this kind of sorting, we can easily see if a domain matches or not (match = supercookie !)
+
+static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2)
+{
+ int n;
+
+ if ((n = s2->nlabels - s1->nlabels))
+ return n; // most labels first
+
+ if ((n = s1->length - s2->length))
+ return n; // shorter rules first
+
+ return strcmp(s1->label, s2->label);
+}
+
+void psl_inline_init(void)
+{
+ size_t it;
+
+ for (it = 0; it < countof(suffixes); it++)
+ suffixes[it].label = suffixes[it].label_buf;
+
+ for (it = 0; it < countof(suffix_exceptions); it++)
+ suffix_exceptions[it].label = suffix_exceptions[it].label_buf;
+}
+
+void psl_inline_deinit(void)
+{
+}
+
+int psl_inline_is_public(const char *domain)
+{
+ _psl_entry_t suffix, *rule;
+ const char *p, *label_bak;
+ unsigned short length_bak;
+
+ // this function should be called without leading dots, just make sure
+ suffix.label = domain + (*domain == '.');
+ suffix.length = strlen(suffix.label);
+ suffix.wildcard = 0;
+ suffix.nlabels = 1;
+
+ for (p = suffix.label; *p; p++)
+ if (*p == '.')
+ suffix.nlabels++;
+
+ // if domain has enough labels, it won't match
+ rule = &suffixes[0];
+ if (!rule || rule->nlabels < suffix.nlabels - 1)
+ return 0;
+
+ rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare);
+ if (rule) {
+ // definitely a match, no matter if the found rule is a wildcard or not
+ return 1;
+ }
+
+ label_bak = suffix.label;
+ length_bak = suffix.length;
+
+ if ((suffix.label = strchr(suffix.label, '.'))) {
+ suffix.label++;
+ suffix.length = strlen(suffix.label);
+ suffix.nlabels--;
+
+ rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare);
+ if (rule) {
+ if (rule->wildcard) {
+ // now that we matched a wildcard, we have to check for an exception
+ suffix.label = label_bak;
+ suffix.length = length_bak;
+ suffix.nlabels++;
+
+ if (bsearch(&suffix, suffix_exceptions, countof(suffix_exceptions), sizeof(suffix_exceptions[0]), (int(*)(const void *, const void *))_suffix_compare))
+ return 0; // found an exception, so 'domain' is not a public suffix
+
+ return 1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/* does not include exceptions */
+int psl_inline_suffix_count(void)
+{
+ return countof(suffixes);
+}
+
+/* just counts exceptions */
+int psl_inline_suffix_exception_count(void)
+{
+ return countof(suffix_exceptions);
+}
diff --git a/src/psl.c b/src/psl.c
index 5fa03a9..aba1594 100644
--- a/src/psl.c
+++ b/src/psl.c
@@ -158,6 +158,11 @@ static void _vector_sort(_psl_vector_t *v)
qsort_r(v->entry, v->cur, sizeof(_psl_vector_t *), _compare, v);
}
+static inline int _vector_size(_psl_vector_t *v)
+{
+ return v ? v->cur : 0;
+}
+
// by this kind of sorting, we can easily see if a domain matches or not (match = supercookie !)
static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2)
@@ -167,7 +172,7 @@ static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2)
if ((n = s2->nlabels - s1->nlabels))
return n; // most labels first
- if ((n=s1->length - s2->length))
+ if ((n = s1->length - s2->length))
return n; // shorter rules first
return strcmp(s1->label, s2->label);
@@ -216,7 +221,7 @@ int psl_is_public(const psl_ctx_t *psl, const char *domain)
const char *p, *label_bak;
unsigned short length_bak;
- // this function should be called without leading dots, just make shure
+ // this function should be called without leading dots, just make sure
suffix.label = domain + (*domain == '.');
suffix.length = strlen(suffix.label);
suffix.wildcard = 0;
@@ -327,19 +332,18 @@ psl_ctx_t *psl_load_fp(FILE *fp)
return psl;
}
-
/* does not include exceptions */
int psl_suffix_count(const psl_ctx_t *psl)
{
- return psl->suffixes->cur;
+ return _vector_size(psl->suffixes);
}
+
/* just counts exceptions */
int psl_suffix_exception_count(const psl_ctx_t *psl)
{
- return psl->suffix_exceptions->cur;
+ return _vector_size(psl->suffix_exceptions);
}
-
void psl_free(psl_ctx_t **psl)
{
if (psl && *psl) {
diff --git a/src/psl2c.c b/src/psl2c.c
new file mode 100644
index 0000000..ffe1487
--- /dev/null
+++ b/src/psl2c.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright(c) 2014 Tim Ruehsen
+ *
+ * This file is part of libpsl.
+ *
+ * Libpsl is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Libpsl is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with libpsl. If not, see .
+ *
+ *
+ * Precompile Public Suffix List into
+ *
+ * Changelog
+ * 22.03.2014 Tim Ruehsen created
+ *
+ */
+
+#if HAVE_CONFIG_H
+# include
+#endif
+
+#include "psl.c"
+
+static void _print_psl_entries(_psl_vector_t *v, const char *varname)
+{
+ int it;
+
+ printf("// automatically generated by psl2c\n");
+ printf("static _psl_entry_t %s[] = {\n", varname);
+
+ for (it = 0; it < v->cur; it++) {
+ _psl_entry_t *e = _vector_get(v, it);
+
+ printf("\t{ \"%s\", NULL, %hd, %hhd, %hhd },\n",
+ e->label_buf, e->length, e->nlabels, e->wildcard);
+ }
+
+ printf("};\n");
+}
+
+// int main(int argc, const char **argv)
+int main(void)
+{
+ psl_ctx_t *psl;
+
+ if (!(psl = psl_load_fp(stdin)))
+ return 1;
+
+ _print_psl_entries(psl->suffixes, "suffixes");
+ _print_psl_entries(psl->suffix_exceptions, "suffix_exceptions");
+
+ psl_free(&psl);
+ return 0;
+}
diff --git a/tests/Makefile.am b/tests/Makefile.am
index f6d6f11..6595619 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -4,12 +4,12 @@ AM_CPPFLAGS = -Wno-missing-field-initializers -I$(top_srcdir)/include
AM_LDFLAGS = -static
LDADD = ../src/libpsl-@LIBPSL_API_VERSION@.la
-PSL_TESTS = test-is-public
+PSL_TESTS = test-is-public test-is-public-inline
check_PROGRAMS = $(PSL_TESTS)
-test_is_tld_SOURCES = test-is-public.c
-test_is_tld_LDADD = ../src/libpsl-@LIBPSL_API_VERSION@.la
+#test_is_public_inline_SOURCES = test-is-public-inline.c
+test_is_public_inline_LDADD = ../src/libpsl-inline-@LIBPSL_API_VERSION@.la
TESTS_ENVIRONMENT = TESTS_VALGRIND="@VALGRIND_ENVIRONMENT@"
TESTS = $(PSL_TESTS)
diff --git a/tests/test-is-public-inline.c b/tests/test-is-public-inline.c
new file mode 100644
index 0000000..62d0c42
--- /dev/null
+++ b/tests/test-is-public-inline.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright(c) 2014 Tim Ruehsen
+ *
+ * This file is part of MGet.
+ *
+ * Mget is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Mget is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Mget. If not, see .
+ *
+ *
+ * Public Suffix List routines (right now experimental)
+ *
+ * Changelog
+ * 19.03.2014 Tim Ruehsen created from libmget/cookie.c
+ *
+ */
+
+#if HAVE_CONFIG_H
+# include
+#endif
+
+#include
+#include
+#include
+
+#include
+
+#define countof(a) (sizeof(a)/sizeof(*(a)))
+
+static int
+ ok,
+ failed;
+
+static void test_psl(void)
+{
+ static const struct test_data {
+ const char
+ *domain;
+ int
+ result;
+ } test_data[] = {
+ { "www.example.com", 0 },
+ { "com.ar", 1 },
+ { "www.com.ar", 0 },
+ { "cc.ar.us", 1 },
+ { ".cc.ar.us", 1 },
+ { "www.cc.ar.us", 0 },
+ { "www.ck", 0 }, // exception from *.ck
+ { "abc.www.ck", 0 },
+ { "xxx.ck", 1 },
+ { "www.xxx.ck", 0 },
+ };
+ unsigned it;
+
+ psl_inline_init();
+
+ printf("have %d suffixes and %d exceptions\n", psl_inline_suffix_count(), psl_inline_suffix_exception_count());
+
+ for (it = 0; it < countof(test_data); it++) {
+ const struct test_data *t = &test_data[it];
+ int result = psl_inline_is_public(t->domain);
+
+ if (result == t->result) {
+ ok++;
+ } else {
+ failed++;
+ printf("psl_is_tld(%s)=%d (expected %d)\n", t->domain, result, t->result);
+ }
+ }
+
+ psl_inline_deinit();
+}
+
+int main(int argc, const char * const *argv)
+{
+ // if VALGRIND testing is enabled, we have to call ourselves with valgrind checking
+ if (argc == 1) {
+ const char *valgrind = getenv("TESTS_VALGRIND");
+
+ if (valgrind && *valgrind) {
+ char cmd[strlen(valgrind)+strlen(argv[0])+32];
+
+ snprintf(cmd, sizeof(cmd), "TESTS_VALGRIND="" %s %s", valgrind, argv[0]);
+ return system(cmd) != 0;
+ }
+ }
+
+ test_psl();
+
+ if (failed) {
+ printf("Summary: %d out of %d tests failed\n", failed, ok + failed);
+ return 1;
+ }
+
+ printf("Summary: All %d tests passed\n", ok + failed);
+ return 0;
+}