added PSL inline library
This commit is contained in:
parent
a707b267c9
commit
3998137fd0
|
@ -1 +1 @@
|
|||
include_HEADERS = libpsl.h
|
||||
include_HEADERS = libpsl.h libpsl-inline.h
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Copyright(c) 2014 Tim Ruehsen
|
||||
*
|
||||
* This file is part of libpsl.
|
||||
*
|
||||
* Libpsl is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Libpsl is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with libpsl. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
* Header file for libpsl library routines
|
||||
*
|
||||
* Changelog
|
||||
* 22.03.2014 Tim Ruehsen created
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _LIBPSL_LIBPSL_INLINE_H
|
||||
#define _LIBPSL_LIBPSL_INLINE_H
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
// Let C++ include C headers
|
||||
#ifdef __cplusplus
|
||||
# define PSL_BEGIN_DECLS extern "C" {
|
||||
# define PSL_END_DECLS }
|
||||
#else
|
||||
# define PSL_BEGIN_DECLS
|
||||
# define PSL_END_DECLS
|
||||
#endif
|
||||
|
||||
#if ENABLE_NLS != 0
|
||||
# include <libintl.h>
|
||||
# define _(STRING) gettext(STRING)
|
||||
#else
|
||||
# define _(STRING) STRING
|
||||
# define ngettext(STRING1,STRING2,N) STRING2
|
||||
#endif
|
||||
|
||||
PSL_BEGIN_DECLS
|
||||
|
||||
void
|
||||
psl_inline_init(void);
|
||||
void
|
||||
psl_inline_deinit(void);
|
||||
int
|
||||
psl_inline_is_public(const char *domain);
|
||||
|
||||
/* does not include exceptions */
|
||||
int
|
||||
psl_inline_suffix_count(void);
|
||||
/* just counts exceptions */
|
||||
int
|
||||
psl_inline_suffix_exception_count(void);
|
||||
|
||||
PSL_END_DECLS
|
||||
|
||||
#endif /* _LIBPSL_LIBPSL_INLINE_H */
|
|
@ -1,14 +1,30 @@
|
|||
lib_LTLIBRARIES = libpsl-@LIBPSL_API_VERSION@.la
|
||||
#EXTRA_DIST = $(top_srcdir)/data/effective_tld_names.dat
|
||||
|
||||
# suffixes.c must be created before psl.c is compiled
|
||||
BUILT_SOURCES = suffixes.c
|
||||
|
||||
# suffixes.c is a built source that must be cleaned
|
||||
CLEANFILES = suffixes.c
|
||||
|
||||
# build two libraries, 'inline' version with PSL entries compiled in
|
||||
lib_LTLIBRARIES = libpsl-@LIBPSL_API_VERSION@.la libpsl-inline-@LIBPSL_API_VERSION@.la
|
||||
|
||||
libpsl_@LIBPSL_API_VERSION@_la_SOURCES = psl.c
|
||||
|
||||
libpsl_@LIBPSL_API_VERSION@_la_CPPFLAGS = -I$(top_srcdir)/include
|
||||
|
||||
# include ABI version information
|
||||
libpsl_@LIBPSL_API_VERSION@_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSION)
|
||||
|
||||
#bin_PROGRAMS = test_linking
|
||||
#noinst_PROGRAMS = test_linking
|
||||
#test_linking_SOURCES = test_linking.c
|
||||
#test_linking_CPPFLAGS = -I$(top_srcdir)/include
|
||||
#test_linking_LDADD = libpsl-@LIBPSL_API_VERSION@.la
|
||||
#test_linking_LDFLAGS = -static
|
||||
libpsl_inline_@LIBPSL_API_VERSION@_la_SOURCES = psl-inline.c
|
||||
libpsl_inline_@LIBPSL_API_VERSION@_la_CPPFLAGS = -I$(top_srcdir)/include
|
||||
# include ABI version information
|
||||
libpsl_inline_@LIBPSL_API_VERSION@_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSION)
|
||||
|
||||
noinst_PROGRAMS = psl2c
|
||||
psl2c_SOURCES = psl2c.c
|
||||
psl2c_CPPFLAGS = -I$(top_srcdir)/include
|
||||
#psl2c_LDADD = libpsl-@LIBPSL_API_VERSION@.la
|
||||
#psl2c_LDFLAGS = -static
|
||||
|
||||
# Build rule for suffix.c
|
||||
suffixes.c: $(top_srcdir)/data/effective_tld_names.dat psl2c$(EXEEXT)
|
||||
./psl2c$(EXEEXT) <$(top_srcdir)/data/effective_tld_names.dat >suffixes.c
|
||||
|
|
|
@ -0,0 +1,153 @@
|
|||
/*
|
||||
* Copyright(c) 2014 Tim Ruehsen
|
||||
*
|
||||
* This file is part of MGet.
|
||||
*
|
||||
* Mget is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Mget is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Mget. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
* Public Suffix List routines (right now experimental)
|
||||
*
|
||||
* Changelog
|
||||
* 22.03.2014 Tim Ruehsen created
|
||||
*
|
||||
*/
|
||||
|
||||
// need _GNU_SOURCE for qsort_r()
|
||||
#ifndef _GNU_SOURCE
|
||||
# define _GNU_SOURCE
|
||||
#endif
|
||||
|
||||
#if HAVE_CONFIG_H
|
||||
# include <config.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include <libpsl-inline.h>
|
||||
|
||||
#define countof(a) (sizeof(a)/sizeof(*(a)))
|
||||
|
||||
typedef struct {
|
||||
char
|
||||
label_buf[42];
|
||||
const char *
|
||||
label;
|
||||
unsigned short
|
||||
length;
|
||||
unsigned char
|
||||
nlabels, // number of labels
|
||||
wildcard; // this is a wildcard rule (e.g. *.sapporo.jp)
|
||||
} _psl_entry_t;
|
||||
|
||||
#include "suffixes.c"
|
||||
|
||||
// by this kind of sorting, we can easily see if a domain matches or not (match = supercookie !)
|
||||
|
||||
static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2)
|
||||
{
|
||||
int n;
|
||||
|
||||
if ((n = s2->nlabels - s1->nlabels))
|
||||
return n; // most labels first
|
||||
|
||||
if ((n = s1->length - s2->length))
|
||||
return n; // shorter rules first
|
||||
|
||||
return strcmp(s1->label, s2->label);
|
||||
}
|
||||
|
||||
void psl_inline_init(void)
|
||||
{
|
||||
size_t it;
|
||||
|
||||
for (it = 0; it < countof(suffixes); it++)
|
||||
suffixes[it].label = suffixes[it].label_buf;
|
||||
|
||||
for (it = 0; it < countof(suffix_exceptions); it++)
|
||||
suffix_exceptions[it].label = suffix_exceptions[it].label_buf;
|
||||
}
|
||||
|
||||
void psl_inline_deinit(void)
|
||||
{
|
||||
}
|
||||
|
||||
int psl_inline_is_public(const char *domain)
|
||||
{
|
||||
_psl_entry_t suffix, *rule;
|
||||
const char *p, *label_bak;
|
||||
unsigned short length_bak;
|
||||
|
||||
// this function should be called without leading dots, just make sure
|
||||
suffix.label = domain + (*domain == '.');
|
||||
suffix.length = strlen(suffix.label);
|
||||
suffix.wildcard = 0;
|
||||
suffix.nlabels = 1;
|
||||
|
||||
for (p = suffix.label; *p; p++)
|
||||
if (*p == '.')
|
||||
suffix.nlabels++;
|
||||
|
||||
// if domain has enough labels, it won't match
|
||||
rule = &suffixes[0];
|
||||
if (!rule || rule->nlabels < suffix.nlabels - 1)
|
||||
return 0;
|
||||
|
||||
rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare);
|
||||
if (rule) {
|
||||
// definitely a match, no matter if the found rule is a wildcard or not
|
||||
return 1;
|
||||
}
|
||||
|
||||
label_bak = suffix.label;
|
||||
length_bak = suffix.length;
|
||||
|
||||
if ((suffix.label = strchr(suffix.label, '.'))) {
|
||||
suffix.label++;
|
||||
suffix.length = strlen(suffix.label);
|
||||
suffix.nlabels--;
|
||||
|
||||
rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare);
|
||||
if (rule) {
|
||||
if (rule->wildcard) {
|
||||
// now that we matched a wildcard, we have to check for an exception
|
||||
suffix.label = label_bak;
|
||||
suffix.length = length_bak;
|
||||
suffix.nlabels++;
|
||||
|
||||
if (bsearch(&suffix, suffix_exceptions, countof(suffix_exceptions), sizeof(suffix_exceptions[0]), (int(*)(const void *, const void *))_suffix_compare))
|
||||
return 0; // found an exception, so 'domain' is not a public suffix
|
||||
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* does not include exceptions */
|
||||
int psl_inline_suffix_count(void)
|
||||
{
|
||||
return countof(suffixes);
|
||||
}
|
||||
|
||||
/* just counts exceptions */
|
||||
int psl_inline_suffix_exception_count(void)
|
||||
{
|
||||
return countof(suffix_exceptions);
|
||||
}
|
14
src/psl.c
14
src/psl.c
|
@ -158,6 +158,11 @@ static void _vector_sort(_psl_vector_t *v)
|
|||
qsort_r(v->entry, v->cur, sizeof(_psl_vector_t *), _compare, v);
|
||||
}
|
||||
|
||||
static inline int _vector_size(_psl_vector_t *v)
|
||||
{
|
||||
return v ? v->cur : 0;
|
||||
}
|
||||
|
||||
// by this kind of sorting, we can easily see if a domain matches or not (match = supercookie !)
|
||||
|
||||
static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2)
|
||||
|
@ -216,7 +221,7 @@ int psl_is_public(const psl_ctx_t *psl, const char *domain)
|
|||
const char *p, *label_bak;
|
||||
unsigned short length_bak;
|
||||
|
||||
// this function should be called without leading dots, just make shure
|
||||
// this function should be called without leading dots, just make sure
|
||||
suffix.label = domain + (*domain == '.');
|
||||
suffix.length = strlen(suffix.label);
|
||||
suffix.wildcard = 0;
|
||||
|
@ -327,19 +332,18 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
|||
return psl;
|
||||
}
|
||||
|
||||
|
||||
/* does not include exceptions */
|
||||
int psl_suffix_count(const psl_ctx_t *psl)
|
||||
{
|
||||
return psl->suffixes->cur;
|
||||
return _vector_size(psl->suffixes);
|
||||
}
|
||||
|
||||
/* just counts exceptions */
|
||||
int psl_suffix_exception_count(const psl_ctx_t *psl)
|
||||
{
|
||||
return psl->suffix_exceptions->cur;
|
||||
return _vector_size(psl->suffix_exceptions);
|
||||
}
|
||||
|
||||
|
||||
void psl_free(psl_ctx_t **psl)
|
||||
{
|
||||
if (psl && *psl) {
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Copyright(c) 2014 Tim Ruehsen
|
||||
*
|
||||
* This file is part of libpsl.
|
||||
*
|
||||
* Libpsl is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Libpsl is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libpsl. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
* Precompile Public Suffix List into
|
||||
*
|
||||
* Changelog
|
||||
* 22.03.2014 Tim Ruehsen created
|
||||
*
|
||||
*/
|
||||
|
||||
#if HAVE_CONFIG_H
|
||||
# include <config.h>
|
||||
#endif
|
||||
|
||||
#include "psl.c"
|
||||
|
||||
static void _print_psl_entries(_psl_vector_t *v, const char *varname)
|
||||
{
|
||||
int it;
|
||||
|
||||
printf("// automatically generated by psl2c\n");
|
||||
printf("static _psl_entry_t %s[] = {\n", varname);
|
||||
|
||||
for (it = 0; it < v->cur; it++) {
|
||||
_psl_entry_t *e = _vector_get(v, it);
|
||||
|
||||
printf("\t{ \"%s\", NULL, %hd, %hhd, %hhd },\n",
|
||||
e->label_buf, e->length, e->nlabels, e->wildcard);
|
||||
}
|
||||
|
||||
printf("};\n");
|
||||
}
|
||||
|
||||
// int main(int argc, const char **argv)
|
||||
int main(void)
|
||||
{
|
||||
psl_ctx_t *psl;
|
||||
|
||||
if (!(psl = psl_load_fp(stdin)))
|
||||
return 1;
|
||||
|
||||
_print_psl_entries(psl->suffixes, "suffixes");
|
||||
_print_psl_entries(psl->suffix_exceptions, "suffix_exceptions");
|
||||
|
||||
psl_free(&psl);
|
||||
return 0;
|
||||
}
|
|
@ -4,12 +4,12 @@ AM_CPPFLAGS = -Wno-missing-field-initializers -I$(top_srcdir)/include
|
|||
AM_LDFLAGS = -static
|
||||
LDADD = ../src/libpsl-@LIBPSL_API_VERSION@.la
|
||||
|
||||
PSL_TESTS = test-is-public
|
||||
PSL_TESTS = test-is-public test-is-public-inline
|
||||
|
||||
check_PROGRAMS = $(PSL_TESTS)
|
||||
|
||||
test_is_tld_SOURCES = test-is-public.c
|
||||
test_is_tld_LDADD = ../src/libpsl-@LIBPSL_API_VERSION@.la
|
||||
#test_is_public_inline_SOURCES = test-is-public-inline.c
|
||||
test_is_public_inline_LDADD = ../src/libpsl-inline-@LIBPSL_API_VERSION@.la
|
||||
|
||||
TESTS_ENVIRONMENT = TESTS_VALGRIND="@VALGRIND_ENVIRONMENT@"
|
||||
TESTS = $(PSL_TESTS)
|
||||
|
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* Copyright(c) 2014 Tim Ruehsen
|
||||
*
|
||||
* This file is part of MGet.
|
||||
*
|
||||
* Mget is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Mget is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Mget. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
* Public Suffix List routines (right now experimental)
|
||||
*
|
||||
* Changelog
|
||||
* 19.03.2014 Tim Ruehsen created from libmget/cookie.c
|
||||
*
|
||||
*/
|
||||
|
||||
#if HAVE_CONFIG_H
|
||||
# include <config.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <libpsl-inline.h>
|
||||
|
||||
#define countof(a) (sizeof(a)/sizeof(*(a)))
|
||||
|
||||
static int
|
||||
ok,
|
||||
failed;
|
||||
|
||||
static void test_psl(void)
|
||||
{
|
||||
static const struct test_data {
|
||||
const char
|
||||
*domain;
|
||||
int
|
||||
result;
|
||||
} test_data[] = {
|
||||
{ "www.example.com", 0 },
|
||||
{ "com.ar", 1 },
|
||||
{ "www.com.ar", 0 },
|
||||
{ "cc.ar.us", 1 },
|
||||
{ ".cc.ar.us", 1 },
|
||||
{ "www.cc.ar.us", 0 },
|
||||
{ "www.ck", 0 }, // exception from *.ck
|
||||
{ "abc.www.ck", 0 },
|
||||
{ "xxx.ck", 1 },
|
||||
{ "www.xxx.ck", 0 },
|
||||
};
|
||||
unsigned it;
|
||||
|
||||
psl_inline_init();
|
||||
|
||||
printf("have %d suffixes and %d exceptions\n", psl_inline_suffix_count(), psl_inline_suffix_exception_count());
|
||||
|
||||
for (it = 0; it < countof(test_data); it++) {
|
||||
const struct test_data *t = &test_data[it];
|
||||
int result = psl_inline_is_public(t->domain);
|
||||
|
||||
if (result == t->result) {
|
||||
ok++;
|
||||
} else {
|
||||
failed++;
|
||||
printf("psl_is_tld(%s)=%d (expected %d)\n", t->domain, result, t->result);
|
||||
}
|
||||
}
|
||||
|
||||
psl_inline_deinit();
|
||||
}
|
||||
|
||||
int main(int argc, const char * const *argv)
|
||||
{
|
||||
// if VALGRIND testing is enabled, we have to call ourselves with valgrind checking
|
||||
if (argc == 1) {
|
||||
const char *valgrind = getenv("TESTS_VALGRIND");
|
||||
|
||||
if (valgrind && *valgrind) {
|
||||
char cmd[strlen(valgrind)+strlen(argv[0])+32];
|
||||
|
||||
snprintf(cmd, sizeof(cmd), "TESTS_VALGRIND="" %s %s", valgrind, argv[0]);
|
||||
return system(cmd) != 0;
|
||||
}
|
||||
}
|
||||
|
||||
test_psl();
|
||||
|
||||
if (failed) {
|
||||
printf("Summary: %d out of %d tests failed\n", failed, ok + failed);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("Summary: All %d tests passed\n", ok + failed);
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue