merged libpsl and libpsl-inline
This commit is contained in:
parent
3b94a03638
commit
9d1c62eb07
|
@ -1,22 +1,26 @@
|
||||||
/*
|
/*
|
||||||
* Copyright(c) 2014 Tim Ruehsen
|
* Copyright(c) 2014 Tim Ruehsen
|
||||||
*
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
* This file is part of libpsl.
|
* This file is part of libpsl.
|
||||||
*
|
*
|
||||||
* Libpsl is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU Lesser General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* Libpsl is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU Lesser General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU Lesser General Public License
|
|
||||||
* along with libpsl. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* Header file for libpsl library routines
|
* Header file for libpsl library routines
|
||||||
*
|
*
|
||||||
* Changelog
|
* Changelog
|
||||||
|
@ -50,21 +54,35 @@ PSL_BEGIN_DECLS
|
||||||
|
|
||||||
typedef struct _psl_ctx_st psl_ctx_t;
|
typedef struct _psl_ctx_st psl_ctx_t;
|
||||||
|
|
||||||
|
int
|
||||||
|
psl_global_init(void);
|
||||||
|
void
|
||||||
|
psl_global_deinit(void);
|
||||||
void
|
void
|
||||||
psl_free(psl_ctx_t **psl);
|
psl_free(psl_ctx_t **psl);
|
||||||
psl_ctx_t *
|
psl_ctx_t *
|
||||||
psl_load_file(const char *fname);
|
psl_load_file(const char *fname);
|
||||||
psl_ctx_t *
|
psl_ctx_t *
|
||||||
psl_load_fp(FILE *fp);
|
psl_load_fp(FILE *fp);
|
||||||
|
psl_ctx_t *
|
||||||
|
psl_builtin(void);
|
||||||
int
|
int
|
||||||
psl_is_public(const psl_ctx_t *psl, const char *domain);
|
psl_is_public(const psl_ctx_t *psl, const char *domain);
|
||||||
|
// does not include exceptions
|
||||||
/* does not include exceptions */
|
|
||||||
int
|
int
|
||||||
psl_suffix_count(const psl_ctx_t *psl);
|
psl_suffix_count(const psl_ctx_t *psl);
|
||||||
/* just counts exceptions */
|
// just counts exceptions
|
||||||
int
|
int
|
||||||
psl_suffix_exception_count(const psl_ctx_t *psl);
|
psl_suffix_exception_count(const psl_ctx_t *psl);
|
||||||
|
// returns compilation time
|
||||||
|
time_t
|
||||||
|
psl_builtin_compile_time(void);
|
||||||
|
// returns mtime of PSL source file
|
||||||
|
time_t
|
||||||
|
psl_builtin_file_time(void);
|
||||||
|
// returns MD5 checksum (hex-encoded, lowercase) of PSL source file
|
||||||
|
const char *
|
||||||
|
psl_builtin_sha1sum(void);
|
||||||
|
|
||||||
|
|
||||||
PSL_END_DECLS
|
PSL_END_DECLS
|
||||||
|
|
|
@ -7,17 +7,18 @@ BUILT_SOURCES = suffixes.c
|
||||||
CLEANFILES = suffixes.c
|
CLEANFILES = suffixes.c
|
||||||
|
|
||||||
# build two libraries, 'inline' version with PSL entries compiled in
|
# build two libraries, 'inline' version with PSL entries compiled in
|
||||||
lib_LTLIBRARIES = libpsl-@LIBPSL_API_VERSION@.la libpsl-inline-@LIBPSL_API_VERSION@.la
|
#lib_LTLIBRARIES = libpsl-@LIBPSL_API_VERSION@.la libpsl-inline-@LIBPSL_API_VERSION@.la
|
||||||
|
lib_LTLIBRARIES = libpsl-@LIBPSL_API_VERSION@.la
|
||||||
|
|
||||||
libpsl_@LIBPSL_API_VERSION@_la_SOURCES = psl.c
|
libpsl_@LIBPSL_API_VERSION@_la_SOURCES = psl.c
|
||||||
libpsl_@LIBPSL_API_VERSION@_la_CPPFLAGS = -I$(top_srcdir)/include
|
libpsl_@LIBPSL_API_VERSION@_la_CPPFLAGS = -I$(top_srcdir)/include
|
||||||
# include ABI version information
|
# include ABI version information
|
||||||
libpsl_@LIBPSL_API_VERSION@_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSION)
|
libpsl_@LIBPSL_API_VERSION@_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSION)
|
||||||
|
|
||||||
libpsl_inline_@LIBPSL_API_VERSION@_la_SOURCES = psl-inline.c
|
#libpsl_inline_@LIBPSL_API_VERSION@_la_SOURCES = psl-inline.c
|
||||||
libpsl_inline_@LIBPSL_API_VERSION@_la_CPPFLAGS = -I$(top_srcdir)/include
|
#libpsl_inline_@LIBPSL_API_VERSION@_la_CPPFLAGS = -I$(top_srcdir)/include
|
||||||
# include ABI version information
|
# include ABI version information
|
||||||
libpsl_inline_@LIBPSL_API_VERSION@_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSION)
|
#libpsl_inline_@LIBPSL_API_VERSION@_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSION)
|
||||||
|
|
||||||
noinst_PROGRAMS = psl2c
|
noinst_PROGRAMS = psl2c
|
||||||
psl2c_SOURCES = psl2c.c
|
psl2c_SOURCES = psl2c.c
|
||||||
|
|
117
src/psl.c
117
src/psl.c
|
@ -1,23 +1,27 @@
|
||||||
/*
|
/*
|
||||||
* Copyright(c) 2014 Tim Ruehsen
|
* Copyright(c) 2014 Tim Ruehsen
|
||||||
*
|
*
|
||||||
* This file is part of MGet.
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
*
|
*
|
||||||
* Mget is free software: you can redistribute it and/or modify
|
* The above copyright notice and this permission notice shall be included in
|
||||||
* it under the terms of the GNU General Public License as published by
|
* all copies or substantial portions of the Software.
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
*
|
||||||
* Mget is distributed in the hope that it will be useful,
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
* GNU General Public License for more details.
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
*
|
*
|
||||||
* You should have received a copy of the GNU General Public License
|
* This file is part of libpsl.
|
||||||
* along with Mget. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*
|
*
|
||||||
*
|
* Public Suffix List routines
|
||||||
* Public Suffix List routines (right now experimental)
|
|
||||||
*
|
*
|
||||||
* Changelog
|
* Changelog
|
||||||
* 19.03.2014 Tim Ruehsen created from libmget/cookie.c
|
* 19.03.2014 Tim Ruehsen created from libmget/cookie.c
|
||||||
|
@ -42,6 +46,9 @@
|
||||||
|
|
||||||
#define countof(a) (sizeof(a)/sizeof(*(a)))
|
#define countof(a) (sizeof(a)/sizeof(*(a)))
|
||||||
|
|
||||||
|
// an invalid pointer
|
||||||
|
#define _PSL_INTERNAL 1
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
char
|
char
|
||||||
label_buf[48];
|
label_buf[48];
|
||||||
|
@ -71,6 +78,12 @@ struct _psl_ctx_st {
|
||||||
*suffix_exceptions;
|
*suffix_exceptions;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#include "suffixes.c"
|
||||||
|
|
||||||
|
// references to this PSL will result in lookups to built-in data
|
||||||
|
static psl_ctx_t
|
||||||
|
_builtin_psl;
|
||||||
|
|
||||||
static _psl_vector_t *_vector_alloc(int max, int (*cmp)(const _psl_entry_t *, const _psl_entry_t *))
|
static _psl_vector_t *_vector_alloc(int max, int (*cmp)(const _psl_entry_t *, const _psl_entry_t *))
|
||||||
{
|
{
|
||||||
_psl_vector_t *v;
|
_psl_vector_t *v;
|
||||||
|
@ -232,11 +245,19 @@ int psl_is_public(const psl_ctx_t *psl, const char *domain)
|
||||||
suffix.nlabels++;
|
suffix.nlabels++;
|
||||||
|
|
||||||
// if domain has enough labels, it is public
|
// if domain has enough labels, it is public
|
||||||
|
if (psl == &_builtin_psl)
|
||||||
|
rule = &suffixes[0];
|
||||||
|
else
|
||||||
rule = _vector_get(psl->suffixes, 0);
|
rule = _vector_get(psl->suffixes, 0);
|
||||||
|
|
||||||
if (!rule || rule->nlabels < suffix.nlabels - 1)
|
if (!rule || rule->nlabels < suffix.nlabels - 1)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
|
if (psl == &_builtin_psl)
|
||||||
|
rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare);
|
||||||
|
else
|
||||||
rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix));
|
rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix));
|
||||||
|
|
||||||
if (rule) {
|
if (rule) {
|
||||||
// definitely a match, no matter if the found rule is a wildcard or not
|
// definitely a match, no matter if the found rule is a wildcard or not
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -250,7 +271,11 @@ int psl_is_public(const psl_ctx_t *psl, const char *domain)
|
||||||
suffix.length = strlen(suffix.label);
|
suffix.length = strlen(suffix.label);
|
||||||
suffix.nlabels--;
|
suffix.nlabels--;
|
||||||
|
|
||||||
|
if (psl == &_builtin_psl)
|
||||||
|
rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare);
|
||||||
|
else
|
||||||
rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix));
|
rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix));
|
||||||
|
|
||||||
if (rule) {
|
if (rule) {
|
||||||
if (rule->wildcard) {
|
if (rule->wildcard) {
|
||||||
// now that we matched a wildcard, we have to check for an exception
|
// now that we matched a wildcard, we have to check for an exception
|
||||||
|
@ -258,8 +283,13 @@ int psl_is_public(const psl_ctx_t *psl, const char *domain)
|
||||||
suffix.length = length_bak;
|
suffix.length = length_bak;
|
||||||
suffix.nlabels++;
|
suffix.nlabels++;
|
||||||
|
|
||||||
|
if (psl == &_builtin_psl) {
|
||||||
|
if (bsearch(&suffix, suffix_exceptions, countof(suffix_exceptions), sizeof(suffix_exceptions[0]), (int(*)(const void *, const void *))_suffix_compare))
|
||||||
|
return 1; // found an exception, so 'domain' is public
|
||||||
|
} else {
|
||||||
if (_vector_get(psl->suffix_exceptions, _vector_find(psl->suffix_exceptions, &suffix)) != 0)
|
if (_vector_get(psl->suffix_exceptions, _vector_find(psl->suffix_exceptions, &suffix)) != 0)
|
||||||
return 1; // found an exception, so 'domain' is public
|
return 1; // found an exception, so 'domain' is public
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -269,6 +299,23 @@ int psl_is_public(const psl_ctx_t *psl, const char *domain)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int psl_global_init(void)
|
||||||
|
{
|
||||||
|
size_t it;
|
||||||
|
|
||||||
|
for (it = 0; it < countof(suffixes); it++)
|
||||||
|
suffixes[it].label = suffixes[it].label_buf;
|
||||||
|
|
||||||
|
for (it = 0; it < countof(suffix_exceptions); it++)
|
||||||
|
suffix_exceptions[it].label = suffix_exceptions[it].label_buf;
|
||||||
|
|
||||||
|
return 0; // 0 = OK
|
||||||
|
}
|
||||||
|
|
||||||
|
void psl_global_deinit(void)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
psl_ctx_t *psl_load_file(const char *fname)
|
psl_ctx_t *psl_load_file(const char *fname)
|
||||||
{
|
{
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
|
@ -332,24 +379,56 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
||||||
return psl;
|
return psl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// return built-in PSL structure
|
||||||
|
psl_ctx_t *psl_builtin(void)
|
||||||
|
{
|
||||||
|
return &_builtin_psl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void psl_free(psl_ctx_t **psl)
|
||||||
|
{
|
||||||
|
if (psl && *psl) {
|
||||||
|
if (*psl != &_builtin_psl) {
|
||||||
|
_vector_free(&(*psl)->suffixes);
|
||||||
|
_vector_free(&(*psl)->suffix_exceptions);
|
||||||
|
}
|
||||||
|
free(*psl);
|
||||||
|
*psl = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* does not include exceptions */
|
/* does not include exceptions */
|
||||||
int psl_suffix_count(const psl_ctx_t *psl)
|
int psl_suffix_count(const psl_ctx_t *psl)
|
||||||
{
|
{
|
||||||
|
if (psl == &_builtin_psl)
|
||||||
|
return countof(suffixes);
|
||||||
|
else
|
||||||
return _vector_size(psl->suffixes);
|
return _vector_size(psl->suffixes);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* just counts exceptions */
|
/* just counts exceptions */
|
||||||
int psl_suffix_exception_count(const psl_ctx_t *psl)
|
int psl_suffix_exception_count(const psl_ctx_t *psl)
|
||||||
{
|
{
|
||||||
|
if (psl == &_builtin_psl)
|
||||||
|
return countof(suffix_exceptions);
|
||||||
|
else
|
||||||
return _vector_size(psl->suffix_exceptions);
|
return _vector_size(psl->suffix_exceptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
void psl_free(psl_ctx_t **psl)
|
// returns compilation time
|
||||||
|
time_t psl_builtin_compile_time(void)
|
||||||
{
|
{
|
||||||
if (psl && *psl) {
|
return _psl_compile_time;
|
||||||
_vector_free(&(*psl)->suffixes);
|
|
||||||
_vector_free(&(*psl)->suffix_exceptions);
|
|
||||||
free(*psl);
|
|
||||||
*psl = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// returns mtime of PSL source file
|
||||||
|
time_t psl_builtin_file_time(void)
|
||||||
|
{
|
||||||
|
return _psl_file_time;
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns MD5 checksum (hex-encoded, lowercase) of PSL source file
|
||||||
|
const char *psl_builtin_sha1sum(void)
|
||||||
|
{
|
||||||
|
return _psl_sha1_checksum;
|
||||||
}
|
}
|
||||||
|
|
256
src/psl2c.c
256
src/psl2c.c
|
@ -29,14 +29,253 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
#include <ctype.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
|
|
||||||
//#ifdef WITH_LIBIDN2
|
//#ifdef WITH_LIBIDN2
|
||||||
# include <idn2.h>
|
//# include <idn2.h>
|
||||||
//#endif
|
//#endif
|
||||||
|
|
||||||
#include "psl.c"
|
#include <libpsl.h>
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
char
|
||||||
|
label_buf[48];
|
||||||
|
const char *
|
||||||
|
label;
|
||||||
|
unsigned short
|
||||||
|
length;
|
||||||
|
unsigned char
|
||||||
|
nlabels, // number of labels
|
||||||
|
wildcard; // this is a wildcard rule (e.g. *.sapporo.jp)
|
||||||
|
} _psl_entry_t;
|
||||||
|
|
||||||
|
// stripped down version libmget vector routines
|
||||||
|
typedef struct {
|
||||||
|
int
|
||||||
|
(*cmp)(const _psl_entry_t *, const _psl_entry_t *); // comparison function
|
||||||
|
_psl_entry_t
|
||||||
|
**entry; // pointer to array of pointers to elements
|
||||||
|
int
|
||||||
|
max, // allocated elements
|
||||||
|
cur; // number of elements in use
|
||||||
|
} _psl_vector_t;
|
||||||
|
|
||||||
|
struct _psl_ctx_st {
|
||||||
|
_psl_vector_t
|
||||||
|
*suffixes,
|
||||||
|
*suffix_exceptions;
|
||||||
|
};
|
||||||
|
|
||||||
|
static _psl_vector_t *_vector_alloc(int max, int (*cmp)(const _psl_entry_t *, const _psl_entry_t *))
|
||||||
|
{
|
||||||
|
_psl_vector_t *v;
|
||||||
|
|
||||||
|
if (!(v = calloc(1, sizeof(_psl_vector_t))))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (!(v->entry = malloc(max * sizeof(_psl_entry_t *)))) {
|
||||||
|
free(v);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
v->max = max;
|
||||||
|
v->cmp = cmp;
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void _vector_free(_psl_vector_t **v)
|
||||||
|
{
|
||||||
|
if (v && *v) {
|
||||||
|
if ((*v)->entry) {
|
||||||
|
int it;
|
||||||
|
|
||||||
|
for (it = 0; it < (*v)->cur; it++)
|
||||||
|
free((*v)->entry[it]);
|
||||||
|
|
||||||
|
free((*v)->entry);
|
||||||
|
}
|
||||||
|
free(*v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static _psl_entry_t *_vector_get(const _psl_vector_t *v, int pos)
|
||||||
|
{
|
||||||
|
if (pos < 0 || !v || pos >= v->cur) return NULL;
|
||||||
|
|
||||||
|
return v->entry[pos];
|
||||||
|
}
|
||||||
|
|
||||||
|
// the entries must be sorted by
|
||||||
|
static int _vector_find(const _psl_vector_t *v, const _psl_entry_t *elem)
|
||||||
|
{
|
||||||
|
if (v) {
|
||||||
|
int l, r, m;
|
||||||
|
int res;
|
||||||
|
|
||||||
|
// binary search for element (exact match)
|
||||||
|
for (l = 0, r = v->cur - 1; l <= r;) {
|
||||||
|
m = (l + r) / 2;
|
||||||
|
if ((res = v->cmp(elem, v->entry[m])) > 0) l = m + 1;
|
||||||
|
else if (res < 0) r = m - 1;
|
||||||
|
else return m;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1; // not found
|
||||||
|
}
|
||||||
|
|
||||||
|
static int _vector_add(_psl_vector_t *v, const _psl_entry_t *elem)
|
||||||
|
{
|
||||||
|
if (v) {
|
||||||
|
void *elemp;
|
||||||
|
|
||||||
|
elemp = malloc(sizeof(_psl_entry_t));
|
||||||
|
memcpy(elemp, elem, sizeof(_psl_entry_t));
|
||||||
|
|
||||||
|
if (v->max == v->cur)
|
||||||
|
v->entry = realloc(v->entry, (v->max *= 2) * sizeof(_psl_entry_t *));
|
||||||
|
|
||||||
|
v->entry[v->cur++] = elemp;
|
||||||
|
return v->cur - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int _compare(const void *p1, const void *p2, void *v)
|
||||||
|
{
|
||||||
|
return ((_psl_vector_t *)v)->cmp(*((_psl_entry_t **)p1), *((_psl_entry_t **)p2));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void _vector_sort(_psl_vector_t *v)
|
||||||
|
{
|
||||||
|
if (v && v->cmp)
|
||||||
|
qsort_r(v->entry, v->cur, sizeof(_psl_vector_t *), _compare, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int _vector_size(_psl_vector_t *v)
|
||||||
|
{
|
||||||
|
return v ? v->cur : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// by this kind of sorting, we can easily see if a domain matches or not (match = supercookie !)
|
||||||
|
|
||||||
|
static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2)
|
||||||
|
{
|
||||||
|
int n;
|
||||||
|
|
||||||
|
if ((n = s2->nlabels - s1->nlabels))
|
||||||
|
return n; // most labels first
|
||||||
|
|
||||||
|
if ((n = s1->length - s2->length))
|
||||||
|
return n; // shorter rules first
|
||||||
|
|
||||||
|
return strcmp(s1->label, s2->label);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length)
|
||||||
|
{
|
||||||
|
const char *src;
|
||||||
|
char *dst;
|
||||||
|
|
||||||
|
suffix->label = suffix->label_buf;
|
||||||
|
|
||||||
|
if (length >= sizeof(suffix->label_buf) - 1) {
|
||||||
|
suffix->nlabels = 0;
|
||||||
|
fprintf(stderr, "Suffix rule too long (%zd, ignored): %s\n", length, rule);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*rule == '*') {
|
||||||
|
if (*++rule != '.') {
|
||||||
|
suffix->nlabels = 0;
|
||||||
|
fprintf(stderr, "Unsupported kind of rule (ignored): %s\n", rule);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
rule++;
|
||||||
|
suffix->wildcard = 1;
|
||||||
|
suffix->length = (unsigned char)length - 2;
|
||||||
|
} else {
|
||||||
|
suffix->wildcard = 0;
|
||||||
|
suffix->length = (unsigned char)length;
|
||||||
|
}
|
||||||
|
|
||||||
|
suffix->nlabels = 1;
|
||||||
|
|
||||||
|
for (dst = suffix->label_buf, src = rule; *src;) {
|
||||||
|
if (*src == '.')
|
||||||
|
suffix->nlabels++;
|
||||||
|
*dst++ = tolower(*src++);
|
||||||
|
}
|
||||||
|
*dst = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
psl_ctx_t *psl_load_file(const char *fname)
|
||||||
|
{
|
||||||
|
FILE *fp;
|
||||||
|
psl_ctx_t *psl = NULL;
|
||||||
|
|
||||||
|
if ((fp = fopen(fname, "r"))) {
|
||||||
|
psl = psl_load_fp(fp);
|
||||||
|
fclose(fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
return psl;
|
||||||
|
}
|
||||||
|
|
||||||
|
psl_ctx_t *psl_load_fp(FILE *fp)
|
||||||
|
{
|
||||||
|
psl_ctx_t *psl;
|
||||||
|
_psl_entry_t suffix, *suffixp;
|
||||||
|
int nsuffixes = 0;
|
||||||
|
char buf[256], *linep, *p;
|
||||||
|
|
||||||
|
if (!fp)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (!(psl = calloc(1, sizeof(psl_ctx_t))))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
// as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
|
||||||
|
// as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
|
||||||
|
psl->suffixes = _vector_alloc(8*1024, _suffix_compare);
|
||||||
|
psl->suffix_exceptions = _vector_alloc(64, _suffix_compare);
|
||||||
|
|
||||||
|
while ((linep = fgets(buf, sizeof(buf), fp))) {
|
||||||
|
while (isspace(*linep)) linep++; // ignore leading whitespace
|
||||||
|
if (!*linep) continue; // skip empty lines
|
||||||
|
|
||||||
|
if (*linep == '/' && linep[1] == '/')
|
||||||
|
continue; // skip comments
|
||||||
|
|
||||||
|
// parse suffix rule
|
||||||
|
for (p = linep; *linep && !isspace(*linep);) linep++;
|
||||||
|
*linep = 0;
|
||||||
|
|
||||||
|
if (*p == '!') {
|
||||||
|
// add to exceptions
|
||||||
|
_suffix_init(&suffix, p + 1, linep - p - 1);
|
||||||
|
suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix));
|
||||||
|
} else {
|
||||||
|
_suffix_init(&suffix, p, linep - p);
|
||||||
|
suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (suffixp)
|
||||||
|
suffixp->label = suffixp->label_buf; // set label to changed address
|
||||||
|
|
||||||
|
nsuffixes++;;
|
||||||
|
}
|
||||||
|
|
||||||
|
_vector_sort(psl->suffix_exceptions);
|
||||||
|
_vector_sort(psl->suffixes);
|
||||||
|
|
||||||
|
return psl;
|
||||||
|
}
|
||||||
|
|
||||||
static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *varname)
|
static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *varname)
|
||||||
{
|
{
|
||||||
|
@ -55,6 +294,16 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *
|
||||||
fprintf(fpout, "};\n");
|
fprintf(fpout, "};\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void psl_free(psl_ctx_t **psl)
|
||||||
|
{
|
||||||
|
if (psl && *psl) {
|
||||||
|
_vector_free(&(*psl)->suffixes);
|
||||||
|
_vector_free(&(*psl)->suffix_exceptions);
|
||||||
|
free(*psl);
|
||||||
|
*psl = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int _str_needs_encoding(const char *s)
|
static int _str_needs_encoding(const char *s)
|
||||||
{
|
{
|
||||||
while (*s > 0) s++;
|
while (*s > 0) s++;
|
||||||
|
@ -90,7 +339,8 @@ static void _add_punycode_if_needed(_psl_vector_t *v)
|
||||||
char cmd[16 + strlen(e->label_buf)], lookupname[64] = "";
|
char cmd[16 + strlen(e->label_buf)], lookupname[64] = "";
|
||||||
snprintf(cmd, sizeof(cmd), "idn2 '%s'", e->label_buf);
|
snprintf(cmd, sizeof(cmd), "idn2 '%s'", e->label_buf);
|
||||||
if ((pp = popen(cmd, "r"))) {
|
if ((pp = popen(cmd, "r"))) {
|
||||||
if (fscanf(pp, "%63s", lookupname) >= 1) {
|
if (fscanf(pp, "%63s", lookupname) >= 1 && strcmp(e->label_buf, lookupname)) {
|
||||||
|
// fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, lookupname);
|
||||||
_suffix_init(&suffix, lookupname, strlen(lookupname));
|
_suffix_init(&suffix, lookupname, strlen(lookupname));
|
||||||
suffix.wildcard = e->wildcard;
|
suffix.wildcard = e->wildcard;
|
||||||
_vector_add(v, &suffix);
|
_vector_add(v, &suffix);
|
||||||
|
|
|
@ -9,7 +9,7 @@ PSL_TESTS = test-is-public test-is-public-inline
|
||||||
check_PROGRAMS = $(PSL_TESTS)
|
check_PROGRAMS = $(PSL_TESTS)
|
||||||
|
|
||||||
#test_is_public_inline_SOURCES = test-is-public-inline.c
|
#test_is_public_inline_SOURCES = test-is-public-inline.c
|
||||||
test_is_public_inline_LDADD = ../src/libpsl-inline-@LIBPSL_API_VERSION@.la
|
#test_is_public_inline_LDADD = ../src/libpsl-inline-@LIBPSL_API_VERSION@.la
|
||||||
|
|
||||||
TESTS_ENVIRONMENT = TESTS_VALGRIND="@VALGRIND_ENVIRONMENT@"
|
TESTS_ENVIRONMENT = TESTS_VALGRIND="@VALGRIND_ENVIRONMENT@"
|
||||||
TESTS = $(PSL_TESTS)
|
TESTS = $(PSL_TESTS)
|
||||||
|
|
|
@ -32,7 +32,7 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include <libpsl-inline.h>
|
#include <libpsl.h>
|
||||||
|
|
||||||
#define countof(a) (sizeof(a)/sizeof(*(a)))
|
#define countof(a) (sizeof(a)/sizeof(*(a)))
|
||||||
|
|
||||||
|
@ -66,14 +66,16 @@ static void test_psl(void)
|
||||||
{ "www.xn--czr694b", 1 },
|
{ "www.xn--czr694b", 1 },
|
||||||
};
|
};
|
||||||
unsigned it;
|
unsigned it;
|
||||||
|
psl_ctx_t *psl;
|
||||||
|
|
||||||
psl_inline_init();
|
if (psl_global_init() == 0) {
|
||||||
|
psl = psl_builtin();
|
||||||
|
|
||||||
printf("have %d suffixes and %d exceptions\n", psl_inline_suffix_count(), psl_inline_suffix_exception_count());
|
printf("have %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl));
|
||||||
|
|
||||||
for (it = 0; it < countof(test_data); it++) {
|
for (it = 0; it < countof(test_data); it++) {
|
||||||
const struct test_data *t = &test_data[it];
|
const struct test_data *t = &test_data[it];
|
||||||
int result = psl_inline_is_public(t->domain);
|
int result = psl_is_public(psl, t->domain);
|
||||||
|
|
||||||
if (result == t->result) {
|
if (result == t->result) {
|
||||||
ok++;
|
ok++;
|
||||||
|
@ -83,16 +85,17 @@ static void test_psl(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("psl_builtin_compile_time()=%ld\n", psl_inline_builtin_compile_time());
|
printf("psl_builtin_compile_time()=%ld\n", psl_builtin_compile_time());
|
||||||
psl_inline_builtin_compile_time() == 0 ? failed++ : ok++;
|
psl_builtin_compile_time() == 0 ? failed++ : ok++;
|
||||||
|
|
||||||
printf("psl_builtin_file_time()=%ld\n", psl_inline_builtin_file_time());
|
printf("psl_builtin_file_time()=%ld\n", psl_builtin_file_time());
|
||||||
psl_inline_builtin_file_time() == 0 ? failed++ : ok++;
|
psl_builtin_file_time() == 0 ? failed++ : ok++;
|
||||||
|
|
||||||
printf("psl_builtin_sha1sum()=%s\n", psl_inline_builtin_sha1sum());
|
printf("psl_builtin_sha1sum()=%s\n", psl_builtin_sha1sum());
|
||||||
*psl_inline_builtin_sha1sum() == 0 ? failed++ : ok++;
|
*psl_builtin_sha1sum() == 0 ? failed++ : ok++;
|
||||||
|
|
||||||
psl_inline_deinit();
|
psl_global_deinit();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, const char * const *argv)
|
int main(int argc, const char * const *argv)
|
||||||
|
|
|
@ -66,6 +66,7 @@ static void test_psl(void)
|
||||||
unsigned it;
|
unsigned it;
|
||||||
psl_ctx_t *psl;
|
psl_ctx_t *psl;
|
||||||
|
|
||||||
|
if (psl_global_init() == 0) {
|
||||||
psl = psl_load_file(DATADIR "/effective_tld_names.dat");
|
psl = psl_load_file(DATADIR "/effective_tld_names.dat");
|
||||||
|
|
||||||
printf("loaded %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl));
|
printf("loaded %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl));
|
||||||
|
@ -83,6 +84,8 @@ static void test_psl(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
psl_free(&psl);
|
psl_free(&psl);
|
||||||
|
psl_global_deinit();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, const char * const *argv)
|
int main(int argc, const char * const *argv)
|
||||||
|
|
Loading…
Reference in New Issue