Release V0.2.2
This commit is contained in:
commit
d51a73dd27
5
NEWS
5
NEWS
|
@ -1,5 +1,10 @@
|
|||
Copyright (C) 2014 Tim Ruehsen
|
||||
|
||||
26.05.2014 Release V0.2.2
|
||||
* changed code to C89
|
||||
* added a few test cases
|
||||
* build static library by default
|
||||
|
||||
25.04.2014 Hotfix release V0.2.1
|
||||
* Updated to the latest Publix Suffix List
|
||||
|
||||
|
|
10
configure.ac
10
configure.ac
|
@ -1,5 +1,5 @@
|
|||
|
||||
AC_INIT([libpsl], [0.2.1], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl])
|
||||
AC_INIT([libpsl], [0.2.2], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl])
|
||||
AC_PREREQ([2.59])
|
||||
AM_INIT_AUTOMAKE([1.10 -Wall no-define])
|
||||
|
||||
|
@ -8,7 +8,9 @@ AM_INIT_AUTOMAKE([1.10 -Wall no-define])
|
|||
# the library.
|
||||
AC_CONFIG_HEADERS([config.h])
|
||||
AC_PROG_CXX
|
||||
LT_INIT([disable-static])
|
||||
m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
|
||||
#LT_INIT([disable-static])
|
||||
LT_INIT
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||
|
||||
|
@ -61,8 +63,8 @@ AS_IF([ test "$enable_man" != no ], [
|
|||
# 4. If any interfaces have been added, removed, or changed since the last update, increment current, and set revision to 0.
|
||||
# 5. If any interfaces have been added since the last public release, then increment age.
|
||||
# 6. If any interfaces have been removed or changed since the last public release, then set age to 0.
|
||||
AC_SUBST([LIBPSL_SO_VERSION], [0:2:0])
|
||||
AC_SUBST([LIBPSL_API_VERSION], [0.2.1])
|
||||
AC_SUBST([LIBPSL_SO_VERSION], [0:3:0])
|
||||
AC_SUBST([LIBPSL_API_VERSION], [0.2.2])
|
||||
|
||||
# Check for idn2
|
||||
AC_CHECK_PROG(HAVE_IDN2, idn2, yes, AC_MSG_ERROR(Cannot find required tool 'idn2'.))
|
||||
|
|
|
@ -41,42 +41,46 @@ extern "C" {
|
|||
|
||||
typedef struct _psl_ctx_st psl_ctx_t;
|
||||
|
||||
/* frees PSL context */
|
||||
void
|
||||
psl_free(psl_ctx_t *psl);
|
||||
/* loads PSL data from file */
|
||||
psl_ctx_t *
|
||||
psl_load_file(const char *fname);
|
||||
/* loads PSL data from FILE pointer */
|
||||
psl_ctx_t *
|
||||
psl_load_fp(FILE *fp);
|
||||
/* retrieves builtin PSL data */
|
||||
const psl_ctx_t *
|
||||
psl_builtin(void);
|
||||
// checks wether domain is a public suffix or not
|
||||
/* checks wether domain is a public suffix or not */
|
||||
int
|
||||
psl_is_public_suffix(const psl_ctx_t *psl, const char *domain);
|
||||
// checks wether cookie_domain is acceptable for domain or not
|
||||
/* checks wether cookie_domain is acceptable for domain or not */
|
||||
int
|
||||
psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, const char *cookie_domain);
|
||||
// returns the longest unregistrable domain within 'domain' or NULL if none found
|
||||
/* returns the longest unregistrable domain within 'domain' or NULL if none found */
|
||||
const char *
|
||||
psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain);
|
||||
// returns the shortest possible registrable domain part or NULL if domain is not registrable at all
|
||||
/* returns the shortest possible registrable domain part or NULL if domain is not registrable at all */
|
||||
const char *
|
||||
psl_registrable_domain(const psl_ctx_t *psl, const char *domain);
|
||||
// does not include exceptions
|
||||
/* does not include exceptions */
|
||||
int
|
||||
psl_suffix_count(const psl_ctx_t *psl);
|
||||
// just counts exceptions
|
||||
/* just counts exceptions */
|
||||
int
|
||||
psl_suffix_exception_count(const psl_ctx_t *psl);
|
||||
// returns compilation time
|
||||
/* returns compilation time */
|
||||
time_t
|
||||
psl_builtin_compile_time(void);
|
||||
// returns mtime of PSL source file
|
||||
/* returns mtime of PSL source file */
|
||||
time_t
|
||||
psl_builtin_file_time(void);
|
||||
// returns SHA1 checksum (hex-encoded, lowercase) of PSL source file
|
||||
/* returns SHA1 checksum (hex-encoded, lowercase) of PSL source file */
|
||||
const char *
|
||||
psl_builtin_sha1sum(void);
|
||||
// returns file name of PSL source file
|
||||
/* returns file name of PSL source file */
|
||||
const char *
|
||||
psl_builtin_filename(void);
|
||||
|
||||
|
|
93
src/psl.c
93
src/psl.c
|
@ -28,7 +28,7 @@
|
|||
*
|
||||
*/
|
||||
|
||||
// need _GNU_SOURCE for qsort_r()
|
||||
/* need _GNU_SOURCE for qsort_r() */
|
||||
#ifndef _GNU_SOURCE
|
||||
# define _GNU_SOURCE
|
||||
#endif
|
||||
|
@ -73,19 +73,19 @@ typedef struct {
|
|||
unsigned short
|
||||
length;
|
||||
unsigned char
|
||||
nlabels, // number of labels
|
||||
wildcard; // this is a wildcard rule (e.g. *.sapporo.jp)
|
||||
nlabels, /* number of labels */
|
||||
wildcard; /* this is a wildcard rule (e.g. *.sapporo.jp) */
|
||||
} _psl_entry_t;
|
||||
|
||||
// stripped down version libmget vector routines
|
||||
/* stripped down version libmget vector routines */
|
||||
typedef struct {
|
||||
int
|
||||
(*cmp)(const _psl_entry_t *, const _psl_entry_t *); // comparison function
|
||||
(*cmp)(const _psl_entry_t *, const _psl_entry_t *); /* comparison function */
|
||||
_psl_entry_t
|
||||
**entry; // pointer to array of pointers to elements
|
||||
**entry; /* pointer to array of pointers to elements */
|
||||
int
|
||||
max, // allocated elements
|
||||
cur; // number of elements in use
|
||||
max, /* allocated elements */
|
||||
cur; /* number of elements in use */
|
||||
} _psl_vector_t;
|
||||
|
||||
struct _psl_ctx_st {
|
||||
|
@ -94,10 +94,10 @@ struct _psl_ctx_st {
|
|||
*suffix_exceptions;
|
||||
};
|
||||
|
||||
// include the PSL data compiled by 'psl2c'
|
||||
/* include the PSL data compiled by 'psl2c' */
|
||||
#include "suffixes.c"
|
||||
|
||||
// references to this PSL will result in lookups to built-in data
|
||||
/* references to this PSL will result in lookups to built-in data */
|
||||
static const psl_ctx_t
|
||||
_builtin_psl;
|
||||
|
||||
|
@ -140,14 +140,14 @@ static _psl_entry_t *_vector_get(const _psl_vector_t *v, int pos)
|
|||
return v->entry[pos];
|
||||
}
|
||||
|
||||
// the entries must be sorted by
|
||||
/* the entries must be sorted by */
|
||||
static int _vector_find(const _psl_vector_t *v, const _psl_entry_t *elem)
|
||||
{
|
||||
if (v) {
|
||||
int l, r, m;
|
||||
int res;
|
||||
|
||||
// binary search for element (exact match)
|
||||
/* binary search for element (exact match) */
|
||||
for (l = 0, r = v->cur - 1; l <= r;) {
|
||||
m = (l + r) / 2;
|
||||
if ((res = v->cmp(elem, v->entry[m])) > 0) l = m + 1;
|
||||
|
@ -156,7 +156,7 @@ static int _vector_find(const _psl_vector_t *v, const _psl_entry_t *elem)
|
|||
}
|
||||
}
|
||||
|
||||
return -1; // not found
|
||||
return -1; /* not found */
|
||||
}
|
||||
|
||||
static int _vector_add(_psl_vector_t *v, const _psl_entry_t *elem)
|
||||
|
@ -188,22 +188,21 @@ static void _vector_sort(_psl_vector_t *v)
|
|||
qsort_r(v->entry, v->cur, sizeof(_psl_vector_t *), _compare, v);
|
||||
}
|
||||
|
||||
static inline int _vector_size(_psl_vector_t *v)
|
||||
static int _vector_size(_psl_vector_t *v)
|
||||
{
|
||||
return v ? v->cur : 0;
|
||||
}
|
||||
|
||||
// by this kind of sorting, we can easily see if a domain matches or not
|
||||
|
||||
/* by this kind of sorting, we can easily see if a domain matches or not */
|
||||
static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2)
|
||||
{
|
||||
int n;
|
||||
|
||||
if ((n = s2->nlabels - s1->nlabels))
|
||||
return n; // most labels first
|
||||
return n; /* most labels first */
|
||||
|
||||
if ((n = s1->length - s2->length))
|
||||
return n; // shorter rules first
|
||||
return n; /* shorter rules first */
|
||||
|
||||
return strcmp(s1->label, s2->label ? s2->label : s2->label_buf);
|
||||
}
|
||||
|
@ -217,14 +216,14 @@ static int _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length)
|
|||
|
||||
if (length >= sizeof(suffix->label_buf) - 1) {
|
||||
suffix->nlabels = 0;
|
||||
// fprintf(stderr, _("Suffix rule too long (%zd, ignored): %s\n"), length, rule);
|
||||
/* fprintf(stderr, _("Suffix rule too long (%zd, ignored): %s\n"), length, rule); */
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (*rule == '*') {
|
||||
if (*++rule != '.') {
|
||||
suffix->nlabels = 0;
|
||||
// fprintf(stderr, _("Unsupported kind of rule (ignored): %s\n"), rule);
|
||||
/* fprintf(stderr, _("Unsupported kind of rule (ignored): %s\n"), rule); */
|
||||
return -2;
|
||||
}
|
||||
rule++;
|
||||
|
@ -273,7 +272,7 @@ int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
|
|||
if (!psl || !domain)
|
||||
return 1;
|
||||
|
||||
// this function should be called without leading dots, just make sure
|
||||
/* this function should be called without leading dots, just make sure */
|
||||
suffix.label = domain + (*domain == '.');
|
||||
suffix.length = strlen(suffix.label);
|
||||
suffix.wildcard = 0;
|
||||
|
@ -283,7 +282,7 @@ int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
|
|||
if (*p == '.')
|
||||
suffix.nlabels++;
|
||||
|
||||
// if domain has enough labels, it is public
|
||||
/* if domain has enough labels, it is public */
|
||||
if (psl == &_builtin_psl)
|
||||
rule = &suffixes[0];
|
||||
else
|
||||
|
@ -298,10 +297,10 @@ int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
|
|||
rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix));
|
||||
|
||||
if (rule) {
|
||||
// definitely a match, no matter if the found rule is a wildcard or not
|
||||
/* definitely a match, no matter if the found rule is a wildcard or not */
|
||||
return 1;
|
||||
} else if (suffix.nlabels == 1) {
|
||||
// unknown TLD, this is the prevailing '*' match
|
||||
/* unknown TLD, this is the prevailing '*' match */
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -320,17 +319,17 @@ int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
|
|||
|
||||
if (rule) {
|
||||
if (rule->wildcard) {
|
||||
// now that we matched a wildcard, we have to check for an exception
|
||||
/* now that we matched a wildcard, we have to check for an exception */
|
||||
suffix.label = label_bak;
|
||||
suffix.length = length_bak;
|
||||
suffix.nlabels++;
|
||||
|
||||
if (psl == &_builtin_psl) {
|
||||
if (bsearch(&suffix, suffix_exceptions, countof(suffix_exceptions), sizeof(suffix_exceptions[0]), (int(*)(const void *, const void *))_suffix_compare))
|
||||
return 0; // found an exception, so 'domain' is not a public suffix
|
||||
return 0; /* found an exception, so 'domain' is not a public suffix */
|
||||
} else {
|
||||
if (_vector_get(psl->suffix_exceptions, _vector_find(psl->suffix_exceptions, &suffix)) != 0)
|
||||
return 0; // found an exception, so 'domain' is not a public suffix
|
||||
return 0; /* found an exception, so 'domain' is not a public suffix */
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
@ -362,14 +361,16 @@ const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
|
|||
if (!psl || !domain)
|
||||
return NULL;
|
||||
|
||||
// We check from left to right to catch special PSL entries like 'forgot.his.name':
|
||||
// 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
|
||||
/*
|
||||
* We check from left to right to catch special PSL entries like 'forgot.his.name':
|
||||
* 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
|
||||
*/
|
||||
|
||||
while (!psl_is_public_suffix(psl, domain)) {
|
||||
if ((domain = strchr(domain, '.')))
|
||||
domain++;
|
||||
else
|
||||
break; // prevent endless loop if psl_is_public_suffix() is broken.
|
||||
break; /* prevent endless loop if psl_is_public_suffix() is broken. */
|
||||
}
|
||||
|
||||
return domain;
|
||||
|
@ -398,15 +399,17 @@ const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
|
|||
if (!psl || !domain || *domain == '.')
|
||||
return NULL;
|
||||
|
||||
// We check from left to right to catch special PSL entries like 'forgot.his.name':
|
||||
// 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
|
||||
/*
|
||||
* We check from left to right to catch special PSL entries like 'forgot.his.name':
|
||||
* 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
|
||||
*/
|
||||
|
||||
while (!psl_is_public_suffix(psl, domain)) {
|
||||
if ((p = strchr(domain, '.'))) {
|
||||
regdom = domain;
|
||||
domain = p + 1;
|
||||
} else
|
||||
break; // prevent endless loop if psl_is_public_suffix() is broken.
|
||||
break; /* prevent endless loop if psl_is_public_suffix() is broken. */
|
||||
}
|
||||
|
||||
return regdom;
|
||||
|
@ -473,24 +476,26 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
|||
if (!(psl = calloc(1, sizeof(psl_ctx_t))))
|
||||
return NULL;
|
||||
|
||||
// as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
|
||||
// as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
|
||||
/*
|
||||
* as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
|
||||
* as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
|
||||
*/
|
||||
psl->suffixes = _vector_alloc(8*1024, _suffix_compare);
|
||||
psl->suffix_exceptions = _vector_alloc(64, _suffix_compare);
|
||||
|
||||
while ((linep = fgets(buf, sizeof(buf), fp))) {
|
||||
while (isspace(*linep)) linep++; // ignore leading whitespace
|
||||
if (!*linep) continue; // skip empty lines
|
||||
while (isspace(*linep)) linep++; /* ignore leading whitespace */
|
||||
if (!*linep) continue; /* skip empty lines */
|
||||
|
||||
if (*linep == '/' && linep[1] == '/')
|
||||
continue; // skip comments
|
||||
continue; /* skip comments */
|
||||
|
||||
// parse suffix rule
|
||||
/* parse suffix rule */
|
||||
for (p = linep; *linep && !isspace(*linep);) linep++;
|
||||
*linep = 0;
|
||||
|
||||
if (*p == '!') {
|
||||
// add to exceptions
|
||||
/* add to exceptions */
|
||||
if (_suffix_init(&suffix, p + 1, linep - p - 1) == 0)
|
||||
suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix));
|
||||
else
|
||||
|
@ -503,7 +508,7 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
|||
}
|
||||
|
||||
if (suffixp)
|
||||
suffixp->label = suffixp->label_buf; // set label to changed address
|
||||
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
||||
|
||||
nsuffixes++;;
|
||||
}
|
||||
|
@ -697,17 +702,17 @@ int psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname,
|
|||
cookie_domain++;
|
||||
|
||||
if (!strcmp(hostname, cookie_domain))
|
||||
return 1; // an exact match is acceptable (and pretty common)
|
||||
return 1; /* an exact match is acceptable (and pretty common) */
|
||||
|
||||
cookie_domain_length = strlen(cookie_domain);
|
||||
hostname_length = strlen(hostname);
|
||||
|
||||
if (cookie_domain_length >= hostname_length)
|
||||
return 0; // cookie_domain is too long
|
||||
return 0; /* cookie_domain is too long */
|
||||
|
||||
p = hostname + hostname_length - cookie_domain_length;
|
||||
if (!strcmp(p, cookie_domain) && p[-1] == '.') {
|
||||
// OK, cookie_domain matches, but it must be longer than the longest public suffix in 'hostname'
|
||||
/* OK, cookie_domain matches, but it must be longer than the longest public suffix in 'hostname' */
|
||||
|
||||
if (!(p = psl_unregistrable_domain(psl, hostname)))
|
||||
return 1;
|
||||
|
|
75
src/psl2c.c
75
src/psl2c.c
|
@ -39,9 +39,11 @@
|
|||
#include <ctype.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
//#ifdef WITH_LIBIDN2
|
||||
//# include <idn2.h>
|
||||
//#endif
|
||||
/*
|
||||
#ifdef WITH_LIBIDN2
|
||||
# include <idn2.h>
|
||||
#endif
|
||||
*/
|
||||
|
||||
#ifdef WITH_BUILTIN
|
||||
|
||||
|
@ -55,19 +57,19 @@ typedef struct {
|
|||
unsigned short
|
||||
length;
|
||||
unsigned char
|
||||
nlabels, // number of labels
|
||||
wildcard; // this is a wildcard rule (e.g. *.sapporo.jp)
|
||||
nlabels, /* number of labels */
|
||||
wildcard; /* this is a wildcard rule (e.g. *.sapporo.jp) */
|
||||
} _psl_entry_t;
|
||||
|
||||
// stripped down version libmget vector routines
|
||||
/* stripped down version libmget vector routines */
|
||||
typedef struct {
|
||||
int
|
||||
(*cmp)(const _psl_entry_t *, const _psl_entry_t *); // comparison function
|
||||
(*cmp)(const _psl_entry_t *, const _psl_entry_t *); /* comparison function */
|
||||
_psl_entry_t
|
||||
**entry; // pointer to array of pointers to elements
|
||||
**entry; /* pointer to array of pointers to elements */
|
||||
int
|
||||
max, // allocated elements
|
||||
cur; // number of elements in use
|
||||
max, /* allocated elements */
|
||||
cur; /* number of elements in use */
|
||||
} _psl_vector_t;
|
||||
|
||||
struct _psl_ctx_st {
|
||||
|
@ -144,17 +146,17 @@ static void _vector_sort(_psl_vector_t *v)
|
|||
qsort_r(v->entry, v->cur, sizeof(_psl_vector_t *), _compare, v);
|
||||
}
|
||||
|
||||
// by this kind of sorting, we can easily see if a domain matches or not (match = supercookie !)
|
||||
/* by this kind of sorting, we can easily see if a domain matches or not (match = supercookie !) */
|
||||
|
||||
static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2)
|
||||
{
|
||||
int n;
|
||||
|
||||
if ((n = s2->nlabels - s1->nlabels))
|
||||
return n; // most labels first
|
||||
return n; /* most labels first */
|
||||
|
||||
if ((n = s1->length - s2->length))
|
||||
return n; // shorter rules first
|
||||
return n; /* shorter rules first */
|
||||
|
||||
return strcmp(s1->label, s2->label);
|
||||
}
|
||||
|
@ -168,7 +170,7 @@ static void _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length)
|
|||
|
||||
if (length >= sizeof(suffix->label_buf) - 1) {
|
||||
suffix->nlabels = 0;
|
||||
fprintf(stderr, "Suffix rule too long (%zd, ignored): %s\n", length, rule);
|
||||
fprintf(stderr, "Suffix rule too long (%d, ignored): %s\n", (int) length, rule);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -222,24 +224,26 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
|||
if (!(psl = calloc(1, sizeof(psl_ctx_t))))
|
||||
return NULL;
|
||||
|
||||
// as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
|
||||
// as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
|
||||
/*
|
||||
* as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
|
||||
* as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
|
||||
*/
|
||||
psl->suffixes = _vector_alloc(8*1024, _suffix_compare);
|
||||
psl->suffix_exceptions = _vector_alloc(64, _suffix_compare);
|
||||
|
||||
while ((linep = fgets(buf, sizeof(buf), fp))) {
|
||||
while (isspace(*linep)) linep++; // ignore leading whitespace
|
||||
if (!*linep) continue; // skip empty lines
|
||||
while (isspace(*linep)) linep++; /* ignore leading whitespace */
|
||||
if (!*linep) continue; /* skip empty lines */
|
||||
|
||||
if (*linep == '/' && linep[1] == '/')
|
||||
continue; // skip comments
|
||||
continue; /* skip comments */
|
||||
|
||||
// parse suffix rule
|
||||
/* parse suffix rule */
|
||||
for (p = linep; *linep && !isspace(*linep);) linep++;
|
||||
*linep = 0;
|
||||
|
||||
if (*p == '!') {
|
||||
// add to exceptions
|
||||
/* add to exceptions */
|
||||
_suffix_init(&suffix, p + 1, linep - p - 1);
|
||||
suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix));
|
||||
} else {
|
||||
|
@ -248,7 +252,7 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
|||
}
|
||||
|
||||
if (suffixp)
|
||||
suffixp->label = suffixp->label_buf; // set label to changed address
|
||||
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
||||
|
||||
nsuffixes++;;
|
||||
}
|
||||
|
@ -263,14 +267,14 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *
|
|||
{
|
||||
int it;
|
||||
|
||||
fprintf(fpout, "// automatically generated by psl2c\n");
|
||||
fprintf(fpout, "/* automatically generated by psl2c */\n");
|
||||
fprintf(fpout, "static _psl_entry_t %s[] = {\n", varname);
|
||||
|
||||
for (it = 0; it < v->cur; it++) {
|
||||
_psl_entry_t *e = _vector_get(v, it);
|
||||
|
||||
fprintf(fpout, "\t{ \"%s\", NULL, %hd, %hhd, %hhd },\n",
|
||||
e->label_buf, e->length, e->nlabels, e->wildcard);
|
||||
fprintf(fpout, "\t{ \"%s\", NULL, %hd, %d, %d },\n",
|
||||
e->label_buf, e->length, (int) e->nlabels, (int) e->wildcard);
|
||||
}
|
||||
|
||||
fprintf(fpout, "};\n");
|
||||
|
@ -296,14 +300,14 @@ static void _add_punycode_if_needed(_psl_vector_t *v)
|
|||
{
|
||||
int it, n;
|
||||
|
||||
// do not use 'it < v->cur' since v->cur is changed by _vector_add() !
|
||||
/* do not use 'it < v->cur' since v->cur is changed by _vector_add() ! */
|
||||
for (it = 0, n = v->cur; it < n; it++) {
|
||||
_psl_entry_t *e = _vector_get(v, it);
|
||||
|
||||
if (_str_needs_encoding(e->label_buf)) {
|
||||
_psl_entry_t suffix, *suffixp;
|
||||
|
||||
// the following lines will have GPL3+ license issues
|
||||
/* the following lines will have GPL3+ license issues */
|
||||
/* char *asc = NULL;
|
||||
int rc;
|
||||
|
||||
|
@ -317,17 +321,17 @@ static void _add_punycode_if_needed(_psl_vector_t *v)
|
|||
fprintf(stderr, "toASCII(%s) failed (%d): %s\n", e->label_buf, rc, idn2_strerror(rc));
|
||||
*/
|
||||
|
||||
// this is much slower than the libidn2 API but should have no license issues
|
||||
/* this is much slower than the libidn2 API but should have no license issues */
|
||||
FILE *pp;
|
||||
char cmd[16 + strlen(e->label_buf)], lookupname[64] = "";
|
||||
char cmd[16 + sizeof(e->label_buf)], lookupname[64] = "";
|
||||
snprintf(cmd, sizeof(cmd), "idn2 '%s'", e->label_buf);
|
||||
if ((pp = popen(cmd, "r"))) {
|
||||
if (fscanf(pp, "%63s", lookupname) >= 1 && strcmp(e->label_buf, lookupname)) {
|
||||
// fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, lookupname);
|
||||
/* fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, lookupname); */
|
||||
_suffix_init(&suffix, lookupname, strlen(lookupname));
|
||||
suffix.wildcard = e->wildcard;
|
||||
suffixp = _vector_get(v, _vector_add(v, &suffix));
|
||||
suffixp->label = suffixp->label_buf; // set label to changed address
|
||||
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
||||
}
|
||||
pclose(pp);
|
||||
} else
|
||||
|
@ -337,7 +341,7 @@ static void _add_punycode_if_needed(_psl_vector_t *v)
|
|||
|
||||
_vector_sort(v);
|
||||
}
|
||||
#endif // WITH_BUILTIN
|
||||
#endif /* WITH_BUILTIN */
|
||||
|
||||
int main(int argc, const char **argv)
|
||||
{
|
||||
|
@ -361,7 +365,8 @@ int main(int argc, const char **argv)
|
|||
if ((fpout = fopen(argv[2], "w"))) {
|
||||
FILE *pp;
|
||||
struct stat st;
|
||||
char cmd[16 + strlen(argv[1])], checksum[64] = "";
|
||||
size_t cmdsize = 16 + strlen(argv[1]);
|
||||
char *cmd = alloca(cmdsize), checksum[64] = "";
|
||||
|
||||
_add_punycode_if_needed(psl->suffixes);
|
||||
_add_punycode_if_needed(psl->suffix_exceptions);
|
||||
|
@ -369,7 +374,7 @@ int main(int argc, const char **argv)
|
|||
_print_psl_entries(fpout, psl->suffixes, "suffixes");
|
||||
_print_psl_entries(fpout, psl->suffix_exceptions, "suffix_exceptions");
|
||||
|
||||
snprintf(cmd, sizeof(cmd), "sha1sum %s", argv[1]);
|
||||
snprintf(cmd, cmdsize, "sha1sum %s", argv[1]);
|
||||
if ((pp = popen(cmd, "r"))) {
|
||||
if (fscanf(pp, "%63[0-9a-zA-Z]", checksum) < 1)
|
||||
*checksum = 0;
|
||||
|
@ -407,7 +412,7 @@ int main(int argc, const char **argv)
|
|||
ret = 3;
|
||||
}
|
||||
|
||||
#endif // WITH_BUILTIN
|
||||
#endif /* WITH_BUILTIN */
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <alloca.h>
|
||||
|
||||
#include <libpsl.h>
|
||||
|
||||
|
@ -46,8 +47,6 @@ static int
|
|||
|
||||
static void test_psl(void)
|
||||
{
|
||||
// punycode generation: idn 商标
|
||||
// octal code generation: echo -n "商标" | od -b
|
||||
static const struct test_data {
|
||||
const char
|
||||
*request_domain,
|
||||
|
@ -65,10 +64,10 @@ static void test_psl(void)
|
|||
{ "www.his.name", "name", 0 },
|
||||
{ "www.example.com", "www.example.com", 1 },
|
||||
{ "www.example.com", "example.com", 1 },
|
||||
{ "www.example.com", "com", 0 }, // not accepted by normalization (PSL rule 'com')
|
||||
{ "www.example.com", "com", 0 }, /* not accepted by normalization (PSL rule 'com') */
|
||||
{ "www.example.com", "example.org", 0 },
|
||||
{ "www.sa.gov.au", "sa.gov.au", 0 }, // not accepted by normalization (PSL rule '*.ar')
|
||||
{ "www.educ.ar", "educ.ar", 1 }, // PSL exception rule '!educ.ar'
|
||||
{ "www.sa.gov.au", "sa.gov.au", 0 }, /* not accepted by normalization (PSL rule '*.ar') */
|
||||
{ "www.educ.ar", "educ.ar", 1 }, /* PSL exception rule '!educ.ar' */
|
||||
};
|
||||
unsigned it;
|
||||
psl_ctx_t *psl;
|
||||
|
@ -95,14 +94,15 @@ static void test_psl(void)
|
|||
|
||||
int main(int argc, const char * const *argv)
|
||||
{
|
||||
// if VALGRIND testing is enabled, we have to call ourselves with valgrind checking
|
||||
/* if VALGRIND testing is enabled, we have to call ourselves with valgrind checking */
|
||||
if (argc == 1) {
|
||||
const char *valgrind = getenv("TESTS_VALGRIND");
|
||||
|
||||
if (valgrind && *valgrind) {
|
||||
char cmd[strlen(valgrind)+strlen(argv[0])+32];
|
||||
size_t cmdsize = strlen(valgrind) + strlen(argv[0]) + 32;
|
||||
char *cmd = alloca(cmdsize);
|
||||
|
||||
snprintf(cmd, sizeof(cmd), "TESTS_VALGRIND="" %s %s", valgrind, argv[0]);
|
||||
snprintf(cmd, cmdsize, "TESTS_VALGRIND="" %s %s", valgrind, argv[0]);
|
||||
return system(cmd) != 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <alloca.h>
|
||||
|
||||
#include <libpsl.h>
|
||||
|
||||
|
@ -50,23 +51,23 @@ static void test_psl(void)
|
|||
int result;
|
||||
char buf[256], domain[64], *linep, *p;
|
||||
|
||||
psl = psl_load_file(PSL_FILE); // PSL_FILE can be set by ./configure --with-psl-file=[PATH]
|
||||
psl = psl_load_file(PSL_FILE); /* PSL_FILE can be set by ./configure --with-psl-file=[PATH] */
|
||||
|
||||
printf("loaded %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl));
|
||||
|
||||
if ((fp = fopen(PSL_FILE, "r"))) {
|
||||
while ((linep = fgets(buf, sizeof(buf), fp))) {
|
||||
while (isspace(*linep)) linep++; // ignore leading whitespace
|
||||
if (!*linep) continue; // skip empty lines
|
||||
while (isspace(*linep)) linep++; /* ignore leading whitespace */
|
||||
if (!*linep) continue; /* skip empty lines */
|
||||
|
||||
if (*linep == '/' && linep[1] == '/')
|
||||
continue; // skip comments
|
||||
continue; /* skip comments */
|
||||
|
||||
// parse suffix rule
|
||||
/* parse suffix rule */
|
||||
for (p = linep; *linep && !isspace(*linep);) linep++;
|
||||
*linep = 0;
|
||||
|
||||
if (*p == '!') { // an exception to a wildcard, e.g. !www.ck (wildcard is *.ck)
|
||||
if (*p == '!') { /* an exception to a wildcard, e.g. !www.ck (wildcard is *.ck) */
|
||||
if ((result = psl_is_public_suffix(psl, p + 1))) {
|
||||
failed++;
|
||||
printf("psl_is_public_suffix(%s)=%d (expected 0)\n", p, result);
|
||||
|
@ -77,7 +78,7 @@ static void test_psl(void)
|
|||
printf("psl_is_public_suffix(%s)=%d (expected 1)\n", strchr(p, '.') + 1, result);
|
||||
} else ok++;
|
||||
}
|
||||
else if (*p == '*') { // a wildcard, e.g. *.ck
|
||||
else if (*p == '*') { /* a wildcard, e.g. *.ck */
|
||||
if (!(result = psl_is_public_suffix(psl, p + 1))) {
|
||||
failed++;
|
||||
printf("psl_is_public_suffix(%s)=%d (expected 1)\n", p + 1, result);
|
||||
|
@ -114,14 +115,15 @@ static void test_psl(void)
|
|||
|
||||
int main(int argc, const char * const *argv)
|
||||
{
|
||||
// if VALGRIND testing is enabled, we have to call ourselves with valgrind checking
|
||||
/* if VALGRIND testing is enabled, we have to call ourselves with valgrind checking */
|
||||
if (argc == 1) {
|
||||
const char *valgrind = getenv("TESTS_VALGRIND");
|
||||
|
||||
if (valgrind && *valgrind) {
|
||||
char cmd[strlen(valgrind) + strlen(argv[0]) + 32];
|
||||
size_t cmdsize = strlen(valgrind) + strlen(argv[0]) + 32;
|
||||
char *cmd = alloca(cmdsize);
|
||||
|
||||
snprintf(cmd, sizeof(cmd), "TESTS_VALGRIND="" %s %s", valgrind, argv[0]);
|
||||
snprintf(cmd, cmdsize, "TESTS_VALGRIND="" %s %s", valgrind, argv[0]);
|
||||
return system(cmd) != 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <alloca.h>
|
||||
|
||||
#include <libpsl.h>
|
||||
|
||||
|
@ -46,8 +47,8 @@ static int
|
|||
|
||||
static void test_psl(void)
|
||||
{
|
||||
// punycode generation: idn 商标
|
||||
// octal code generation: echo -n "商标" | od -b
|
||||
/* punycode generation: idn 商标 */
|
||||
/* octal code generation: echo -n "商标" | od -b */
|
||||
static const struct test_data {
|
||||
const char
|
||||
*domain;
|
||||
|
@ -60,15 +61,15 @@ static void test_psl(void)
|
|||
{ "cc.ar.us", 1 },
|
||||
{ ".cc.ar.us", 1 },
|
||||
{ "www.cc.ar.us", 0 },
|
||||
{ "www.ck", 0 }, // exception from *.ck
|
||||
{ "www.ck", 0 }, /* exception from *.ck */
|
||||
{ "abc.www.ck", 0 },
|
||||
{ "xxx.ck", 1 },
|
||||
{ "www.xxx.ck", 0 },
|
||||
{ "\345\225\206\346\240\207", 1 }, // xn--czr694b oder 商标
|
||||
{ "\345\225\206\346\240\207", 1 }, /* xn--czr694b oder 商标 */
|
||||
{ "www.\345\225\206\346\240\207", 0 },
|
||||
{ "xn--czr694b", 1 },
|
||||
{ "www.xn--czr694b", 0 },
|
||||
// some special test follow ('name' and 'forgot.his.name' are public, but e.g. his.name is not)
|
||||
/* some special test follow ('name' and 'forgot.his.name' are public, but e.g. his.name is not) */
|
||||
{ "name", 1 },
|
||||
{ ".name", 1 },
|
||||
{ "his.name", 0 },
|
||||
|
@ -77,6 +78,10 @@ static void test_psl(void)
|
|||
{ ".forgot.his.name", 1 },
|
||||
{ "whoever.his.name", 0 },
|
||||
{ "whoever.forgot.his.name", 0 },
|
||||
{ ".", 1 }, /* special case */
|
||||
{ "", 1 }, /* special case */
|
||||
{ NULL, 1 }, /* special case */
|
||||
{ "adfhoweirh", 1 }, /* unknown TLD */
|
||||
};
|
||||
unsigned it;
|
||||
const psl_ctx_t *psl;
|
||||
|
@ -109,14 +114,15 @@ static void test_psl(void)
|
|||
|
||||
int main(int argc, const char * const *argv)
|
||||
{
|
||||
// if VALGRIND testing is enabled, we have to call ourselves with valgrind checking
|
||||
/* if VALGRIND testing is enabled, we have to call ourselves with valgrind checking */
|
||||
if (argc == 1) {
|
||||
const char *valgrind = getenv("TESTS_VALGRIND");
|
||||
|
||||
if (valgrind && *valgrind) {
|
||||
char cmd[strlen(valgrind)+strlen(argv[0])+32];
|
||||
size_t cmdsize = strlen(valgrind) + strlen(argv[0]) + 32;
|
||||
char *cmd = alloca(cmdsize);
|
||||
|
||||
snprintf(cmd, sizeof(cmd), "TESTS_VALGRIND="" %s %s", valgrind, argv[0]);
|
||||
snprintf(cmd, cmdsize, "TESTS_VALGRIND="" %s %s", valgrind, argv[0]);
|
||||
return system(cmd) != 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <alloca.h>
|
||||
|
||||
#include <libpsl.h>
|
||||
|
||||
|
@ -46,8 +47,8 @@ static int
|
|||
|
||||
static void test_psl(void)
|
||||
{
|
||||
// punycode generation: idn 商标
|
||||
// octal code generation: echo -n "商标" | od -b
|
||||
/* punycode generation: idn 商标 */
|
||||
/* octal code generation: echo -n "商标" | od -b */
|
||||
static const struct test_data {
|
||||
const char
|
||||
*domain;
|
||||
|
@ -60,13 +61,13 @@ static void test_psl(void)
|
|||
{ "cc.ar.us", 1 },
|
||||
{ ".cc.ar.us", 1 },
|
||||
{ "www.cc.ar.us", 0 },
|
||||
{ "www.ck", 0 }, // exception from *.ck
|
||||
{ "www.ck", 0 }, /* exception from *.ck */
|
||||
{ "abc.www.ck", 0 },
|
||||
{ "xxx.ck", 1 },
|
||||
{ "www.xxx.ck", 0 },
|
||||
{ "\345\225\206\346\240\207", 1 }, // xn--czr694b oder 商标
|
||||
{ "\345\225\206\346\240\207", 1 }, /* xn--czr694b oder 商标 */
|
||||
{ "www.\345\225\206\346\240\207", 0 },
|
||||
// some special test follow ('name' and 'forgot.his.name' are public, but e.g. his.name is not)
|
||||
/* some special test follow ('name' and 'forgot.his.name' are public, but e.g. his.name is not) */
|
||||
{ "name", 1 },
|
||||
{ ".name", 1 },
|
||||
{ "his.name", 0 },
|
||||
|
@ -75,6 +76,10 @@ static void test_psl(void)
|
|||
{ ".forgot.his.name", 1 },
|
||||
{ "whoever.his.name", 0 },
|
||||
{ "whoever.forgot.his.name", 0 },
|
||||
{ ".", 1 }, /* special case */
|
||||
{ "", 1 }, /* special case */
|
||||
{ NULL, 1 }, /* special case */
|
||||
{ "adfhoweirh", 1 }, /* unknown TLD */
|
||||
};
|
||||
unsigned it;
|
||||
psl_ctx_t *psl;
|
||||
|
@ -100,14 +105,15 @@ static void test_psl(void)
|
|||
|
||||
int main(int argc, const char * const *argv)
|
||||
{
|
||||
// if VALGRIND testing is enabled, we have to call ourselves with valgrind checking
|
||||
/* if VALGRIND testing is enabled, we have to call ourselves with valgrind checking */
|
||||
if (argc == 1) {
|
||||
const char *valgrind = getenv("TESTS_VALGRIND");
|
||||
|
||||
if (valgrind && *valgrind) {
|
||||
char cmd[strlen(valgrind)+strlen(argv[0])+32];
|
||||
size_t cmdsize = strlen(valgrind) + strlen(argv[0]) + 32;
|
||||
char *cmd = alloca(cmdsize);
|
||||
|
||||
snprintf(cmd, sizeof(cmd), "TESTS_VALGRIND="" %s %s", valgrind, argv[0]);
|
||||
snprintf(cmd, cmdsize, "TESTS_VALGRIND="" %s %s", valgrind, argv[0]);
|
||||
return system(cmd) != 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <alloca.h>
|
||||
|
||||
#include <libpsl.h>
|
||||
|
||||
|
@ -48,7 +49,7 @@ static void test(const psl_ctx_t *psl, const char *domain, const char *expected_
|
|||
const char *result;
|
||||
char lookupname[128];
|
||||
|
||||
// check if there might be some utf-8 characters
|
||||
/* check if there might be some utf-8 characters */
|
||||
if (domain) {
|
||||
int utf8;
|
||||
const char *p;
|
||||
|
@ -57,13 +58,14 @@ static void test(const psl_ctx_t *psl, const char *domain, const char *expected_
|
|||
if (*p < 0)
|
||||
utf8 = 1;
|
||||
|
||||
// if we found utf-8, make sure to convert domain correctly to lowercase
|
||||
// does it work, if we are not in a utf-8 env ?
|
||||
/* if we found utf-8, make sure to convert domain correctly to lowercase */
|
||||
/* does it work, if we are not in a utf-8 env ? */
|
||||
if (utf8) {
|
||||
FILE *pp;
|
||||
char cmd[48 + strlen(domain)];
|
||||
size_t cmdsize = 48 + strlen(domain);
|
||||
char *cmd = alloca(cmdsize);
|
||||
|
||||
snprintf(cmd, sizeof(cmd), "echo -n '%s' | sed -e 's/./\\L\\0/g'", domain);
|
||||
snprintf(cmd, cmdsize, "echo -n '%s' | sed -e 's/./\\L\\0/g'", domain);
|
||||
if ((pp = popen(cmd, "r"))) {
|
||||
if (fscanf(pp, "%127s", lookupname) >= 1)
|
||||
domain = lookupname;
|
||||
|
@ -93,28 +95,28 @@ static void test_psl(void)
|
|||
|
||||
printf("have %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl));
|
||||
|
||||
// special check with NULL values
|
||||
/* special check with NULL values */
|
||||
test(NULL, NULL, NULL);
|
||||
|
||||
// special check with NULL psl context
|
||||
/* special check with NULL psl context */
|
||||
test(NULL, "www.example.com", NULL);
|
||||
|
||||
// special check with NULL psl context and TLD
|
||||
/* special check with NULL psl context and TLD */
|
||||
test(NULL, "com", NULL);
|
||||
|
||||
// Norwegian with uppercase oe
|
||||
/* Norwegian with uppercase oe */
|
||||
test(psl, "www.\303\230yer.no", "www.\303\270yer.no");
|
||||
|
||||
// Norwegian with lowercase oe
|
||||
/* Norwegian with lowercase oe */
|
||||
test(psl, "www.\303\270yer.no", "www.\303\270yer.no");
|
||||
|
||||
// special check with NULL psl context and TLD
|
||||
/* special check with NULL psl context and TLD */
|
||||
test(psl, "whoever.forgot.his.name", "whoever.forgot.his.name");
|
||||
|
||||
// special check with NULL psl context and TLD
|
||||
/* special check with NULL psl context and TLD */
|
||||
test(psl, "forgot.his.name", NULL);
|
||||
|
||||
// special check with NULL psl context and TLD
|
||||
/* special check with NULL psl context and TLD */
|
||||
test(psl, "his.name", "his.name");
|
||||
|
||||
if ((fp = fopen(PSL_TESTFILE, "r"))) {
|
||||
|
@ -124,7 +126,7 @@ static void test_psl(void)
|
|||
continue;
|
||||
}
|
||||
|
||||
// we have to lowercase the domain - the PSL API just takes lowercase
|
||||
/* we have to lowercase the domain - the PSL API just takes lowercase */
|
||||
for (p = domain; *p; p++)
|
||||
if (*p > 0 && isupper(*p))
|
||||
*p = tolower(*p);
|
||||
|
@ -144,14 +146,15 @@ static void test_psl(void)
|
|||
|
||||
int main(int argc, const char * const *argv)
|
||||
{
|
||||
// if VALGRIND testing is enabled, we have to call ourselves with valgrind checking
|
||||
/* if VALGRIND testing is enabled, we have to call ourselves with valgrind checking */
|
||||
if (argc == 1) {
|
||||
const char *valgrind = getenv("TESTS_VALGRIND");
|
||||
|
||||
if (valgrind && *valgrind) {
|
||||
char cmd[strlen(valgrind) + strlen(argv[0]) + 32];
|
||||
size_t cmdsize = strlen(valgrind) + strlen(argv[0]) + 32;
|
||||
char *cmd = alloca(cmdsize);
|
||||
|
||||
snprintf(cmd, sizeof(cmd), "TESTS_VALGRIND="" %s %s", valgrind, argv[0]);
|
||||
snprintf(cmd, cmdsize, "TESTS_VALGRIND="" %s %s", valgrind, argv[0]);
|
||||
return system(cmd) != 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -54,12 +54,13 @@ static void usage(int err)
|
|||
exit(err);
|
||||
}
|
||||
|
||||
/* RFC 2822-compliant date format */
|
||||
static const char *time2str(time_t t)
|
||||
{
|
||||
static char buf[64];
|
||||
struct tm *tp = localtime(&t);
|
||||
|
||||
strftime(buf, sizeof(buf), "%a, %d %b %Y %T %z", tp);
|
||||
strftime(buf, sizeof(buf), "%a, %d %b %Y %H:%M:%S %Z", tp);
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue