This commit is contained in:
Alessandro Vesely 2017-10-27 18:01:27 +00:00 committed by GitHub
commit 58ca2395e1
4 changed files with 162 additions and 170 deletions

View File

@ -73,7 +73,8 @@ typedef enum {
PSL_ERR_NO_MEM = -6 /* failed to allocate memory */
} psl_error_t;
typedef struct _psl_ctx_st psl_ctx_t;
struct psl_ctx_st;
typedef struct psl_ctx_st psl_ctx_t;
/* frees PSL context */
void

View File

@ -8,15 +8,15 @@
#include <stddef.h>
#if defined(__GNUC__) && defined(__GNUC_MINOR__)
# define _GCC_VERSION_AT_LEAST(major, minor) ((__GNUC__ > (major)) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
# define GCC_VERSION_AT_LEAST(major, minor) ((__GNUC__ > (major)) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
#else
# define _GCC_VERSION_AT_LEAST(major, minor) 0
# define GCC_VERSION_AT_LEAST(major, minor) 0
#endif
#if _GCC_VERSION_AT_LEAST(4,0)
# define _HIDDEN __attribute__ ((visibility ("hidden")))
#if GCC_VERSION_AT_LEAST(4,0)
# define HIDDEN __attribute__ ((visibility ("hidden")))
#else
# define _HIDDEN
# define HIDDEN
#endif
#define CHECK_LT(a, b) if ((a) >= b) return 0
@ -203,9 +203,9 @@ static int GetReturnValue(const unsigned char* offset,
*/
/* prototype to skip warning with -Wmissing-prototypes */
int _HIDDEN LookupStringInFixedSet(const unsigned char*, size_t,const char*, size_t);
int HIDDEN LookupStringInFixedSet(const unsigned char*, size_t,const char*, size_t);
int _HIDDEN LookupStringInFixedSet(const unsigned char* graph,
int HIDDEN LookupStringInFixedSet(const unsigned char* graph,
size_t length,
const char* key,
size_t key_length)
@ -277,9 +277,9 @@ int _HIDDEN LookupStringInFixedSet(const unsigned char* graph,
}
/* prototype to skip warning with -Wmissing-prototypes */
int _HIDDEN GetUtfMode(const unsigned char *graph, size_t length);
int HIDDEN GetUtfMode(const unsigned char *graph, size_t length);
int _HIDDEN GetUtfMode(const unsigned char *graph, size_t length)
int HIDDEN GetUtfMode(const unsigned char *graph, size_t length)
{
return length > 0 && graph[length - 1] < 0x80;
}

View File

@ -514,12 +514,12 @@ def sha1_file(name):
def to_cxx_plus(data, codecs):
"""Generates C++ code from a word list plus some variable assignments as needed by libpsl"""
text = to_cxx(data, codecs)
text += b'static time_t _psl_file_time = %d;\n' % os.stat(psl_input_file).st_mtime
text += b'static int _psl_nsuffixes = %d;\n' % psl_nsuffixes
text += b'static int _psl_nexceptions = %d;\n' % psl_nexceptions
text += b'static int _psl_nwildcards = %d;\n' % psl_nwildcards
text += b'static const char _psl_sha1_checksum[] = "%s";\n' % bytes(sha1_file(psl_input_file), **codecs)
text += b'static const char _psl_filename[] = "%s";\n' % bytes(psl_input_file, **codecs)
text += b'static time_t psl_file_time = %d;\n' % os.stat(psl_input_file).st_mtime
text += b'static int psl_nsuffixes = %d;\n' % psl_nsuffixes
text += b'static int psl_nexceptions = %d;\n' % psl_nexceptions
text += b'static int psl_nwildcards = %d;\n' % psl_nwildcards
text += b'static const char psl_sha1_checksum[] = "%s";\n' % bytes(sha1_file(psl_input_file), **codecs)
text += b'static const char psl_filename[] = "%s";\n' % bytes(psl_input_file, **codecs)
return text
def words_to_whatever(words, converter, utf_mode, codecs):

299
src/psl.c
View File

@ -33,15 +33,15 @@
#endif
#if defined(__GNUC__) && defined(__GNUC_MINOR__)
# define _GCC_VERSION_AT_LEAST(major, minor) ((__GNUC__ > (major)) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
# define GCC_VERSION_AT_LEAST(major, minor) ((__GNUC__ > (major)) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
#else
# define _GCC_VERSION_AT_LEAST(major, minor) 0
# define GCC_VERSION_AT_LEAST(major, minor) 0
#endif
#if _GCC_VERSION_AT_LEAST(2,95)
# define _UNUSED __attribute__ ((unused))
#if GCC_VERSION_AT_LEAST(2,95)
# define UNUSED __attribute__ ((unused))
#else
# define _UNUSED
# define UNUSED
#endif
#if ENABLE_NLS != 0
@ -64,12 +64,19 @@
#include <time.h>
#include <errno.h>
#include <limits.h> /* for UINT_MAX */
#include <stdint.h>
#include <langinfo.h>
#include <arpa/inet.h>
#ifdef HAVE_ALLOCA_H
# include <alloca.h>
#endif
/* stat, fstat */
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#ifdef WITH_LIBICU
# include <unicode/uversion.h>
# include <unicode/ustring.h>
@ -125,11 +132,11 @@ static char *strndup(const char *s, size_t n)
#define countof(a) (sizeof(a)/sizeof(*(a)))
#define _PSL_FLAG_EXCEPTION (1<<0)
#define _PSL_FLAG_WILDCARD (1<<1)
#define _PSL_FLAG_ICANN (1<<2) /* entry of ICANN section */
#define _PSL_FLAG_PRIVATE (1<<3) /* entry of PRIVATE section */
#define _PSL_FLAG_PLAIN (1<<4) /* just used for PSL syntax checking */
#define PSL_FLAG_EXCEPTION (1<<0)
#define PSL_FLAG_WILDCARD (1<<1)
#define PSL_FLAG_ICANN (1<<2) /* entry of ICANN section */
#define PSL_FLAG_PRIVATE (1<<3) /* entry of PRIVATE section */
#define PSL_FLAG_PLAIN (1<<4) /* just used for PSL syntax checking */
typedef struct {
char
@ -141,21 +148,21 @@ typedef struct {
unsigned char
nlabels, /* number of labels */
flags;
} _psl_entry_t;
} psl_entry_t;
/* stripped down version libmget vector routines */
typedef struct {
int
(*cmp)(const _psl_entry_t **, const _psl_entry_t **); /* comparison function */
_psl_entry_t
(*cmp)(const psl_entry_t **, const psl_entry_t **); /* comparison function */
psl_entry_t
**entry; /* pointer to array of pointers to elements */
int
max, /* allocated elements */
cur; /* number of elements in use */
} _psl_vector_t;
} psl_vector_t;
struct _psl_ctx_st {
_psl_vector_t
struct psl_ctx_st {
psl_vector_t
*suffixes;
unsigned char
*dafsa;
@ -174,22 +181,22 @@ struct _psl_ctx_st {
/* references to these PSLs will result in lookups to built-in data */
static const psl_ctx_t
_builtin_psl;
builtin_psl;
#ifdef PSL_DISTFILE
static const char _psl_dist_filename[] = PSL_DISTFILE;
static const char psl_dist_filename_string[] = PSL_DISTFILE;
#else
static const char _psl_dist_filename[] = "";
static const char psl_dist_filename_string[] = "";
#endif
static _psl_vector_t *_vector_alloc(int max, int (*cmp)(const _psl_entry_t **, const _psl_entry_t **))
static psl_vector_t *vector_alloc(int max, int (*cmp)(const psl_entry_t **, const psl_entry_t **))
{
_psl_vector_t *v;
psl_vector_t *v;
if (!(v = calloc(1, sizeof(_psl_vector_t))))
if (!(v = calloc(1, sizeof(psl_vector_t))))
return NULL;
if (!(v->entry = malloc(max * sizeof(_psl_entry_t *)))) {
if (!(v->entry = malloc(max * sizeof(psl_entry_t *)))) {
free(v);
return NULL;
}
@ -199,7 +206,7 @@ static _psl_vector_t *_vector_alloc(int max, int (*cmp)(const _psl_entry_t **, c
return v;
}
static void _vector_free(_psl_vector_t **v)
static void vector_free(psl_vector_t **v)
{
if (v && *v) {
if ((*v)->entry) {
@ -214,7 +221,7 @@ static void _vector_free(_psl_vector_t **v)
}
}
static _psl_entry_t *_vector_get(const _psl_vector_t *v, int pos)
static psl_entry_t *vector_get(const psl_vector_t *v, int pos)
{
if (pos < 0 || !v || pos >= v->cur) return NULL;
@ -222,7 +229,7 @@ static _psl_entry_t *_vector_get(const _psl_vector_t *v, int pos)
}
/* the entries must be sorted by */
static int _vector_find(const _psl_vector_t *v, const _psl_entry_t *elem)
static int vector_find(const psl_vector_t *v, const psl_entry_t *elem)
{
if (v) {
int l, r, m;
@ -231,7 +238,7 @@ static int _vector_find(const _psl_vector_t *v, const _psl_entry_t *elem)
/* binary search for element (exact match) */
for (l = 0, r = v->cur - 1; l <= r;) {
m = (l + r) / 2;
if ((res = v->cmp(&elem, (const _psl_entry_t **)&(v->entry[m]))) > 0) l = m + 1;
if ((res = v->cmp(&elem, (const psl_entry_t **)&(v->entry[m]))) > 0) l = m + 1;
else if (res < 0) r = m - 1;
else return m;
}
@ -240,18 +247,18 @@ static int _vector_find(const _psl_vector_t *v, const _psl_entry_t *elem)
return -1; /* not found */
}
static int _vector_add(_psl_vector_t *v, const _psl_entry_t *elem)
static int vector_add(psl_vector_t *v, const psl_entry_t *elem)
{
if (v) {
void *elemp;
if (!(elemp = malloc(sizeof(_psl_entry_t))))
if (!(elemp = malloc(sizeof(psl_entry_t))))
return -1;
memcpy(elemp, elem, sizeof(_psl_entry_t));
memcpy(elemp, elem, sizeof(psl_entry_t));
if (v->max == v->cur) {
void *m = realloc(v->entry, (v->max *= 2) * sizeof(_psl_entry_t *));
void *m = realloc(v->entry, (v->max *= 2) * sizeof(psl_entry_t *));
if (m)
v->entry = m;
@ -268,14 +275,14 @@ static int _vector_add(_psl_vector_t *v, const _psl_entry_t *elem)
return -1;
}
static void _vector_sort(_psl_vector_t *v)
static void vector_sort(psl_vector_t *v)
{
if (v && v->cmp)
qsort(v->entry, v->cur, sizeof(_psl_vector_t **), (int(*)(const void *, const void *))v->cmp);
qsort(v->entry, v->cur, sizeof(psl_vector_t **), (int(*)(const void *, const void *))v->cmp);
}
/* by this kind of sorting, we can easily see if a domain matches or not */
static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2)
static int suffix_compare(const psl_entry_t *s1, const psl_entry_t *s2)
{
int n;
@ -289,12 +296,12 @@ static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2)
}
/* needed to sort array of pointers, given to qsort() */
static int _suffix_compare_array(const _psl_entry_t **s1, const _psl_entry_t **s2)
static int suffix_compare_array(const psl_entry_t **s1, const psl_entry_t **s2)
{
return _suffix_compare(*s1, *s2);
return suffix_compare(*s1, *s2);
}
static int _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length)
static int suffix_init(psl_entry_t *suffix, const char *rule, size_t length)
{
const char *src;
char *dst;
@ -506,7 +513,7 @@ static enum punycode_status punycode_encode(
return punycode_success;
}
static ssize_t _utf8_to_utf32(const char *in, size_t inlen, punycode_uint *out, size_t outlen)
static ssize_t utf8_to_utf32(const char *in, size_t inlen, punycode_uint *out, size_t outlen)
{
size_t n = 0;
const unsigned char *s = (void *)in;
@ -547,7 +554,7 @@ static ssize_t _utf8_to_utf32(const char *in, size_t inlen, punycode_uint *out,
return n;
}
static int _mem_is_ascii(const char *s, size_t n)
static int mem_is_ascii(const char *s, size_t n)
{
while (n--)
if (*((unsigned char *)s++) >= 128)
@ -556,7 +563,7 @@ static int _mem_is_ascii(const char *s, size_t n)
return 1;
}
static int _domain_to_punycode(const char *domain, char *out, size_t outsize)
static int domain_to_punycode(const char *domain, char *out, size_t outsize)
{
size_t outlen = 0, labellen;
punycode_uint input[256];
@ -567,7 +574,7 @@ static int _domain_to_punycode(const char *domain, char *out, size_t outsize)
labellen = e ? (size_t) (e - label) : strlen(label);
/* printf("s=%s inlen=%zd\n", label, labellen); */
if (_mem_is_ascii(label, labellen)) {
if (mem_is_ascii(label, labellen)) {
if (outlen + labellen + (e != NULL) >= outsize)
return 1;
@ -580,7 +587,7 @@ static int _domain_to_punycode(const char *domain, char *out, size_t outsize)
if (outlen + labellen + (e != NULL) + 4 >= outsize)
return 1;
if ((inputlen = _utf8_to_utf32(label, labellen, input, countof(input))) < 0)
if ((inputlen = utf8_to_utf32(label, labellen, input, countof(input))) < 0)
return 1;
memcpy(out + outlen, "xn--", 4);
@ -602,12 +609,12 @@ static int _domain_to_punycode(const char *domain, char *out, size_t outsize)
}
#endif
static int _isspace_ascii(const char c)
static int isspace_ascii(const char c)
{
return c == ' ' || c == '\t' || c == '\r' || c == '\n';
}
static int _str_is_ascii(const char *s)
static int str_is_ascii(const char *s)
{
while (*s && *((unsigned char *)s) < 128) s++;
@ -625,7 +632,7 @@ static int _str_is_ascii(const char *s)
* [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html
* [3] https://curl.haxx.se/mail/lib-2015-06/0143.html
*/
static int _utf8_is_valid(const char *utf8)
static int utf8_is_valid(const char *utf8)
{
const unsigned char *s = (const unsigned char *) utf8;
@ -652,9 +659,9 @@ static int _utf8_is_valid(const char *utf8)
}
#endif
typedef void *_psl_idna_t;
typedef void *psl_idna_t;
static _psl_idna_t *_psl_idna_open(void)
static psl_idna_t *psl_idna_open(void)
{
#if defined(WITH_LIBICU)
UErrorCode status = 0;
@ -663,7 +670,7 @@ static _psl_idna_t *_psl_idna_open(void)
return NULL;
}
static void _psl_idna_close(_psl_idna_t *idna _UNUSED)
static void psl_idna_close(psl_idna_t *idna UNUSED)
{
#if defined(WITH_LIBICU)
if (idna)
@ -671,7 +678,7 @@ static void _psl_idna_close(_psl_idna_t *idna _UNUSED)
#endif
}
static int _psl_idna_toASCII(_psl_idna_t *idna _UNUSED, const char *utf8, char **ascii)
static int psl_idna_toASCII(psl_idna_t *idna UNUSED, const char *utf8, char **ascii)
{
int ret = -1;
@ -767,7 +774,7 @@ cleanup:
#elif defined(WITH_LIBIDN)
int rc;
if (!_utf8_is_valid(utf8)) {
if (!utf8_is_valid(utf8)) {
/* fprintf(_(stderr, "Invalid UTF-8 sequence not converted: '%s'\n"), utf8); */
return -1;
}
@ -781,7 +788,7 @@ cleanup:
#else
char lookupname[128];
if (_domain_to_punycode(utf8, lookupname, sizeof(lookupname)) == 0) {
if (domain_to_punycode(utf8, lookupname, sizeof(lookupname)) == 0) {
if (ascii)
if ((*ascii = strdup(lookupname)))
ret = 0;
@ -791,21 +798,21 @@ cleanup:
return ret;
}
static void _add_punycode_if_needed(_psl_idna_t *idna, _psl_vector_t *v, _psl_entry_t *e)
static void add_punycode_if_needed(psl_idna_t *idna, psl_vector_t *v, psl_entry_t *e)
{
char *lookupname;
if (_str_is_ascii(e->label_buf))
if (str_is_ascii(e->label_buf))
return;
if (_psl_idna_toASCII(idna, e->label_buf, &lookupname) == 0) {
if (psl_idna_toASCII(idna, e->label_buf, &lookupname) == 0) {
if (strcmp(e->label_buf, lookupname)) {
_psl_entry_t suffix, *suffixp;
psl_entry_t suffix, *suffixp;
/* fprintf(stderr, "toASCII '%s' -> '%s'\n", e->label_buf, lookupname); */
if (_suffix_init(&suffix, lookupname, strlen(lookupname)) == 0) {
if (suffix_init(&suffix, lookupname, strlen(lookupname)) == 0) {
suffix.flags = e->flags;
if ((suffixp = _vector_get(v, _vector_add(v, &suffix))))
if ((suffixp = vector_get(v, vector_add(v, &suffix))))
suffixp->label = suffixp->label_buf; /* set label to changed address */
}
} /* else ignore */
@ -818,9 +825,9 @@ static void _add_punycode_if_needed(_psl_idna_t *idna, _psl_vector_t *v, _psl_en
int LookupStringInFixedSet(const unsigned char* graph, size_t length, const char* key, size_t key_length);
int GetUtfMode(const unsigned char *graph, size_t length);
static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain, int type)
static int psl_is_public_suffix_internal(const psl_ctx_t *psl, const char *domain, int type)
{
_psl_entry_t suffix;
psl_entry_t suffix;
const char *p;
char *punycode = NULL;
int need_conversion = 0;
@ -845,18 +852,18 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain, int t
return 1;
}
if (psl->utf8 || psl == &_builtin_psl)
if (psl->utf8 || psl == &builtin_psl)
need_conversion = 0;
#if defined(WITH_LIBIDN) || defined(WITH_LIBIDN2) || defined(WITH_LIBICU)
if (psl == &_builtin_psl)
if (psl == &builtin_psl)
need_conversion = 0;
#endif
if (need_conversion) {
_psl_idna_t *idna = _psl_idna_open();
psl_idna_t *idna = psl_idna_open();
if (_psl_idna_toASCII(idna, domain, &punycode) == 0) {
if (psl_idna_toASCII(idna, domain, &punycode) == 0) {
suffix.label = punycode;
suffix.length = strlen(punycode);
} else {
@ -866,24 +873,24 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain, int t
suffix.length = p - suffix.label;
}
_psl_idna_close(idna);
psl_idna_close(idna);
} else {
suffix.label = domain;
suffix.length = p - suffix.label;
}
if (psl == &_builtin_psl || psl->dafsa) {
size_t dafsa_size = psl == &_builtin_psl ? sizeof(kDafsa) : psl->dafsa_size;
const unsigned char *dafsa = psl == &_builtin_psl ? kDafsa : psl->dafsa;
if (psl == &builtin_psl || psl->dafsa) {
size_t dafsa_size = psl == &builtin_psl ? sizeof(kDafsa) : psl->dafsa_size;
const unsigned char *dafsa = psl == &builtin_psl ? kDafsa : psl->dafsa;
int rc = LookupStringInFixedSet(dafsa, dafsa_size, suffix.label, suffix.length);
if (rc != -1) {
/* check for correct rule type */
if (type == PSL_TYPE_ICANN && !(rc & _PSL_FLAG_ICANN))
if (type == PSL_TYPE_ICANN && !(rc & PSL_FLAG_ICANN))
goto suffix_no;
else if (type == PSL_TYPE_PRIVATE && !(rc & _PSL_FLAG_PRIVATE))
else if (type == PSL_TYPE_PRIVATE && !(rc & PSL_FLAG_PRIVATE))
goto suffix_no;
if (rc & _PSL_FLAG_EXCEPTION)
if (rc & PSL_FLAG_EXCEPTION)
goto suffix_no;
/* wildcard *.foo.bar implicitly make foo.bar a public suffix */
@ -898,31 +905,31 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain, int t
rc = LookupStringInFixedSet(dafsa, dafsa_size, suffix.label, suffix.length);
if (rc != -1) {
/* check for correct rule type */
if (type == PSL_TYPE_ICANN && !(rc & _PSL_FLAG_ICANN))
if (type == PSL_TYPE_ICANN && !(rc & PSL_FLAG_ICANN))
goto suffix_no;
else if (type == PSL_TYPE_PRIVATE && !(rc & _PSL_FLAG_PRIVATE))
else if (type == PSL_TYPE_PRIVATE && !(rc & PSL_FLAG_PRIVATE))
goto suffix_no;
if (rc & _PSL_FLAG_WILDCARD)
if (rc & PSL_FLAG_WILDCARD)
goto suffix_yes;
}
}
} else {
_psl_entry_t *rule = _vector_get(psl->suffixes, 0);
psl_entry_t *rule = vector_get(psl->suffixes, 0);
if (!rule || rule->nlabels < suffix.nlabels - 1)
goto suffix_no;
rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix));
rule = vector_get(psl->suffixes, vector_find(psl->suffixes, &suffix));
if (rule) {
/* check for correct rule type */
if (type == PSL_TYPE_ICANN && !(rule->flags & _PSL_FLAG_ICANN))
if (type == PSL_TYPE_ICANN && !(rule->flags & PSL_FLAG_ICANN))
goto suffix_no;
else if (type == PSL_TYPE_PRIVATE && !(rule->flags & _PSL_FLAG_PRIVATE))
else if (type == PSL_TYPE_PRIVATE && !(rule->flags & PSL_FLAG_PRIVATE))
goto suffix_no;
if (rule->flags & _PSL_FLAG_EXCEPTION)
if (rule->flags & PSL_FLAG_EXCEPTION)
goto suffix_no;
/* wildcard *.foo.bar implicitly make foo.bar a public suffix */
@ -937,16 +944,16 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain, int t
suffix.length = strlen(suffix.label);
suffix.nlabels--;
rule = _vector_get(psl->suffixes, (pos = _vector_find(psl->suffixes, &suffix)));
rule = vector_get(psl->suffixes, (pos = vector_find(psl->suffixes, &suffix)));
if (rule) {
/* check for correct rule type */
if (type == PSL_TYPE_ICANN && !(rule->flags & _PSL_FLAG_ICANN))
if (type == PSL_TYPE_ICANN && !(rule->flags & PSL_FLAG_ICANN))
goto suffix_no;
else if (type == PSL_TYPE_PRIVATE && !(rule->flags & _PSL_FLAG_PRIVATE))
else if (type == PSL_TYPE_PRIVATE && !(rule->flags & PSL_FLAG_PRIVATE))
goto suffix_no;
if (rule->flags & _PSL_FLAG_WILDCARD)
if (rule->flags & PSL_FLAG_WILDCARD)
goto suffix_yes;
}
}
@ -989,7 +996,7 @@ int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
if (!psl || !domain)
return 1;
return _psl_is_public_suffix(psl, domain, PSL_TYPE_ANY);
return psl_is_public_suffix_internal(psl, domain, PSL_TYPE_ANY);
}
/**
@ -1020,7 +1027,7 @@ int psl_is_public_suffix2(const psl_ctx_t *psl, const char *domain, int type)
if (!psl || !domain)
return 1;
return _psl_is_public_suffix(psl, domain, type);
return psl_is_public_suffix_internal(psl, domain, type);
}
/**
@ -1053,7 +1060,7 @@ const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
* 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
*/
while (!_psl_is_public_suffix(psl, domain, 0)) {
while (!psl_is_public_suffix_internal(psl, domain, 0)) {
if ((domain = strchr(domain, '.')))
domain++;
else
@ -1095,12 +1102,12 @@ const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
* 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
*/
while (!_psl_is_public_suffix(psl, domain, 0)) {
while (!psl_is_public_suffix_internal(psl, domain, 0)) {
if ((p = strchr(domain, '.'))) {
regdom = domain;
domain = p + 1;
} else
break; /* prevent endless loop if psl_is_public_suffix() is broken. */
break; /* prevent endless loop if psl_is_public_suffix_internal() is broken. */
}
return regdom;
@ -1151,10 +1158,10 @@ psl_ctx_t *psl_load_file(const char *fname)
psl_ctx_t *psl_load_fp(FILE *fp)
{
psl_ctx_t *psl;
_psl_entry_t suffix, *suffixp;
psl_entry_t suffix, *suffixp;
char buf[256], *linep, *p;
int type = 0, is_dafsa;
_psl_idna_t *idna;
psl_idna_t *idna;
if (!fp)
return NULL;
@ -1170,73 +1177,57 @@ psl_ctx_t *psl_load_fp(FILE *fp)
if (is_dafsa) {
void *m;
size_t size = 65536, n, len = 0;
int version = atoi(buf + 11);
struct stat st;
if (version != 0)
goto fail;
if (!(psl->dafsa = malloc(size)))
goto fail;
memcpy(psl->dafsa, buf, len);
while ((n = fread(psl->dafsa + len, 1, size - len, fp)) > 0) {
len += n;
if (len >= size) {
if (!(m = realloc(psl->dafsa, size *= 2)))
goto fail;
psl->dafsa = m;
}
}
/* release unused memory */
if ((m = realloc(psl->dafsa, len)))
psl->dafsa = m;
else if (!len)
psl->dafsa = NULL; /* realloc() just free'd psl->dafsa */
psl->dafsa_size = len;
psl->utf8 = !!GetUtfMode(psl->dafsa, len);
if (fstat(fileno(fp), &st) != 0 ||
(uintmax_t)st.st_size >= (uintmax_t)SIZE_MAX ||
(psl->dafsa = malloc(st.st_size)) == NULL ||
fread(psl->dafsa, psl->dafsa_size = st.st_size - 16, 1, fp) != 1)
goto fail;
psl->utf8 = !!GetUtfMode(psl->dafsa, psl->dafsa_size);
return psl;
}
idna = _psl_idna_open();
idna = psl_idna_open();
/*
* as of 02.11.2012, the list at https://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
* as of 19.02.2014, the list at https://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
*/
psl->suffixes = _vector_alloc(8*1024, _suffix_compare_array);
psl->suffixes = vector_alloc(8*1024, suffix_compare_array);
psl->utf8 = 1; /* we put UTF-8 and punycode rules in the lookup vector */
do {
while (_isspace_ascii(*linep)) linep++; /* ignore leading whitespace */
while (isspace_ascii(*linep)) linep++; /* ignore leading whitespace */
if (!*linep) continue; /* skip empty lines */
if (*linep == '/' && linep[1] == '/') {
if (!type) {
if (strstr(linep + 2, "===BEGIN ICANN DOMAINS==="))
type = _PSL_FLAG_ICANN;
type = PSL_FLAG_ICANN;
else if (!type && strstr(linep + 2, "===BEGIN PRIVATE DOMAINS==="))
type = _PSL_FLAG_PRIVATE;
type = PSL_FLAG_PRIVATE;
}
else if (type == _PSL_FLAG_ICANN && strstr(linep + 2, "===END ICANN DOMAINS==="))
else if (type == PSL_FLAG_ICANN && strstr(linep + 2, "===END ICANN DOMAINS==="))
type = 0;
else if (type == _PSL_FLAG_PRIVATE && strstr(linep + 2, "===END PRIVATE DOMAINS==="))
else if (type == PSL_FLAG_PRIVATE && strstr(linep + 2, "===END PRIVATE DOMAINS==="))
type = 0;
continue; /* skip comments */
}
/* parse suffix rule */
for (p = linep; *linep && !_isspace_ascii(*linep);) linep++;
for (p = linep; *linep && !isspace_ascii(*linep);) linep++;
*linep = 0;
if (*p == '!') {
p++;
suffix.flags = _PSL_FLAG_EXCEPTION | type;
suffix.flags = PSL_FLAG_EXCEPTION | type;
psl->nexceptions++;
} else if (*p == '*') {
if (*++p != '.') {
@ -1245,20 +1236,20 @@ psl_ctx_t *psl_load_fp(FILE *fp)
}
p++;
/* wildcard *.foo.bar implicitly make foo.bar a public suffix */
suffix.flags = _PSL_FLAG_WILDCARD | _PSL_FLAG_PLAIN | type;
suffix.flags = PSL_FLAG_WILDCARD | PSL_FLAG_PLAIN | type;
psl->nwildcards++;
psl->nsuffixes++;
} else {
if (!strchr(p, '.'))
continue; /* we do not need an explicit plain TLD rule, already covered by implicit '*' rule */
suffix.flags = _PSL_FLAG_PLAIN | type;
suffix.flags = PSL_FLAG_PLAIN | type;
psl->nsuffixes++;
}
if (_suffix_init(&suffix, p, linep - p) == 0) {
if (suffix_init(&suffix, p, linep - p) == 0) {
int index;
if ((index = _vector_find(psl->suffixes, &suffix)) >= 0) {
if ((index = vector_find(psl->suffixes, &suffix)) >= 0) {
/* Found existing entry:
* Combination of exception and plain rule is ambiguous
* !foo.bar
@ -1271,23 +1262,23 @@ psl_ctx_t *psl_load_fp(FILE *fp)
* We do not check here, let's do it later.
*/
suffixp = _vector_get(psl->suffixes, index);
suffixp = vector_get(psl->suffixes, index);
suffixp->flags |= suffix.flags;
} else {
/* New entry */
suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix));
suffixp = vector_get(psl->suffixes, vector_add(psl->suffixes, &suffix));
}
if (suffixp) {
suffixp->label = suffixp->label_buf; /* set label to changed address */
_add_punycode_if_needed(idna, psl->suffixes, suffixp);
add_punycode_if_needed(idna, psl->suffixes, suffixp);
}
}
} while ((linep = fgets(buf, sizeof(buf), fp)));
_vector_sort(psl->suffixes);
vector_sort(psl->suffixes);
_psl_idna_close(idna);
psl_idna_close(idna);
return psl;
@ -1307,8 +1298,8 @@ fail:
*/
void psl_free(psl_ctx_t *psl)
{
if (psl && psl != &_builtin_psl) {
_vector_free(&psl->suffixes);
if (psl && psl != &builtin_psl) {
vector_free(&psl->suffixes);
free(psl->dafsa);
free(psl);
}
@ -1333,7 +1324,7 @@ void psl_free(psl_ctx_t *psl)
const psl_ctx_t *psl_builtin(void)
{
#if defined(BUILTIN_GENERATOR_LIBICU) || defined(BUILTIN_GENERATOR_LIBIDN2) || defined(BUILTIN_GENERATOR_LIBIDN)
return &_builtin_psl;
return &builtin_psl;
#else
return NULL;
#endif
@ -1355,8 +1346,8 @@ const psl_ctx_t *psl_builtin(void)
*/
int psl_suffix_count(const psl_ctx_t *psl)
{
if (psl == &_builtin_psl)
return _psl_nsuffixes;
if (psl == &builtin_psl)
return psl_nsuffixes;
else if (psl)
return psl->dafsa ? -1 : psl->nsuffixes;
else
@ -1378,8 +1369,8 @@ int psl_suffix_count(const psl_ctx_t *psl)
*/
int psl_suffix_exception_count(const psl_ctx_t *psl)
{
if (psl == &_builtin_psl)
return _psl_nexceptions;
if (psl == &builtin_psl)
return psl_nexceptions;
else if (psl)
return psl->dafsa ? -1 : psl->nexceptions;
else
@ -1401,8 +1392,8 @@ int psl_suffix_exception_count(const psl_ctx_t *psl)
*/
int psl_suffix_wildcard_count(const psl_ctx_t *psl)
{
if (psl == &_builtin_psl)
return _psl_nwildcards;
if (psl == &builtin_psl)
return psl_nwildcards;
else if (psl)
return psl->dafsa ? -1 : psl->nwildcards;
else
@ -1422,7 +1413,7 @@ int psl_suffix_wildcard_count(const psl_ctx_t *psl)
*/
time_t psl_builtin_file_time(void)
{
return _psl_file_time;
return psl_file_time;
}
/**
@ -1439,7 +1430,7 @@ time_t psl_builtin_file_time(void)
*/
const char *psl_builtin_sha1sum(void)
{
return _psl_sha1_checksum;
return psl_sha1_checksum;
}
/**
@ -1455,7 +1446,7 @@ const char *psl_builtin_sha1sum(void)
*/
const char *psl_builtin_filename(void)
{
return _psl_filename;
return psl_filename;
}
/**
@ -1475,7 +1466,7 @@ int psl_builtin_outdated(void)
{
struct stat st;
if (stat(_psl_filename, &st) == 0 && st.st_mtime > _psl_file_time)
if (stat(psl_filename, &st) == 0 && st.st_mtime > psl_file_time)
return 1;
return 0;
@ -1495,7 +1486,7 @@ int psl_builtin_outdated(void)
*/
const char *psl_dist_filename(void)
{
return _psl_dist_filename;
return psl_dist_filename_string;
}
/**
@ -1552,7 +1543,7 @@ int psl_check_version_number(int version)
}
/* return whether hostname is an IP address or not */
static int _isip(const char *hostname)
static int isip(const char *hostname)
{
struct in_addr addr;
struct in6_addr addr6;
@ -1599,7 +1590,7 @@ int psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname,
if (!strcmp(hostname, cookie_domain))
return 1; /* an exact match is acceptable (and pretty common) */
if (_isip(hostname))
if (isip(hostname))
return 0; /* Hostname is an IP address and these must match fully (RFC 6265, 5.1.3) */
cookie_domain_length = strlen(cookie_domain);
@ -1662,7 +1653,7 @@ void psl_free_string(char *str)
*
* Since: 0.4
*/
psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding _UNUSED, const char *locale _UNUSED, char **lower)
psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding UNUSED, const char *locale UNUSED, char **lower)
{
int ret = PSL_ERR_INVALID_ARG;
@ -1670,7 +1661,7 @@ psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding _UNUSED,
return PSL_ERR_INVALID_ARG;
/* shortcut to avoid costly conversion */
if (_str_is_ascii(str)) {
if (str_is_ascii(str)) {
if (lower) {
char *p, *tmp;
@ -1835,12 +1826,12 @@ out:
}
/* if file is newer than the builtin data, insert it reverse sorted by mtime */
static int _insert_file(const char *fname, const char **psl_fname, time_t *psl_mtime, int n)
static int insert_file(const char *fname, const char **psl_fname, time_t *psl_mtime, int n)
{
struct stat st;
int it;
if (fname && *fname && stat(fname, &st) == 0 && st.st_mtime > _psl_file_time) {
if (fname && *fname && stat(fname, &st) == 0 && st.st_mtime > psl_file_time) {
/* add file name and mtime to end of array */
psl_fname[n] = fname;
psl_mtime[n++] = st.st_mtime;
@ -1885,13 +1876,13 @@ psl_ctx_t *psl_latest(const char *fname)
psl_fname[0] = NULL; /* silence gcc 6.2 false warning */
/* create array of PSL files reverse sorted by mtime (latest first) */
ntimes = _insert_file(fname, psl_fname, psl_mtime, 0);
ntimes = _insert_file(_psl_dist_filename, psl_fname, psl_mtime, ntimes);
ntimes = _insert_file(_psl_filename, psl_fname, psl_mtime, ntimes);
ntimes = insert_file(fname, psl_fname, psl_mtime, 0);
ntimes = insert_file(psl_dist_filename_string, psl_fname, psl_mtime, ntimes);
ntimes = insert_file(psl_filename, psl_fname, psl_mtime, ntimes);
/* load PSL data from the latest file, falling back to the second recent, ... */
for (psl = NULL, it = 0; it < ntimes; it++) {
if (psl_mtime[it] > _psl_file_time)
if (psl_mtime[it] > psl_file_time)
if ((psl = psl_load_file(psl_fname[it])))
break;
}