Merge 85e337b010
into 16bf63a6bf
This commit is contained in:
commit
58ca2395e1
|
@ -73,7 +73,8 @@ typedef enum {
|
|||
PSL_ERR_NO_MEM = -6 /* failed to allocate memory */
|
||||
} psl_error_t;
|
||||
|
||||
typedef struct _psl_ctx_st psl_ctx_t;
|
||||
struct psl_ctx_st;
|
||||
typedef struct psl_ctx_st psl_ctx_t;
|
||||
|
||||
/* frees PSL context */
|
||||
void
|
||||
|
|
|
@ -8,15 +8,15 @@
|
|||
#include <stddef.h>
|
||||
|
||||
#if defined(__GNUC__) && defined(__GNUC_MINOR__)
|
||||
# define _GCC_VERSION_AT_LEAST(major, minor) ((__GNUC__ > (major)) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
|
||||
# define GCC_VERSION_AT_LEAST(major, minor) ((__GNUC__ > (major)) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
|
||||
#else
|
||||
# define _GCC_VERSION_AT_LEAST(major, minor) 0
|
||||
# define GCC_VERSION_AT_LEAST(major, minor) 0
|
||||
#endif
|
||||
|
||||
#if _GCC_VERSION_AT_LEAST(4,0)
|
||||
# define _HIDDEN __attribute__ ((visibility ("hidden")))
|
||||
#if GCC_VERSION_AT_LEAST(4,0)
|
||||
# define HIDDEN __attribute__ ((visibility ("hidden")))
|
||||
#else
|
||||
# define _HIDDEN
|
||||
# define HIDDEN
|
||||
#endif
|
||||
|
||||
#define CHECK_LT(a, b) if ((a) >= b) return 0
|
||||
|
@ -203,9 +203,9 @@ static int GetReturnValue(const unsigned char* offset,
|
|||
*/
|
||||
|
||||
/* prototype to skip warning with -Wmissing-prototypes */
|
||||
int _HIDDEN LookupStringInFixedSet(const unsigned char*, size_t,const char*, size_t);
|
||||
int HIDDEN LookupStringInFixedSet(const unsigned char*, size_t,const char*, size_t);
|
||||
|
||||
int _HIDDEN LookupStringInFixedSet(const unsigned char* graph,
|
||||
int HIDDEN LookupStringInFixedSet(const unsigned char* graph,
|
||||
size_t length,
|
||||
const char* key,
|
||||
size_t key_length)
|
||||
|
@ -277,9 +277,9 @@ int _HIDDEN LookupStringInFixedSet(const unsigned char* graph,
|
|||
}
|
||||
|
||||
/* prototype to skip warning with -Wmissing-prototypes */
|
||||
int _HIDDEN GetUtfMode(const unsigned char *graph, size_t length);
|
||||
int HIDDEN GetUtfMode(const unsigned char *graph, size_t length);
|
||||
|
||||
int _HIDDEN GetUtfMode(const unsigned char *graph, size_t length)
|
||||
int HIDDEN GetUtfMode(const unsigned char *graph, size_t length)
|
||||
{
|
||||
return length > 0 && graph[length - 1] < 0x80;
|
||||
}
|
||||
|
|
|
@ -514,12 +514,12 @@ def sha1_file(name):
|
|||
def to_cxx_plus(data, codecs):
|
||||
"""Generates C++ code from a word list plus some variable assignments as needed by libpsl"""
|
||||
text = to_cxx(data, codecs)
|
||||
text += b'static time_t _psl_file_time = %d;\n' % os.stat(psl_input_file).st_mtime
|
||||
text += b'static int _psl_nsuffixes = %d;\n' % psl_nsuffixes
|
||||
text += b'static int _psl_nexceptions = %d;\n' % psl_nexceptions
|
||||
text += b'static int _psl_nwildcards = %d;\n' % psl_nwildcards
|
||||
text += b'static const char _psl_sha1_checksum[] = "%s";\n' % bytes(sha1_file(psl_input_file), **codecs)
|
||||
text += b'static const char _psl_filename[] = "%s";\n' % bytes(psl_input_file, **codecs)
|
||||
text += b'static time_t psl_file_time = %d;\n' % os.stat(psl_input_file).st_mtime
|
||||
text += b'static int psl_nsuffixes = %d;\n' % psl_nsuffixes
|
||||
text += b'static int psl_nexceptions = %d;\n' % psl_nexceptions
|
||||
text += b'static int psl_nwildcards = %d;\n' % psl_nwildcards
|
||||
text += b'static const char psl_sha1_checksum[] = "%s";\n' % bytes(sha1_file(psl_input_file), **codecs)
|
||||
text += b'static const char psl_filename[] = "%s";\n' % bytes(psl_input_file, **codecs)
|
||||
return text
|
||||
|
||||
def words_to_whatever(words, converter, utf_mode, codecs):
|
||||
|
|
299
src/psl.c
299
src/psl.c
|
@ -33,15 +33,15 @@
|
|||
#endif
|
||||
|
||||
#if defined(__GNUC__) && defined(__GNUC_MINOR__)
|
||||
# define _GCC_VERSION_AT_LEAST(major, minor) ((__GNUC__ > (major)) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
|
||||
# define GCC_VERSION_AT_LEAST(major, minor) ((__GNUC__ > (major)) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
|
||||
#else
|
||||
# define _GCC_VERSION_AT_LEAST(major, minor) 0
|
||||
# define GCC_VERSION_AT_LEAST(major, minor) 0
|
||||
#endif
|
||||
|
||||
#if _GCC_VERSION_AT_LEAST(2,95)
|
||||
# define _UNUSED __attribute__ ((unused))
|
||||
#if GCC_VERSION_AT_LEAST(2,95)
|
||||
# define UNUSED __attribute__ ((unused))
|
||||
#else
|
||||
# define _UNUSED
|
||||
# define UNUSED
|
||||
#endif
|
||||
|
||||
#if ENABLE_NLS != 0
|
||||
|
@ -64,12 +64,19 @@
|
|||
#include <time.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h> /* for UINT_MAX */
|
||||
#include <stdint.h>
|
||||
#include <langinfo.h>
|
||||
#include <arpa/inet.h>
|
||||
#ifdef HAVE_ALLOCA_H
|
||||
# include <alloca.h>
|
||||
#endif
|
||||
|
||||
/* stat, fstat */
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
|
||||
#ifdef WITH_LIBICU
|
||||
# include <unicode/uversion.h>
|
||||
# include <unicode/ustring.h>
|
||||
|
@ -125,11 +132,11 @@ static char *strndup(const char *s, size_t n)
|
|||
|
||||
#define countof(a) (sizeof(a)/sizeof(*(a)))
|
||||
|
||||
#define _PSL_FLAG_EXCEPTION (1<<0)
|
||||
#define _PSL_FLAG_WILDCARD (1<<1)
|
||||
#define _PSL_FLAG_ICANN (1<<2) /* entry of ICANN section */
|
||||
#define _PSL_FLAG_PRIVATE (1<<3) /* entry of PRIVATE section */
|
||||
#define _PSL_FLAG_PLAIN (1<<4) /* just used for PSL syntax checking */
|
||||
#define PSL_FLAG_EXCEPTION (1<<0)
|
||||
#define PSL_FLAG_WILDCARD (1<<1)
|
||||
#define PSL_FLAG_ICANN (1<<2) /* entry of ICANN section */
|
||||
#define PSL_FLAG_PRIVATE (1<<3) /* entry of PRIVATE section */
|
||||
#define PSL_FLAG_PLAIN (1<<4) /* just used for PSL syntax checking */
|
||||
|
||||
typedef struct {
|
||||
char
|
||||
|
@ -141,21 +148,21 @@ typedef struct {
|
|||
unsigned char
|
||||
nlabels, /* number of labels */
|
||||
flags;
|
||||
} _psl_entry_t;
|
||||
} psl_entry_t;
|
||||
|
||||
/* stripped down version libmget vector routines */
|
||||
typedef struct {
|
||||
int
|
||||
(*cmp)(const _psl_entry_t **, const _psl_entry_t **); /* comparison function */
|
||||
_psl_entry_t
|
||||
(*cmp)(const psl_entry_t **, const psl_entry_t **); /* comparison function */
|
||||
psl_entry_t
|
||||
**entry; /* pointer to array of pointers to elements */
|
||||
int
|
||||
max, /* allocated elements */
|
||||
cur; /* number of elements in use */
|
||||
} _psl_vector_t;
|
||||
} psl_vector_t;
|
||||
|
||||
struct _psl_ctx_st {
|
||||
_psl_vector_t
|
||||
struct psl_ctx_st {
|
||||
psl_vector_t
|
||||
*suffixes;
|
||||
unsigned char
|
||||
*dafsa;
|
||||
|
@ -174,22 +181,22 @@ struct _psl_ctx_st {
|
|||
|
||||
/* references to these PSLs will result in lookups to built-in data */
|
||||
static const psl_ctx_t
|
||||
_builtin_psl;
|
||||
builtin_psl;
|
||||
|
||||
#ifdef PSL_DISTFILE
|
||||
static const char _psl_dist_filename[] = PSL_DISTFILE;
|
||||
static const char psl_dist_filename_string[] = PSL_DISTFILE;
|
||||
#else
|
||||
static const char _psl_dist_filename[] = "";
|
||||
static const char psl_dist_filename_string[] = "";
|
||||
#endif
|
||||
|
||||
static _psl_vector_t *_vector_alloc(int max, int (*cmp)(const _psl_entry_t **, const _psl_entry_t **))
|
||||
static psl_vector_t *vector_alloc(int max, int (*cmp)(const psl_entry_t **, const psl_entry_t **))
|
||||
{
|
||||
_psl_vector_t *v;
|
||||
psl_vector_t *v;
|
||||
|
||||
if (!(v = calloc(1, sizeof(_psl_vector_t))))
|
||||
if (!(v = calloc(1, sizeof(psl_vector_t))))
|
||||
return NULL;
|
||||
|
||||
if (!(v->entry = malloc(max * sizeof(_psl_entry_t *)))) {
|
||||
if (!(v->entry = malloc(max * sizeof(psl_entry_t *)))) {
|
||||
free(v);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -199,7 +206,7 @@ static _psl_vector_t *_vector_alloc(int max, int (*cmp)(const _psl_entry_t **, c
|
|||
return v;
|
||||
}
|
||||
|
||||
static void _vector_free(_psl_vector_t **v)
|
||||
static void vector_free(psl_vector_t **v)
|
||||
{
|
||||
if (v && *v) {
|
||||
if ((*v)->entry) {
|
||||
|
@ -214,7 +221,7 @@ static void _vector_free(_psl_vector_t **v)
|
|||
}
|
||||
}
|
||||
|
||||
static _psl_entry_t *_vector_get(const _psl_vector_t *v, int pos)
|
||||
static psl_entry_t *vector_get(const psl_vector_t *v, int pos)
|
||||
{
|
||||
if (pos < 0 || !v || pos >= v->cur) return NULL;
|
||||
|
||||
|
@ -222,7 +229,7 @@ static _psl_entry_t *_vector_get(const _psl_vector_t *v, int pos)
|
|||
}
|
||||
|
||||
/* the entries must be sorted by */
|
||||
static int _vector_find(const _psl_vector_t *v, const _psl_entry_t *elem)
|
||||
static int vector_find(const psl_vector_t *v, const psl_entry_t *elem)
|
||||
{
|
||||
if (v) {
|
||||
int l, r, m;
|
||||
|
@ -231,7 +238,7 @@ static int _vector_find(const _psl_vector_t *v, const _psl_entry_t *elem)
|
|||
/* binary search for element (exact match) */
|
||||
for (l = 0, r = v->cur - 1; l <= r;) {
|
||||
m = (l + r) / 2;
|
||||
if ((res = v->cmp(&elem, (const _psl_entry_t **)&(v->entry[m]))) > 0) l = m + 1;
|
||||
if ((res = v->cmp(&elem, (const psl_entry_t **)&(v->entry[m]))) > 0) l = m + 1;
|
||||
else if (res < 0) r = m - 1;
|
||||
else return m;
|
||||
}
|
||||
|
@ -240,18 +247,18 @@ static int _vector_find(const _psl_vector_t *v, const _psl_entry_t *elem)
|
|||
return -1; /* not found */
|
||||
}
|
||||
|
||||
static int _vector_add(_psl_vector_t *v, const _psl_entry_t *elem)
|
||||
static int vector_add(psl_vector_t *v, const psl_entry_t *elem)
|
||||
{
|
||||
if (v) {
|
||||
void *elemp;
|
||||
|
||||
if (!(elemp = malloc(sizeof(_psl_entry_t))))
|
||||
if (!(elemp = malloc(sizeof(psl_entry_t))))
|
||||
return -1;
|
||||
|
||||
memcpy(elemp, elem, sizeof(_psl_entry_t));
|
||||
memcpy(elemp, elem, sizeof(psl_entry_t));
|
||||
|
||||
if (v->max == v->cur) {
|
||||
void *m = realloc(v->entry, (v->max *= 2) * sizeof(_psl_entry_t *));
|
||||
void *m = realloc(v->entry, (v->max *= 2) * sizeof(psl_entry_t *));
|
||||
|
||||
if (m)
|
||||
v->entry = m;
|
||||
|
@ -268,14 +275,14 @@ static int _vector_add(_psl_vector_t *v, const _psl_entry_t *elem)
|
|||
return -1;
|
||||
}
|
||||
|
||||
static void _vector_sort(_psl_vector_t *v)
|
||||
static void vector_sort(psl_vector_t *v)
|
||||
{
|
||||
if (v && v->cmp)
|
||||
qsort(v->entry, v->cur, sizeof(_psl_vector_t **), (int(*)(const void *, const void *))v->cmp);
|
||||
qsort(v->entry, v->cur, sizeof(psl_vector_t **), (int(*)(const void *, const void *))v->cmp);
|
||||
}
|
||||
|
||||
/* by this kind of sorting, we can easily see if a domain matches or not */
|
||||
static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2)
|
||||
static int suffix_compare(const psl_entry_t *s1, const psl_entry_t *s2)
|
||||
{
|
||||
int n;
|
||||
|
||||
|
@ -289,12 +296,12 @@ static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2)
|
|||
}
|
||||
|
||||
/* needed to sort array of pointers, given to qsort() */
|
||||
static int _suffix_compare_array(const _psl_entry_t **s1, const _psl_entry_t **s2)
|
||||
static int suffix_compare_array(const psl_entry_t **s1, const psl_entry_t **s2)
|
||||
{
|
||||
return _suffix_compare(*s1, *s2);
|
||||
return suffix_compare(*s1, *s2);
|
||||
}
|
||||
|
||||
static int _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length)
|
||||
static int suffix_init(psl_entry_t *suffix, const char *rule, size_t length)
|
||||
{
|
||||
const char *src;
|
||||
char *dst;
|
||||
|
@ -506,7 +513,7 @@ static enum punycode_status punycode_encode(
|
|||
return punycode_success;
|
||||
}
|
||||
|
||||
static ssize_t _utf8_to_utf32(const char *in, size_t inlen, punycode_uint *out, size_t outlen)
|
||||
static ssize_t utf8_to_utf32(const char *in, size_t inlen, punycode_uint *out, size_t outlen)
|
||||
{
|
||||
size_t n = 0;
|
||||
const unsigned char *s = (void *)in;
|
||||
|
@ -547,7 +554,7 @@ static ssize_t _utf8_to_utf32(const char *in, size_t inlen, punycode_uint *out,
|
|||
return n;
|
||||
}
|
||||
|
||||
static int _mem_is_ascii(const char *s, size_t n)
|
||||
static int mem_is_ascii(const char *s, size_t n)
|
||||
{
|
||||
while (n--)
|
||||
if (*((unsigned char *)s++) >= 128)
|
||||
|
@ -556,7 +563,7 @@ static int _mem_is_ascii(const char *s, size_t n)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int _domain_to_punycode(const char *domain, char *out, size_t outsize)
|
||||
static int domain_to_punycode(const char *domain, char *out, size_t outsize)
|
||||
{
|
||||
size_t outlen = 0, labellen;
|
||||
punycode_uint input[256];
|
||||
|
@ -567,7 +574,7 @@ static int _domain_to_punycode(const char *domain, char *out, size_t outsize)
|
|||
labellen = e ? (size_t) (e - label) : strlen(label);
|
||||
/* printf("s=%s inlen=%zd\n", label, labellen); */
|
||||
|
||||
if (_mem_is_ascii(label, labellen)) {
|
||||
if (mem_is_ascii(label, labellen)) {
|
||||
if (outlen + labellen + (e != NULL) >= outsize)
|
||||
return 1;
|
||||
|
||||
|
@ -580,7 +587,7 @@ static int _domain_to_punycode(const char *domain, char *out, size_t outsize)
|
|||
if (outlen + labellen + (e != NULL) + 4 >= outsize)
|
||||
return 1;
|
||||
|
||||
if ((inputlen = _utf8_to_utf32(label, labellen, input, countof(input))) < 0)
|
||||
if ((inputlen = utf8_to_utf32(label, labellen, input, countof(input))) < 0)
|
||||
return 1;
|
||||
|
||||
memcpy(out + outlen, "xn--", 4);
|
||||
|
@ -602,12 +609,12 @@ static int _domain_to_punycode(const char *domain, char *out, size_t outsize)
|
|||
}
|
||||
#endif
|
||||
|
||||
static int _isspace_ascii(const char c)
|
||||
static int isspace_ascii(const char c)
|
||||
{
|
||||
return c == ' ' || c == '\t' || c == '\r' || c == '\n';
|
||||
}
|
||||
|
||||
static int _str_is_ascii(const char *s)
|
||||
static int str_is_ascii(const char *s)
|
||||
{
|
||||
while (*s && *((unsigned char *)s) < 128) s++;
|
||||
|
||||
|
@ -625,7 +632,7 @@ static int _str_is_ascii(const char *s)
|
|||
* [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html
|
||||
* [3] https://curl.haxx.se/mail/lib-2015-06/0143.html
|
||||
*/
|
||||
static int _utf8_is_valid(const char *utf8)
|
||||
static int utf8_is_valid(const char *utf8)
|
||||
{
|
||||
const unsigned char *s = (const unsigned char *) utf8;
|
||||
|
||||
|
@ -652,9 +659,9 @@ static int _utf8_is_valid(const char *utf8)
|
|||
}
|
||||
#endif
|
||||
|
||||
typedef void *_psl_idna_t;
|
||||
typedef void *psl_idna_t;
|
||||
|
||||
static _psl_idna_t *_psl_idna_open(void)
|
||||
static psl_idna_t *psl_idna_open(void)
|
||||
{
|
||||
#if defined(WITH_LIBICU)
|
||||
UErrorCode status = 0;
|
||||
|
@ -663,7 +670,7 @@ static _psl_idna_t *_psl_idna_open(void)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static void _psl_idna_close(_psl_idna_t *idna _UNUSED)
|
||||
static void psl_idna_close(psl_idna_t *idna UNUSED)
|
||||
{
|
||||
#if defined(WITH_LIBICU)
|
||||
if (idna)
|
||||
|
@ -671,7 +678,7 @@ static void _psl_idna_close(_psl_idna_t *idna _UNUSED)
|
|||
#endif
|
||||
}
|
||||
|
||||
static int _psl_idna_toASCII(_psl_idna_t *idna _UNUSED, const char *utf8, char **ascii)
|
||||
static int psl_idna_toASCII(psl_idna_t *idna UNUSED, const char *utf8, char **ascii)
|
||||
{
|
||||
int ret = -1;
|
||||
|
||||
|
@ -767,7 +774,7 @@ cleanup:
|
|||
#elif defined(WITH_LIBIDN)
|
||||
int rc;
|
||||
|
||||
if (!_utf8_is_valid(utf8)) {
|
||||
if (!utf8_is_valid(utf8)) {
|
||||
/* fprintf(_(stderr, "Invalid UTF-8 sequence not converted: '%s'\n"), utf8); */
|
||||
return -1;
|
||||
}
|
||||
|
@ -781,7 +788,7 @@ cleanup:
|
|||
#else
|
||||
char lookupname[128];
|
||||
|
||||
if (_domain_to_punycode(utf8, lookupname, sizeof(lookupname)) == 0) {
|
||||
if (domain_to_punycode(utf8, lookupname, sizeof(lookupname)) == 0) {
|
||||
if (ascii)
|
||||
if ((*ascii = strdup(lookupname)))
|
||||
ret = 0;
|
||||
|
@ -791,21 +798,21 @@ cleanup:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void _add_punycode_if_needed(_psl_idna_t *idna, _psl_vector_t *v, _psl_entry_t *e)
|
||||
static void add_punycode_if_needed(psl_idna_t *idna, psl_vector_t *v, psl_entry_t *e)
|
||||
{
|
||||
char *lookupname;
|
||||
|
||||
if (_str_is_ascii(e->label_buf))
|
||||
if (str_is_ascii(e->label_buf))
|
||||
return;
|
||||
|
||||
if (_psl_idna_toASCII(idna, e->label_buf, &lookupname) == 0) {
|
||||
if (psl_idna_toASCII(idna, e->label_buf, &lookupname) == 0) {
|
||||
if (strcmp(e->label_buf, lookupname)) {
|
||||
_psl_entry_t suffix, *suffixp;
|
||||
psl_entry_t suffix, *suffixp;
|
||||
|
||||
/* fprintf(stderr, "toASCII '%s' -> '%s'\n", e->label_buf, lookupname); */
|
||||
if (_suffix_init(&suffix, lookupname, strlen(lookupname)) == 0) {
|
||||
if (suffix_init(&suffix, lookupname, strlen(lookupname)) == 0) {
|
||||
suffix.flags = e->flags;
|
||||
if ((suffixp = _vector_get(v, _vector_add(v, &suffix))))
|
||||
if ((suffixp = vector_get(v, vector_add(v, &suffix))))
|
||||
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
||||
}
|
||||
} /* else ignore */
|
||||
|
@ -818,9 +825,9 @@ static void _add_punycode_if_needed(_psl_idna_t *idna, _psl_vector_t *v, _psl_en
|
|||
int LookupStringInFixedSet(const unsigned char* graph, size_t length, const char* key, size_t key_length);
|
||||
int GetUtfMode(const unsigned char *graph, size_t length);
|
||||
|
||||
static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain, int type)
|
||||
static int psl_is_public_suffix_internal(const psl_ctx_t *psl, const char *domain, int type)
|
||||
{
|
||||
_psl_entry_t suffix;
|
||||
psl_entry_t suffix;
|
||||
const char *p;
|
||||
char *punycode = NULL;
|
||||
int need_conversion = 0;
|
||||
|
@ -845,18 +852,18 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain, int t
|
|||
return 1;
|
||||
}
|
||||
|
||||
if (psl->utf8 || psl == &_builtin_psl)
|
||||
if (psl->utf8 || psl == &builtin_psl)
|
||||
need_conversion = 0;
|
||||
|
||||
#if defined(WITH_LIBIDN) || defined(WITH_LIBIDN2) || defined(WITH_LIBICU)
|
||||
if (psl == &_builtin_psl)
|
||||
if (psl == &builtin_psl)
|
||||
need_conversion = 0;
|
||||
#endif
|
||||
|
||||
if (need_conversion) {
|
||||
_psl_idna_t *idna = _psl_idna_open();
|
||||
psl_idna_t *idna = psl_idna_open();
|
||||
|
||||
if (_psl_idna_toASCII(idna, domain, &punycode) == 0) {
|
||||
if (psl_idna_toASCII(idna, domain, &punycode) == 0) {
|
||||
suffix.label = punycode;
|
||||
suffix.length = strlen(punycode);
|
||||
} else {
|
||||
|
@ -866,24 +873,24 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain, int t
|
|||
suffix.length = p - suffix.label;
|
||||
}
|
||||
|
||||
_psl_idna_close(idna);
|
||||
psl_idna_close(idna);
|
||||
} else {
|
||||
suffix.label = domain;
|
||||
suffix.length = p - suffix.label;
|
||||
}
|
||||
|
||||
if (psl == &_builtin_psl || psl->dafsa) {
|
||||
size_t dafsa_size = psl == &_builtin_psl ? sizeof(kDafsa) : psl->dafsa_size;
|
||||
const unsigned char *dafsa = psl == &_builtin_psl ? kDafsa : psl->dafsa;
|
||||
if (psl == &builtin_psl || psl->dafsa) {
|
||||
size_t dafsa_size = psl == &builtin_psl ? sizeof(kDafsa) : psl->dafsa_size;
|
||||
const unsigned char *dafsa = psl == &builtin_psl ? kDafsa : psl->dafsa;
|
||||
int rc = LookupStringInFixedSet(dafsa, dafsa_size, suffix.label, suffix.length);
|
||||
if (rc != -1) {
|
||||
/* check for correct rule type */
|
||||
if (type == PSL_TYPE_ICANN && !(rc & _PSL_FLAG_ICANN))
|
||||
if (type == PSL_TYPE_ICANN && !(rc & PSL_FLAG_ICANN))
|
||||
goto suffix_no;
|
||||
else if (type == PSL_TYPE_PRIVATE && !(rc & _PSL_FLAG_PRIVATE))
|
||||
else if (type == PSL_TYPE_PRIVATE && !(rc & PSL_FLAG_PRIVATE))
|
||||
goto suffix_no;
|
||||
|
||||
if (rc & _PSL_FLAG_EXCEPTION)
|
||||
if (rc & PSL_FLAG_EXCEPTION)
|
||||
goto suffix_no;
|
||||
|
||||
/* wildcard *.foo.bar implicitly make foo.bar a public suffix */
|
||||
|
@ -898,31 +905,31 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain, int t
|
|||
rc = LookupStringInFixedSet(dafsa, dafsa_size, suffix.label, suffix.length);
|
||||
if (rc != -1) {
|
||||
/* check for correct rule type */
|
||||
if (type == PSL_TYPE_ICANN && !(rc & _PSL_FLAG_ICANN))
|
||||
if (type == PSL_TYPE_ICANN && !(rc & PSL_FLAG_ICANN))
|
||||
goto suffix_no;
|
||||
else if (type == PSL_TYPE_PRIVATE && !(rc & _PSL_FLAG_PRIVATE))
|
||||
else if (type == PSL_TYPE_PRIVATE && !(rc & PSL_FLAG_PRIVATE))
|
||||
goto suffix_no;
|
||||
|
||||
if (rc & _PSL_FLAG_WILDCARD)
|
||||
if (rc & PSL_FLAG_WILDCARD)
|
||||
goto suffix_yes;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
_psl_entry_t *rule = _vector_get(psl->suffixes, 0);
|
||||
psl_entry_t *rule = vector_get(psl->suffixes, 0);
|
||||
|
||||
if (!rule || rule->nlabels < suffix.nlabels - 1)
|
||||
goto suffix_no;
|
||||
|
||||
rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix));
|
||||
rule = vector_get(psl->suffixes, vector_find(psl->suffixes, &suffix));
|
||||
|
||||
if (rule) {
|
||||
/* check for correct rule type */
|
||||
if (type == PSL_TYPE_ICANN && !(rule->flags & _PSL_FLAG_ICANN))
|
||||
if (type == PSL_TYPE_ICANN && !(rule->flags & PSL_FLAG_ICANN))
|
||||
goto suffix_no;
|
||||
else if (type == PSL_TYPE_PRIVATE && !(rule->flags & _PSL_FLAG_PRIVATE))
|
||||
else if (type == PSL_TYPE_PRIVATE && !(rule->flags & PSL_FLAG_PRIVATE))
|
||||
goto suffix_no;
|
||||
|
||||
if (rule->flags & _PSL_FLAG_EXCEPTION)
|
||||
if (rule->flags & PSL_FLAG_EXCEPTION)
|
||||
goto suffix_no;
|
||||
|
||||
/* wildcard *.foo.bar implicitly make foo.bar a public suffix */
|
||||
|
@ -937,16 +944,16 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain, int t
|
|||
suffix.length = strlen(suffix.label);
|
||||
suffix.nlabels--;
|
||||
|
||||
rule = _vector_get(psl->suffixes, (pos = _vector_find(psl->suffixes, &suffix)));
|
||||
rule = vector_get(psl->suffixes, (pos = vector_find(psl->suffixes, &suffix)));
|
||||
|
||||
if (rule) {
|
||||
/* check for correct rule type */
|
||||
if (type == PSL_TYPE_ICANN && !(rule->flags & _PSL_FLAG_ICANN))
|
||||
if (type == PSL_TYPE_ICANN && !(rule->flags & PSL_FLAG_ICANN))
|
||||
goto suffix_no;
|
||||
else if (type == PSL_TYPE_PRIVATE && !(rule->flags & _PSL_FLAG_PRIVATE))
|
||||
else if (type == PSL_TYPE_PRIVATE && !(rule->flags & PSL_FLAG_PRIVATE))
|
||||
goto suffix_no;
|
||||
|
||||
if (rule->flags & _PSL_FLAG_WILDCARD)
|
||||
if (rule->flags & PSL_FLAG_WILDCARD)
|
||||
goto suffix_yes;
|
||||
}
|
||||
}
|
||||
|
@ -989,7 +996,7 @@ int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
|
|||
if (!psl || !domain)
|
||||
return 1;
|
||||
|
||||
return _psl_is_public_suffix(psl, domain, PSL_TYPE_ANY);
|
||||
return psl_is_public_suffix_internal(psl, domain, PSL_TYPE_ANY);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1020,7 +1027,7 @@ int psl_is_public_suffix2(const psl_ctx_t *psl, const char *domain, int type)
|
|||
if (!psl || !domain)
|
||||
return 1;
|
||||
|
||||
return _psl_is_public_suffix(psl, domain, type);
|
||||
return psl_is_public_suffix_internal(psl, domain, type);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1053,7 +1060,7 @@ const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
|
|||
* 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
|
||||
*/
|
||||
|
||||
while (!_psl_is_public_suffix(psl, domain, 0)) {
|
||||
while (!psl_is_public_suffix_internal(psl, domain, 0)) {
|
||||
if ((domain = strchr(domain, '.')))
|
||||
domain++;
|
||||
else
|
||||
|
@ -1095,12 +1102,12 @@ const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
|
|||
* 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
|
||||
*/
|
||||
|
||||
while (!_psl_is_public_suffix(psl, domain, 0)) {
|
||||
while (!psl_is_public_suffix_internal(psl, domain, 0)) {
|
||||
if ((p = strchr(domain, '.'))) {
|
||||
regdom = domain;
|
||||
domain = p + 1;
|
||||
} else
|
||||
break; /* prevent endless loop if psl_is_public_suffix() is broken. */
|
||||
break; /* prevent endless loop if psl_is_public_suffix_internal() is broken. */
|
||||
}
|
||||
|
||||
return regdom;
|
||||
|
@ -1151,10 +1158,10 @@ psl_ctx_t *psl_load_file(const char *fname)
|
|||
psl_ctx_t *psl_load_fp(FILE *fp)
|
||||
{
|
||||
psl_ctx_t *psl;
|
||||
_psl_entry_t suffix, *suffixp;
|
||||
psl_entry_t suffix, *suffixp;
|
||||
char buf[256], *linep, *p;
|
||||
int type = 0, is_dafsa;
|
||||
_psl_idna_t *idna;
|
||||
psl_idna_t *idna;
|
||||
|
||||
if (!fp)
|
||||
return NULL;
|
||||
|
@ -1170,73 +1177,57 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
|||
|
||||
if (is_dafsa) {
|
||||
void *m;
|
||||
size_t size = 65536, n, len = 0;
|
||||
int version = atoi(buf + 11);
|
||||
struct stat st;
|
||||
|
||||
if (version != 0)
|
||||
goto fail;
|
||||
|
||||
if (!(psl->dafsa = malloc(size)))
|
||||
goto fail;
|
||||
|
||||
memcpy(psl->dafsa, buf, len);
|
||||
|
||||
while ((n = fread(psl->dafsa + len, 1, size - len, fp)) > 0) {
|
||||
len += n;
|
||||
if (len >= size) {
|
||||
if (!(m = realloc(psl->dafsa, size *= 2)))
|
||||
goto fail;
|
||||
psl->dafsa = m;
|
||||
}
|
||||
}
|
||||
|
||||
/* release unused memory */
|
||||
if ((m = realloc(psl->dafsa, len)))
|
||||
psl->dafsa = m;
|
||||
else if (!len)
|
||||
psl->dafsa = NULL; /* realloc() just free'd psl->dafsa */
|
||||
|
||||
psl->dafsa_size = len;
|
||||
psl->utf8 = !!GetUtfMode(psl->dafsa, len);
|
||||
if (fstat(fileno(fp), &st) != 0 ||
|
||||
(uintmax_t)st.st_size >= (uintmax_t)SIZE_MAX ||
|
||||
(psl->dafsa = malloc(st.st_size)) == NULL ||
|
||||
fread(psl->dafsa, psl->dafsa_size = st.st_size - 16, 1, fp) != 1)
|
||||
goto fail;
|
||||
|
||||
psl->utf8 = !!GetUtfMode(psl->dafsa, psl->dafsa_size);
|
||||
return psl;
|
||||
}
|
||||
|
||||
idna = _psl_idna_open();
|
||||
idna = psl_idna_open();
|
||||
|
||||
/*
|
||||
* as of 02.11.2012, the list at https://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
|
||||
* as of 19.02.2014, the list at https://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
|
||||
*/
|
||||
psl->suffixes = _vector_alloc(8*1024, _suffix_compare_array);
|
||||
psl->suffixes = vector_alloc(8*1024, suffix_compare_array);
|
||||
psl->utf8 = 1; /* we put UTF-8 and punycode rules in the lookup vector */
|
||||
|
||||
do {
|
||||
while (_isspace_ascii(*linep)) linep++; /* ignore leading whitespace */
|
||||
while (isspace_ascii(*linep)) linep++; /* ignore leading whitespace */
|
||||
if (!*linep) continue; /* skip empty lines */
|
||||
|
||||
if (*linep == '/' && linep[1] == '/') {
|
||||
if (!type) {
|
||||
if (strstr(linep + 2, "===BEGIN ICANN DOMAINS==="))
|
||||
type = _PSL_FLAG_ICANN;
|
||||
type = PSL_FLAG_ICANN;
|
||||
else if (!type && strstr(linep + 2, "===BEGIN PRIVATE DOMAINS==="))
|
||||
type = _PSL_FLAG_PRIVATE;
|
||||
type = PSL_FLAG_PRIVATE;
|
||||
}
|
||||
else if (type == _PSL_FLAG_ICANN && strstr(linep + 2, "===END ICANN DOMAINS==="))
|
||||
else if (type == PSL_FLAG_ICANN && strstr(linep + 2, "===END ICANN DOMAINS==="))
|
||||
type = 0;
|
||||
else if (type == _PSL_FLAG_PRIVATE && strstr(linep + 2, "===END PRIVATE DOMAINS==="))
|
||||
else if (type == PSL_FLAG_PRIVATE && strstr(linep + 2, "===END PRIVATE DOMAINS==="))
|
||||
type = 0;
|
||||
|
||||
continue; /* skip comments */
|
||||
}
|
||||
|
||||
/* parse suffix rule */
|
||||
for (p = linep; *linep && !_isspace_ascii(*linep);) linep++;
|
||||
for (p = linep; *linep && !isspace_ascii(*linep);) linep++;
|
||||
*linep = 0;
|
||||
|
||||
if (*p == '!') {
|
||||
p++;
|
||||
suffix.flags = _PSL_FLAG_EXCEPTION | type;
|
||||
suffix.flags = PSL_FLAG_EXCEPTION | type;
|
||||
psl->nexceptions++;
|
||||
} else if (*p == '*') {
|
||||
if (*++p != '.') {
|
||||
|
@ -1245,20 +1236,20 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
|||
}
|
||||
p++;
|
||||
/* wildcard *.foo.bar implicitly make foo.bar a public suffix */
|
||||
suffix.flags = _PSL_FLAG_WILDCARD | _PSL_FLAG_PLAIN | type;
|
||||
suffix.flags = PSL_FLAG_WILDCARD | PSL_FLAG_PLAIN | type;
|
||||
psl->nwildcards++;
|
||||
psl->nsuffixes++;
|
||||
} else {
|
||||
if (!strchr(p, '.'))
|
||||
continue; /* we do not need an explicit plain TLD rule, already covered by implicit '*' rule */
|
||||
suffix.flags = _PSL_FLAG_PLAIN | type;
|
||||
suffix.flags = PSL_FLAG_PLAIN | type;
|
||||
psl->nsuffixes++;
|
||||
}
|
||||
|
||||
if (_suffix_init(&suffix, p, linep - p) == 0) {
|
||||
if (suffix_init(&suffix, p, linep - p) == 0) {
|
||||
int index;
|
||||
|
||||
if ((index = _vector_find(psl->suffixes, &suffix)) >= 0) {
|
||||
if ((index = vector_find(psl->suffixes, &suffix)) >= 0) {
|
||||
/* Found existing entry:
|
||||
* Combination of exception and plain rule is ambiguous
|
||||
* !foo.bar
|
||||
|
@ -1271,23 +1262,23 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
|||
* We do not check here, let's do it later.
|
||||
*/
|
||||
|
||||
suffixp = _vector_get(psl->suffixes, index);
|
||||
suffixp = vector_get(psl->suffixes, index);
|
||||
suffixp->flags |= suffix.flags;
|
||||
} else {
|
||||
/* New entry */
|
||||
suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix));
|
||||
suffixp = vector_get(psl->suffixes, vector_add(psl->suffixes, &suffix));
|
||||
}
|
||||
|
||||
if (suffixp) {
|
||||
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
||||
_add_punycode_if_needed(idna, psl->suffixes, suffixp);
|
||||
add_punycode_if_needed(idna, psl->suffixes, suffixp);
|
||||
}
|
||||
}
|
||||
} while ((linep = fgets(buf, sizeof(buf), fp)));
|
||||
|
||||
_vector_sort(psl->suffixes);
|
||||
vector_sort(psl->suffixes);
|
||||
|
||||
_psl_idna_close(idna);
|
||||
psl_idna_close(idna);
|
||||
|
||||
return psl;
|
||||
|
||||
|
@ -1307,8 +1298,8 @@ fail:
|
|||
*/
|
||||
void psl_free(psl_ctx_t *psl)
|
||||
{
|
||||
if (psl && psl != &_builtin_psl) {
|
||||
_vector_free(&psl->suffixes);
|
||||
if (psl && psl != &builtin_psl) {
|
||||
vector_free(&psl->suffixes);
|
||||
free(psl->dafsa);
|
||||
free(psl);
|
||||
}
|
||||
|
@ -1333,7 +1324,7 @@ void psl_free(psl_ctx_t *psl)
|
|||
const psl_ctx_t *psl_builtin(void)
|
||||
{
|
||||
#if defined(BUILTIN_GENERATOR_LIBICU) || defined(BUILTIN_GENERATOR_LIBIDN2) || defined(BUILTIN_GENERATOR_LIBIDN)
|
||||
return &_builtin_psl;
|
||||
return &builtin_psl;
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
|
@ -1355,8 +1346,8 @@ const psl_ctx_t *psl_builtin(void)
|
|||
*/
|
||||
int psl_suffix_count(const psl_ctx_t *psl)
|
||||
{
|
||||
if (psl == &_builtin_psl)
|
||||
return _psl_nsuffixes;
|
||||
if (psl == &builtin_psl)
|
||||
return psl_nsuffixes;
|
||||
else if (psl)
|
||||
return psl->dafsa ? -1 : psl->nsuffixes;
|
||||
else
|
||||
|
@ -1378,8 +1369,8 @@ int psl_suffix_count(const psl_ctx_t *psl)
|
|||
*/
|
||||
int psl_suffix_exception_count(const psl_ctx_t *psl)
|
||||
{
|
||||
if (psl == &_builtin_psl)
|
||||
return _psl_nexceptions;
|
||||
if (psl == &builtin_psl)
|
||||
return psl_nexceptions;
|
||||
else if (psl)
|
||||
return psl->dafsa ? -1 : psl->nexceptions;
|
||||
else
|
||||
|
@ -1401,8 +1392,8 @@ int psl_suffix_exception_count(const psl_ctx_t *psl)
|
|||
*/
|
||||
int psl_suffix_wildcard_count(const psl_ctx_t *psl)
|
||||
{
|
||||
if (psl == &_builtin_psl)
|
||||
return _psl_nwildcards;
|
||||
if (psl == &builtin_psl)
|
||||
return psl_nwildcards;
|
||||
else if (psl)
|
||||
return psl->dafsa ? -1 : psl->nwildcards;
|
||||
else
|
||||
|
@ -1422,7 +1413,7 @@ int psl_suffix_wildcard_count(const psl_ctx_t *psl)
|
|||
*/
|
||||
time_t psl_builtin_file_time(void)
|
||||
{
|
||||
return _psl_file_time;
|
||||
return psl_file_time;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1439,7 +1430,7 @@ time_t psl_builtin_file_time(void)
|
|||
*/
|
||||
const char *psl_builtin_sha1sum(void)
|
||||
{
|
||||
return _psl_sha1_checksum;
|
||||
return psl_sha1_checksum;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1455,7 +1446,7 @@ const char *psl_builtin_sha1sum(void)
|
|||
*/
|
||||
const char *psl_builtin_filename(void)
|
||||
{
|
||||
return _psl_filename;
|
||||
return psl_filename;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1475,7 +1466,7 @@ int psl_builtin_outdated(void)
|
|||
{
|
||||
struct stat st;
|
||||
|
||||
if (stat(_psl_filename, &st) == 0 && st.st_mtime > _psl_file_time)
|
||||
if (stat(psl_filename, &st) == 0 && st.st_mtime > psl_file_time)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
|
@ -1495,7 +1486,7 @@ int psl_builtin_outdated(void)
|
|||
*/
|
||||
const char *psl_dist_filename(void)
|
||||
{
|
||||
return _psl_dist_filename;
|
||||
return psl_dist_filename_string;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1552,7 +1543,7 @@ int psl_check_version_number(int version)
|
|||
}
|
||||
|
||||
/* return whether hostname is an IP address or not */
|
||||
static int _isip(const char *hostname)
|
||||
static int isip(const char *hostname)
|
||||
{
|
||||
struct in_addr addr;
|
||||
struct in6_addr addr6;
|
||||
|
@ -1599,7 +1590,7 @@ int psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname,
|
|||
if (!strcmp(hostname, cookie_domain))
|
||||
return 1; /* an exact match is acceptable (and pretty common) */
|
||||
|
||||
if (_isip(hostname))
|
||||
if (isip(hostname))
|
||||
return 0; /* Hostname is an IP address and these must match fully (RFC 6265, 5.1.3) */
|
||||
|
||||
cookie_domain_length = strlen(cookie_domain);
|
||||
|
@ -1662,7 +1653,7 @@ void psl_free_string(char *str)
|
|||
*
|
||||
* Since: 0.4
|
||||
*/
|
||||
psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding _UNUSED, const char *locale _UNUSED, char **lower)
|
||||
psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding UNUSED, const char *locale UNUSED, char **lower)
|
||||
{
|
||||
int ret = PSL_ERR_INVALID_ARG;
|
||||
|
||||
|
@ -1670,7 +1661,7 @@ psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding _UNUSED,
|
|||
return PSL_ERR_INVALID_ARG;
|
||||
|
||||
/* shortcut to avoid costly conversion */
|
||||
if (_str_is_ascii(str)) {
|
||||
if (str_is_ascii(str)) {
|
||||
if (lower) {
|
||||
char *p, *tmp;
|
||||
|
||||
|
@ -1835,12 +1826,12 @@ out:
|
|||
}
|
||||
|
||||
/* if file is newer than the builtin data, insert it reverse sorted by mtime */
|
||||
static int _insert_file(const char *fname, const char **psl_fname, time_t *psl_mtime, int n)
|
||||
static int insert_file(const char *fname, const char **psl_fname, time_t *psl_mtime, int n)
|
||||
{
|
||||
struct stat st;
|
||||
int it;
|
||||
|
||||
if (fname && *fname && stat(fname, &st) == 0 && st.st_mtime > _psl_file_time) {
|
||||
if (fname && *fname && stat(fname, &st) == 0 && st.st_mtime > psl_file_time) {
|
||||
/* add file name and mtime to end of array */
|
||||
psl_fname[n] = fname;
|
||||
psl_mtime[n++] = st.st_mtime;
|
||||
|
@ -1885,13 +1876,13 @@ psl_ctx_t *psl_latest(const char *fname)
|
|||
psl_fname[0] = NULL; /* silence gcc 6.2 false warning */
|
||||
|
||||
/* create array of PSL files reverse sorted by mtime (latest first) */
|
||||
ntimes = _insert_file(fname, psl_fname, psl_mtime, 0);
|
||||
ntimes = _insert_file(_psl_dist_filename, psl_fname, psl_mtime, ntimes);
|
||||
ntimes = _insert_file(_psl_filename, psl_fname, psl_mtime, ntimes);
|
||||
ntimes = insert_file(fname, psl_fname, psl_mtime, 0);
|
||||
ntimes = insert_file(psl_dist_filename_string, psl_fname, psl_mtime, ntimes);
|
||||
ntimes = insert_file(psl_filename, psl_fname, psl_mtime, ntimes);
|
||||
|
||||
/* load PSL data from the latest file, falling back to the second recent, ... */
|
||||
for (psl = NULL, it = 0; it < ntimes; it++) {
|
||||
if (psl_mtime[it] > _psl_file_time)
|
||||
if (psl_mtime[it] > psl_file_time)
|
||||
if ((psl = psl_load_file(psl_fname[it])))
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue