2014-03-20 22:43:04 +01:00
|
|
|
/*
|
|
|
|
* Copyright(c) 2014 Tim Ruehsen
|
|
|
|
*
|
2014-03-24 17:29:56 +01:00
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
|
|
* DEALINGS IN THE SOFTWARE.
|
|
|
|
*
|
|
|
|
* This file is part of libpsl.
|
2014-03-20 22:43:04 +01:00
|
|
|
*
|
2014-03-24 17:29:56 +01:00
|
|
|
* Public Suffix List routines
|
2014-03-20 22:43:04 +01:00
|
|
|
*
|
|
|
|
* Changelog
|
|
|
|
* 19.03.2014 Tim Ruehsen created from libmget/cookie.c
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2014-05-12 12:20:59 +02:00
|
|
|
/* need _GNU_SOURCE for qsort_r() */
|
2014-03-21 11:05:09 +01:00
|
|
|
#ifndef _GNU_SOURCE
|
|
|
|
# define _GNU_SOURCE
|
|
|
|
#endif
|
2014-03-20 22:43:04 +01:00
|
|
|
|
|
|
|
#if HAVE_CONFIG_H
|
|
|
|
# include <config.h>
|
|
|
|
#endif
|
|
|
|
|
2014-06-29 22:56:33 +02:00
|
|
|
/* if this file is included by psl2c, redefine to use requested library for builtin data */
|
|
|
|
#ifdef _LIBPSL_INCLUDED_BY_PSL2C
|
|
|
|
# undef WITH_LIBICU
|
|
|
|
# undef WITH_LIBIDN2
|
|
|
|
# undef WITH_LIBIDN
|
|
|
|
# ifdef BUILTIN_GENERATOR_LIBICU
|
|
|
|
# define WITH_LIBICU
|
|
|
|
# elif defined(BUILTIN_GENERATOR_LIBIDN2)
|
|
|
|
# define WITH_LIBIDN2
|
|
|
|
# elif defined(BUILTIN_GENERATOR_LIBIDN)
|
|
|
|
# define WITH_LIBIDN
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
2014-04-02 10:26:40 +02:00
|
|
|
#if ENABLE_NLS != 0
|
|
|
|
# include <libintl.h>
|
|
|
|
# define _(STRING) gettext(STRING)
|
|
|
|
#else
|
|
|
|
# define _(STRING) STRING
|
|
|
|
# define ngettext(STRING1,STRING2,N) STRING2
|
|
|
|
#endif
|
|
|
|
|
2014-03-20 22:43:04 +01:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <ctype.h>
|
2014-06-30 13:21:16 +02:00
|
|
|
#include <errno.h>
|
|
|
|
#include <langinfo.h>
|
2014-08-19 17:46:36 +02:00
|
|
|
#include <arpa/inet.h>
|
2014-10-28 15:41:35 +01:00
|
|
|
#ifdef HAVE_ALLOCA_H
|
|
|
|
# include <alloca.h>
|
|
|
|
#endif
|
2014-06-17 17:14:02 +02:00
|
|
|
|
|
|
|
#ifdef WITH_LIBICU
|
|
|
|
# include <unicode/uversion.h>
|
|
|
|
# include <unicode/ustring.h>
|
|
|
|
# include <unicode/uidna.h>
|
|
|
|
# include <unicode/ucnv.h>
|
2014-06-29 22:56:33 +02:00
|
|
|
#elif defined(WITH_LIBIDN2)
|
2014-06-30 13:21:16 +02:00
|
|
|
# include <iconv.h>
|
2014-06-29 22:56:33 +02:00
|
|
|
# include <idn2.h>
|
|
|
|
# include <unicase.h>
|
|
|
|
# include <unistr.h>
|
|
|
|
#elif defined(WITH_LIBIDN)
|
2014-06-30 13:21:16 +02:00
|
|
|
# include <iconv.h>
|
2014-06-29 22:56:33 +02:00
|
|
|
# include <stringprep.h>
|
|
|
|
# include <idna.h>
|
2014-06-30 13:21:16 +02:00
|
|
|
# include <unicase.h>
|
|
|
|
# include <unistr.h>
|
2014-06-17 17:14:02 +02:00
|
|
|
#endif
|
2014-03-20 22:43:04 +01:00
|
|
|
|
|
|
|
#include <libpsl.h>
|
|
|
|
|
2014-06-17 17:14:02 +02:00
|
|
|
/* number of elements within an array */
|
|
|
|
#define countof(a) (sizeof(a)/sizeof(*(a)))
|
|
|
|
|
2014-04-02 10:26:40 +02:00
|
|
|
/**
|
|
|
|
* SECTION:libpsl
|
|
|
|
* @short_description: Public Suffix List library functions
|
|
|
|
* @title: libpsl
|
|
|
|
* @stability: unstable
|
|
|
|
* @include: libpsl.h
|
|
|
|
*
|
2014-04-11 16:30:20 +02:00
|
|
|
* [Public Suffix List](http://publicsuffix.org/) library functions.
|
2014-04-02 10:26:40 +02:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2014-03-20 22:43:04 +01:00
|
|
|
#define countof(a) (sizeof(a)/sizeof(*(a)))
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
char
|
2014-03-22 22:55:34 +01:00
|
|
|
label_buf[48];
|
2014-03-20 22:43:04 +01:00
|
|
|
const char *
|
|
|
|
label;
|
|
|
|
unsigned short
|
|
|
|
length;
|
|
|
|
unsigned char
|
2014-05-12 12:20:59 +02:00
|
|
|
nlabels, /* number of labels */
|
|
|
|
wildcard; /* this is a wildcard rule (e.g. *.sapporo.jp) */
|
2014-03-20 22:43:04 +01:00
|
|
|
} _psl_entry_t;
|
|
|
|
|
2014-05-12 12:20:59 +02:00
|
|
|
/* stripped down version libmget vector routines */
|
2014-03-20 22:43:04 +01:00
|
|
|
typedef struct {
|
|
|
|
int
|
2014-08-22 17:44:48 +02:00
|
|
|
(*cmp)(const _psl_entry_t **, const _psl_entry_t **); /* comparison function */
|
2014-03-20 22:43:04 +01:00
|
|
|
_psl_entry_t
|
2014-05-12 12:20:59 +02:00
|
|
|
**entry; /* pointer to array of pointers to elements */
|
2014-03-20 22:43:04 +01:00
|
|
|
int
|
2014-05-12 12:20:59 +02:00
|
|
|
max, /* allocated elements */
|
|
|
|
cur; /* number of elements in use */
|
2014-03-20 22:43:04 +01:00
|
|
|
} _psl_vector_t;
|
|
|
|
|
|
|
|
struct _psl_ctx_st {
|
|
|
|
_psl_vector_t
|
|
|
|
*suffixes,
|
|
|
|
*suffix_exceptions;
|
|
|
|
};
|
|
|
|
|
2014-05-12 12:20:59 +02:00
|
|
|
/* include the PSL data compiled by 'psl2c' */
|
2014-06-19 12:06:54 +02:00
|
|
|
#ifndef _LIBPSL_INCLUDED_BY_PSL2C
|
|
|
|
# include "suffixes.c"
|
|
|
|
#else
|
|
|
|
/* if this source file is included by psl2c.c, provide empty builtin data */
|
2014-06-20 17:04:22 +02:00
|
|
|
static _psl_entry_t suffixes[1];
|
|
|
|
static _psl_entry_t suffix_exceptions[1];
|
2014-06-19 12:06:54 +02:00
|
|
|
static time_t _psl_file_time;
|
|
|
|
static time_t _psl_compile_time;
|
|
|
|
static const char _psl_sha1_checksum[] = "";
|
|
|
|
static const char _psl_filename[] = "";
|
|
|
|
#endif
|
2014-03-24 17:29:56 +01:00
|
|
|
|
2014-05-12 12:20:59 +02:00
|
|
|
/* references to this PSL will result in lookups to built-in data */
|
2014-03-24 23:18:45 +01:00
|
|
|
static const psl_ctx_t
|
2014-03-30 12:06:16 +02:00
|
|
|
_builtin_psl;
|
2014-03-24 17:29:56 +01:00
|
|
|
|
2014-08-22 17:44:48 +02:00
|
|
|
static _psl_vector_t *_vector_alloc(int max, int (*cmp)(const _psl_entry_t **, const _psl_entry_t **))
|
2014-03-20 22:43:04 +01:00
|
|
|
{
|
|
|
|
_psl_vector_t *v;
|
|
|
|
|
|
|
|
if (!(v = calloc(1, sizeof(_psl_vector_t))))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (!(v->entry = malloc(max * sizeof(_psl_entry_t *)))) {
|
|
|
|
free(v);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
v->max = max;
|
|
|
|
v->cmp = cmp;
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void _vector_free(_psl_vector_t **v)
|
|
|
|
{
|
|
|
|
if (v && *v) {
|
|
|
|
if ((*v)->entry) {
|
|
|
|
int it;
|
|
|
|
|
|
|
|
for (it = 0; it < (*v)->cur; it++)
|
|
|
|
free((*v)->entry[it]);
|
|
|
|
|
|
|
|
free((*v)->entry);
|
|
|
|
}
|
|
|
|
free(*v);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static _psl_entry_t *_vector_get(const _psl_vector_t *v, int pos)
|
|
|
|
{
|
|
|
|
if (pos < 0 || !v || pos >= v->cur) return NULL;
|
|
|
|
|
|
|
|
return v->entry[pos];
|
|
|
|
}
|
|
|
|
|
2014-05-12 12:20:59 +02:00
|
|
|
/* the entries must be sorted by */
|
2014-03-20 22:43:04 +01:00
|
|
|
static int _vector_find(const _psl_vector_t *v, const _psl_entry_t *elem)
|
|
|
|
{
|
|
|
|
if (v) {
|
|
|
|
int l, r, m;
|
|
|
|
int res;
|
|
|
|
|
2014-05-12 12:20:59 +02:00
|
|
|
/* binary search for element (exact match) */
|
2014-03-20 22:43:04 +01:00
|
|
|
for (l = 0, r = v->cur - 1; l <= r;) {
|
|
|
|
m = (l + r) / 2;
|
2014-08-22 17:44:48 +02:00
|
|
|
if ((res = v->cmp(&elem, (const _psl_entry_t **)&(v->entry[m]))) > 0) l = m + 1;
|
2014-03-20 22:43:04 +01:00
|
|
|
else if (res < 0) r = m - 1;
|
|
|
|
else return m;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-05-12 12:20:59 +02:00
|
|
|
return -1; /* not found */
|
2014-03-20 22:43:04 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static int _vector_add(_psl_vector_t *v, const _psl_entry_t *elem)
|
|
|
|
{
|
|
|
|
if (v) {
|
|
|
|
void *elemp;
|
|
|
|
|
|
|
|
elemp = malloc(sizeof(_psl_entry_t));
|
|
|
|
memcpy(elemp, elem, sizeof(_psl_entry_t));
|
|
|
|
|
|
|
|
if (v->max == v->cur)
|
|
|
|
v->entry = realloc(v->entry, (v->max *= 2) * sizeof(_psl_entry_t *));
|
|
|
|
|
|
|
|
v->entry[v->cur++] = elemp;
|
|
|
|
return v->cur - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void _vector_sort(_psl_vector_t *v)
|
|
|
|
{
|
|
|
|
if (v && v->cmp)
|
2014-08-22 17:44:48 +02:00
|
|
|
qsort(v->entry, v->cur, sizeof(_psl_vector_t **), (int(*)(const void *, const void *))v->cmp);
|
2014-03-20 22:43:04 +01:00
|
|
|
}
|
|
|
|
|
2014-05-12 12:20:59 +02:00
|
|
|
static int _vector_size(_psl_vector_t *v)
|
2014-03-22 20:35:56 +01:00
|
|
|
{
|
|
|
|
return v ? v->cur : 0;
|
|
|
|
}
|
|
|
|
|
2014-05-12 12:20:59 +02:00
|
|
|
/* by this kind of sorting, we can easily see if a domain matches or not */
|
2014-03-20 22:43:04 +01:00
|
|
|
static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2)
|
|
|
|
{
|
|
|
|
int n;
|
|
|
|
|
|
|
|
if ((n = s2->nlabels - s1->nlabels))
|
2014-08-22 17:44:48 +02:00
|
|
|
return n; // most labels first
|
2014-03-20 22:43:04 +01:00
|
|
|
|
2014-03-22 20:35:56 +01:00
|
|
|
if ((n = s1->length - s2->length))
|
2014-08-22 17:44:48 +02:00
|
|
|
return n; // shorter rules first
|
2014-03-20 22:43:04 +01:00
|
|
|
|
2014-03-24 23:33:27 +01:00
|
|
|
return strcmp(s1->label, s2->label ? s2->label : s2->label_buf);
|
2014-03-20 22:43:04 +01:00
|
|
|
}
|
|
|
|
|
2014-08-22 17:44:48 +02:00
|
|
|
/* needed to sort array of pointers, given to qsort() */
|
|
|
|
static int _suffix_compare_array(const _psl_entry_t **s1, const _psl_entry_t **s2)
|
|
|
|
{
|
|
|
|
return _suffix_compare(*s1, *s2);
|
|
|
|
}
|
|
|
|
|
2014-03-29 18:12:45 +01:00
|
|
|
static int _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length)
|
2014-03-20 22:43:04 +01:00
|
|
|
{
|
|
|
|
const char *src;
|
|
|
|
char *dst;
|
|
|
|
|
|
|
|
suffix->label = suffix->label_buf;
|
|
|
|
|
|
|
|
if (length >= sizeof(suffix->label_buf) - 1) {
|
|
|
|
suffix->nlabels = 0;
|
2014-05-12 12:20:59 +02:00
|
|
|
/* fprintf(stderr, _("Suffix rule too long (%zd, ignored): %s\n"), length, rule); */
|
2014-03-29 18:12:45 +01:00
|
|
|
return -1;
|
2014-03-20 22:43:04 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (*rule == '*') {
|
|
|
|
if (*++rule != '.') {
|
|
|
|
suffix->nlabels = 0;
|
2014-05-12 12:20:59 +02:00
|
|
|
/* fprintf(stderr, _("Unsupported kind of rule (ignored): %s\n"), rule); */
|
2014-03-29 18:12:45 +01:00
|
|
|
return -2;
|
2014-03-20 22:43:04 +01:00
|
|
|
}
|
|
|
|
rule++;
|
|
|
|
suffix->wildcard = 1;
|
|
|
|
suffix->length = (unsigned char)length - 2;
|
|
|
|
} else {
|
|
|
|
suffix->wildcard = 0;
|
|
|
|
suffix->length = (unsigned char)length;
|
|
|
|
}
|
|
|
|
|
|
|
|
suffix->nlabels = 1;
|
|
|
|
|
|
|
|
for (dst = suffix->label_buf, src = rule; *src;) {
|
|
|
|
if (*src == '.')
|
|
|
|
suffix->nlabels++;
|
2014-06-17 17:14:02 +02:00
|
|
|
*dst++ = *src++;
|
2014-03-20 22:43:04 +01:00
|
|
|
}
|
|
|
|
*dst = 0;
|
2014-03-29 18:12:45 +01:00
|
|
|
|
|
|
|
return 0;
|
2014-03-20 22:43:04 +01:00
|
|
|
}
|
|
|
|
|
2014-06-17 17:14:02 +02:00
|
|
|
static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
|
2014-03-20 22:43:04 +01:00
|
|
|
{
|
|
|
|
_psl_entry_t suffix, *rule;
|
|
|
|
const char *p, *label_bak;
|
|
|
|
unsigned short length_bak;
|
|
|
|
|
2014-05-12 12:20:59 +02:00
|
|
|
/* this function should be called without leading dots, just make sure */
|
2014-03-20 22:43:04 +01:00
|
|
|
suffix.label = domain + (*domain == '.');
|
|
|
|
suffix.length = strlen(suffix.label);
|
|
|
|
suffix.wildcard = 0;
|
|
|
|
suffix.nlabels = 1;
|
|
|
|
|
|
|
|
for (p = suffix.label; *p; p++)
|
|
|
|
if (*p == '.')
|
|
|
|
suffix.nlabels++;
|
|
|
|
|
2014-05-12 12:20:59 +02:00
|
|
|
/* if domain has enough labels, it is public */
|
2014-03-24 17:29:56 +01:00
|
|
|
if (psl == &_builtin_psl)
|
|
|
|
rule = &suffixes[0];
|
|
|
|
else
|
|
|
|
rule = _vector_get(psl->suffixes, 0);
|
|
|
|
|
2014-03-20 22:43:04 +01:00
|
|
|
if (!rule || rule->nlabels < suffix.nlabels - 1)
|
2014-04-06 22:30:50 +02:00
|
|
|
return 0;
|
2014-03-20 22:43:04 +01:00
|
|
|
|
2014-03-24 17:29:56 +01:00
|
|
|
if (psl == &_builtin_psl)
|
|
|
|
rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare);
|
|
|
|
else
|
|
|
|
rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix));
|
|
|
|
|
2014-03-20 22:43:04 +01:00
|
|
|
if (rule) {
|
2014-05-12 12:20:59 +02:00
|
|
|
/* definitely a match, no matter if the found rule is a wildcard or not */
|
2014-04-06 22:30:50 +02:00
|
|
|
return 1;
|
2014-03-28 22:02:42 +01:00
|
|
|
} else if (suffix.nlabels == 1) {
|
2014-05-12 12:20:59 +02:00
|
|
|
/* unknown TLD, this is the prevailing '*' match */
|
2014-04-06 22:30:50 +02:00
|
|
|
return 1;
|
2014-03-20 22:43:04 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
label_bak = suffix.label;
|
|
|
|
length_bak = suffix.length;
|
|
|
|
|
|
|
|
if ((suffix.label = strchr(suffix.label, '.'))) {
|
|
|
|
suffix.label++;
|
|
|
|
suffix.length = strlen(suffix.label);
|
|
|
|
suffix.nlabels--;
|
|
|
|
|
2014-03-24 17:29:56 +01:00
|
|
|
if (psl == &_builtin_psl)
|
|
|
|
rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare);
|
|
|
|
else
|
|
|
|
rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix));
|
|
|
|
|
2014-03-20 22:43:04 +01:00
|
|
|
if (rule) {
|
|
|
|
if (rule->wildcard) {
|
2014-05-12 12:20:59 +02:00
|
|
|
/* now that we matched a wildcard, we have to check for an exception */
|
2014-03-20 22:43:04 +01:00
|
|
|
suffix.label = label_bak;
|
|
|
|
suffix.length = length_bak;
|
|
|
|
suffix.nlabels++;
|
|
|
|
|
2014-03-24 17:29:56 +01:00
|
|
|
if (psl == &_builtin_psl) {
|
|
|
|
if (bsearch(&suffix, suffix_exceptions, countof(suffix_exceptions), sizeof(suffix_exceptions[0]), (int(*)(const void *, const void *))_suffix_compare))
|
2014-05-12 12:20:59 +02:00
|
|
|
return 0; /* found an exception, so 'domain' is not a public suffix */
|
2014-03-24 17:29:56 +01:00
|
|
|
} else {
|
|
|
|
if (_vector_get(psl->suffix_exceptions, _vector_find(psl->suffix_exceptions, &suffix)) != 0)
|
2014-05-12 12:20:59 +02:00
|
|
|
return 0; /* found an exception, so 'domain' is not a public suffix */
|
2014-03-24 17:29:56 +01:00
|
|
|
}
|
2014-03-20 22:43:04 +01:00
|
|
|
|
2014-04-06 22:30:50 +02:00
|
|
|
return 1;
|
2014-03-20 22:43:04 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-04-06 22:30:50 +02:00
|
|
|
return 0;
|
2014-03-20 22:43:04 +01:00
|
|
|
}
|
|
|
|
|
2014-06-17 17:14:02 +02:00
|
|
|
/**
|
|
|
|
* psl_is_public_suffix:
|
|
|
|
* @psl: PSL context
|
|
|
|
* @domain: Domain string
|
|
|
|
*
|
|
|
|
* This function checks if @domain is a public suffix by the means of the
|
|
|
|
* [Mozilla Public Suffix List](http://publicsuffix.org).
|
|
|
|
*
|
|
|
|
* For cookie domain checking see psl_is_cookie_domain_acceptable().
|
|
|
|
*
|
2014-06-18 12:26:45 +02:00
|
|
|
* International @domain names have to be either in lowercase UTF-8 or in ASCII form (punycode).
|
|
|
|
* Other encodings result in unexpected behavior.
|
|
|
|
*
|
2014-06-17 17:14:02 +02:00
|
|
|
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
|
|
|
|
* psl_builtin().
|
|
|
|
*
|
|
|
|
* Returns: 1 if domain is a public suffix, 0 if not.
|
|
|
|
*
|
|
|
|
* Since: 0.1
|
|
|
|
*/
|
|
|
|
int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
|
|
|
|
{
|
|
|
|
if (!psl || !domain)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return _psl_is_public_suffix(psl, domain);
|
|
|
|
}
|
|
|
|
|
2014-04-06 22:30:50 +02:00
|
|
|
/**
|
|
|
|
* psl_unregistrable_domain:
|
|
|
|
* @psl: PSL context
|
|
|
|
* @domain: Domain string
|
|
|
|
*
|
|
|
|
* This function finds the longest publix suffix part of @domain by the means
|
|
|
|
* of the [Mozilla Public Suffix List](http://publicsuffix.org).
|
|
|
|
*
|
2014-06-18 12:26:45 +02:00
|
|
|
* International @domain names have to be either in lowercase UTF-8 or in ASCII form (punycode).
|
|
|
|
* Other encodings result in unexpected behavior.
|
|
|
|
*
|
2014-04-06 22:30:50 +02:00
|
|
|
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
|
|
|
|
* psl_builtin().
|
|
|
|
*
|
2014-04-11 16:30:20 +02:00
|
|
|
* Returns: Pointer to longest public suffix part of @domain or %NULL if @domain
|
|
|
|
* does not contain a public suffix (or if @psl is %NULL).
|
|
|
|
*
|
|
|
|
* Since: 0.1
|
2014-04-06 22:30:50 +02:00
|
|
|
*/
|
2014-03-26 22:27:31 +01:00
|
|
|
const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
|
2014-03-26 17:14:25 +01:00
|
|
|
{
|
2014-03-26 22:27:31 +01:00
|
|
|
if (!psl || !domain)
|
|
|
|
return NULL;
|
|
|
|
|
2014-05-12 12:20:59 +02:00
|
|
|
/*
|
|
|
|
* We check from left to right to catch special PSL entries like 'forgot.his.name':
|
|
|
|
* 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
|
|
|
|
*/
|
2014-03-26 17:14:25 +01:00
|
|
|
|
2014-06-17 17:14:02 +02:00
|
|
|
while (!_psl_is_public_suffix(psl, domain)) {
|
2014-04-16 10:52:35 +02:00
|
|
|
if ((domain = strchr(domain, '.')))
|
|
|
|
domain++;
|
|
|
|
else
|
2014-05-12 12:20:59 +02:00
|
|
|
break; /* prevent endless loop if psl_is_public_suffix() is broken. */
|
2014-03-26 17:14:25 +01:00
|
|
|
}
|
2014-04-16 10:52:35 +02:00
|
|
|
|
|
|
|
return domain;
|
2014-03-26 17:14:25 +01:00
|
|
|
}
|
|
|
|
|
2014-04-06 22:30:50 +02:00
|
|
|
/**
|
|
|
|
* psl_registrable_domain:
|
|
|
|
* @psl: PSL context
|
|
|
|
* @domain: Domain string
|
|
|
|
*
|
|
|
|
* This function finds the shortest private suffix part of @domain by the means
|
|
|
|
* of the [Mozilla Public Suffix List](http://publicsuffix.org).
|
|
|
|
*
|
2014-06-18 12:26:45 +02:00
|
|
|
* International @domain names have to be either in lowercase UTF-8 or in ASCII form (punycode).
|
|
|
|
* Other encodings result in unexpected behavior.
|
|
|
|
*
|
2014-04-06 22:30:50 +02:00
|
|
|
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
|
|
|
|
* psl_builtin().
|
|
|
|
*
|
2014-04-11 16:30:20 +02:00
|
|
|
* Returns: Pointer to shortest private suffix part of @domain or %NULL if @domain
|
|
|
|
* does not contain a private suffix (or if @psl is %NULL).
|
|
|
|
*
|
|
|
|
* Since: 0.1
|
2014-04-06 22:30:50 +02:00
|
|
|
*/
|
2014-03-26 22:27:31 +01:00
|
|
|
const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
|
|
|
|
{
|
2014-04-16 10:52:35 +02:00
|
|
|
const char *p, *regdom = NULL;
|
2014-03-26 22:27:31 +01:00
|
|
|
|
|
|
|
if (!psl || !domain || *domain == '.')
|
|
|
|
return NULL;
|
|
|
|
|
2014-05-12 12:20:59 +02:00
|
|
|
/*
|
|
|
|
* We check from left to right to catch special PSL entries like 'forgot.his.name':
|
|
|
|
* 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
|
|
|
|
*/
|
2014-03-26 22:27:31 +01:00
|
|
|
|
2014-06-17 17:14:02 +02:00
|
|
|
while (!_psl_is_public_suffix(psl, domain)) {
|
2014-04-16 10:52:35 +02:00
|
|
|
if ((p = strchr(domain, '.'))) {
|
|
|
|
regdom = domain;
|
|
|
|
domain = p + 1;
|
|
|
|
} else
|
2014-05-12 12:20:59 +02:00
|
|
|
break; /* prevent endless loop if psl_is_public_suffix() is broken. */
|
2014-03-26 22:27:31 +01:00
|
|
|
}
|
|
|
|
|
2014-04-16 10:52:35 +02:00
|
|
|
return regdom;
|
2014-03-26 22:27:31 +01:00
|
|
|
}
|
|
|
|
|
2014-06-17 17:14:02 +02:00
|
|
|
static int _str_is_ascii(const char *s)
|
|
|
|
{
|
2014-08-01 09:16:44 +02:00
|
|
|
while (*s > 0 && *s < 128) s++;
|
2014-06-17 17:14:02 +02:00
|
|
|
|
|
|
|
return !*s;
|
|
|
|
}
|
|
|
|
|
2014-06-29 22:56:33 +02:00
|
|
|
#if defined(WITH_LIBICU)
|
2014-06-17 17:14:02 +02:00
|
|
|
static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t *e)
|
|
|
|
{
|
|
|
|
if (_str_is_ascii(e->label_buf))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* IDNA2008 UTS#46 punycode conversion */
|
|
|
|
if (idna) {
|
|
|
|
char lookupname[128] = "";
|
|
|
|
UErrorCode status = 0;
|
|
|
|
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
|
|
|
|
UChar utf16_dst[128], utf16_src[128];
|
|
|
|
int32_t utf16_src_length;
|
|
|
|
|
|
|
|
u_strFromUTF8(utf16_src, sizeof(utf16_src)/sizeof(utf16_src[0]), &utf16_src_length, e->label_buf, -1, &status);
|
|
|
|
if (U_SUCCESS(status)) {
|
|
|
|
int32_t dst_length = uidna_nameToASCII(idna, utf16_src, utf16_src_length, utf16_dst, sizeof(utf16_dst)/sizeof(utf16_dst[0]), &info, &status);
|
|
|
|
if (U_SUCCESS(status)) {
|
|
|
|
u_strToUTF8(lookupname, sizeof(lookupname), NULL, utf16_dst, dst_length, &status);
|
|
|
|
if (U_SUCCESS(status)) {
|
|
|
|
if (strcmp(e->label_buf, lookupname)) {
|
2014-06-29 22:56:33 +02:00
|
|
|
_psl_entry_t suffix, *suffixp;
|
|
|
|
|
2014-06-17 17:14:02 +02:00
|
|
|
/* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */
|
|
|
|
_suffix_init(&suffix, lookupname, strlen(lookupname));
|
|
|
|
suffix.wildcard = e->wildcard;
|
|
|
|
suffixp = _vector_get(v, _vector_add(v, &suffix));
|
|
|
|
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
|
|
|
} /* else ignore */
|
|
|
|
} /* else
|
|
|
|
fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */
|
|
|
|
} /* else
|
|
|
|
fprintf(stderr, "Failed to convert to ASCII (status %d)\n", status); */
|
|
|
|
} /* else
|
|
|
|
fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status); */
|
|
|
|
}
|
|
|
|
}
|
2014-06-29 22:56:33 +02:00
|
|
|
#elif defined(WITH_LIBIDN2)
|
|
|
|
static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e)
|
|
|
|
{
|
|
|
|
char *lookupname = NULL;
|
|
|
|
int rc;
|
|
|
|
uint8_t *lower, resbuf[256];
|
|
|
|
size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */
|
|
|
|
|
|
|
|
if (_str_is_ascii(e->label_buf))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* we need a conversion to lowercase */
|
2014-06-30 13:21:16 +02:00
|
|
|
lower = u8_tolower((uint8_t *)e->label_buf, u8_strlen((uint8_t *)e->label_buf), 0, UNINORM_NFKC, resbuf, &len);
|
2014-06-29 22:56:33 +02:00
|
|
|
if (!lower) {
|
2014-06-30 13:21:16 +02:00
|
|
|
/* fprintf(stderr, "u8_tolower(%s) failed (%d)\n", e->label_buf, errno); */
|
|
|
|
return;
|
2014-06-29 22:56:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* u8_tolower() does not terminate the result string */
|
|
|
|
if (lower == resbuf) {
|
|
|
|
lower[len]=0;
|
|
|
|
} else {
|
|
|
|
uint8_t *tmp = lower;
|
|
|
|
lower = (uint8_t *)strndup((char *)lower, len);
|
2014-06-30 13:21:16 +02:00
|
|
|
free(tmp);
|
2014-06-29 22:56:33 +02:00
|
|
|
}
|
|
|
|
|
2014-06-30 13:21:16 +02:00
|
|
|
if ((rc = idn2_lookup_u8(lower, (uint8_t **)&lookupname, 0)) == IDN2_OK) {
|
|
|
|
if (strcmp(e->label_buf, lookupname)) {
|
|
|
|
_psl_entry_t suffix, *suffixp;
|
|
|
|
|
|
|
|
/* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */
|
|
|
|
_suffix_init(&suffix, lookupname, strlen(lookupname));
|
|
|
|
suffix.wildcard = e->wildcard;
|
|
|
|
suffixp = _vector_get(v, _vector_add(v, &suffix));
|
|
|
|
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
|
|
|
} /* else ignore */
|
|
|
|
} /* else
|
|
|
|
fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */
|
2014-06-29 22:56:33 +02:00
|
|
|
|
|
|
|
if (lower != resbuf)
|
2014-06-30 13:21:16 +02:00
|
|
|
free(lower);
|
2014-06-29 22:56:33 +02:00
|
|
|
}
|
|
|
|
#elif defined(WITH_LIBIDN)
|
|
|
|
static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e)
|
|
|
|
{
|
|
|
|
char *lookupname = NULL;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
if (_str_is_ascii(e->label_buf))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* idna_to_ascii_8z() automatically converts UTF-8 to lowercase */
|
|
|
|
|
|
|
|
if ((rc = idna_to_ascii_8z(e->label_buf, &lookupname, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) {
|
|
|
|
if (strcmp(e->label_buf, lookupname)) {
|
|
|
|
_psl_entry_t suffix, *suffixp;
|
|
|
|
|
|
|
|
/* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */
|
|
|
|
_suffix_init(&suffix, lookupname, strlen(lookupname));
|
|
|
|
suffix.wildcard = e->wildcard;
|
|
|
|
suffixp = _vector_get(v, _vector_add(v, &suffix));
|
|
|
|
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
|
|
|
} /* else ignore */
|
|
|
|
} /* else
|
|
|
|
fprintf(_(stderr, "toASCII failed (%d): %s\n"), rc, idna_strerror(rc)); */
|
|
|
|
}
|
2014-06-18 15:21:22 +02:00
|
|
|
#endif
|
2014-06-17 17:14:02 +02:00
|
|
|
|
2014-04-06 22:30:50 +02:00
|
|
|
/**
|
|
|
|
* psl_load_file:
|
|
|
|
* @fname: Name of PSL file
|
|
|
|
*
|
|
|
|
* This function loads the public suffixes file named @fname.
|
|
|
|
* To free the allocated resources, call psl_free().
|
|
|
|
*
|
2014-06-18 12:26:45 +02:00
|
|
|
* The suffixes are expected to be lowercase UTF-8 encoded if they are international.
|
2014-04-22 16:49:00 +02:00
|
|
|
*
|
2014-04-11 16:30:20 +02:00
|
|
|
* Returns: Pointer to a PSL context or %NULL on failure.
|
|
|
|
*
|
|
|
|
* Since: 0.1
|
2014-04-06 22:30:50 +02:00
|
|
|
*/
|
2014-03-20 22:43:04 +01:00
|
|
|
psl_ctx_t *psl_load_file(const char *fname)
|
2014-03-22 14:28:55 +01:00
|
|
|
{
|
|
|
|
FILE *fp;
|
|
|
|
psl_ctx_t *psl = NULL;
|
|
|
|
|
2014-03-29 18:01:03 +01:00
|
|
|
if (!fname)
|
|
|
|
return NULL;
|
|
|
|
|
2014-03-22 14:28:55 +01:00
|
|
|
if ((fp = fopen(fname, "r"))) {
|
|
|
|
psl = psl_load_fp(fp);
|
|
|
|
fclose(fp);
|
|
|
|
}
|
|
|
|
|
|
|
|
return psl;
|
|
|
|
}
|
|
|
|
|
2014-04-06 22:30:50 +02:00
|
|
|
/**
|
|
|
|
* psl_load_fp:
|
|
|
|
* @fp: FILE pointer
|
|
|
|
*
|
|
|
|
* This function loads the public suffixes from a FILE pointer.
|
|
|
|
* To free the allocated resources, call psl_free().
|
|
|
|
*
|
2014-06-18 12:26:45 +02:00
|
|
|
* The suffixes are expected to be lowercase UTF-8 encoded if they are international.
|
2014-04-22 16:49:00 +02:00
|
|
|
*
|
2014-04-11 16:30:20 +02:00
|
|
|
* Returns: Pointer to a PSL context or %NULL on failure.
|
|
|
|
*
|
|
|
|
* Since: 0.1
|
2014-04-06 22:30:50 +02:00
|
|
|
*/
|
2014-03-22 14:28:55 +01:00
|
|
|
psl_ctx_t *psl_load_fp(FILE *fp)
|
2014-03-20 22:43:04 +01:00
|
|
|
{
|
|
|
|
psl_ctx_t *psl;
|
|
|
|
_psl_entry_t suffix, *suffixp;
|
2014-03-21 15:41:27 +01:00
|
|
|
char buf[256], *linep, *p;
|
2014-06-17 17:14:02 +02:00
|
|
|
#ifdef WITH_LIBICU
|
|
|
|
UIDNA *idna;
|
|
|
|
UErrorCode status = 0;
|
|
|
|
#endif
|
2014-03-20 22:43:04 +01:00
|
|
|
|
2014-03-22 14:28:55 +01:00
|
|
|
if (!fp)
|
2014-03-20 22:43:04 +01:00
|
|
|
return NULL;
|
|
|
|
|
2014-03-22 14:28:55 +01:00
|
|
|
if (!(psl = calloc(1, sizeof(psl_ctx_t))))
|
|
|
|
return NULL;
|
2014-03-20 22:43:04 +01:00
|
|
|
|
2014-06-17 17:14:02 +02:00
|
|
|
#ifdef WITH_LIBICU
|
|
|
|
idna = uidna_openUTS46(UIDNA_USE_STD3_RULES, &status);
|
|
|
|
#endif
|
|
|
|
|
2014-05-12 12:20:59 +02:00
|
|
|
/*
|
|
|
|
* as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
|
|
|
|
* as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
|
|
|
|
*/
|
2014-08-22 17:44:48 +02:00
|
|
|
psl->suffixes = _vector_alloc(8*1024, _suffix_compare_array);
|
|
|
|
psl->suffix_exceptions = _vector_alloc(64, _suffix_compare_array);
|
2014-03-22 14:28:55 +01:00
|
|
|
|
|
|
|
while ((linep = fgets(buf, sizeof(buf), fp))) {
|
2014-05-12 12:20:59 +02:00
|
|
|
while (isspace(*linep)) linep++; /* ignore leading whitespace */
|
|
|
|
if (!*linep) continue; /* skip empty lines */
|
2014-03-22 14:28:55 +01:00
|
|
|
|
|
|
|
if (*linep == '/' && linep[1] == '/')
|
2014-05-12 12:20:59 +02:00
|
|
|
continue; /* skip comments */
|
2014-03-22 14:28:55 +01:00
|
|
|
|
2014-05-12 12:20:59 +02:00
|
|
|
/* parse suffix rule */
|
2014-03-22 14:28:55 +01:00
|
|
|
for (p = linep; *linep && !isspace(*linep);) linep++;
|
|
|
|
*linep = 0;
|
|
|
|
|
|
|
|
if (*p == '!') {
|
2014-05-12 12:20:59 +02:00
|
|
|
/* add to exceptions */
|
2014-06-17 17:14:02 +02:00
|
|
|
if (_suffix_init(&suffix, p + 1, linep - p - 1) == 0) {
|
2014-03-29 18:12:45 +01:00
|
|
|
suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix));
|
2014-06-17 17:14:02 +02:00
|
|
|
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
2014-06-18 15:21:22 +02:00
|
|
|
#ifdef WITH_LIBICU
|
2014-06-17 17:14:02 +02:00
|
|
|
_add_punycode_if_needed(idna, psl->suffix_exceptions, suffixp);
|
2014-06-29 22:56:33 +02:00
|
|
|
#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
|
|
|
|
_add_punycode_if_needed(psl->suffix_exceptions, suffixp);
|
2014-06-18 15:21:22 +02:00
|
|
|
#endif
|
2014-06-17 17:14:02 +02:00
|
|
|
}
|
2014-03-22 14:28:55 +01:00
|
|
|
} else {
|
2014-06-17 17:14:02 +02:00
|
|
|
/* add to suffixes */
|
|
|
|
if (_suffix_init(&suffix, p, linep - p) == 0) {
|
2014-03-29 18:12:45 +01:00
|
|
|
suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix));
|
2014-06-17 17:14:02 +02:00
|
|
|
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
2014-06-18 15:21:22 +02:00
|
|
|
#ifdef WITH_LIBICU
|
2014-06-17 17:14:02 +02:00
|
|
|
_add_punycode_if_needed(idna, psl->suffixes, suffixp);
|
2014-06-29 22:56:33 +02:00
|
|
|
#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
|
2014-06-30 13:21:16 +02:00
|
|
|
_add_punycode_if_needed(psl->suffixes, suffixp);
|
2014-06-18 15:21:22 +02:00
|
|
|
#endif
|
2014-06-17 17:14:02 +02:00
|
|
|
}
|
2014-03-20 22:43:04 +01:00
|
|
|
}
|
2014-03-21 19:43:27 +01:00
|
|
|
}
|
2014-03-20 22:43:04 +01:00
|
|
|
|
2014-03-22 14:28:55 +01:00
|
|
|
_vector_sort(psl->suffix_exceptions);
|
|
|
|
_vector_sort(psl->suffixes);
|
|
|
|
|
2014-06-17 17:14:02 +02:00
|
|
|
#ifdef WITH_LIBICU
|
|
|
|
if (idna)
|
|
|
|
uidna_close(idna);
|
|
|
|
#endif
|
|
|
|
|
2014-03-20 22:43:04 +01:00
|
|
|
return psl;
|
|
|
|
}
|
|
|
|
|
2014-04-11 16:30:20 +02:00
|
|
|
/**
|
|
|
|
* psl_load_free:
|
|
|
|
* @psl: PSL context pointer
|
|
|
|
*
|
|
|
|
* This function frees the the PSL context that has been retrieved via
|
|
|
|
* psl_load_fp() or psl_load_file().
|
|
|
|
*
|
|
|
|
* Returns: Pointer to a PSL context private or %NULL on failure.
|
|
|
|
*
|
|
|
|
* Since: 0.1
|
|
|
|
*/
|
2014-03-27 18:16:54 +01:00
|
|
|
void psl_free(psl_ctx_t *psl)
|
2014-03-24 17:29:56 +01:00
|
|
|
{
|
2014-03-27 18:16:54 +01:00
|
|
|
if (psl && psl != &_builtin_psl) {
|
|
|
|
_vector_free(&psl->suffixes);
|
|
|
|
_vector_free(&psl->suffix_exceptions);
|
|
|
|
free(psl);
|
2014-03-24 17:29:56 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-04-11 16:30:20 +02:00
|
|
|
/**
|
|
|
|
* psl_builtin:
|
|
|
|
*
|
|
|
|
* This function returns the PSL context that has been generated and built in at compile-time.
|
|
|
|
* You don't have to free the returned context explicitely.
|
|
|
|
*
|
2014-04-22 16:49:00 +02:00
|
|
|
* The builtin data also contains punycode entries, one for each international domain name.
|
|
|
|
*
|
2014-04-11 16:30:20 +02:00
|
|
|
* If the generation of built-in data has been disabled during compilation, %NULL will be returned.
|
2014-04-22 16:49:00 +02:00
|
|
|
* So if using the builtin psl context, you can provide UTF-8 or punycode representations of domains to
|
|
|
|
* functions like psl_is_public_suffix().
|
2014-04-11 16:30:20 +02:00
|
|
|
*
|
|
|
|
* Returns: Pointer to the built in PSL data or NULL if this data is not available.
|
|
|
|
*
|
|
|
|
* Since: 0.1
|
|
|
|
*/
|
2014-04-06 22:30:50 +02:00
|
|
|
const psl_ctx_t *psl_builtin(void)
|
|
|
|
{
|
2014-06-29 22:56:33 +02:00
|
|
|
#if defined(BUILTIN_GENERATOR_LIBICU) || defined(BUILTIN_GENERATOR_LIBIDN2) || defined(BUILTIN_GENERATOR_LIBIDN)
|
2014-04-06 22:30:50 +02:00
|
|
|
return &_builtin_psl;
|
2014-05-30 16:08:47 +02:00
|
|
|
#else
|
|
|
|
return NULL;
|
|
|
|
#endif
|
2014-04-06 22:30:50 +02:00
|
|
|
}
|
|
|
|
|
2014-04-11 16:30:20 +02:00
|
|
|
/**
|
|
|
|
* psl_suffix_count:
|
|
|
|
* @psl: PSL context pointer
|
|
|
|
*
|
|
|
|
* This function returns number of public suffixes maintained by @psl.
|
|
|
|
* The number of exceptions within the Public Suffix List are not included.
|
|
|
|
*
|
|
|
|
* If the generation of built-in data has been disabled during compilation, 0 will be returned.
|
|
|
|
*
|
|
|
|
* Returns: Number of public suffixes entries in PSL context.
|
|
|
|
*
|
|
|
|
* Since: 0.1
|
|
|
|
*/
|
2014-03-21 19:26:55 +01:00
|
|
|
int psl_suffix_count(const psl_ctx_t *psl)
|
|
|
|
{
|
2014-03-24 17:29:56 +01:00
|
|
|
if (psl == &_builtin_psl)
|
|
|
|
return countof(suffixes);
|
2014-03-29 18:01:03 +01:00
|
|
|
else if (psl)
|
2014-03-24 17:29:56 +01:00
|
|
|
return _vector_size(psl->suffixes);
|
2014-03-29 18:58:24 +01:00
|
|
|
else
|
|
|
|
return 0;
|
2014-03-21 19:26:55 +01:00
|
|
|
}
|
2014-03-22 20:35:56 +01:00
|
|
|
|
2014-04-11 16:30:20 +02:00
|
|
|
/**
|
|
|
|
* psl_suffix_exception_count:
|
|
|
|
* @psl: PSL context pointer
|
|
|
|
*
|
|
|
|
* This function returns number of public suffix exceptions maintained by @psl.
|
|
|
|
*
|
|
|
|
* If the generation of built-in data has been disabled during compilation, 0 will be returned.
|
|
|
|
*
|
|
|
|
* Returns: Number of public suffix exceptions in PSL context.
|
|
|
|
*
|
|
|
|
* Since: 0.1
|
|
|
|
*/
|
2014-03-21 19:26:55 +01:00
|
|
|
int psl_suffix_exception_count(const psl_ctx_t *psl)
|
|
|
|
{
|
2014-03-24 17:29:56 +01:00
|
|
|
if (psl == &_builtin_psl)
|
|
|
|
return countof(suffix_exceptions);
|
2014-03-29 18:01:03 +01:00
|
|
|
else if (psl)
|
2014-03-24 17:29:56 +01:00
|
|
|
return _vector_size(psl->suffix_exceptions);
|
2014-03-29 18:58:24 +01:00
|
|
|
else
|
|
|
|
return 0;
|
2014-03-21 19:26:55 +01:00
|
|
|
}
|
|
|
|
|
2014-04-11 16:30:20 +02:00
|
|
|
/**
|
|
|
|
* psl_builtin_compile_time:
|
|
|
|
*
|
|
|
|
* This function returns the time when the Publix Suffix List has been compiled into C code (by psl2c).
|
|
|
|
*
|
|
|
|
* If the generation of built-in data has been disabled during compilation, 0 will be returned.
|
|
|
|
*
|
|
|
|
* Returns: time_t value or 0.
|
|
|
|
*
|
|
|
|
* Since: 0.1
|
|
|
|
*/
|
2014-03-24 17:29:56 +01:00
|
|
|
time_t psl_builtin_compile_time(void)
|
2014-03-20 22:43:04 +01:00
|
|
|
{
|
2014-03-24 17:29:56 +01:00
|
|
|
return _psl_compile_time;
|
|
|
|
}
|
|
|
|
|
2014-04-11 16:30:20 +02:00
|
|
|
/**
|
|
|
|
* psl_builtin_file_time:
|
|
|
|
*
|
|
|
|
* This function returns the mtime of the Publix Suffix List file that has been built in.
|
|
|
|
*
|
|
|
|
* If the generation of built-in data has been disabled during compilation, 0 will be returned.
|
|
|
|
*
|
|
|
|
* Returns: time_t value or 0.
|
|
|
|
*
|
|
|
|
* Since: 0.1
|
|
|
|
*/
|
2014-03-24 17:29:56 +01:00
|
|
|
time_t psl_builtin_file_time(void)
|
|
|
|
{
|
|
|
|
return _psl_file_time;
|
|
|
|
}
|
|
|
|
|
2014-04-11 16:30:20 +02:00
|
|
|
/**
|
|
|
|
* psl_builtin_sha1sum:
|
|
|
|
*
|
|
|
|
* This function returns the SHA1 checksum of the Publix Suffix List file that has been built in.
|
|
|
|
* The returned string is in lowercase hex encoding, e.g. "2af1e9e3044eda0678bb05949d7cca2f769901d8".
|
|
|
|
*
|
|
|
|
* If the generation of built-in data has been disabled during compilation, an empty string will be returned.
|
|
|
|
*
|
|
|
|
* Returns: String containing SHA1 checksum or an empty string.
|
|
|
|
*
|
|
|
|
* Since: 0.1
|
|
|
|
*/
|
2014-03-24 17:29:56 +01:00
|
|
|
const char *psl_builtin_sha1sum(void)
|
|
|
|
{
|
|
|
|
return _psl_sha1_checksum;
|
2014-03-20 22:43:04 +01:00
|
|
|
}
|
2014-04-16 10:52:35 +02:00
|
|
|
|
2014-04-17 12:31:06 +02:00
|
|
|
/**
|
|
|
|
* psl_builtin_filename:
|
|
|
|
*
|
|
|
|
* This function returns the file name of the Publix Suffix List file that has been built in.
|
|
|
|
*
|
|
|
|
* If the generation of built-in data has been disabled during compilation, an empty string will be returned.
|
|
|
|
*
|
|
|
|
* Returns: String containing the PSL file name or an empty string.
|
|
|
|
*
|
|
|
|
* Since: 0.1
|
|
|
|
*/
|
|
|
|
const char *psl_builtin_filename(void)
|
|
|
|
{
|
|
|
|
return _psl_filename;
|
|
|
|
}
|
|
|
|
|
2014-05-31 19:33:30 +02:00
|
|
|
/**
|
|
|
|
* psl_get_version:
|
|
|
|
*
|
|
|
|
* Get libpsl version.
|
|
|
|
*
|
|
|
|
* Returns: String containing version of libpsl.
|
|
|
|
*
|
|
|
|
* Since: 0.2.5
|
|
|
|
**/
|
2014-08-14 11:05:47 +02:00
|
|
|
const char *psl_get_version(void)
|
2014-05-31 19:33:30 +02:00
|
|
|
{
|
2014-06-17 17:14:02 +02:00
|
|
|
#ifdef WITH_LIBICU
|
2014-06-29 22:56:33 +02:00
|
|
|
return PACKAGE_VERSION " (+libicu/" U_ICU_VERSION ")";
|
|
|
|
#elif defined(WITH_LIBIDN2)
|
|
|
|
return PACKAGE_VERSION " (+libidn2/" IDN2_VERSION ")";
|
|
|
|
#elif defined(WITH_LIBIDN)
|
|
|
|
return PACKAGE_VERSION " (+libidn/" STRINGPREP_VERSION ")";
|
2014-06-17 17:14:02 +02:00
|
|
|
#else
|
2014-06-30 13:21:16 +02:00
|
|
|
return PACKAGE_VERSION " (no IDNA support)";
|
2014-06-17 17:14:02 +02:00
|
|
|
#endif
|
2014-05-31 19:33:30 +02:00
|
|
|
}
|
|
|
|
|
2014-08-19 17:46:36 +02:00
|
|
|
/* return whether hostname is an IP address or not */
|
|
|
|
static int _isip(const char *hostname)
|
|
|
|
{
|
|
|
|
struct in_addr addr;
|
|
|
|
struct in6_addr addr6;
|
|
|
|
|
|
|
|
return inet_pton(AF_INET, hostname, &addr) || inet_pton(AF_INET6, hostname, &addr6);
|
|
|
|
}
|
|
|
|
|
2014-04-16 10:52:35 +02:00
|
|
|
/**
|
|
|
|
* psl_is_cookie_domain_acceptable:
|
|
|
|
* @psl: PSL context pointer
|
|
|
|
* @hostname: The request hostname.
|
|
|
|
* @cookie_domain: The domain value from a cookie
|
|
|
|
*
|
|
|
|
* This helper function checks whether @cookie_domain is an acceptable cookie domain value for the request
|
|
|
|
* @hostname.
|
|
|
|
*
|
2014-06-18 12:26:45 +02:00
|
|
|
* For international domain names both, @hostname and @cookie_domain, have to be either in lowercase UTF-8
|
|
|
|
* or in ASCII form (punycode). Other encodings or mixing UTF-8 and punycode result in unexpected behavior.
|
|
|
|
*
|
2014-04-16 11:36:37 +02:00
|
|
|
* Examples:
|
|
|
|
* 1. Cookie domain 'example.com' would be acceptable for hostname 'www.example.com',
|
|
|
|
* but '.com' or 'com' would NOT be acceptable since 'com' is a public suffix.
|
|
|
|
*
|
|
|
|
* 2. Cookie domain 'his.name' would be acceptable for hostname 'remember.his.name',
|
|
|
|
* but NOT for 'forgot.his.name' since 'forgot.his.name' is a public suffix.
|
|
|
|
*
|
2014-04-16 10:52:35 +02:00
|
|
|
* Returns: 1 if acceptable, 0 if not acceptable.
|
|
|
|
*
|
|
|
|
* Since: 0.1
|
|
|
|
*/
|
|
|
|
int psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, const char *cookie_domain)
|
|
|
|
{
|
2014-04-16 11:36:37 +02:00
|
|
|
const char *p;
|
2014-04-16 10:52:35 +02:00
|
|
|
size_t hostname_length, cookie_domain_length;
|
|
|
|
|
|
|
|
if (!psl || !hostname || !cookie_domain)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
while (*cookie_domain == '.')
|
|
|
|
cookie_domain++;
|
|
|
|
|
|
|
|
if (!strcmp(hostname, cookie_domain))
|
2014-05-12 12:20:59 +02:00
|
|
|
return 1; /* an exact match is acceptable (and pretty common) */
|
2014-04-16 10:52:35 +02:00
|
|
|
|
2014-08-19 17:46:36 +02:00
|
|
|
if (_isip(hostname))
|
|
|
|
return 0; /* Hostname is an IP address and these must match fully (RFC 6265, 5.1.3) */
|
|
|
|
|
2014-04-16 10:52:35 +02:00
|
|
|
cookie_domain_length = strlen(cookie_domain);
|
|
|
|
hostname_length = strlen(hostname);
|
|
|
|
|
|
|
|
if (cookie_domain_length >= hostname_length)
|
2014-05-12 12:20:59 +02:00
|
|
|
return 0; /* cookie_domain is too long */
|
2014-04-16 10:52:35 +02:00
|
|
|
|
|
|
|
p = hostname + hostname_length - cookie_domain_length;
|
|
|
|
if (!strcmp(p, cookie_domain) && p[-1] == '.') {
|
2014-05-12 12:20:59 +02:00
|
|
|
/* OK, cookie_domain matches, but it must be longer than the longest public suffix in 'hostname' */
|
2014-04-16 10:52:35 +02:00
|
|
|
|
|
|
|
if (!(p = psl_unregistrable_domain(psl, hostname)))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
if (cookie_domain_length > strlen(p))
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2014-06-17 17:14:02 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* psl_str_to_utf8lower:
|
|
|
|
* @str: string to convert
|
|
|
|
* @encoding: charset encoding of @str, e.g. 'iso-8859-1' or %NULL
|
|
|
|
* @locale: locale of @str for to lowercase conversion, e.g. 'de' or %NULL
|
|
|
|
* @lower: return value containing the converted string
|
|
|
|
*
|
|
|
|
* This helper function converts a string to lowercase UTF-8 representation.
|
|
|
|
* Lowercase UTF-8 is needed as input to the domain checking functions.
|
|
|
|
*
|
2014-06-23 12:56:13 +02:00
|
|
|
* @lower is set to %NULL on error.
|
|
|
|
*
|
2014-06-17 17:14:02 +02:00
|
|
|
* The return value 'lower' must be freed after usage.
|
|
|
|
*
|
2014-06-20 12:36:51 +02:00
|
|
|
* Returns: psl_error_t value.
|
|
|
|
* PSL_SUCCESS: Success
|
|
|
|
* PSL_ERR_INVALID_ARG: @str is a %NULL value.
|
|
|
|
* PSL_ERR_CONVERTER: Failed to open the unicode converter with name @encoding
|
|
|
|
* PSL_ERR_TO_UTF16: Failed to convert @str to unicode
|
|
|
|
* PSL_ERR_TO_LOWER: Failed to convert unicode to lowercase
|
|
|
|
* PSL_ERR_TO_UTF8: Failed to convert unicode to UTF-8
|
2014-06-17 17:14:02 +02:00
|
|
|
*
|
|
|
|
* Since: 0.4
|
|
|
|
*/
|
2014-06-20 12:36:51 +02:00
|
|
|
psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower)
|
2014-06-17 17:14:02 +02:00
|
|
|
{
|
2014-06-20 12:36:51 +02:00
|
|
|
int ret = PSL_ERR_INVALID_ARG;
|
2014-06-17 17:14:02 +02:00
|
|
|
|
|
|
|
if (lower)
|
|
|
|
*lower = NULL;
|
|
|
|
|
|
|
|
if (!str)
|
2014-06-20 12:36:51 +02:00
|
|
|
return PSL_ERR_INVALID_ARG;
|
2014-06-18 12:26:45 +02:00
|
|
|
|
|
|
|
/* shortcut to avoid costly conversion */
|
|
|
|
if (_str_is_ascii(str)) {
|
2014-06-18 12:39:55 +02:00
|
|
|
if (lower) {
|
|
|
|
char *p;
|
|
|
|
|
2014-06-18 12:26:45 +02:00
|
|
|
*lower = strdup(str);
|
2014-06-18 12:39:55 +02:00
|
|
|
|
|
|
|
/* convert ASCII string to lowercase */
|
|
|
|
for (p = *lower; *p; p++)
|
|
|
|
if (isupper(*p))
|
|
|
|
*p = tolower(*p);
|
|
|
|
}
|
2014-06-20 12:36:51 +02:00
|
|
|
return PSL_SUCCESS;
|
2014-06-18 12:26:45 +02:00
|
|
|
}
|
2014-06-17 17:14:02 +02:00
|
|
|
|
|
|
|
#ifdef WITH_LIBICU
|
2014-06-18 16:27:29 +02:00
|
|
|
do {
|
2014-06-17 17:14:02 +02:00
|
|
|
size_t str_length = strlen(str);
|
|
|
|
UErrorCode status = 0;
|
|
|
|
UChar *utf16_dst, *utf16_lower;
|
|
|
|
int32_t utf16_dst_length;
|
|
|
|
char *utf8_lower;
|
|
|
|
UConverter *uconv;
|
|
|
|
|
|
|
|
/* C89 allocation */
|
|
|
|
utf16_dst = alloca(sizeof(UChar) * (str_length * 2 + 1));
|
|
|
|
utf16_lower = alloca(sizeof(UChar) * (str_length * 2 + 1));
|
|
|
|
utf8_lower = alloca(str_length * 2 + 1);
|
|
|
|
|
|
|
|
uconv = ucnv_open(encoding, &status);
|
|
|
|
if (U_SUCCESS(status)) {
|
|
|
|
utf16_dst_length = ucnv_toUChars(uconv, utf16_dst, str_length * 2 + 1, str, str_length, &status);
|
|
|
|
ucnv_close(uconv);
|
|
|
|
|
|
|
|
if (U_SUCCESS(status)) {
|
|
|
|
int32_t utf16_lower_length = u_strToLower(utf16_lower, str_length * 2 + 1, utf16_dst, utf16_dst_length, locale, &status);
|
|
|
|
if (U_SUCCESS(status)) {
|
|
|
|
u_strToUTF8(utf8_lower, str_length * 8 + 1, NULL, utf16_lower, utf16_lower_length, &status);
|
|
|
|
if (U_SUCCESS(status)) {
|
|
|
|
if (lower)
|
|
|
|
*lower = strdup(utf8_lower);
|
2014-06-20 12:36:51 +02:00
|
|
|
ret = PSL_SUCCESS;
|
2014-06-17 17:14:02 +02:00
|
|
|
} else {
|
2014-06-20 12:36:51 +02:00
|
|
|
ret = PSL_ERR_TO_UTF8;
|
2014-06-18 12:26:45 +02:00
|
|
|
/* fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */
|
2014-06-17 17:14:02 +02:00
|
|
|
}
|
|
|
|
} else {
|
2014-06-20 12:36:51 +02:00
|
|
|
ret = PSL_ERR_TO_LOWER;
|
2014-06-18 12:26:45 +02:00
|
|
|
/* fprintf(stderr, "Failed to convert UTF-16 to lowercase (status %d)\n", status); */
|
2014-06-17 17:14:02 +02:00
|
|
|
}
|
|
|
|
} else {
|
2014-06-20 12:36:51 +02:00
|
|
|
ret = PSL_ERR_TO_UTF16;
|
2014-06-18 12:26:45 +02:00
|
|
|
/* fprintf(stderr, "Failed to convert string to UTF-16 (status %d)\n", status); */
|
2014-06-17 17:14:02 +02:00
|
|
|
}
|
|
|
|
} else {
|
2014-06-20 12:36:51 +02:00
|
|
|
ret = PSL_ERR_CONVERTER;
|
2014-06-18 12:26:45 +02:00
|
|
|
/* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */
|
2014-06-17 17:14:02 +02:00
|
|
|
}
|
2014-06-18 16:27:29 +02:00
|
|
|
} while (0);
|
2014-06-30 13:21:16 +02:00
|
|
|
#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
|
|
|
|
do {
|
|
|
|
/* find out local charset encoding */
|
|
|
|
if (!encoding) {
|
|
|
|
encoding = nl_langinfo(CODESET);
|
|
|
|
|
|
|
|
if (!encoding || !*encoding)
|
|
|
|
encoding = "ASCII";
|
|
|
|
}
|
|
|
|
|
|
|
|
/* convert to UTF-8 */
|
|
|
|
if (strcasecmp(encoding, "utf-8")) {
|
|
|
|
iconv_t cd = iconv_open("utf-8", encoding);
|
|
|
|
|
|
|
|
if (cd != (iconv_t)-1) {
|
|
|
|
char *tmp = (char *)str; /* iconv won't change where str points to, but changes tmp itself */
|
|
|
|
size_t tmp_len = strlen(str);
|
|
|
|
size_t dst_len = tmp_len * 6, dst_len_tmp = dst_len;
|
|
|
|
char *dst = malloc(dst_len + 1), *dst_tmp = dst;
|
|
|
|
|
|
|
|
if (iconv(cd, &tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1) {
|
|
|
|
uint8_t *resbuf = malloc(dst_len * 2 + 1);
|
|
|
|
size_t len = dst_len * 2; /* leave space for additional \0 byte */
|
|
|
|
|
|
|
|
if ((dst = (char *)u8_tolower((uint8_t *)dst, dst_len - dst_len_tmp, 0, UNINORM_NFKC, resbuf, &len))) {
|
|
|
|
/* u8_tolower() does not terminate the result string */
|
|
|
|
if (lower)
|
|
|
|
*lower = strndup((char *)dst, len);
|
|
|
|
} else {
|
|
|
|
ret = PSL_ERR_TO_LOWER;
|
|
|
|
/* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
|
|
|
|
}
|
|
|
|
|
|
|
|
if (lower)
|
|
|
|
*lower = strndup(dst, dst_len - dst_len_tmp);
|
|
|
|
ret = PSL_SUCCESS;
|
|
|
|
} else {
|
|
|
|
ret = PSL_ERR_TO_UTF8;
|
|
|
|
/* fprintf(stderr, "Failed to convert '%s' string into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
|
|
|
|
}
|
|
|
|
|
|
|
|
free(dst);
|
|
|
|
iconv_close(cd);
|
|
|
|
} else {
|
|
|
|
ret = PSL_ERR_TO_UTF8;
|
|
|
|
/* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
ret = PSL_SUCCESS;
|
|
|
|
|
|
|
|
/* convert to lowercase */
|
|
|
|
if (ret == PSL_SUCCESS) {
|
|
|
|
uint8_t *dst, resbuf[256];
|
|
|
|
size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */
|
|
|
|
|
|
|
|
/* we need a conversion to lowercase */
|
|
|
|
if ((dst = u8_tolower((uint8_t *)str, u8_strlen((uint8_t *)str), 0, UNINORM_NFKC, resbuf, &len))) {
|
|
|
|
/* u8_tolower() does not terminate the result string */
|
|
|
|
if (lower)
|
|
|
|
*lower = strndup((char *)dst, len);
|
|
|
|
} else {
|
|
|
|
ret = PSL_ERR_TO_LOWER;
|
|
|
|
/* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} while (0);
|
2014-06-17 17:14:02 +02:00
|
|
|
#endif
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|