autoconf first version
This commit is contained in:
parent
6563469fc1
commit
dc05276515
15
ChangeLog
15
ChangeLog
|
@ -1,2 +1,17 @@
|
||||||
|
2014-03-20 gettextize <bug-gnu-gettext@gnu.org>
|
||||||
|
|
||||||
|
* m4/gettext.m4: New file, from gettext-0.18.3.
|
||||||
|
* m4/iconv.m4: New file, from gettext-0.18.3.
|
||||||
|
* m4/lib-ld.m4: New file, from gettext-0.18.3.
|
||||||
|
* m4/lib-link.m4: New file, from gettext-0.18.3.
|
||||||
|
* m4/lib-prefix.m4: New file, from gettext-0.18.3.
|
||||||
|
* m4/nls.m4: New file, from gettext-0.18.3.
|
||||||
|
* m4/po.m4: New file, from gettext-0.18.3.
|
||||||
|
* m4/progtest.m4: New file, from gettext-0.18.3.
|
||||||
|
* Makefile.am (SUBDIRS): Add po.
|
||||||
|
(ACLOCAL_AMFLAGS): Add -I m4.
|
||||||
|
(EXTRA_DIST): New variable.
|
||||||
|
* configure.ac (AC_CONFIG_FILES): Add po/Makefile.in.
|
||||||
|
|
||||||
2014-02-20 Tim Ruehsen <tim.ruehsen@gmx.de>
|
2014-02-20 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||||
* inital setup
|
* inital setup
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# got some hints from https://gitorious.org/openismus-playground/examplelib/source
|
# got some hints from https://gitorious.org/openismus-playground/examplelib/source
|
||||||
|
|
||||||
SUBDIRS = po include libpsl src examples tests
|
SUBDIRS = po include src tests
|
||||||
ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS}
|
ACLOCAL_AMFLAGS = -I m4 ${ACLOCAL_FLAGS}
|
||||||
|
|
||||||
# Enable GTK-Doc during make distcheck
|
# Enable GTK-Doc during make distcheck
|
||||||
#DISTCHECK_CONFIGURE_FLAGS = --enable-gtk-doc --enable-man
|
#DISTCHECK_CONFIGURE_FLAGS = --enable-gtk-doc --enable-man
|
||||||
|
@ -12,3 +12,5 @@ ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS}
|
||||||
## in which case the correct install location would be $(datadir)/pkgconfig.
|
## in which case the correct install location would be $(datadir)/pkgconfig.
|
||||||
pkgconfigdir = $(libdir)/pkgconfig
|
pkgconfigdir = $(libdir)/pkgconfig
|
||||||
pkgconfig_DATA = libpsl-$(LIBPSL_API_VERSION).pc
|
pkgconfig_DATA = libpsl-$(LIBPSL_API_VERSION).pc
|
||||||
|
|
||||||
|
EXTRA_DIST = config.rpath
|
||||||
|
|
|
@ -9,7 +9,7 @@ Building from git
|
||||||
|
|
||||||
Download project and prepare sources with
|
Download project and prepare sources with
|
||||||
|
|
||||||
git clone http://github.com/rockdaboot/mget
|
git clone http://github.com/rockdaboot/libpsl
|
||||||
./autogen.sh
|
./autogen.sh
|
||||||
./configure
|
./configure
|
||||||
make
|
make
|
||||||
|
|
|
@ -9,9 +9,15 @@ AM_INIT_AUTOMAKE([1.10 -Wall no-define])
|
||||||
AC_CONFIG_HEADERS([config.h])
|
AC_CONFIG_HEADERS([config.h])
|
||||||
AC_PROG_CXX
|
AC_PROG_CXX
|
||||||
LT_INIT([disable-static])
|
LT_INIT([disable-static])
|
||||||
C_CONFIG_MACRO_DIR([m4])
|
AC_CONFIG_MACRO_DIR([m4])
|
||||||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||||
|
|
||||||
|
#
|
||||||
|
# Gettext
|
||||||
|
#
|
||||||
|
AM_GNU_GETTEXT([external],[need-ngettext])
|
||||||
|
AM_GNU_GETTEXT_VERSION([0.18.1])
|
||||||
|
|
||||||
# Define these substitions here to keep all version information in one place.
|
# Define these substitions here to keep all version information in one place.
|
||||||
# For information on how to properly maintain the library version information,
|
# For information on how to properly maintain the library version information,
|
||||||
# refer to the libtool manual, section "Updating library version information":
|
# refer to the libtool manual, section "Updating library version information":
|
||||||
|
@ -44,6 +50,7 @@ AC_CONFIG_FILES([Makefile
|
||||||
include/Makefile
|
include/Makefile
|
||||||
src/Makefile
|
src/Makefile
|
||||||
po/Makefile.in
|
po/Makefile.in
|
||||||
|
data/Makefile
|
||||||
tests/Makefile
|
tests/Makefile
|
||||||
libpsl-${LIBPSL_API_VERSION}.pc:libpsl.pc.in])
|
libpsl-${LIBPSL_API_VERSION}.pc:libpsl.pc.in])
|
||||||
AC_OUTPUT
|
AC_OUTPUT
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
filesdir = $(datadir)/@PACKAGE@
|
||||||
|
files_DATA = effective_tld_names.dat
|
||||||
|
EXTRA_DIST = $(files_DATA)
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,60 @@
|
||||||
|
/*
|
||||||
|
* Copyright(c) 2014 Tim Ruehsen
|
||||||
|
*
|
||||||
|
* This file is part of libpsl.
|
||||||
|
*
|
||||||
|
* Libpsl is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libpsl is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with libpsl. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* Header file for libpsl library routines
|
||||||
|
*
|
||||||
|
* Changelog
|
||||||
|
* 20.03.2014 Tim Ruehsen created
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _LIBPSL_LIBPSL_H
|
||||||
|
#define _LIBPSL_LIBPSL_H
|
||||||
|
|
||||||
|
// Let C++ include C headers
|
||||||
|
#ifdef __cplusplus
|
||||||
|
# define PSL_BEGIN_DECLS extern "C" {
|
||||||
|
# define PSL_END_DECLS }
|
||||||
|
#else
|
||||||
|
# define PSL_BEGIN_DECLS
|
||||||
|
# define PSL_END_DECLS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if ENABLE_NLS != 0
|
||||||
|
# include <libintl.h>
|
||||||
|
# define _(STRING) gettext(STRING)
|
||||||
|
#else
|
||||||
|
# define _(STRING) STRING
|
||||||
|
# define ngettext(STRING1,STRING2,N) STRING2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
PSL_BEGIN_DECLS
|
||||||
|
|
||||||
|
typedef struct _psl_ctx_st psl_ctx_t;
|
||||||
|
|
||||||
|
void
|
||||||
|
psl_free(psl_ctx_t **psl);
|
||||||
|
psl_ctx_t *
|
||||||
|
psl_load_file(const char *fname);
|
||||||
|
int
|
||||||
|
psl_is_tld(const psl_ctx_t *psl, const char *domain);
|
||||||
|
|
||||||
|
PSL_END_DECLS
|
||||||
|
|
||||||
|
#endif /* _LIBPSL_LIBPSL_H */
|
|
@ -0,0 +1,11 @@
|
||||||
|
prefix=@prefix@
|
||||||
|
exec_prefix=@exec_prefix@
|
||||||
|
libdir=@libdir@
|
||||||
|
includedir=@includedir@
|
||||||
|
|
||||||
|
Name: @PACKAGE_NAME@
|
||||||
|
Description: Publix Suffix List C library.
|
||||||
|
Version: @PACKAGE_VERSION@
|
||||||
|
URL: @PACKAGE_URL@
|
||||||
|
Libs: -L${libdir} -llibpsl-@LIBPSL_API_VERSION@
|
||||||
|
Cflags: -I${includedir}/libpsl-@LIBPSL_API_VERSION@ -I${libdir}/libpsl-@LIBPSL_API_VERSION@/include
|
|
@ -0,0 +1,53 @@
|
||||||
|
# Makefile variables for PO directory in any package using GNU gettext.
|
||||||
|
|
||||||
|
# Usually the message domain is the same as the package name.
|
||||||
|
DOMAIN = $(PACKAGE)
|
||||||
|
|
||||||
|
# These two variables depend on the location of this directory.
|
||||||
|
subdir = po
|
||||||
|
top_builddir = ..
|
||||||
|
|
||||||
|
# These options get passed to xgettext.
|
||||||
|
XGETTEXT_OPTIONS = --keyword=_ --keyword=N_
|
||||||
|
|
||||||
|
# This is the copyright holder that gets inserted into the header of the
|
||||||
|
# $(DOMAIN).pot file. Set this to the copyright holder of the surrounding
|
||||||
|
# package. (Note that the msgstr strings, extracted from the package's
|
||||||
|
# sources, belong to the copyright holder of the package.) Translators are
|
||||||
|
# expected to transfer the copyright for their translations to this person
|
||||||
|
# or entity, or to disclaim their copyright. The empty string stands for
|
||||||
|
# the public domain; in this case the translators are expected to disclaim
|
||||||
|
# their copyright.
|
||||||
|
COPYRIGHT_HOLDER = Tim Ruehsen
|
||||||
|
|
||||||
|
# This is the email address or URL to which the translators shall report
|
||||||
|
# bugs in the untranslated strings:
|
||||||
|
# - Strings which are not entire sentences, see the maintainer guidelines
|
||||||
|
# in the GNU gettext documentation, section 'Preparing Strings'.
|
||||||
|
# - Strings which use unclear terms or require additional context to be
|
||||||
|
# understood.
|
||||||
|
# - Strings which make invalid assumptions about notation of date, time or
|
||||||
|
# money.
|
||||||
|
# - Pluralisation problems.
|
||||||
|
# - Incorrect English spelling.
|
||||||
|
# - Incorrect formatting.
|
||||||
|
# It can be your email address, or a mailing list address where translators
|
||||||
|
# can write to without being subscribed, or the URL of a web page through
|
||||||
|
# which the translators can contact you.
|
||||||
|
MSGID_BUGS_ADDRESS = tim.ruehsen@gmx.de
|
||||||
|
|
||||||
|
# This is the list of locale categories, beyond LC_MESSAGES, for which the
|
||||||
|
# message catalogs shall be used. It is usually empty.
|
||||||
|
EXTRA_LOCALE_CATEGORIES =
|
||||||
|
|
||||||
|
# This tells whether the $(DOMAIN).pot file contains messages with an 'msgctxt'
|
||||||
|
# context. Possible values are "yes" and "no". Set this to yes if the
|
||||||
|
# package uses functions taking also a message context, like pgettext(), or
|
||||||
|
# if in $(XGETTEXT_OPTIONS) you define keywords with a context argument.
|
||||||
|
USE_MSGCTXT = no
|
||||||
|
|
||||||
|
# These options get passed to msgmerge.
|
||||||
|
# Useful options are in particular:
|
||||||
|
# --previous to keep previous msgids of translated messages,
|
||||||
|
# --quiet to reduce the verbosity.
|
||||||
|
MSGMERGE_OPTIONS =
|
|
@ -0,0 +1,2 @@
|
||||||
|
# List of source files which contain translatable strings.
|
||||||
|
src/psl.c
|
|
@ -0,0 +1,14 @@
|
||||||
|
lib_LTLIBRARIES = libpsl-@LIBPSL_API_VERSION@.la
|
||||||
|
libpsl_@LIBPSL_API_VERSION@_la_SOURCES = psl.c
|
||||||
|
|
||||||
|
libpsl_@LIBPSL_API_VERSION@_la_CPPFLAGS = -I$(top_srcdir)/include
|
||||||
|
|
||||||
|
# include ABI version information
|
||||||
|
libpsl_@LIBPSL_API_VERSION@_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSION)
|
||||||
|
|
||||||
|
#bin_PROGRAMS = test_linking
|
||||||
|
#noinst_PROGRAMS = test_linking
|
||||||
|
#test_linking_SOURCES = test_linking.c
|
||||||
|
#test_linking_CPPFLAGS = -I$(top_srcdir)/include
|
||||||
|
#test_linking_LDADD = libpsl-@LIBPSL_API_VERSION@.la
|
||||||
|
#test_linking_LDFLAGS = -static
|
|
@ -0,0 +1,333 @@
|
||||||
|
/*
|
||||||
|
* Copyright(c) 2014 Tim Ruehsen
|
||||||
|
*
|
||||||
|
* This file is part of MGet.
|
||||||
|
*
|
||||||
|
* Mget is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Mget is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with Mget. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* Public Suffix List routines (right now experimental)
|
||||||
|
*
|
||||||
|
* Changelog
|
||||||
|
* 19.03.2014 Tim Ruehsen created from libmget/cookie.c
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
|
||||||
|
#if HAVE_CONFIG_H
|
||||||
|
# include <config.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
|
#include <libpsl.h>
|
||||||
|
|
||||||
|
#define countof(a) (sizeof(a)/sizeof(*(a)))
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
char
|
||||||
|
label_buf[42];
|
||||||
|
const char *
|
||||||
|
label;
|
||||||
|
unsigned short
|
||||||
|
length;
|
||||||
|
unsigned char
|
||||||
|
nlabels, // number of labels
|
||||||
|
wildcard; // this is a wildcard rule (e.g. *.sapporo.jp)
|
||||||
|
} _psl_entry_t;
|
||||||
|
|
||||||
|
// stripped down version libmget vector routines
|
||||||
|
typedef struct {
|
||||||
|
int
|
||||||
|
(*cmp)(const _psl_entry_t *, const _psl_entry_t *); // comparison function
|
||||||
|
_psl_entry_t
|
||||||
|
**entry; // pointer to array of pointers to elements
|
||||||
|
int
|
||||||
|
max, // allocated elements
|
||||||
|
cur; // number of elements in use
|
||||||
|
} _psl_vector_t;
|
||||||
|
|
||||||
|
struct _psl_ctx_st {
|
||||||
|
_psl_vector_t
|
||||||
|
*suffixes,
|
||||||
|
*suffix_exceptions;
|
||||||
|
};
|
||||||
|
|
||||||
|
static _psl_vector_t *_vector_alloc(int max, int (*cmp)(const _psl_entry_t *, const _psl_entry_t *))
|
||||||
|
{
|
||||||
|
_psl_vector_t *v;
|
||||||
|
|
||||||
|
if (!(v = calloc(1, sizeof(_psl_vector_t))))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (!(v->entry = malloc(max * sizeof(_psl_entry_t *)))) {
|
||||||
|
free(v);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
v->max = max;
|
||||||
|
v->cmp = cmp;
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void _vector_free(_psl_vector_t **v)
|
||||||
|
{
|
||||||
|
if (v && *v) {
|
||||||
|
if ((*v)->entry) {
|
||||||
|
int it;
|
||||||
|
|
||||||
|
for (it = 0; it < (*v)->cur; it++)
|
||||||
|
free((*v)->entry[it]);
|
||||||
|
|
||||||
|
free((*v)->entry);
|
||||||
|
}
|
||||||
|
free(*v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static _psl_entry_t *_vector_get(const _psl_vector_t *v, int pos)
|
||||||
|
{
|
||||||
|
if (pos < 0 || !v || pos >= v->cur) return NULL;
|
||||||
|
|
||||||
|
return v->entry[pos];
|
||||||
|
}
|
||||||
|
|
||||||
|
// the entries must be sorted by
|
||||||
|
static int _vector_find(const _psl_vector_t *v, const _psl_entry_t *elem)
|
||||||
|
{
|
||||||
|
if (v) {
|
||||||
|
int l, r, m;
|
||||||
|
int res;
|
||||||
|
|
||||||
|
// binary search for element (exact match)
|
||||||
|
for (l = 0, r = v->cur - 1; l <= r;) {
|
||||||
|
m = (l + r) / 2;
|
||||||
|
if ((res = v->cmp(elem, v->entry[m])) > 0) l = m + 1;
|
||||||
|
else if (res < 0) r = m - 1;
|
||||||
|
else return m;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1; // not found
|
||||||
|
}
|
||||||
|
|
||||||
|
static int _vector_add(_psl_vector_t *v, const _psl_entry_t *elem)
|
||||||
|
{
|
||||||
|
if (v) {
|
||||||
|
void *elemp;
|
||||||
|
|
||||||
|
elemp = malloc(sizeof(_psl_entry_t));
|
||||||
|
memcpy(elemp, elem, sizeof(_psl_entry_t));
|
||||||
|
|
||||||
|
if (v->max == v->cur)
|
||||||
|
v->entry = realloc(v->entry, (v->max *= 2) * sizeof(_psl_entry_t *));
|
||||||
|
|
||||||
|
v->entry[v->cur++] = elemp;
|
||||||
|
return v->cur - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int _compare(const void *p1, const void *p2, void *v)
|
||||||
|
{
|
||||||
|
return ((_psl_vector_t *)v)->cmp(*((_psl_entry_t **)p1), *((_psl_entry_t **)p2));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void _vector_sort(_psl_vector_t *v)
|
||||||
|
{
|
||||||
|
if (v && v->cmp)
|
||||||
|
qsort_r(v->entry, v->cur, sizeof(_psl_vector_t *), _compare, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
// by this kind of sorting, we can easily see if a domain matches or not (match = supercookie !)
|
||||||
|
|
||||||
|
static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2)
|
||||||
|
{
|
||||||
|
int n;
|
||||||
|
|
||||||
|
if ((n = s2->nlabels - s1->nlabels))
|
||||||
|
return n; // most labels first
|
||||||
|
|
||||||
|
if ((n=s1->length - s2->length))
|
||||||
|
return n; // shorter rules first
|
||||||
|
|
||||||
|
return strcmp(s1->label, s2->label);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length)
|
||||||
|
{
|
||||||
|
const char *src;
|
||||||
|
char *dst;
|
||||||
|
|
||||||
|
suffix->label = suffix->label_buf;
|
||||||
|
|
||||||
|
if (length >= sizeof(suffix->label_buf) - 1) {
|
||||||
|
suffix->nlabels = 0;
|
||||||
|
fprintf(stderr, _("Suffix rule too long (ignored): %s\n"), rule);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*rule == '*') {
|
||||||
|
if (*++rule != '.') {
|
||||||
|
suffix->nlabels = 0;
|
||||||
|
fprintf(stderr, _("Unsupported kind of rule (ignored): %s\n"), rule);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
rule++;
|
||||||
|
suffix->wildcard = 1;
|
||||||
|
suffix->length = (unsigned char)length - 2;
|
||||||
|
} else {
|
||||||
|
suffix->wildcard = 0;
|
||||||
|
suffix->length = (unsigned char)length;
|
||||||
|
}
|
||||||
|
|
||||||
|
suffix->nlabels = 1;
|
||||||
|
|
||||||
|
for (dst = suffix->label_buf, src = rule; *src;) {
|
||||||
|
if (*src == '.')
|
||||||
|
suffix->nlabels++;
|
||||||
|
*dst++ = tolower(*src++);
|
||||||
|
}
|
||||||
|
*dst = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int psl_is_tld(const psl_ctx_t *psl, const char *domain)
|
||||||
|
{
|
||||||
|
_psl_entry_t suffix, *rule;
|
||||||
|
const char *p, *label_bak;
|
||||||
|
unsigned short length_bak;
|
||||||
|
|
||||||
|
// this function should be called without leading dots, just make shure
|
||||||
|
suffix.label = domain + (*domain == '.');
|
||||||
|
suffix.length = strlen(suffix.label);
|
||||||
|
suffix.wildcard = 0;
|
||||||
|
suffix.nlabels = 1;
|
||||||
|
|
||||||
|
for (p = suffix.label; *p; p++)
|
||||||
|
if (*p == '.')
|
||||||
|
suffix.nlabels++;
|
||||||
|
|
||||||
|
// if domain has enough labels, it won't match
|
||||||
|
rule = _vector_get(psl->suffixes, 0);
|
||||||
|
if (!rule || rule->nlabels < suffix.nlabels - 1)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix));
|
||||||
|
if (rule) {
|
||||||
|
// definitely a match, no matter if the found rule is a wildcard or not
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
label_bak = suffix.label;
|
||||||
|
length_bak = suffix.length;
|
||||||
|
|
||||||
|
if ((suffix.label = strchr(suffix.label, '.'))) {
|
||||||
|
suffix.label++;
|
||||||
|
suffix.length = strlen(suffix.label);
|
||||||
|
suffix.nlabels--;
|
||||||
|
|
||||||
|
rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix));
|
||||||
|
if (rule) {
|
||||||
|
if (rule->wildcard) {
|
||||||
|
// now that we matched a wildcard, we have to check for an exception
|
||||||
|
suffix.label = label_bak;
|
||||||
|
suffix.length = length_bak;
|
||||||
|
suffix.nlabels++;
|
||||||
|
|
||||||
|
if (_vector_get(psl->suffix_exceptions, _vector_find(psl->suffix_exceptions, &suffix)) != 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
psl_ctx_t *psl_load_file(const char *fname)
|
||||||
|
{
|
||||||
|
psl_ctx_t *psl;
|
||||||
|
_psl_entry_t suffix, *suffixp;
|
||||||
|
FILE *fp;
|
||||||
|
int nsuffixes = 0;
|
||||||
|
char *buf = NULL, *linep, *p;
|
||||||
|
size_t bufsize = 0;
|
||||||
|
ssize_t buflen;
|
||||||
|
|
||||||
|
if (!(psl = calloc(1, sizeof(psl_ctx_t))))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
// as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
|
||||||
|
// as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
|
||||||
|
psl->suffixes = _vector_alloc(8*1024, _suffix_compare);
|
||||||
|
psl->suffix_exceptions = _vector_alloc(64, _suffix_compare);
|
||||||
|
|
||||||
|
if ((fp = fopen(fname, "r"))) {
|
||||||
|
while ((buflen = getline(&buf, &bufsize, fp)) >= 0) {
|
||||||
|
linep = buf;
|
||||||
|
|
||||||
|
while (isspace(*linep)) linep++; // ignore leading whitespace
|
||||||
|
if (!*linep) continue; // skip empty lines
|
||||||
|
|
||||||
|
if (*linep == '/' && linep[1] == '/')
|
||||||
|
continue; // skip comments
|
||||||
|
|
||||||
|
// parse suffix rule
|
||||||
|
for (p = linep; *linep && !isspace(*linep);) linep++;
|
||||||
|
*linep = 0;
|
||||||
|
|
||||||
|
if (*p == '!') {
|
||||||
|
// add to exceptions
|
||||||
|
_suffix_init(&suffix, p + 1, linep - p - 1);
|
||||||
|
suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix));
|
||||||
|
} else {
|
||||||
|
_suffix_init(&suffix, p, linep - p);
|
||||||
|
suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (suffixp)
|
||||||
|
suffixp->label = suffixp->label_buf; // set label to changed address
|
||||||
|
|
||||||
|
nsuffixes++;;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(buf);
|
||||||
|
fclose(fp);
|
||||||
|
|
||||||
|
_vector_sort(psl->suffix_exceptions);
|
||||||
|
_vector_sort(psl->suffixes);
|
||||||
|
|
||||||
|
printf("loaded %d (%d/%d) suffixes\n", nsuffixes, psl->suffixes->cur, psl->suffix_exceptions->cur);
|
||||||
|
|
||||||
|
} else
|
||||||
|
fprintf(stderr, _("Failed to open PSL file '%s'\n"), fname);
|
||||||
|
|
||||||
|
return psl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void psl_free(psl_ctx_t **psl)
|
||||||
|
{
|
||||||
|
if (psl && *psl) {
|
||||||
|
_vector_free(&(*psl)->suffixes);
|
||||||
|
_vector_free(&(*psl)->suffix_exceptions);
|
||||||
|
free(*psl);
|
||||||
|
*psl = NULL;
|
||||||
|
}
|
||||||
|
}
|
|
@ -2,31 +2,14 @@
|
||||||
DEFS = @DEFS@ -DDATADIR=\"$(top_srcdir)/data\" -DSRCDIR=\"$(srcdir)\"
|
DEFS = @DEFS@ -DDATADIR=\"$(top_srcdir)/data\" -DSRCDIR=\"$(srcdir)\"
|
||||||
AM_CPPFLAGS = -Wno-missing-field-initializers -I$(top_srcdir)/include
|
AM_CPPFLAGS = -Wno-missing-field-initializers -I$(top_srcdir)/include
|
||||||
AM_LDFLAGS = -static
|
AM_LDFLAGS = -static
|
||||||
LDADD = libtest.la ../src/libpsl-@LIBPSL_API_VERSION@.la
|
LDADD = ../src/libpsl-@LIBPSL_API_VERSION@.la
|
||||||
|
|
||||||
PSL_TESTS = test
|
PSL_TESTS = test-is-tld
|
||||||
|
|
||||||
check_PROGRAMS = $(PSL_TESTS)
|
check_PROGRAMS = $(PSL_TESTS)
|
||||||
|
|
||||||
test_SOURCES = test.c
|
test_is_tld_SOURCES = test-is-tld.c
|
||||||
test_LDADD = ../src/libpsl-@LIBPSL_API_VERSION@.la
|
test_is_tld_LDADD = ../src/libpsl-@LIBPSL_API_VERSION@.la
|
||||||
test_parse_html_LDADD = ../libmget/libmget-@LIBPSL_API_VERSION@.la
|
|
||||||
|
|
||||||
noinst_LTLIBRARIES = libtest.la
|
|
||||||
libtest_la_SOURCES = libtest.c
|
|
||||||
libtest_la_CPPFLAGS = -I$(top_srcdir)/tests -I$(top_srcdir)/include
|
|
||||||
#libtest_LDADD = libtest.o
|
|
||||||
|
|
||||||
EXTRA_DIST = libtest.h
|
|
||||||
dist-hook:
|
|
||||||
rm -f $(distdir)/files/elb_bibel.txt
|
|
||||||
# cp $(top_srcdir)/data/public_suffixes.txt $(distdir)/files/
|
|
||||||
# rm -rf `find $(distdir)/files -name CVS`
|
|
||||||
|
|
||||||
#dist-hook:
|
|
||||||
# mkdir $(distdir)/random
|
|
||||||
# cp -p $(srcdir)/random/a1 $(srcdir)/random/a2 $(distdir)/random
|
|
||||||
|
|
||||||
TESTS_ENVIRONMENT = TESTS_VALGRIND="@VALGRIND_ENVIRONMENT@"
|
TESTS_ENVIRONMENT = TESTS_VALGRIND="@VALGRIND_ENVIRONMENT@"
|
||||||
#TESTS = test $(PSL_TESTS)
|
|
||||||
TESTS = $(PSL_TESTS)
|
TESTS = $(PSL_TESTS)
|
||||||
|
|
|
@ -0,0 +1,105 @@
|
||||||
|
/*
|
||||||
|
* Copyright(c) 2014 Tim Ruehsen
|
||||||
|
*
|
||||||
|
* This file is part of MGet.
|
||||||
|
*
|
||||||
|
* Mget is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Mget is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with Mget. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* Public Suffix List routines (right now experimental)
|
||||||
|
*
|
||||||
|
* Changelog
|
||||||
|
* 19.03.2014 Tim Ruehsen created from libmget/cookie.c
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if HAVE_CONFIG_H
|
||||||
|
# include <config.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include <libpsl.h>
|
||||||
|
|
||||||
|
#define countof(a) (sizeof(a)/sizeof(*(a)))
|
||||||
|
|
||||||
|
static int
|
||||||
|
ok,
|
||||||
|
failed;
|
||||||
|
|
||||||
|
static void test_psl(void)
|
||||||
|
{
|
||||||
|
static const struct test_data {
|
||||||
|
const char
|
||||||
|
*domain;
|
||||||
|
int
|
||||||
|
result;
|
||||||
|
} test_data[] = {
|
||||||
|
{ "www.example.com", 0 },
|
||||||
|
{ "com.ar", 1 },
|
||||||
|
{ "www.com.ar", 0 },
|
||||||
|
{ "cc.ar.us", 1 },
|
||||||
|
{ ".cc.ar.us", 1 },
|
||||||
|
{ "www.cc.ar.us", 0 },
|
||||||
|
{ "www.ck", 0 }, // exception from *.ck
|
||||||
|
{ "abc.www.ck", 0 },
|
||||||
|
{ "xxx.ck", 1 },
|
||||||
|
{ "www.xxx.ck", 0 },
|
||||||
|
};
|
||||||
|
unsigned it;
|
||||||
|
psl_ctx_t *psl;
|
||||||
|
|
||||||
|
psl = psl_load_file(DATADIR "/effective_tld_names.dat");
|
||||||
|
|
||||||
|
for (it = 0; it < countof(test_data); it++) {
|
||||||
|
const struct test_data *t = &test_data[it];
|
||||||
|
int result = psl_is_tld(psl, t->domain);
|
||||||
|
|
||||||
|
if (result == t->result) {
|
||||||
|
ok++;
|
||||||
|
} else {
|
||||||
|
failed++;
|
||||||
|
printf("psl_is_tld(%s)=%d (expected %d)\n", t->domain, result, t->result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
psl_free(&psl);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, const char * const *argv)
|
||||||
|
{
|
||||||
|
// if VALGRIND testing is enabled, we have to call ourselves with valgrind checking
|
||||||
|
if (argc == 1) {
|
||||||
|
const char *valgrind = getenv("TESTS_VALGRIND");
|
||||||
|
|
||||||
|
if (valgrind && *valgrind) {
|
||||||
|
char cmd[strlen(valgrind)+strlen(argv[0])+32];
|
||||||
|
|
||||||
|
snprintf(cmd, sizeof(cmd), "TESTS_VALGRIND="" %s %s", valgrind, argv[0]);
|
||||||
|
return system(cmd) != 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test_psl();
|
||||||
|
|
||||||
|
if (failed) {
|
||||||
|
printf("Summary: %d out of %d tests failed\n", failed, ok + failed);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Summary: All %d tests passed\n", ok + failed);
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue