Merge branch 'master' into debian
Conflicts: .travis.yml (merged in favor of master)
This commit is contained in:
commit
b62b9285e5
|
@ -0,0 +1,9 @@
|
|||
;; emacs local configuration settings for libpsl source
|
||||
;; surmised by dkg on 2014-03-21 14:35:49-0400
|
||||
|
||||
((c-mode
|
||||
(indent-tabs-mode . t)
|
||||
(tab-width . 4)
|
||||
(c-basic-offset . 4)
|
||||
(c-file-style . "linux"))
|
||||
)
|
|
@ -0,0 +1,57 @@
|
|||
*~
|
||||
Makefile
|
||||
Makefile.in
|
||||
aclocal.m4
|
||||
autom4te.cache/
|
||||
compile
|
||||
config.guess
|
||||
config.h
|
||||
config.h.in
|
||||
config.log
|
||||
config.rpath
|
||||
config.status
|
||||
config.sub
|
||||
configure
|
||||
data/Makefile
|
||||
data/Makefile.in
|
||||
depcomp
|
||||
include/Makefile
|
||||
include/Makefile.in
|
||||
install-sh
|
||||
libpsl-*.pc
|
||||
libtool
|
||||
ltmain.sh
|
||||
m4/
|
||||
missing
|
||||
po/Makefile
|
||||
po/Makefile.in
|
||||
po/Makefile.in.in
|
||||
po/Makevars.template
|
||||
po/POTFILES
|
||||
po/Rules-quot
|
||||
po/boldquot.sed
|
||||
po/en@boldquot.header
|
||||
po/en@quot.header
|
||||
po/insert-header.sin
|
||||
po/psl.pot
|
||||
po/quot.sed
|
||||
po/remove-potcdate.sin
|
||||
po/remove-potcdate.sed
|
||||
po/stamp-po
|
||||
src/.deps/
|
||||
src/.libs/
|
||||
src/Makefile
|
||||
src/Makefile.in
|
||||
src/libpsl-*.la
|
||||
src/libpsl_*_la-psl.lo
|
||||
stamp-h1
|
||||
test-driver
|
||||
tests/.deps/
|
||||
tests/Makefile
|
||||
tests/Makefile.in
|
||||
tests/test-is-tld
|
||||
tests/test-is-tld.log
|
||||
tests/test-is-tld.o
|
||||
tests/test-is-tld.trs
|
||||
tests/test-suite.log
|
||||
psl-*.tar.gz
|
14
.travis.yml
14
.travis.yml
|
@ -1,4 +1,10 @@
|
|||
# blacklist
|
||||
branches:
|
||||
except:
|
||||
- debian
|
||||
language: c
|
||||
compiler:
|
||||
- gcc
|
||||
- clang
|
||||
# Change this to your needs
|
||||
script: ./autogen.sh && ./configure --enable-gtk-doc && make -j4 && make check -j4 && make distcheck
|
||||
before_install:
|
||||
- apt-cache search libicu | grep icu
|
||||
- sudo apt-get -qq update
|
||||
- sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev libicu-dev
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
Authors of and contributors to libpsl.
|
||||
Thank you very much for spending your time !
|
||||
|
||||
Also many thanks for anyone who contributed ideas,
|
||||
took part in discussions or 'just' asked questions.
|
||||
|
||||
Please drop me a note if you feel you should have
|
||||
been mentioned here.
|
||||
|
||||
Tim Ruehsen (Implementation of libpsl)
|
||||
Daniel Kahn Gillmor (Discussion, Ideas, Organization)
|
|
@ -0,0 +1,17 @@
|
|||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
|
@ -0,0 +1,17 @@
|
|||
2014-03-20 gettextize <bug-gnu-gettext@gnu.org>
|
||||
|
||||
* m4/gettext.m4: New file, from gettext-0.18.3.
|
||||
* m4/iconv.m4: New file, from gettext-0.18.3.
|
||||
* m4/lib-ld.m4: New file, from gettext-0.18.3.
|
||||
* m4/lib-link.m4: New file, from gettext-0.18.3.
|
||||
* m4/lib-prefix.m4: New file, from gettext-0.18.3.
|
||||
* m4/nls.m4: New file, from gettext-0.18.3.
|
||||
* m4/po.m4: New file, from gettext-0.18.3.
|
||||
* m4/progtest.m4: New file, from gettext-0.18.3.
|
||||
* Makefile.am (SUBDIRS): Add po.
|
||||
(ACLOCAL_AMFLAGS): Add -I m4.
|
||||
(EXTRA_DIST): New variable.
|
||||
* configure.ac (AC_CONFIG_FILES): Add po/Makefile.in.
|
||||
|
||||
2014-02-20 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
* inital setup
|
|
@ -0,0 +1,17 @@
|
|||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
|
@ -0,0 +1,16 @@
|
|||
# got some hints from https://gitorious.org/openismus-playground/examplelib/source
|
||||
|
||||
SUBDIRS = po include src tools data docs/libpsl tests
|
||||
ACLOCAL_AMFLAGS = -I m4 ${ACLOCAL_FLAGS}
|
||||
|
||||
# Enable GTK-Doc during make distcheck
|
||||
DISTCHECK_CONFIGURE_FLAGS = --enable-gtk-doc --enable-man
|
||||
|
||||
## Install the generated pkg-config file (.pc) into the expected location for
|
||||
## architecture-dependent package configuration information. Occasionally,
|
||||
## pkg-config files are also used for architecture-independent data packages,
|
||||
## in which case the correct install location would be $(datadir)/pkgconfig.
|
||||
pkgconfigdir = $(libdir)/pkgconfig
|
||||
pkgconfig_DATA = libpsl.pc
|
||||
|
||||
EXTRA_DIST = config.rpath
|
|
@ -0,0 +1,26 @@
|
|||
Copyright (C) 2014 Tim Ruehsen
|
||||
|
||||
05.06.2014 Release V0.3.0
|
||||
* added support for libicu in psl2c (IDNA2008 UTS#46)
|
||||
this needs pkg-config and libicu-dev installed
|
||||
* added --version to psl utility
|
||||
|
||||
31.05.2014 Release V0.2.5
|
||||
* added psl_get_version()
|
||||
* removed version from library name
|
||||
|
||||
30.05.2014 Release V0.2.4
|
||||
* fixed psl_builtin() to return NULL if no built-in PSL data is available
|
||||
|
||||
27.05.2014 Release V0.2.3
|
||||
* changed API version to 0.2
|
||||
|
||||
26.05.2014 Release V0.2.2
|
||||
* changed code to C89
|
||||
* added a few test cases
|
||||
* build static library by default
|
||||
|
||||
25.04.2014 Hotfix release V0.2.1
|
||||
* updated to the latest Publix Suffix List
|
||||
|
||||
25.04.2014 Initial release V0.2
|
|
@ -0,0 +1,103 @@
|
|||
[![Build Status](https://travis-ci.org/rockdaboot/libpsl.png?branch=master)](https://travis-ci.org/rockdaboot/libpsl)
|
||||
|
||||
libpsl - C library to handle the Public Suffix List
|
||||
===================================================
|
||||
|
||||
A "public suffix" is a domain name under which Internet users can directly register own names.
|
||||
|
||||
Browsers and other web clients can use it to
|
||||
|
||||
- avoid privacy-leaking "supercookies"
|
||||
- avoid privacy-leaking "super domain" certificates ([see post from Jeffry Walton](http://lists.gnu.org/archive/html/bug-wget/2014-03/msg00093.html))
|
||||
- domain highlighting parts of the domain in a user interface
|
||||
- sorting domain lists by site
|
||||
|
||||
Libpsl...
|
||||
|
||||
- has built-in PSL data for fast access
|
||||
- allows to load PSL data from files
|
||||
- checks if a given domain is a "public suffix"
|
||||
- provides immediate cookie domain verification
|
||||
- finds the longest public part of a given domain
|
||||
- finds the shortest private part of a given domain
|
||||
- works with international domains (UTF-8 and IDNA2008 Punycode)
|
||||
- is thread-safe
|
||||
- handles IDNA2008 UTS#46 (libicu is used by psl2c if installed)
|
||||
|
||||
Find more information about the Publix Suffix List [here](http://publicsuffix.org/).
|
||||
|
||||
Download the Public Suffix List [here](https://hg.mozilla.org/mozilla-central/raw-file/tip/netwerk/dns/effective_tld_names.dat).
|
||||
|
||||
|
||||
API Documentation
|
||||
-----------------
|
||||
|
||||
You find the current API documentation [here](https://rockdaboot.github.io/libpsl).
|
||||
|
||||
|
||||
Quick API example
|
||||
-----------------
|
||||
|
||||
#include <stdio.h>
|
||||
#include <libpsl.h>
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
const char *domain = "www.example.com";
|
||||
const char *cookie_domain = ".com";
|
||||
const psl_ctx_t *psl = psl_builtin();
|
||||
int is_public, is_acceptable;
|
||||
|
||||
is_public = psl_is_public_suffix(psl, domain);
|
||||
printf("%s %s a public suffix.\n", domain, is_public ? "is" : "is not");
|
||||
|
||||
is_acceptable = psl_is_cookie_domain_acceptable(psl, domain, cookie_domain);
|
||||
printf("cookie domain '%s' %s acceptable for domain '%s'.\n",
|
||||
cookie_domain, is_acceptable ? "is" : "is not", domain);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Command Line Tool
|
||||
-----------------
|
||||
|
||||
Libpsl comes with a tool 'psl' that gives you access to most of the
|
||||
library API via command line.
|
||||
|
||||
$ psl --help
|
||||
|
||||
prints the usage.
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
Libpsl is made available under the terms of the MIT license.<br>
|
||||
See the LICENSE file that accompanies this distribution for the full text of the license.
|
||||
|
||||
|
||||
Building from git
|
||||
-----------------
|
||||
|
||||
Download project and prepare sources with
|
||||
|
||||
git clone http://github.com/rockdaboot/libpsl
|
||||
./autogen.sh
|
||||
./configure
|
||||
make
|
||||
make check
|
||||
|
||||
|
||||
Mailing List
|
||||
------------
|
||||
|
||||
[Mailing List Archive](http://news.gmane.org/gmane.network.dns.libpsl.bugs)
|
||||
|
||||
[Mailing List](https://groups.google.com/forum/#!forum/libpsl-bugs)
|
||||
|
||||
To join the mailing list send an email to
|
||||
|
||||
<libpsl-bugs+subscribe@googlegroups.com>
|
||||
|
||||
and follow the instructions provided by the answer mail.
|
||||
|
||||
Or click [join](https://groups.google.com/forum/#!forum/libpsl-bugs/join).
|
|
@ -0,0 +1,37 @@
|
|||
# !/bin/sh -e
|
||||
|
||||
if test -z `which autoreconf`; then
|
||||
echo "No 'autoreconf' found. You must install the autoconf package."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if test -z `which idn2`; then
|
||||
echo "No 'idn2' found. You must install the idn2 package."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# create m4 before gtkdocize
|
||||
mkdir m4 2>/dev/null
|
||||
|
||||
GTKDOCIZE=`which gtkdocize 2>/dev/null`
|
||||
if test -z $GTKDOCIZE; then
|
||||
echo "No gtk-doc support found. You can't build the docs."
|
||||
echo "EXTRA_DIST =" >gtk-doc.make
|
||||
echo "CLEANFILES =" >>gtk-doc.make
|
||||
else
|
||||
gtkdocize || exit $?
|
||||
fi
|
||||
|
||||
autoreconf --install --force --symlink || exit $?
|
||||
|
||||
echo
|
||||
echo "----------------------------------------------------------------"
|
||||
echo "Initialized build system. For a common configuration please run:"
|
||||
echo "----------------------------------------------------------------"
|
||||
echo
|
||||
if test -z $GTKDOCIZE; then
|
||||
echo "./configure"
|
||||
else
|
||||
echo "./configure --enable-gtk-doc"
|
||||
fi
|
||||
echo
|
|
@ -0,0 +1,145 @@
|
|||
|
||||
AC_INIT([libpsl], [0.3.0], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl])
|
||||
AC_PREREQ([2.59])
|
||||
AM_INIT_AUTOMAKE([1.10 -Wall no-define])
|
||||
|
||||
# Generate two configuration headers; one for building the library itself with
|
||||
# an autogenerated template, and a second one that will be installed alongside
|
||||
# the library.
|
||||
AC_CONFIG_HEADERS([config.h])
|
||||
AC_PROG_CXX
|
||||
m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
|
||||
#LT_INIT([disable-static])
|
||||
LT_INIT
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||
|
||||
#
|
||||
# Gettext
|
||||
#
|
||||
AM_GNU_GETTEXT([external],[need-ngettext])
|
||||
AM_GNU_GETTEXT_VERSION([0.18.1])
|
||||
|
||||
#
|
||||
# check for gtk-doc
|
||||
#
|
||||
m4_ifdef([GTK_DOC_CHECK], [
|
||||
GTK_DOC_CHECK([1.15],[--flavour no-tmpl])
|
||||
],[
|
||||
AM_CONDITIONAL([ENABLE_GTK_DOC], false)
|
||||
])
|
||||
|
||||
#
|
||||
# enable creation of man pages
|
||||
#
|
||||
AC_ARG_ENABLE(man,[AC_HELP_STRING([--enable-man],
|
||||
[generate man pages [default=auto]])],enable_man=yes,enable_man=no)
|
||||
AS_IF([test "$enable_man" != no], [
|
||||
AC_PATH_PROG([XSLTPROC], [xsltproc])
|
||||
AS_IF([test -z "$XSLTPROC"], [
|
||||
AS_IF([test "$enable_man" = yes], [
|
||||
AC_MSG_ERROR([xsltproc is required for --enable-man])
|
||||
])
|
||||
enable_man=no
|
||||
])
|
||||
])
|
||||
AM_CONDITIONAL(ENABLE_MAN, test x$enable_man != xno)
|
||||
AC_MSG_CHECKING([whether to generate man pages])
|
||||
AS_IF([ test "$enable_man" != no ], [
|
||||
AC_MSG_RESULT([yes])
|
||||
], [
|
||||
AC_MSG_RESULT([no])
|
||||
])
|
||||
|
||||
|
||||
# Define these substitions here to keep all version information in one place.
|
||||
# For information on how to properly maintain the library version information,
|
||||
# refer to the libtool manual, section "Updating library version information":
|
||||
# http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html
|
||||
#
|
||||
# 1. Start with version information of ‘0:0:0’ for each libtool library.
|
||||
# 2. Update the version information only immediately before a public release of your software. More frequent updates are unnecessary, and only guarantee that the current interface number gets larger faster.
|
||||
# 3. If the library source code has changed at all since the last update, then increment revision (‘c:r:a’ becomes ‘c:r+1:a’).
|
||||
# 4. If any interfaces have been added, removed, or changed since the last update, increment current, and set revision to 0.
|
||||
# 5. If any interfaces have been added since the last public release, then increment age.
|
||||
# 6. If any interfaces have been removed or changed since the last public release, then set age to 0.
|
||||
AC_SUBST([LIBPSL_SO_VERSION], [1:0:1])
|
||||
AC_SUBST([LIBPSL_VERSION], $VERSION)
|
||||
|
||||
# Check for enable/disable builtin PSL data
|
||||
AC_ARG_ENABLE(builtin,
|
||||
AS_HELP_STRING([--disable-builtin], [do not compile PSL data into library]),
|
||||
[
|
||||
enable_builtin=no
|
||||
], [
|
||||
enable_builtin=yes
|
||||
AC_DEFINE([WITH_BUILTIN], [1], [compile PSL data into library])
|
||||
|
||||
PKG_CHECK_MODULES(LIBICU, [icu-uc],
|
||||
[AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])],
|
||||
[AC_CHECK_PROG(HAVE_IDN2, idn2, yes, AC_MSG_ERROR(Cannot find required tool 'idn2'.))])
|
||||
])
|
||||
AM_CONDITIONAL([WITH_BUILTIN], [test $enable_builtin = yes])
|
||||
|
||||
# Check for valgrind
|
||||
ac_enable_valgrind=no
|
||||
AC_ARG_ENABLE(valgrind-tests,
|
||||
AS_HELP_STRING([--enable-valgrind-tests], [enable using Valgrind for tests]),
|
||||
[ac_enable_valgrind=$enableval], [ac_enable_valgrind=no])
|
||||
|
||||
if test "${ac_enable_valgrind}" = "yes" ; then
|
||||
AC_CHECK_PROG(HAVE_VALGRIND, valgrind, yes, no)
|
||||
if test "$HAVE_VALGRIND" = "yes" ; then
|
||||
VALGRIND_ENVIRONMENT="valgrind --error-exitcode=1 --leak-check=yes --show-reachable=yes --track-origins=yes"
|
||||
AC_SUBST(VALGRIND_ENVIRONMENT)
|
||||
TESTS_INFO="Test suite will be run under Valgrind"
|
||||
else
|
||||
TESTS_INFO="Valgrind not found"
|
||||
fi
|
||||
else
|
||||
TESTS_INFO="Valgrind testing not enabled"
|
||||
fi
|
||||
|
||||
# Check for custom PSL file
|
||||
AC_ARG_WITH(psl-file,
|
||||
AC_HELP_STRING([--with-psl-file=[PATH]],
|
||||
[path to PSL file]),
|
||||
PSL_FILE=$withval,
|
||||
PSL_FILE="\$(top_srcdir)/data/effective_tld_names.dat")
|
||||
AC_SUBST(PSL_FILE)
|
||||
|
||||
# Check for custom PSL test file
|
||||
AC_ARG_WITH(psl-testfile,
|
||||
AC_HELP_STRING([--with-psl-testfile=[PATH]],
|
||||
[path to PSL test file]),
|
||||
PSL_TESTFILE=$withval,
|
||||
PSL_TESTFILE="\$(top_srcdir)/data/test_psl.txt")
|
||||
AC_SUBST(PSL_TESTFILE)
|
||||
|
||||
# Override the template file name of the generated .pc file, so that there
|
||||
# is no need to rename the template file when the API version changes.
|
||||
AC_CONFIG_FILES([Makefile
|
||||
include/Makefile
|
||||
src/Makefile
|
||||
tools/Makefile
|
||||
po/Makefile.in
|
||||
docs/libpsl/Makefile
|
||||
docs/libpsl/version.xml
|
||||
data/Makefile
|
||||
tests/Makefile
|
||||
libpsl.pc:libpsl.pc.in])
|
||||
AC_OUTPUT
|
||||
|
||||
AC_MSG_NOTICE([Summary of build options:
|
||||
|
||||
Version: ${PACKAGE_VERSION}
|
||||
Host OS: ${host_os}
|
||||
Install prefix: ${prefix}
|
||||
Compiler: ${CC}
|
||||
CFlags: ${CFLAGS} ${CPPFLAGS}
|
||||
LDFlags: ${LDFLAGS}
|
||||
Builtin PSL: ${enable_builtin}
|
||||
PSL File: ${PSL_FILE}
|
||||
PSL Test File: ${PSL_TESTFILE}
|
||||
Tests: ${TESTS_INFO}
|
||||
])
|
|
@ -0,0 +1,3 @@
|
|||
filesdir = $(datadir)/@PACKAGE@
|
||||
files_DATA = effective_tld_names.dat test_psl.txt
|
||||
EXTRA_DIST = $(files_DATA)
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,98 @@
|
|||
// Any copyright is dedicated to the Public Domain.
|
||||
// http://creativecommons.org/publicdomain/zero/1.0/
|
||||
|
||||
// null input.
|
||||
checkPublicSuffix(null, null);
|
||||
// Mixed case.
|
||||
checkPublicSuffix('COM', null);
|
||||
checkPublicSuffix('example.COM', 'example.com');
|
||||
checkPublicSuffix('WwW.example.COM', 'example.com');
|
||||
// Leading dot.
|
||||
checkPublicSuffix('.com', null);
|
||||
checkPublicSuffix('.example', null);
|
||||
checkPublicSuffix('.example.com', null);
|
||||
checkPublicSuffix('.example.example', null);
|
||||
// Unlisted TLD.
|
||||
checkPublicSuffix('example', null);
|
||||
checkPublicSuffix('example.example', 'example.example');
|
||||
checkPublicSuffix('b.example.example', 'example.example');
|
||||
checkPublicSuffix('a.b.example.example', 'example.example');
|
||||
// Listed, but non-Internet, TLD.
|
||||
//checkPublicSuffix('local', null);
|
||||
//checkPublicSuffix('example.local', null);
|
||||
//checkPublicSuffix('b.example.local', null);
|
||||
//checkPublicSuffix('a.b.example.local', null);
|
||||
// TLD with only 1 rule.
|
||||
checkPublicSuffix('biz', null);
|
||||
checkPublicSuffix('domain.biz', 'domain.biz');
|
||||
checkPublicSuffix('b.domain.biz', 'domain.biz');
|
||||
checkPublicSuffix('a.b.domain.biz', 'domain.biz');
|
||||
// TLD with some 2-level rules.
|
||||
checkPublicSuffix('com', null);
|
||||
checkPublicSuffix('example.com', 'example.com');
|
||||
checkPublicSuffix('b.example.com', 'example.com');
|
||||
checkPublicSuffix('a.b.example.com', 'example.com');
|
||||
checkPublicSuffix('uk.com', null);
|
||||
checkPublicSuffix('example.uk.com', 'example.uk.com');
|
||||
checkPublicSuffix('b.example.uk.com', 'example.uk.com');
|
||||
checkPublicSuffix('a.b.example.uk.com', 'example.uk.com');
|
||||
checkPublicSuffix('test.ac', 'test.ac');
|
||||
// TLD with only 1 (wildcard) rule.
|
||||
checkPublicSuffix('cy', null);
|
||||
checkPublicSuffix('c.cy', null);
|
||||
checkPublicSuffix('b.c.cy', 'b.c.cy');
|
||||
checkPublicSuffix('a.b.c.cy', 'b.c.cy');
|
||||
// More complex TLD.
|
||||
checkPublicSuffix('jp', null);
|
||||
checkPublicSuffix('test.jp', 'test.jp');
|
||||
checkPublicSuffix('www.test.jp', 'test.jp');
|
||||
checkPublicSuffix('ac.jp', null);
|
||||
checkPublicSuffix('test.ac.jp', 'test.ac.jp');
|
||||
checkPublicSuffix('www.test.ac.jp', 'test.ac.jp');
|
||||
checkPublicSuffix('kyoto.jp', null);
|
||||
checkPublicSuffix('test.kyoto.jp', 'test.kyoto.jp');
|
||||
checkPublicSuffix('ide.kyoto.jp', null);
|
||||
checkPublicSuffix('b.ide.kyoto.jp', 'b.ide.kyoto.jp');
|
||||
checkPublicSuffix('a.b.ide.kyoto.jp', 'b.ide.kyoto.jp');
|
||||
checkPublicSuffix('c.kobe.jp', null);
|
||||
checkPublicSuffix('b.c.kobe.jp', 'b.c.kobe.jp');
|
||||
checkPublicSuffix('a.b.c.kobe.jp', 'b.c.kobe.jp');
|
||||
checkPublicSuffix('city.kobe.jp', 'city.kobe.jp');
|
||||
checkPublicSuffix('www.city.kobe.jp', 'city.kobe.jp');
|
||||
// TLD with a wildcard rule and exceptions.
|
||||
checkPublicSuffix('ck', null);
|
||||
checkPublicSuffix('test.ck', null);
|
||||
checkPublicSuffix('b.test.ck', 'b.test.ck');
|
||||
checkPublicSuffix('a.b.test.ck', 'b.test.ck');
|
||||
checkPublicSuffix('www.ck', 'www.ck');
|
||||
checkPublicSuffix('www.www.ck', 'www.ck');
|
||||
// US K12.
|
||||
checkPublicSuffix('us', null);
|
||||
checkPublicSuffix('test.us', 'test.us');
|
||||
checkPublicSuffix('www.test.us', 'test.us');
|
||||
checkPublicSuffix('ak.us', null);
|
||||
checkPublicSuffix('test.ak.us', 'test.ak.us');
|
||||
checkPublicSuffix('www.test.ak.us', 'test.ak.us');
|
||||
checkPublicSuffix('k12.ak.us', null);
|
||||
checkPublicSuffix('test.k12.ak.us', 'test.k12.ak.us');
|
||||
checkPublicSuffix('www.test.k12.ak.us', 'test.k12.ak.us');
|
||||
// IDN labels.
|
||||
checkPublicSuffix('食狮.com.cn', '食狮.com.cn');
|
||||
checkPublicSuffix('食狮.公司.cn', '食狮.公司.cn');
|
||||
checkPublicSuffix('www.食狮.公司.cn', '食狮.公司.cn');
|
||||
checkPublicSuffix('shishi.公司.cn', 'shishi.公司.cn');
|
||||
checkPublicSuffix('公司.cn', null);
|
||||
checkPublicSuffix('食狮.中国', '食狮.中国');
|
||||
checkPublicSuffix('www.食狮.中国', '食狮.中国');
|
||||
checkPublicSuffix('shishi.中国', 'shishi.中国');
|
||||
checkPublicSuffix('中国', null);
|
||||
// Same as above, but punycoded.
|
||||
checkPublicSuffix('xn--85x722f.com.cn', 'xn--85x722f.com.cn');
|
||||
checkPublicSuffix('xn--85x722f.xn--55qx5d.cn', 'xn--85x722f.xn--55qx5d.cn');
|
||||
checkPublicSuffix('www.xn--85x722f.xn--55qx5d.cn', 'xn--85x722f.xn--55qx5d.cn');
|
||||
checkPublicSuffix('shishi.xn--55qx5d.cn', 'shishi.xn--55qx5d.cn');
|
||||
checkPublicSuffix('xn--55qx5d.cn', null);
|
||||
checkPublicSuffix('xn--85x722f.xn--fiqs8s', 'xn--85x722f.xn--fiqs8s');
|
||||
checkPublicSuffix('www.xn--85x722f.xn--fiqs8s', 'xn--85x722f.xn--fiqs8s');
|
||||
checkPublicSuffix('shishi.xn--fiqs8s', 'shishi.xn--fiqs8s');
|
||||
checkPublicSuffix('xn--fiqs8s', null);
|
|
@ -0,0 +1,135 @@
|
|||
## Process this file with automake to produce Makefile.in
|
||||
|
||||
# We require automake 1.6 at least.
|
||||
AUTOMAKE_OPTIONS = 1.6
|
||||
|
||||
# This is a blank Makefile.am for using gtk-doc.
|
||||
# Copy this to your project's API docs directory and modify the variables to
|
||||
# suit your project. See the GTK+ Makefiles in gtk+/docs/reference for examples
|
||||
# of using the various options.
|
||||
|
||||
# The name of the module, e.g. 'glib'.
|
||||
DOC_MODULE=libpsl
|
||||
|
||||
# Uncomment for versioned docs and specify the version of the module, e.g. '2'.
|
||||
#DOC_MODULE_VERSION=2
|
||||
|
||||
|
||||
# The top-level SGML file. You can change this if you want to.
|
||||
DOC_MAIN_SGML_FILE=$(DOC_MODULE)-docs.sgml
|
||||
|
||||
# Directories containing the source code.
|
||||
# gtk-doc will search all .c and .h files beneath these paths
|
||||
# for inline comments documenting functions and macros.
|
||||
# e.g. DOC_SOURCE_DIR=$(top_srcdir)/gtk $(top_srcdir)/gdk
|
||||
DOC_SOURCE_DIR=$(top_srcdir)/src $(top_srcdir)/include
|
||||
|
||||
# Extra options to pass to gtkdoc-scangobj. Not normally needed.
|
||||
#SCANGOBJ_OPTIONS=--ignore-decorators="UNUSED_RESULT|CONST|PURE"
|
||||
|
||||
# Extra options to supply to gtkdoc-scan.
|
||||
# e.g. SCAN_OPTIONS=--deprecated-guards="GTK_DISABLE_DEPRECATED"
|
||||
SCAN_OPTIONS=--ignore-decorators="G_GNUC_PSL_UNUSED"
|
||||
|
||||
# Extra options to supply to gtkdoc-mkdb.
|
||||
# e.g. MKDB_OPTIONS=--xml-mode --output-format=xml
|
||||
MKDB_OPTIONS=--xml-mode --output-format=xml
|
||||
|
||||
# Extra options to supply to gtkdoc-mktmpl
|
||||
# e.g. MKTMPL_OPTIONS=--only-section-tmpl
|
||||
MKTMPL_OPTIONS=
|
||||
|
||||
# Extra options to supply to gtkdoc-mkhtml
|
||||
MKHTML_OPTIONS=
|
||||
|
||||
# Extra options to supply to gtkdoc-fixref. Not normally needed.
|
||||
# e.g. FIXXREF_OPTIONS=--extra-dir=../gdk-pixbuf/html --extra-dir=../gdk/html
|
||||
FIXXREF_OPTIONS=
|
||||
|
||||
# Used for dependencies. The docs will be rebuilt if any of these change.
|
||||
# e.g. HFILE_GLOB=$(top_srcdir)/gtk/*.h
|
||||
# e.g. CFILE_GLOB=$(top_srcdir)/gtk/*.c
|
||||
HFILE_GLOB=$(top_srcdir)/include/*.h
|
||||
CFILE_GLOB=$(top_srcdir)/src/*.c
|
||||
|
||||
# Extra header to include when scanning, which are not under DOC_SOURCE_DIR
|
||||
# e.g. EXTRA_HFILES=$(top_srcdir}/contrib/extra.h
|
||||
EXTRA_HFILES=
|
||||
|
||||
# Header files or dirs to ignore when scanning. Use base file/dir names
|
||||
# e.g. IGNORE_HFILES=gtkdebug.h gtkintl.h private_code
|
||||
IGNORE_HFILES=private.h
|
||||
|
||||
# Images to copy into HTML directory.
|
||||
# e.g. HTML_IMAGES=$(top_srcdir)/gtk/stock-icons/stock_about_24.png
|
||||
HTML_IMAGES=
|
||||
|
||||
# Extra SGML files that are included by $(DOC_MAIN_SGML_FILE).
|
||||
# e.g. content_files=running.sgml building.sgml changes-2.0.sgml
|
||||
content_files=
|
||||
|
||||
# SGML files where gtk-doc abbrevations (#GtkWidget) are expanded
|
||||
# These files must be listed here *and* in content_files
|
||||
# e.g. expand_content_files=running.sgml
|
||||
expand_content_files=
|
||||
|
||||
# CFLAGS and LDFLAGS for compiling gtkdoc-scangobj with your library.
|
||||
# Only needed if you are using gtkdoc-scangobj to dynamically query widget
|
||||
# signals and properties.
|
||||
# e.g. GTKDOC_CFLAGS=-I$(top_srcdir) -I$(top_builddir) $(GTK_DEBUG_FLAGS)
|
||||
# e.g. GTKDOC_LIBS=$(top_builddir)/gtk/$(gtktargetlib)
|
||||
GTKDOC_CFLAGS=
|
||||
GTKDOC_LIBS=
|
||||
|
||||
# This includes the standard gtk-doc make rules, copied by gtkdocize.
|
||||
include $(top_srcdir)/gtk-doc.make
|
||||
|
||||
# Other files to distribute
|
||||
# e.g. EXTRA_DIST += version.xml.in
|
||||
EXTRA_DIST +=
|
||||
|
||||
# Files not to distribute
|
||||
# for --rebuild-types in $(SCAN_OPTIONS), e.g. $(DOC_MODULE).types
|
||||
# for --rebuild-sections in $(SCAN_OPTIONS) e.g. $(DOC_MODULE)-sections.txt
|
||||
#DISTCLEANFILES +=
|
||||
|
||||
# Comment this out if you want 'make check' to test you doc status
|
||||
# and run some sanity checks
|
||||
if ENABLE_GTK_DOC
|
||||
TESTS_ENVIRONMENT = cd $(srcdir) && \
|
||||
DOC_MODULE=$(DOC_MODULE) DOC_MAIN_SGML_FILE=$(DOC_MAIN_SGML_FILE) \
|
||||
SRCDIR=$(abs_srcdir) BUILDDIR=$(abs_builddir)
|
||||
#TESTS = $(GTKDOC_CHECK)
|
||||
endif
|
||||
|
||||
-include $(top_srcdir)/git.mk
|
||||
|
||||
theMANS =
|
||||
man_MANS =
|
||||
|
||||
if ENABLE_GTK_DOC
|
||||
if ENABLE_MAN
|
||||
|
||||
theMANS += libpsl.3
|
||||
man_MANS += docs $(theMANS)
|
||||
|
||||
%.3:
|
||||
#.xml.3:
|
||||
@file=xml/`echo $@|cut -d'.' -f1`.xml; \
|
||||
@XSLTPROC@ -nonet http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl $$file
|
||||
|
||||
endif
|
||||
endif
|
||||
|
||||
BUILT_EXTRA_DIST = $(theMANS)
|
||||
EXTRA_DIST += $(theMANS)
|
||||
CLEANFILES ?=
|
||||
CLEANFILES += $(theMANS) libpsl-overrides.txt libpsl-decl.txt libpsl-decl-list.txt
|
||||
|
||||
clean-local:
|
||||
rm -rf xml html
|
||||
|
||||
dist-hook-local: all-local
|
||||
|
||||
libpsl-docs-clean: clean
|
||||
cd $(srcdir) && rm -rf xml html
|
|
@ -0,0 +1,41 @@
|
|||
<?xml version="1.0"?>
|
||||
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
|
||||
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd"
|
||||
[
|
||||
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'">
|
||||
<!ENTITY version SYSTEM "version.xml">
|
||||
]>
|
||||
<book id="index">
|
||||
<bookinfo>
|
||||
<title>Libpsl Reference Manual</title>
|
||||
<releaseinfo>
|
||||
for Libpsl &version;.
|
||||
The latest version of this documentation can be found on-line at
|
||||
<ulink role="online-location" url="http://github.com/rockdaboot/libpsl">GitHub</ulink>.
|
||||
</releaseinfo>
|
||||
</bookinfo>
|
||||
|
||||
<chapter id="libpsl">
|
||||
<title>Libpsl Overview</title>
|
||||
<para>
|
||||
Libpsl provides functions to work with the Mozilla Public Suffix List.
|
||||
</para>
|
||||
<xi:include href="xml/libpsl.xml"/>
|
||||
</chapter>
|
||||
<chapter id="object-tree">
|
||||
<title>Object Hierarchy</title>
|
||||
<xi:include href="xml/tree_index.sgml"/>
|
||||
</chapter>
|
||||
<index id="api-index-full">
|
||||
<title>API Index</title>
|
||||
<xi:include href="xml/api-index-full.xml"><xi:fallback /></xi:include>
|
||||
</index>
|
||||
<index id="deprecated-api-index" role="deprecated">
|
||||
<title>Index of deprecated API</title>
|
||||
<xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
|
||||
</index>
|
||||
|
||||
<!--
|
||||
<xi:include href="xml/annotation-glossary.xml"><xi:fallback /></xi:include>
|
||||
-->
|
||||
</book>
|
|
@ -0,0 +1,20 @@
|
|||
<SECTION>
|
||||
<FILE>libpsl</FILE>
|
||||
<TITLE>Public Suffix List functions</TITLE>
|
||||
psl_ctx_t
|
||||
psl_load_file
|
||||
psl_load_fp
|
||||
psl_builtin
|
||||
psl_free
|
||||
psl_is_public_suffix
|
||||
psl_unregistrable_domain
|
||||
psl_registrable_domain
|
||||
psl_suffix_count
|
||||
psl_suffix_exception_count
|
||||
psl_builtin_compile_time
|
||||
psl_builtin_file_time
|
||||
psl_builtin_sha1sum
|
||||
psl_builtin_filename
|
||||
psl_is_cookie_domain_acceptable
|
||||
psl_get_version
|
||||
</SECTION>
|
|
@ -0,0 +1 @@
|
|||
@LIBPSL_VERSION@
|
|
@ -0,0 +1 @@
|
|||
include_HEADERS = libpsl.h
|
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* Copyright(c) 2014 Tim Ruehsen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* This file is part of libpsl.
|
||||
*
|
||||
* Header file for libpsl library routines
|
||||
*
|
||||
* Changelog
|
||||
* 20.03.2014 Tim Ruehsen created
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _LIBPSL_LIBPSL_H
|
||||
#define _LIBPSL_LIBPSL_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
typedef struct _psl_ctx_st psl_ctx_t;
|
||||
|
||||
/* frees PSL context */
|
||||
void
|
||||
psl_free(psl_ctx_t *psl);
|
||||
/* loads PSL data from file */
|
||||
psl_ctx_t *
|
||||
psl_load_file(const char *fname);
|
||||
/* loads PSL data from FILE pointer */
|
||||
psl_ctx_t *
|
||||
psl_load_fp(FILE *fp);
|
||||
/* retrieves builtin PSL data */
|
||||
const psl_ctx_t *
|
||||
psl_builtin(void);
|
||||
/* checks wether domain is a public suffix or not */
|
||||
int
|
||||
psl_is_public_suffix(const psl_ctx_t *psl, const char *domain);
|
||||
/* checks wether cookie_domain is acceptable for domain or not */
|
||||
int
|
||||
psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, const char *cookie_domain);
|
||||
/* returns the longest unregistrable domain within 'domain' or NULL if none found */
|
||||
const char *
|
||||
psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain);
|
||||
/* returns the shortest possible registrable domain part or NULL if domain is not registrable at all */
|
||||
const char *
|
||||
psl_registrable_domain(const psl_ctx_t *psl, const char *domain);
|
||||
/* does not include exceptions */
|
||||
int
|
||||
psl_suffix_count(const psl_ctx_t *psl);
|
||||
/* just counts exceptions */
|
||||
int
|
||||
psl_suffix_exception_count(const psl_ctx_t *psl);
|
||||
/* returns compilation time */
|
||||
time_t
|
||||
psl_builtin_compile_time(void);
|
||||
/* returns mtime of PSL source file */
|
||||
time_t
|
||||
psl_builtin_file_time(void);
|
||||
/* returns SHA1 checksum (hex-encoded, lowercase) of PSL source file */
|
||||
const char *
|
||||
psl_builtin_sha1sum(void);
|
||||
/* returns file name of PSL source file */
|
||||
const char *
|
||||
psl_builtin_filename(void);
|
||||
/* returns library version */
|
||||
const char *
|
||||
psl_get_version(void);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _LIBPSL_LIBPSL_H */
|
|
@ -0,0 +1,11 @@
|
|||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
libdir=@libdir@
|
||||
includedir=@includedir@
|
||||
|
||||
Name: @PACKAGE_NAME@
|
||||
Description: Publix Suffix List C library.
|
||||
Version: @PACKAGE_VERSION@
|
||||
URL: @PACKAGE_URL@
|
||||
Libs: -L${libdir} -llibpsl
|
||||
Cflags: -I${includedir}/libpsl -I${libdir}/libpsl/include
|
|
@ -0,0 +1,53 @@
|
|||
# Makefile variables for PO directory in any package using GNU gettext.
|
||||
|
||||
# Usually the message domain is the same as the package name.
|
||||
DOMAIN = $(PACKAGE)
|
||||
|
||||
# These two variables depend on the location of this directory.
|
||||
subdir = po
|
||||
top_builddir = ..
|
||||
|
||||
# These options get passed to xgettext.
|
||||
XGETTEXT_OPTIONS = --keyword=_ --keyword=N_
|
||||
|
||||
# This is the copyright holder that gets inserted into the header of the
|
||||
# $(DOMAIN).pot file. Set this to the copyright holder of the surrounding
|
||||
# package. (Note that the msgstr strings, extracted from the package's
|
||||
# sources, belong to the copyright holder of the package.) Translators are
|
||||
# expected to transfer the copyright for their translations to this person
|
||||
# or entity, or to disclaim their copyright. The empty string stands for
|
||||
# the public domain; in this case the translators are expected to disclaim
|
||||
# their copyright.
|
||||
COPYRIGHT_HOLDER = Tim Ruehsen
|
||||
|
||||
# This is the email address or URL to which the translators shall report
|
||||
# bugs in the untranslated strings:
|
||||
# - Strings which are not entire sentences, see the maintainer guidelines
|
||||
# in the GNU gettext documentation, section 'Preparing Strings'.
|
||||
# - Strings which use unclear terms or require additional context to be
|
||||
# understood.
|
||||
# - Strings which make invalid assumptions about notation of date, time or
|
||||
# money.
|
||||
# - Pluralisation problems.
|
||||
# - Incorrect English spelling.
|
||||
# - Incorrect formatting.
|
||||
# It can be your email address, or a mailing list address where translators
|
||||
# can write to without being subscribed, or the URL of a web page through
|
||||
# which the translators can contact you.
|
||||
MSGID_BUGS_ADDRESS = tim.ruehsen@gmx.de
|
||||
|
||||
# This is the list of locale categories, beyond LC_MESSAGES, for which the
|
||||
# message catalogs shall be used. It is usually empty.
|
||||
EXTRA_LOCALE_CATEGORIES =
|
||||
|
||||
# This tells whether the $(DOMAIN).pot file contains messages with an 'msgctxt'
|
||||
# context. Possible values are "yes" and "no". Set this to yes if the
|
||||
# package uses functions taking also a message context, like pgettext(), or
|
||||
# if in $(XGETTEXT_OPTIONS) you define keywords with a context argument.
|
||||
USE_MSGCTXT = no
|
||||
|
||||
# These options get passed to msgmerge.
|
||||
# Useful options are in particular:
|
||||
# --previous to keep previous msgids of translated messages,
|
||||
# --quiet to reduce the verbosity.
|
||||
MSGMERGE_OPTIONS =
|
|
@ -0,0 +1,2 @@
|
|||
# List of source files which contain translatable strings.
|
||||
src/psl.c
|
|
@ -0,0 +1,22 @@
|
|||
# suffixes.c must be created before psl.c is compiled
|
||||
BUILT_SOURCES = suffixes.c
|
||||
|
||||
# suffixes.c is a built source that must be cleaned
|
||||
CLEANFILES = suffixes.c
|
||||
|
||||
lib_LTLIBRARIES = libpsl.la
|
||||
|
||||
libpsl_la_SOURCES = psl.c
|
||||
libpsl_la_CPPFLAGS = -I$(top_srcdir)/include
|
||||
# include ABI version information
|
||||
libpsl_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSION)
|
||||
|
||||
noinst_PROGRAMS = psl2c
|
||||
psl2c_SOURCES = psl2c.c
|
||||
psl2c_CPPFLAGS = -I$(top_srcdir)/include -D _GNU_SOURCE $(LIBICU_CFLAGS)
|
||||
psl2c_LDADD = $(LIBICU_LIBS)
|
||||
|
||||
# Build rule for suffix.c
|
||||
# PSL_FILE can be set by ./configure --with-psl-file=[PATH]
|
||||
suffixes.c: $(PSL_FILE) psl2c$(EXEEXT)
|
||||
./psl2c$(EXEEXT) "$(PSL_FILE)" suffixes.c
|
|
@ -0,0 +1,743 @@
|
|||
/*
|
||||
* Copyright(c) 2014 Tim Ruehsen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* This file is part of libpsl.
|
||||
*
|
||||
* Public Suffix List routines
|
||||
*
|
||||
* Changelog
|
||||
* 19.03.2014 Tim Ruehsen created from libmget/cookie.c
|
||||
*
|
||||
*/
|
||||
|
||||
/* need _GNU_SOURCE for qsort_r() */
|
||||
#ifndef _GNU_SOURCE
|
||||
# define _GNU_SOURCE
|
||||
#endif
|
||||
|
||||
#if HAVE_CONFIG_H
|
||||
# include <config.h>
|
||||
#endif
|
||||
|
||||
#if ENABLE_NLS != 0
|
||||
# include <libintl.h>
|
||||
# define _(STRING) gettext(STRING)
|
||||
#else
|
||||
# define _(STRING) STRING
|
||||
# define ngettext(STRING1,STRING2,N) STRING2
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include <libpsl.h>
|
||||
|
||||
/**
|
||||
* SECTION:libpsl
|
||||
* @short_description: Public Suffix List library functions
|
||||
* @title: libpsl
|
||||
* @stability: unstable
|
||||
* @include: libpsl.h
|
||||
*
|
||||
* [Public Suffix List](http://publicsuffix.org/) library functions.
|
||||
*
|
||||
*/
|
||||
|
||||
#define countof(a) (sizeof(a)/sizeof(*(a)))
|
||||
|
||||
typedef struct {
|
||||
char
|
||||
label_buf[48];
|
||||
const char *
|
||||
label;
|
||||
unsigned short
|
||||
length;
|
||||
unsigned char
|
||||
nlabels, /* number of labels */
|
||||
wildcard; /* this is a wildcard rule (e.g. *.sapporo.jp) */
|
||||
} _psl_entry_t;
|
||||
|
||||
/* stripped down version libmget vector routines */
|
||||
typedef struct {
|
||||
int
|
||||
(*cmp)(const _psl_entry_t *, const _psl_entry_t *); /* comparison function */
|
||||
_psl_entry_t
|
||||
**entry; /* pointer to array of pointers to elements */
|
||||
int
|
||||
max, /* allocated elements */
|
||||
cur; /* number of elements in use */
|
||||
} _psl_vector_t;
|
||||
|
||||
struct _psl_ctx_st {
|
||||
_psl_vector_t
|
||||
*suffixes,
|
||||
*suffix_exceptions;
|
||||
};
|
||||
|
||||
/* include the PSL data compiled by 'psl2c' */
|
||||
#include "suffixes.c"
|
||||
|
||||
/* references to this PSL will result in lookups to built-in data */
|
||||
static const psl_ctx_t
|
||||
_builtin_psl;
|
||||
|
||||
static _psl_vector_t *_vector_alloc(int max, int (*cmp)(const _psl_entry_t *, const _psl_entry_t *))
|
||||
{
|
||||
_psl_vector_t *v;
|
||||
|
||||
if (!(v = calloc(1, sizeof(_psl_vector_t))))
|
||||
return NULL;
|
||||
|
||||
if (!(v->entry = malloc(max * sizeof(_psl_entry_t *)))) {
|
||||
free(v);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
v->max = max;
|
||||
v->cmp = cmp;
|
||||
return v;
|
||||
}
|
||||
|
||||
static void _vector_free(_psl_vector_t **v)
|
||||
{
|
||||
if (v && *v) {
|
||||
if ((*v)->entry) {
|
||||
int it;
|
||||
|
||||
for (it = 0; it < (*v)->cur; it++)
|
||||
free((*v)->entry[it]);
|
||||
|
||||
free((*v)->entry);
|
||||
}
|
||||
free(*v);
|
||||
}
|
||||
}
|
||||
|
||||
static _psl_entry_t *_vector_get(const _psl_vector_t *v, int pos)
|
||||
{
|
||||
if (pos < 0 || !v || pos >= v->cur) return NULL;
|
||||
|
||||
return v->entry[pos];
|
||||
}
|
||||
|
||||
/* the entries must be sorted by */
|
||||
static int _vector_find(const _psl_vector_t *v, const _psl_entry_t *elem)
|
||||
{
|
||||
if (v) {
|
||||
int l, r, m;
|
||||
int res;
|
||||
|
||||
/* binary search for element (exact match) */
|
||||
for (l = 0, r = v->cur - 1; l <= r;) {
|
||||
m = (l + r) / 2;
|
||||
if ((res = v->cmp(elem, v->entry[m])) > 0) l = m + 1;
|
||||
else if (res < 0) r = m - 1;
|
||||
else return m;
|
||||
}
|
||||
}
|
||||
|
||||
return -1; /* not found */
|
||||
}
|
||||
|
||||
static int _vector_add(_psl_vector_t *v, const _psl_entry_t *elem)
|
||||
{
|
||||
if (v) {
|
||||
void *elemp;
|
||||
|
||||
elemp = malloc(sizeof(_psl_entry_t));
|
||||
memcpy(elemp, elem, sizeof(_psl_entry_t));
|
||||
|
||||
if (v->max == v->cur)
|
||||
v->entry = realloc(v->entry, (v->max *= 2) * sizeof(_psl_entry_t *));
|
||||
|
||||
v->entry[v->cur++] = elemp;
|
||||
return v->cur - 1;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int _compare(const void *p1, const void *p2, void *v)
|
||||
{
|
||||
return ((_psl_vector_t *)v)->cmp(*((_psl_entry_t **)p1), *((_psl_entry_t **)p2));
|
||||
}
|
||||
|
||||
static void _vector_sort(_psl_vector_t *v)
|
||||
{
|
||||
if (v && v->cmp)
|
||||
qsort_r(v->entry, v->cur, sizeof(_psl_vector_t *), _compare, v);
|
||||
}
|
||||
|
||||
static int _vector_size(_psl_vector_t *v)
|
||||
{
|
||||
return v ? v->cur : 0;
|
||||
}
|
||||
|
||||
/* by this kind of sorting, we can easily see if a domain matches or not */
|
||||
static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2)
|
||||
{
|
||||
int n;
|
||||
|
||||
if ((n = s2->nlabels - s1->nlabels))
|
||||
return n; /* most labels first */
|
||||
|
||||
if ((n = s1->length - s2->length))
|
||||
return n; /* shorter rules first */
|
||||
|
||||
return strcmp(s1->label, s2->label ? s2->label : s2->label_buf);
|
||||
}
|
||||
|
||||
static int _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length)
|
||||
{
|
||||
const char *src;
|
||||
char *dst;
|
||||
|
||||
suffix->label = suffix->label_buf;
|
||||
|
||||
if (length >= sizeof(suffix->label_buf) - 1) {
|
||||
suffix->nlabels = 0;
|
||||
/* fprintf(stderr, _("Suffix rule too long (%zd, ignored): %s\n"), length, rule); */
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (*rule == '*') {
|
||||
if (*++rule != '.') {
|
||||
suffix->nlabels = 0;
|
||||
/* fprintf(stderr, _("Unsupported kind of rule (ignored): %s\n"), rule); */
|
||||
return -2;
|
||||
}
|
||||
rule++;
|
||||
suffix->wildcard = 1;
|
||||
suffix->length = (unsigned char)length - 2;
|
||||
} else {
|
||||
suffix->wildcard = 0;
|
||||
suffix->length = (unsigned char)length;
|
||||
}
|
||||
|
||||
suffix->nlabels = 1;
|
||||
|
||||
for (dst = suffix->label_buf, src = rule; *src;) {
|
||||
if (*src == '.')
|
||||
suffix->nlabels++;
|
||||
*dst++ = tolower(*src++);
|
||||
}
|
||||
*dst = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* psl_is_public_suffix:
|
||||
* @psl: PSL context
|
||||
* @domain: Domain string
|
||||
*
|
||||
* This function checks if @domain is a public suffix by the means of the
|
||||
* [Mozilla Public Suffix List](http://publicsuffix.org).
|
||||
*
|
||||
* For cookie domain checking see psl_is_cookie_domain_acceptable().
|
||||
*
|
||||
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
|
||||
* psl_builtin().
|
||||
*
|
||||
* Returns: 1 if domain is a public suffix, 0 if not.
|
||||
*
|
||||
* Since: 0.1
|
||||
*/
|
||||
int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
|
||||
{
|
||||
_psl_entry_t suffix, *rule;
|
||||
const char *p, *label_bak;
|
||||
unsigned short length_bak;
|
||||
|
||||
if (!psl || !domain)
|
||||
return 1;
|
||||
|
||||
/* this function should be called without leading dots, just make sure */
|
||||
suffix.label = domain + (*domain == '.');
|
||||
suffix.length = strlen(suffix.label);
|
||||
suffix.wildcard = 0;
|
||||
suffix.nlabels = 1;
|
||||
|
||||
for (p = suffix.label; *p; p++)
|
||||
if (*p == '.')
|
||||
suffix.nlabels++;
|
||||
|
||||
/* if domain has enough labels, it is public */
|
||||
if (psl == &_builtin_psl)
|
||||
rule = &suffixes[0];
|
||||
else
|
||||
rule = _vector_get(psl->suffixes, 0);
|
||||
|
||||
if (!rule || rule->nlabels < suffix.nlabels - 1)
|
||||
return 0;
|
||||
|
||||
if (psl == &_builtin_psl)
|
||||
rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare);
|
||||
else
|
||||
rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix));
|
||||
|
||||
if (rule) {
|
||||
/* definitely a match, no matter if the found rule is a wildcard or not */
|
||||
return 1;
|
||||
} else if (suffix.nlabels == 1) {
|
||||
/* unknown TLD, this is the prevailing '*' match */
|
||||
return 1;
|
||||
}
|
||||
|
||||
label_bak = suffix.label;
|
||||
length_bak = suffix.length;
|
||||
|
||||
if ((suffix.label = strchr(suffix.label, '.'))) {
|
||||
suffix.label++;
|
||||
suffix.length = strlen(suffix.label);
|
||||
suffix.nlabels--;
|
||||
|
||||
if (psl == &_builtin_psl)
|
||||
rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare);
|
||||
else
|
||||
rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix));
|
||||
|
||||
if (rule) {
|
||||
if (rule->wildcard) {
|
||||
/* now that we matched a wildcard, we have to check for an exception */
|
||||
suffix.label = label_bak;
|
||||
suffix.length = length_bak;
|
||||
suffix.nlabels++;
|
||||
|
||||
if (psl == &_builtin_psl) {
|
||||
if (bsearch(&suffix, suffix_exceptions, countof(suffix_exceptions), sizeof(suffix_exceptions[0]), (int(*)(const void *, const void *))_suffix_compare))
|
||||
return 0; /* found an exception, so 'domain' is not a public suffix */
|
||||
} else {
|
||||
if (_vector_get(psl->suffix_exceptions, _vector_find(psl->suffix_exceptions, &suffix)) != 0)
|
||||
return 0; /* found an exception, so 'domain' is not a public suffix */
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* psl_unregistrable_domain:
|
||||
* @psl: PSL context
|
||||
* @domain: Domain string
|
||||
*
|
||||
* This function finds the longest publix suffix part of @domain by the means
|
||||
* of the [Mozilla Public Suffix List](http://publicsuffix.org).
|
||||
*
|
||||
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
|
||||
* psl_builtin().
|
||||
*
|
||||
* Returns: Pointer to longest public suffix part of @domain or %NULL if @domain
|
||||
* does not contain a public suffix (or if @psl is %NULL).
|
||||
*
|
||||
* Since: 0.1
|
||||
*/
|
||||
const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
|
||||
{
|
||||
if (!psl || !domain)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* We check from left to right to catch special PSL entries like 'forgot.his.name':
|
||||
* 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
|
||||
*/
|
||||
|
||||
while (!psl_is_public_suffix(psl, domain)) {
|
||||
if ((domain = strchr(domain, '.')))
|
||||
domain++;
|
||||
else
|
||||
break; /* prevent endless loop if psl_is_public_suffix() is broken. */
|
||||
}
|
||||
|
||||
return domain;
|
||||
}
|
||||
|
||||
/**
|
||||
* psl_registrable_domain:
|
||||
* @psl: PSL context
|
||||
* @domain: Domain string
|
||||
*
|
||||
* This function finds the shortest private suffix part of @domain by the means
|
||||
* of the [Mozilla Public Suffix List](http://publicsuffix.org).
|
||||
*
|
||||
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
|
||||
* psl_builtin().
|
||||
*
|
||||
* Returns: Pointer to shortest private suffix part of @domain or %NULL if @domain
|
||||
* does not contain a private suffix (or if @psl is %NULL).
|
||||
*
|
||||
* Since: 0.1
|
||||
*/
|
||||
const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
|
||||
{
|
||||
const char *p, *regdom = NULL;
|
||||
|
||||
if (!psl || !domain || *domain == '.')
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* We check from left to right to catch special PSL entries like 'forgot.his.name':
|
||||
* 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
|
||||
*/
|
||||
|
||||
while (!psl_is_public_suffix(psl, domain)) {
|
||||
if ((p = strchr(domain, '.'))) {
|
||||
regdom = domain;
|
||||
domain = p + 1;
|
||||
} else
|
||||
break; /* prevent endless loop if psl_is_public_suffix() is broken. */
|
||||
}
|
||||
|
||||
return regdom;
|
||||
}
|
||||
|
||||
/**
|
||||
* psl_load_file:
|
||||
* @fname: Name of PSL file
|
||||
*
|
||||
* This function loads the public suffixes file named @fname.
|
||||
* To free the allocated resources, call psl_free().
|
||||
*
|
||||
* If you want to use punycode representations for functions like psl_is_public_suffix(),
|
||||
* these have to exist as entries within @fname. This is a design decision to not pull in
|
||||
* dependencies for UTF-8 case-handling and IDNA libraries.
|
||||
*
|
||||
* On the contrary, the builtin data already contains punycode entries.
|
||||
*
|
||||
* Have a look into psl2c.c for example code on how to convert UTF-8 to lowercase and to punycode.
|
||||
*
|
||||
* Returns: Pointer to a PSL context or %NULL on failure.
|
||||
*
|
||||
* Since: 0.1
|
||||
*/
|
||||
psl_ctx_t *psl_load_file(const char *fname)
|
||||
{
|
||||
FILE *fp;
|
||||
psl_ctx_t *psl = NULL;
|
||||
|
||||
if (!fname)
|
||||
return NULL;
|
||||
|
||||
if ((fp = fopen(fname, "r"))) {
|
||||
psl = psl_load_fp(fp);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
return psl;
|
||||
}
|
||||
|
||||
/**
|
||||
* psl_load_fp:
|
||||
* @fp: FILE pointer
|
||||
*
|
||||
* This function loads the public suffixes from a FILE pointer.
|
||||
* To free the allocated resources, call psl_free().
|
||||
*
|
||||
* Have a look at psl_load_fp() for punycode considerations.
|
||||
*
|
||||
* Returns: Pointer to a PSL context or %NULL on failure.
|
||||
*
|
||||
* Since: 0.1
|
||||
*/
|
||||
psl_ctx_t *psl_load_fp(FILE *fp)
|
||||
{
|
||||
psl_ctx_t *psl;
|
||||
_psl_entry_t suffix, *suffixp;
|
||||
int nsuffixes = 0;
|
||||
char buf[256], *linep, *p;
|
||||
|
||||
if (!fp)
|
||||
return NULL;
|
||||
|
||||
if (!(psl = calloc(1, sizeof(psl_ctx_t))))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
|
||||
* as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
|
||||
*/
|
||||
psl->suffixes = _vector_alloc(8*1024, _suffix_compare);
|
||||
psl->suffix_exceptions = _vector_alloc(64, _suffix_compare);
|
||||
|
||||
while ((linep = fgets(buf, sizeof(buf), fp))) {
|
||||
while (isspace(*linep)) linep++; /* ignore leading whitespace */
|
||||
if (!*linep) continue; /* skip empty lines */
|
||||
|
||||
if (*linep == '/' && linep[1] == '/')
|
||||
continue; /* skip comments */
|
||||
|
||||
/* parse suffix rule */
|
||||
for (p = linep; *linep && !isspace(*linep);) linep++;
|
||||
*linep = 0;
|
||||
|
||||
if (*p == '!') {
|
||||
/* add to exceptions */
|
||||
if (_suffix_init(&suffix, p + 1, linep - p - 1) == 0)
|
||||
suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix));
|
||||
else
|
||||
suffixp = NULL;
|
||||
} else {
|
||||
if (_suffix_init(&suffix, p, linep - p) == 0)
|
||||
suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix));
|
||||
else
|
||||
suffixp = NULL;
|
||||
}
|
||||
|
||||
if (suffixp)
|
||||
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
||||
|
||||
nsuffixes++;;
|
||||
}
|
||||
|
||||
_vector_sort(psl->suffix_exceptions);
|
||||
_vector_sort(psl->suffixes);
|
||||
|
||||
return psl;
|
||||
}
|
||||
|
||||
/**
|
||||
* psl_load_free:
|
||||
* @psl: PSL context pointer
|
||||
*
|
||||
* This function frees the the PSL context that has been retrieved via
|
||||
* psl_load_fp() or psl_load_file().
|
||||
*
|
||||
* Returns: Pointer to a PSL context private or %NULL on failure.
|
||||
*
|
||||
* Since: 0.1
|
||||
*/
|
||||
void psl_free(psl_ctx_t *psl)
|
||||
{
|
||||
if (psl && psl != &_builtin_psl) {
|
||||
_vector_free(&psl->suffixes);
|
||||
_vector_free(&psl->suffix_exceptions);
|
||||
free(psl);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* psl_builtin:
|
||||
*
|
||||
* This function returns the PSL context that has been generated and built in at compile-time.
|
||||
* You don't have to free the returned context explicitely.
|
||||
*
|
||||
* The builtin data also contains punycode entries, one for each international domain name.
|
||||
*
|
||||
* If the generation of built-in data has been disabled during compilation, %NULL will be returned.
|
||||
* So if using the builtin psl context, you can provide UTF-8 or punycode representations of domains to
|
||||
* functions like psl_is_public_suffix().
|
||||
*
|
||||
* Returns: Pointer to the built in PSL data or NULL if this data is not available.
|
||||
*
|
||||
* Since: 0.1
|
||||
*/
|
||||
const psl_ctx_t *psl_builtin(void)
|
||||
{
|
||||
#ifdef WITH_BUILTIN
|
||||
return &_builtin_psl;
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* psl_suffix_count:
|
||||
* @psl: PSL context pointer
|
||||
*
|
||||
* This function returns number of public suffixes maintained by @psl.
|
||||
* The number of exceptions within the Public Suffix List are not included.
|
||||
*
|
||||
* If the generation of built-in data has been disabled during compilation, 0 will be returned.
|
||||
*
|
||||
* Returns: Number of public suffixes entries in PSL context.
|
||||
*
|
||||
* Since: 0.1
|
||||
*/
|
||||
int psl_suffix_count(const psl_ctx_t *psl)
|
||||
{
|
||||
if (psl == &_builtin_psl)
|
||||
return countof(suffixes);
|
||||
else if (psl)
|
||||
return _vector_size(psl->suffixes);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* psl_suffix_exception_count:
|
||||
* @psl: PSL context pointer
|
||||
*
|
||||
* This function returns number of public suffix exceptions maintained by @psl.
|
||||
*
|
||||
* If the generation of built-in data has been disabled during compilation, 0 will be returned.
|
||||
*
|
||||
* Returns: Number of public suffix exceptions in PSL context.
|
||||
*
|
||||
* Since: 0.1
|
||||
*/
|
||||
int psl_suffix_exception_count(const psl_ctx_t *psl)
|
||||
{
|
||||
if (psl == &_builtin_psl)
|
||||
return countof(suffix_exceptions);
|
||||
else if (psl)
|
||||
return _vector_size(psl->suffix_exceptions);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* psl_builtin_compile_time:
|
||||
*
|
||||
* This function returns the time when the Publix Suffix List has been compiled into C code (by psl2c).
|
||||
*
|
||||
* If the generation of built-in data has been disabled during compilation, 0 will be returned.
|
||||
*
|
||||
* Returns: time_t value or 0.
|
||||
*
|
||||
* Since: 0.1
|
||||
*/
|
||||
time_t psl_builtin_compile_time(void)
|
||||
{
|
||||
return _psl_compile_time;
|
||||
}
|
||||
|
||||
/**
|
||||
* psl_builtin_file_time:
|
||||
*
|
||||
* This function returns the mtime of the Publix Suffix List file that has been built in.
|
||||
*
|
||||
* If the generation of built-in data has been disabled during compilation, 0 will be returned.
|
||||
*
|
||||
* Returns: time_t value or 0.
|
||||
*
|
||||
* Since: 0.1
|
||||
*/
|
||||
time_t psl_builtin_file_time(void)
|
||||
{
|
||||
return _psl_file_time;
|
||||
}
|
||||
|
||||
/**
|
||||
* psl_builtin_sha1sum:
|
||||
*
|
||||
* This function returns the SHA1 checksum of the Publix Suffix List file that has been built in.
|
||||
* The returned string is in lowercase hex encoding, e.g. "2af1e9e3044eda0678bb05949d7cca2f769901d8".
|
||||
*
|
||||
* If the generation of built-in data has been disabled during compilation, an empty string will be returned.
|
||||
*
|
||||
* Returns: String containing SHA1 checksum or an empty string.
|
||||
*
|
||||
* Since: 0.1
|
||||
*/
|
||||
const char *psl_builtin_sha1sum(void)
|
||||
{
|
||||
return _psl_sha1_checksum;
|
||||
}
|
||||
|
||||
/**
|
||||
* psl_builtin_filename:
|
||||
*
|
||||
* This function returns the file name of the Publix Suffix List file that has been built in.
|
||||
*
|
||||
* If the generation of built-in data has been disabled during compilation, an empty string will be returned.
|
||||
*
|
||||
* Returns: String containing the PSL file name or an empty string.
|
||||
*
|
||||
* Since: 0.1
|
||||
*/
|
||||
const char *psl_builtin_filename(void)
|
||||
{
|
||||
return _psl_filename;
|
||||
}
|
||||
|
||||
/**
|
||||
* psl_get_version:
|
||||
*
|
||||
* Get libpsl version.
|
||||
*
|
||||
* Returns: String containing version of libpsl.
|
||||
*
|
||||
* Since: 0.2.5
|
||||
**/
|
||||
const char *psl_get_version (void)
|
||||
{
|
||||
return PACKAGE_VERSION;
|
||||
}
|
||||
|
||||
/**
|
||||
* psl_is_cookie_domain_acceptable:
|
||||
* @psl: PSL context pointer
|
||||
* @hostname: The request hostname.
|
||||
* @cookie_domain: The domain value from a cookie
|
||||
*
|
||||
* This helper function checks whether @cookie_domain is an acceptable cookie domain value for the request
|
||||
* @hostname.
|
||||
*
|
||||
* Examples:
|
||||
* 1. Cookie domain 'example.com' would be acceptable for hostname 'www.example.com',
|
||||
* but '.com' or 'com' would NOT be acceptable since 'com' is a public suffix.
|
||||
*
|
||||
* 2. Cookie domain 'his.name' would be acceptable for hostname 'remember.his.name',
|
||||
* but NOT for 'forgot.his.name' since 'forgot.his.name' is a public suffix.
|
||||
*
|
||||
* Returns: 1 if acceptable, 0 if not acceptable.
|
||||
*
|
||||
* Since: 0.1
|
||||
*/
|
||||
int psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, const char *cookie_domain)
|
||||
{
|
||||
const char *p;
|
||||
size_t hostname_length, cookie_domain_length;
|
||||
|
||||
if (!psl || !hostname || !cookie_domain)
|
||||
return 0;
|
||||
|
||||
while (*cookie_domain == '.')
|
||||
cookie_domain++;
|
||||
|
||||
if (!strcmp(hostname, cookie_domain))
|
||||
return 1; /* an exact match is acceptable (and pretty common) */
|
||||
|
||||
cookie_domain_length = strlen(cookie_domain);
|
||||
hostname_length = strlen(hostname);
|
||||
|
||||
if (cookie_domain_length >= hostname_length)
|
||||
return 0; /* cookie_domain is too long */
|
||||
|
||||
p = hostname + hostname_length - cookie_domain_length;
|
||||
if (!strcmp(p, cookie_domain) && p[-1] == '.') {
|
||||
/* OK, cookie_domain matches, but it must be longer than the longest public suffix in 'hostname' */
|
||||
|
||||
if (!(p = psl_unregistrable_domain(psl, hostname)))
|
||||
return 1;
|
||||
|
||||
if (cookie_domain_length > strlen(p))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,478 @@
|
|||
/*
|
||||
* Copyright(c) 2014 Tim Ruehsen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* This file is part of libpsl.
|
||||
*
|
||||
* Precompile Public Suffix List into a C source file
|
||||
*
|
||||
* Changelog
|
||||
* 22.03.2014 Tim Ruehsen created
|
||||
*
|
||||
*/
|
||||
|
||||
#if HAVE_CONFIG_H
|
||||
# include <config.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <ctype.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
/*
|
||||
#ifdef WITH_LIBIDN2
|
||||
# include <idn2.h>
|
||||
#endif
|
||||
*/
|
||||
|
||||
#ifdef WITH_LIBICU
|
||||
# include <unicode/uversion.h>
|
||||
# include <unicode/ustring.h>
|
||||
# include <unicode/uidna.h>
|
||||
#endif
|
||||
|
||||
#ifdef WITH_BUILTIN
|
||||
|
||||
#include <libpsl.h>
|
||||
|
||||
typedef struct {
|
||||
char
|
||||
label_buf[48];
|
||||
const char *
|
||||
label;
|
||||
unsigned short
|
||||
length;
|
||||
unsigned char
|
||||
nlabels, /* number of labels */
|
||||
wildcard; /* this is a wildcard rule (e.g. *.sapporo.jp) */
|
||||
} _psl_entry_t;
|
||||
|
||||
/* stripped down version libmget vector routines */
|
||||
typedef struct {
|
||||
int
|
||||
(*cmp)(const _psl_entry_t *, const _psl_entry_t *); /* comparison function */
|
||||
_psl_entry_t
|
||||
**entry; /* pointer to array of pointers to elements */
|
||||
int
|
||||
max, /* allocated elements */
|
||||
cur; /* number of elements in use */
|
||||
} _psl_vector_t;
|
||||
|
||||
struct _psl_ctx_st {
|
||||
_psl_vector_t
|
||||
*suffixes,
|
||||
*suffix_exceptions;
|
||||
};
|
||||
|
||||
static _psl_vector_t *_vector_alloc(int max, int (*cmp)(const _psl_entry_t *, const _psl_entry_t *))
|
||||
{
|
||||
_psl_vector_t *v;
|
||||
|
||||
if (!(v = calloc(1, sizeof(_psl_vector_t))))
|
||||
return NULL;
|
||||
|
||||
if (!(v->entry = malloc(max * sizeof(_psl_entry_t *)))) {
|
||||
free(v);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
v->max = max;
|
||||
v->cmp = cmp;
|
||||
return v;
|
||||
}
|
||||
|
||||
static void _vector_free(_psl_vector_t **v)
|
||||
{
|
||||
if (v && *v) {
|
||||
if ((*v)->entry) {
|
||||
int it;
|
||||
|
||||
for (it = 0; it < (*v)->cur; it++)
|
||||
free((*v)->entry[it]);
|
||||
|
||||
free((*v)->entry);
|
||||
}
|
||||
free(*v);
|
||||
}
|
||||
}
|
||||
|
||||
static _psl_entry_t *_vector_get(const _psl_vector_t *v, int pos)
|
||||
{
|
||||
if (pos < 0 || !v || pos >= v->cur) return NULL;
|
||||
|
||||
return v->entry[pos];
|
||||
}
|
||||
|
||||
static int _vector_add(_psl_vector_t *v, const _psl_entry_t *elem)
|
||||
{
|
||||
if (v) {
|
||||
void *elemp;
|
||||
|
||||
elemp = malloc(sizeof(_psl_entry_t));
|
||||
memcpy(elemp, elem, sizeof(_psl_entry_t));
|
||||
|
||||
if (v->max == v->cur)
|
||||
v->entry = realloc(v->entry, (v->max *= 2) * sizeof(_psl_entry_t *));
|
||||
|
||||
v->entry[v->cur++] = elemp;
|
||||
return v->cur - 1;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int _compare(const void *p1, const void *p2, void *v)
|
||||
{
|
||||
return ((_psl_vector_t *)v)->cmp(*((_psl_entry_t **)p1), *((_psl_entry_t **)p2));
|
||||
}
|
||||
|
||||
static void _vector_sort(_psl_vector_t *v)
|
||||
{
|
||||
if (v && v->cmp)
|
||||
qsort_r(v->entry, v->cur, sizeof(_psl_vector_t *), _compare, v);
|
||||
}
|
||||
|
||||
/* by this kind of sorting, we can easily see if a domain matches or not (match = supercookie !) */
|
||||
|
||||
static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2)
|
||||
{
|
||||
int n;
|
||||
|
||||
if ((n = s2->nlabels - s1->nlabels))
|
||||
return n; /* most labels first */
|
||||
|
||||
if ((n = s1->length - s2->length))
|
||||
return n; /* shorter rules first */
|
||||
|
||||
return strcmp(s1->label, s2->label);
|
||||
}
|
||||
|
||||
static void _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length)
|
||||
{
|
||||
const char *src;
|
||||
char *dst;
|
||||
|
||||
suffix->label = suffix->label_buf;
|
||||
|
||||
if (length >= sizeof(suffix->label_buf) - 1) {
|
||||
suffix->nlabels = 0;
|
||||
fprintf(stderr, "Suffix rule too long (%d, ignored): %s\n", (int) length, rule);
|
||||
return;
|
||||
}
|
||||
|
||||
if (*rule == '*') {
|
||||
if (*++rule != '.') {
|
||||
suffix->nlabels = 0;
|
||||
fprintf(stderr, "Unsupported kind of rule (ignored): %s\n", rule);
|
||||
return;
|
||||
}
|
||||
rule++;
|
||||
suffix->wildcard = 1;
|
||||
suffix->length = (unsigned char)length - 2;
|
||||
} else {
|
||||
suffix->wildcard = 0;
|
||||
suffix->length = (unsigned char)length;
|
||||
}
|
||||
|
||||
suffix->nlabels = 1;
|
||||
|
||||
for (dst = suffix->label_buf, src = rule; *src;) {
|
||||
if (*src == '.')
|
||||
suffix->nlabels++;
|
||||
*dst++ = tolower(*src++);
|
||||
}
|
||||
*dst = 0;
|
||||
}
|
||||
|
||||
psl_ctx_t *psl_load_file(const char *fname)
|
||||
{
|
||||
FILE *fp;
|
||||
psl_ctx_t *psl = NULL;
|
||||
|
||||
if ((fp = fopen(fname, "r"))) {
|
||||
psl = psl_load_fp(fp);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
return psl;
|
||||
}
|
||||
|
||||
psl_ctx_t *psl_load_fp(FILE *fp)
|
||||
{
|
||||
psl_ctx_t *psl;
|
||||
_psl_entry_t suffix, *suffixp;
|
||||
int nsuffixes = 0;
|
||||
char buf[256], *linep, *p;
|
||||
|
||||
if (!fp)
|
||||
return NULL;
|
||||
|
||||
if (!(psl = calloc(1, sizeof(psl_ctx_t))))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
|
||||
* as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
|
||||
*/
|
||||
psl->suffixes = _vector_alloc(8*1024, _suffix_compare);
|
||||
psl->suffix_exceptions = _vector_alloc(64, _suffix_compare);
|
||||
|
||||
while ((linep = fgets(buf, sizeof(buf), fp))) {
|
||||
while (isspace(*linep)) linep++; /* ignore leading whitespace */
|
||||
if (!*linep) continue; /* skip empty lines */
|
||||
|
||||
if (*linep == '/' && linep[1] == '/')
|
||||
continue; /* skip comments */
|
||||
|
||||
/* parse suffix rule */
|
||||
for (p = linep; *linep && !isspace(*linep);) linep++;
|
||||
*linep = 0;
|
||||
|
||||
if (*p == '!') {
|
||||
/* add to exceptions */
|
||||
_suffix_init(&suffix, p + 1, linep - p - 1);
|
||||
suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix));
|
||||
} else {
|
||||
_suffix_init(&suffix, p, linep - p);
|
||||
suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix));
|
||||
}
|
||||
|
||||
if (suffixp)
|
||||
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
||||
|
||||
nsuffixes++;;
|
||||
}
|
||||
|
||||
_vector_sort(psl->suffix_exceptions);
|
||||
_vector_sort(psl->suffixes);
|
||||
|
||||
return psl;
|
||||
}
|
||||
|
||||
static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *varname)
|
||||
{
|
||||
int it;
|
||||
|
||||
#ifdef WITH_LIBICU
|
||||
do {
|
||||
UVersionInfo version_info;
|
||||
char version[U_MAX_VERSION_STRING_LENGTH];
|
||||
|
||||
u_getVersion(version_info);
|
||||
u_versionToString(version_info, version);
|
||||
fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libicu/%s) */\n", version);
|
||||
} while (0);
|
||||
#else
|
||||
fprintf(fpout, "/* automatically generated by psl2c (punycode generated with idn2) */\n");
|
||||
#endif
|
||||
|
||||
fprintf(fpout, "static _psl_entry_t %s[] = {\n", varname);
|
||||
|
||||
for (it = 0; it < v->cur; it++) {
|
||||
_psl_entry_t *e = _vector_get(v, it);
|
||||
|
||||
fprintf(fpout, "\t{ \"%s\", NULL, %hd, %d, %d },\n",
|
||||
e->label_buf, e->length, (int) e->nlabels, (int) e->wildcard);
|
||||
}
|
||||
|
||||
fprintf(fpout, "};\n");
|
||||
}
|
||||
|
||||
void psl_free(psl_ctx_t *psl)
|
||||
{
|
||||
if (psl) {
|
||||
_vector_free(&psl->suffixes);
|
||||
_vector_free(&psl->suffix_exceptions);
|
||||
free(psl);
|
||||
}
|
||||
}
|
||||
|
||||
static int _str_needs_encoding(const char *s)
|
||||
{
|
||||
while (*s > 0) s++;
|
||||
|
||||
return !!*s;
|
||||
}
|
||||
|
||||
static void _add_punycode_if_needed(_psl_vector_t *v)
|
||||
{
|
||||
int it, n;
|
||||
|
||||
/* do not use 'it < v->cur' since v->cur is changed by _vector_add() ! */
|
||||
for (it = 0, n = v->cur; it < n; it++) {
|
||||
_psl_entry_t *e = _vector_get(v, it);
|
||||
|
||||
if (_str_needs_encoding(e->label_buf)) {
|
||||
_psl_entry_t suffix, *suffixp;
|
||||
char lookupname[64] = "";
|
||||
|
||||
/* the following lines will have GPL3+ license issues */
|
||||
/* char *asc = NULL;
|
||||
int rc;
|
||||
|
||||
if ((rc = idn2_lookup_u8((uint8_t *)e->label_buf, (uint8_t **)&asc, 0)) == IDN2_OK) {
|
||||
// fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, asc);
|
||||
_suffix_init(&suffix, asc, strlen(asc));
|
||||
suffix.wildcard = e->wildcard;
|
||||
suffixp = _vector_get(v, _vector_add(v, &suffix));
|
||||
suffixp->label = suffixp->e_label_buf; // set label to changed address
|
||||
} else
|
||||
fprintf(stderr, "toASCII(%s) failed (%d): %s\n", e->label_buf, rc, idn2_strerror(rc));
|
||||
*/
|
||||
|
||||
#ifdef WITH_LIBICU
|
||||
UIDNA *idna;
|
||||
UErrorCode status = 0;
|
||||
|
||||
/* IDNA2003 punycode conversion */
|
||||
/* destLen = uidna_toASCII(e->label_buf, (int32_t) strlen(e->label_buf), lookupname, (int32_t) sizeof(lookupname),
|
||||
UIDNA_DEFAULT, NULL, &status);
|
||||
*/
|
||||
|
||||
/* IDNA2008 UTS#46 punycode conversion */
|
||||
if ((idna = uidna_openUTS46(UIDNA_USE_STD3_RULES, &status))) {
|
||||
UChar utf16_dst[64], utf16_src[64];
|
||||
int32_t utf16_src_length;
|
||||
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
|
||||
|
||||
u_strFromUTF8(utf16_src, sizeof(utf16_src)/sizeof(utf16_src[0]), &utf16_src_length, e->label_buf, (int32_t) strlen(e->label_buf), &status);
|
||||
if (U_SUCCESS(status)) {
|
||||
int32_t dst_length = uidna_nameToASCII(idna, utf16_src, utf16_src_length, utf16_dst, sizeof(utf16_dst)/sizeof(utf16_dst[0]), &info, &status);
|
||||
if (U_SUCCESS(status)) {
|
||||
u_strToUTF8(lookupname, (int32_t) sizeof(lookupname), NULL, utf16_dst, dst_length, &status);
|
||||
if (U_SUCCESS(status)) {
|
||||
if (strcmp(e->label_buf, lookupname)) {
|
||||
/* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */
|
||||
_suffix_init(&suffix, lookupname, strlen(lookupname));
|
||||
suffix.wildcard = e->wildcard;
|
||||
suffixp = _vector_get(v, _vector_add(v, &suffix));
|
||||
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
||||
} /* else ignore */
|
||||
} else
|
||||
fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status);
|
||||
} else
|
||||
fprintf(stderr, "Failed to convert to ASCII (status %d)\n", status);
|
||||
} else
|
||||
fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status);
|
||||
|
||||
uidna_close(idna);
|
||||
} else
|
||||
fprintf(stderr, "Failed to get UTS46 IDNA handle\n");
|
||||
|
||||
#else
|
||||
/* this is much slower than the libidn2 API but should have no license issues */
|
||||
FILE *pp;
|
||||
char cmd[16 + sizeof(e->label_buf)];
|
||||
snprintf(cmd, sizeof(cmd), "idn2 '%s'", e->label_buf);
|
||||
if ((pp = popen(cmd, "r"))) {
|
||||
if (fscanf(pp, "%63s", lookupname) >= 1 && strcmp(e->label_buf, lookupname)) {
|
||||
/* fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, lookupname); */
|
||||
_suffix_init(&suffix, lookupname, strlen(lookupname));
|
||||
suffix.wildcard = e->wildcard;
|
||||
suffixp = _vector_get(v, _vector_add(v, &suffix));
|
||||
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
||||
}
|
||||
pclose(pp);
|
||||
} else
|
||||
fprintf(stderr, "Failed to call popen(%s, \"r\")\n", cmd);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
_vector_sort(v);
|
||||
}
|
||||
#endif /* WITH_BUILTIN */
|
||||
|
||||
int main(int argc, const char **argv)
|
||||
{
|
||||
FILE *fpout;
|
||||
#ifdef WITH_BUILTIN
|
||||
psl_ctx_t *psl;
|
||||
#endif
|
||||
int ret = 0;
|
||||
|
||||
if (argc != 3) {
|
||||
fprintf(stderr, "Usage: psl2c <infile> <outfile>\n");
|
||||
fprintf(stderr, " <infile> is the 'effective_tld_names.dat' (aka Public Suffix List)\n");
|
||||
fprintf(stderr, " <outfile> is the the C filename to be generated from <infile>\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef WITH_BUILTIN
|
||||
if (!(psl = psl_load_file(argv[1])))
|
||||
return 2;
|
||||
|
||||
if ((fpout = fopen(argv[2], "w"))) {
|
||||
FILE *pp;
|
||||
struct stat st;
|
||||
size_t cmdsize = 16 + strlen(argv[1]);
|
||||
char *cmd = alloca(cmdsize), checksum[64] = "";
|
||||
|
||||
_add_punycode_if_needed(psl->suffixes);
|
||||
_add_punycode_if_needed(psl->suffix_exceptions);
|
||||
|
||||
_print_psl_entries(fpout, psl->suffixes, "suffixes");
|
||||
_print_psl_entries(fpout, psl->suffix_exceptions, "suffix_exceptions");
|
||||
|
||||
snprintf(cmd, cmdsize, "sha1sum %s", argv[1]);
|
||||
if ((pp = popen(cmd, "r"))) {
|
||||
if (fscanf(pp, "%63[0-9a-zA-Z]", checksum) < 1)
|
||||
*checksum = 0;
|
||||
pclose(pp);
|
||||
}
|
||||
|
||||
if (stat(argv[1], &st) != 0)
|
||||
st.st_mtime = 0;
|
||||
fprintf(fpout, "static time_t _psl_file_time = %lu;\n", st.st_mtime);
|
||||
fprintf(fpout, "static time_t _psl_compile_time = %lu;\n", time(NULL));
|
||||
fprintf(fpout, "static const char _psl_sha1_checksum[] = \"%s\";\n", checksum);
|
||||
fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", checksum);
|
||||
|
||||
if (fclose(fpout) != 0)
|
||||
ret = 4;
|
||||
} else {
|
||||
fprintf(stderr, "Failed to write open '%s'\n", argv[2]);
|
||||
ret = 3;
|
||||
}
|
||||
|
||||
psl_free(psl);
|
||||
#else
|
||||
if ((fpout = fopen(argv[2], "w"))) {
|
||||
fprintf(fpout, "static _psl_entry_t suffixes[0];\n");
|
||||
fprintf(fpout, "static _psl_entry_t suffix_exceptions[0];\n");
|
||||
fprintf(fpout, "static time_t _psl_file_time;\n");
|
||||
fprintf(fpout, "static time_t _psl_compile_time;\n");
|
||||
fprintf(fpout, "static const char _psl_sha1_checksum[] = \"\";\n");
|
||||
fprintf(fpout, "static const char _psl_filename[] = \"\";\n");
|
||||
|
||||
if (fclose(fpout) != 0)
|
||||
ret = 4;
|
||||
} else {
|
||||
fprintf(stderr, "Failed to write open '%s'\n", argv[2]);
|
||||
ret = 3;
|
||||
}
|
||||
|
||||
#endif /* WITH_BUILTIN */
|
||||
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
DEFS = @DEFS@ -DDATADIR=\"$(top_srcdir)/data\" -DSRCDIR=\"$(srcdir)\" -DPSL_FILE=\"$(PSL_FILE)\" -DPSL_TESTFILE=\"$(PSL_TESTFILE)\"
|
||||
AM_CPPFLAGS = -I$(top_srcdir)/include
|
||||
LDADD = ../src/libpsl.la
|
||||
|
||||
if WITH_BUILTIN
|
||||
|
||||
PSL_TESTS = test-is-public test-is-public-builtin test-is-public-all test-registrable-domain \
|
||||
test-is-cookie-domain-acceptable
|
||||
|
||||
else
|
||||
|
||||
# ./configure'd with '--disable-builtin'
|
||||
# Do not call test-is-public-builtin here: it does not make sense.
|
||||
# Do not call test-registrable-domain here: it would fail due to missing punycode entries in PSL file.
|
||||
PSL_TESTS = test-is-public test-is-public-all test-is-cookie-domain-acceptable
|
||||
|
||||
endif
|
||||
|
||||
check_PROGRAMS = $(PSL_TESTS)
|
||||
|
||||
TESTS_ENVIRONMENT = TESTS_VALGRIND="@VALGRIND_ENVIRONMENT@"
|
||||
TESTS = $(PSL_TESTS)
|
|
@ -0,0 +1,119 @@
|
|||
/*
|
||||
* Copyright(c) 2014 Tim Ruehsen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* This file is part of the test suite of libpsl.
|
||||
*
|
||||
* Test case for psl_is_cookie_doamin_acceptable()
|
||||
*
|
||||
* Changelog
|
||||
* 15.04.2014 Tim Ruehsen created from libmget/cookie.c
|
||||
*
|
||||
*/
|
||||
|
||||
#if HAVE_CONFIG_H
|
||||
# include <config.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <alloca.h>
|
||||
|
||||
#include <libpsl.h>
|
||||
|
||||
#define countof(a) (sizeof(a)/sizeof(*(a)))
|
||||
|
||||
static int
|
||||
ok,
|
||||
failed;
|
||||
|
||||
static void test_psl(void)
|
||||
{
|
||||
static const struct test_data {
|
||||
const char
|
||||
*request_domain,
|
||||
*cookie_domain;
|
||||
int
|
||||
result;
|
||||
} test_data[] = {
|
||||
{ "www.dkg.forgot.his.name", "www.dkg.forgot.his.name", 1 },
|
||||
{ "www.dkg.forgot.his.name", "dkg.forgot.his.name", 1 },
|
||||
{ "www.dkg.forgot.his.name", "forgot.his.name", 0 },
|
||||
{ "www.dkg.forgot.his.name", "his.name", 0 },
|
||||
{ "www.dkg.forgot.his.name", "name", 0 },
|
||||
{ "www.his.name", "www.his.name", 1 },
|
||||
{ "www.his.name", "his.name", 1 },
|
||||
{ "www.his.name", "name", 0 },
|
||||
{ "www.example.com", "www.example.com", 1 },
|
||||
{ "www.example.com", "example.com", 1 },
|
||||
{ "www.example.com", "com", 0 }, /* not accepted by normalization (PSL rule 'com') */
|
||||
{ "www.example.com", "example.org", 0 },
|
||||
{ "www.sa.gov.au", "sa.gov.au", 0 }, /* not accepted by normalization (PSL rule '*.ar') */
|
||||
{ "www.educ.ar", "educ.ar", 1 }, /* PSL exception rule '!educ.ar' */
|
||||
};
|
||||
unsigned it;
|
||||
psl_ctx_t *psl;
|
||||
|
||||
psl = psl_load_file(PSL_FILE);
|
||||
|
||||
printf("loaded %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl));
|
||||
|
||||
for (it = 0; it < countof(test_data); it++) {
|
||||
const struct test_data *t = &test_data[it];
|
||||
int result = psl_is_cookie_domain_acceptable(psl, t->request_domain, t->cookie_domain);
|
||||
|
||||
if (result == t->result) {
|
||||
ok++;
|
||||
} else {
|
||||
failed++;
|
||||
printf("psl_is_cookie_domain_acceptable(%s, %s)=%d (expected %d)\n",
|
||||
t->request_domain, t->cookie_domain, result, t->result);
|
||||
}
|
||||
}
|
||||
|
||||
psl_free(psl);
|
||||
}
|
||||
|
||||
int main(int argc, const char * const *argv)
|
||||
{
|
||||
/* if VALGRIND testing is enabled, we have to call ourselves with valgrind checking */
|
||||
if (argc == 1) {
|
||||
const char *valgrind = getenv("TESTS_VALGRIND");
|
||||
|
||||
if (valgrind && *valgrind) {
|
||||
size_t cmdsize = strlen(valgrind) + strlen(argv[0]) + 32;
|
||||
char *cmd = alloca(cmdsize);
|
||||
|
||||
snprintf(cmd, cmdsize, "TESTS_VALGRIND="" %s %s", valgrind, argv[0]);
|
||||
return system(cmd) != 0;
|
||||
}
|
||||
}
|
||||
|
||||
test_psl();
|
||||
|
||||
if (failed) {
|
||||
printf("Summary: %d out of %d tests failed\n", failed, ok + failed);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("Summary: All %d tests passed\n", ok + failed);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,140 @@
|
|||
/*
|
||||
* Copyright(c) 2014 Tim Ruehsen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* This file is part of the test suite of libpsl.
|
||||
*
|
||||
* Test psl_is_public_suffix() for all entries in effective_tld_names.dat
|
||||
*
|
||||
* Changelog
|
||||
* 19.03.2014 Tim Ruehsen created
|
||||
*
|
||||
*/
|
||||
|
||||
#if HAVE_CONFIG_H
|
||||
# include <config.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <alloca.h>
|
||||
|
||||
#include <libpsl.h>
|
||||
|
||||
static int
|
||||
ok,
|
||||
failed;
|
||||
|
||||
static void test_psl(void)
|
||||
{
|
||||
FILE *fp;
|
||||
psl_ctx_t *psl;
|
||||
int result;
|
||||
char buf[256], domain[64], *linep, *p;
|
||||
|
||||
psl = psl_load_file(PSL_FILE); /* PSL_FILE can be set by ./configure --with-psl-file=[PATH] */
|
||||
|
||||
printf("loaded %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl));
|
||||
|
||||
if ((fp = fopen(PSL_FILE, "r"))) {
|
||||
while ((linep = fgets(buf, sizeof(buf), fp))) {
|
||||
while (isspace(*linep)) linep++; /* ignore leading whitespace */
|
||||
if (!*linep) continue; /* skip empty lines */
|
||||
|
||||
if (*linep == '/' && linep[1] == '/')
|
||||
continue; /* skip comments */
|
||||
|
||||
/* parse suffix rule */
|
||||
for (p = linep; *linep && !isspace(*linep);) linep++;
|
||||
*linep = 0;
|
||||
|
||||
if (*p == '!') { /* an exception to a wildcard, e.g. !www.ck (wildcard is *.ck) */
|
||||
if ((result = psl_is_public_suffix(psl, p + 1))) {
|
||||
failed++;
|
||||
printf("psl_is_public_suffix(%s)=%d (expected 0)\n", p, result);
|
||||
} else ok++;
|
||||
|
||||
if (!(result = psl_is_public_suffix(psl, strchr(p, '.') + 1))) {
|
||||
failed++;
|
||||
printf("psl_is_public_suffix(%s)=%d (expected 1)\n", strchr(p, '.') + 1, result);
|
||||
} else ok++;
|
||||
}
|
||||
else if (*p == '*') { /* a wildcard, e.g. *.ck */
|
||||
if (!(result = psl_is_public_suffix(psl, p + 1))) {
|
||||
failed++;
|
||||
printf("psl_is_public_suffix(%s)=%d (expected 1)\n", p + 1, result);
|
||||
} else ok++;
|
||||
|
||||
*p = 'x';
|
||||
if (!(result = psl_is_public_suffix(psl, p))) {
|
||||
failed++;
|
||||
printf("psl_is_public_suffix(%s)=%d (expected 1)\n", p, result);
|
||||
} else ok++;
|
||||
}
|
||||
else {
|
||||
if (!(result = psl_is_public_suffix(psl, p))) {
|
||||
failed++;
|
||||
printf("psl_is_public_suffix(%s)=%d (expected 1)\n", p, result);
|
||||
} else ok++;
|
||||
|
||||
snprintf(domain, sizeof(domain), "xxxx.%s", p);
|
||||
if ((result = psl_is_public_suffix(psl, domain))) {
|
||||
failed++;
|
||||
printf("psl_is_public_suffix(%s)=%d (expected 0)\n", domain, result);
|
||||
} else ok++;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
} else {
|
||||
printf("Failed to open %s\n", PSL_FILE);
|
||||
failed++;
|
||||
}
|
||||
|
||||
psl_free(psl);
|
||||
}
|
||||
|
||||
int main(int argc, const char * const *argv)
|
||||
{
|
||||
/* if VALGRIND testing is enabled, we have to call ourselves with valgrind checking */
|
||||
if (argc == 1) {
|
||||
const char *valgrind = getenv("TESTS_VALGRIND");
|
||||
|
||||
if (valgrind && *valgrind) {
|
||||
size_t cmdsize = strlen(valgrind) + strlen(argv[0]) + 32;
|
||||
char *cmd = alloca(cmdsize);
|
||||
|
||||
snprintf(cmd, cmdsize, "TESTS_VALGRIND="" %s %s", valgrind, argv[0]);
|
||||
return system(cmd) != 0;
|
||||
}
|
||||
}
|
||||
|
||||
test_psl();
|
||||
|
||||
if (failed) {
|
||||
printf("Summary: %d out of %d tests failed\n", failed, ok + failed);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("Summary: All %d tests passed\n", ok + failed);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,139 @@
|
|||
/*
|
||||
* Copyright(c) 2014 Tim Ruehsen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* This file is part of the test suite of libpsl.
|
||||
*
|
||||
* Test case for psl built-in functions
|
||||
*
|
||||
* Changelog
|
||||
* 19.03.2014 Tim Ruehsen created from libmget/cookie.c
|
||||
*
|
||||
*/
|
||||
|
||||
#if HAVE_CONFIG_H
|
||||
# include <config.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <alloca.h>
|
||||
|
||||
#include <libpsl.h>
|
||||
|
||||
#define countof(a) (sizeof(a)/sizeof(*(a)))
|
||||
|
||||
static int
|
||||
ok,
|
||||
failed;
|
||||
|
||||
static void test_psl(void)
|
||||
{
|
||||
/* punycode generation: idn 商标 */
|
||||
/* octal code generation: echo -n "商标" | od -b */
|
||||
static const struct test_data {
|
||||
const char
|
||||
*domain;
|
||||
int
|
||||
result;
|
||||
} test_data[] = {
|
||||
{ "www.example.com", 0 },
|
||||
{ "com.ar", 1 },
|
||||
{ "www.com.ar", 0 },
|
||||
{ "cc.ar.us", 1 },
|
||||
{ ".cc.ar.us", 1 },
|
||||
{ "www.cc.ar.us", 0 },
|
||||
{ "www.ck", 0 }, /* exception from *.ck */
|
||||
{ "abc.www.ck", 0 },
|
||||
{ "xxx.ck", 1 },
|
||||
{ "www.xxx.ck", 0 },
|
||||
{ "\345\225\206\346\240\207", 1 }, /* xn--czr694b oder 商标 */
|
||||
{ "www.\345\225\206\346\240\207", 0 },
|
||||
{ "xn--czr694b", 1 },
|
||||
{ "www.xn--czr694b", 0 },
|
||||
/* some special test follow ('name' and 'forgot.his.name' are public, but e.g. his.name is not) */
|
||||
{ "name", 1 },
|
||||
{ ".name", 1 },
|
||||
{ "his.name", 0 },
|
||||
{ ".his.name", 0 },
|
||||
{ "forgot.his.name", 1 },
|
||||
{ ".forgot.his.name", 1 },
|
||||
{ "whoever.his.name", 0 },
|
||||
{ "whoever.forgot.his.name", 0 },
|
||||
{ ".", 1 }, /* special case */
|
||||
{ "", 1 }, /* special case */
|
||||
{ NULL, 1 }, /* special case */
|
||||
{ "adfhoweirh", 1 }, /* unknown TLD */
|
||||
};
|
||||
unsigned it;
|
||||
const psl_ctx_t *psl;
|
||||
|
||||
psl = psl_builtin();
|
||||
|
||||
printf("have %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl));
|
||||
|
||||
for (it = 0; it < countof(test_data); it++) {
|
||||
const struct test_data *t = &test_data[it];
|
||||
int result = psl_is_public_suffix(psl, t->domain);
|
||||
|
||||
if (result == t->result) {
|
||||
ok++;
|
||||
} else {
|
||||
failed++;
|
||||
printf("psl_is_public_suffix(%s)=%d (expected %d)\n", t->domain, result, t->result);
|
||||
}
|
||||
}
|
||||
|
||||
printf("psl_builtin_compile_time()=%ld\n", psl_builtin_compile_time());
|
||||
psl_builtin_compile_time() == 0 ? failed++ : ok++;
|
||||
|
||||
printf("psl_builtin_file_time()=%ld\n", psl_builtin_file_time());
|
||||
psl_builtin_file_time() == 0 ? failed++ : ok++;
|
||||
|
||||
printf("psl_builtin_sha1sum()=%s\n", psl_builtin_sha1sum());
|
||||
*psl_builtin_sha1sum() == 0 ? failed++ : ok++;
|
||||
}
|
||||
|
||||
int main(int argc, const char * const *argv)
|
||||
{
|
||||
/* if VALGRIND testing is enabled, we have to call ourselves with valgrind checking */
|
||||
if (argc == 1) {
|
||||
const char *valgrind = getenv("TESTS_VALGRIND");
|
||||
|
||||
if (valgrind && *valgrind) {
|
||||
size_t cmdsize = strlen(valgrind) + strlen(argv[0]) + 32;
|
||||
char *cmd = alloca(cmdsize);
|
||||
|
||||
snprintf(cmd, cmdsize, "TESTS_VALGRIND="" %s %s", valgrind, argv[0]);
|
||||
return system(cmd) != 0;
|
||||
}
|
||||
}
|
||||
|
||||
test_psl();
|
||||
|
||||
if (failed) {
|
||||
printf("Summary: %d out of %d tests failed\n", failed, ok + failed);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("Summary: All %d tests passed\n", ok + failed);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,130 @@
|
|||
/*
|
||||
* Copyright(c) 2014 Tim Ruehsen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* This file is part of the test suite of libpsl.
|
||||
*
|
||||
* Test case for psl_load_file(), psl_is_public_suffix(), psl_free()
|
||||
*
|
||||
* Changelog
|
||||
* 19.03.2014 Tim Ruehsen created from libmget/cookie.c
|
||||
*
|
||||
*/
|
||||
|
||||
#if HAVE_CONFIG_H
|
||||
# include <config.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <alloca.h>
|
||||
|
||||
#include <libpsl.h>
|
||||
|
||||
#define countof(a) (sizeof(a)/sizeof(*(a)))
|
||||
|
||||
static int
|
||||
ok,
|
||||
failed;
|
||||
|
||||
static void test_psl(void)
|
||||
{
|
||||
/* punycode generation: idn 商标 */
|
||||
/* octal code generation: echo -n "商标" | od -b */
|
||||
static const struct test_data {
|
||||
const char
|
||||
*domain;
|
||||
int
|
||||
result;
|
||||
} test_data[] = {
|
||||
{ "www.example.com", 0 },
|
||||
{ "com.ar", 1 },
|
||||
{ "www.com.ar", 0 },
|
||||
{ "cc.ar.us", 1 },
|
||||
{ ".cc.ar.us", 1 },
|
||||
{ "www.cc.ar.us", 0 },
|
||||
{ "www.ck", 0 }, /* exception from *.ck */
|
||||
{ "abc.www.ck", 0 },
|
||||
{ "xxx.ck", 1 },
|
||||
{ "www.xxx.ck", 0 },
|
||||
{ "\345\225\206\346\240\207", 1 }, /* xn--czr694b oder 商标 */
|
||||
{ "www.\345\225\206\346\240\207", 0 },
|
||||
/* some special test follow ('name' and 'forgot.his.name' are public, but e.g. his.name is not) */
|
||||
{ "name", 1 },
|
||||
{ ".name", 1 },
|
||||
{ "his.name", 0 },
|
||||
{ ".his.name", 0 },
|
||||
{ "forgot.his.name", 1 },
|
||||
{ ".forgot.his.name", 1 },
|
||||
{ "whoever.his.name", 0 },
|
||||
{ "whoever.forgot.his.name", 0 },
|
||||
{ ".", 1 }, /* special case */
|
||||
{ "", 1 }, /* special case */
|
||||
{ NULL, 1 }, /* special case */
|
||||
{ "adfhoweirh", 1 }, /* unknown TLD */
|
||||
};
|
||||
unsigned it;
|
||||
psl_ctx_t *psl;
|
||||
|
||||
psl = psl_load_file(PSL_FILE);
|
||||
|
||||
printf("loaded %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl));
|
||||
|
||||
for (it = 0; it < countof(test_data); it++) {
|
||||
const struct test_data *t = &test_data[it];
|
||||
int result = psl_is_public_suffix(psl, t->domain);
|
||||
|
||||
if (result == t->result) {
|
||||
ok++;
|
||||
} else {
|
||||
failed++;
|
||||
printf("psl_is_public_suffix(%s)=%d (expected %d)\n", t->domain, result, t->result);
|
||||
}
|
||||
}
|
||||
|
||||
psl_free(psl);
|
||||
}
|
||||
|
||||
int main(int argc, const char * const *argv)
|
||||
{
|
||||
/* if VALGRIND testing is enabled, we have to call ourselves with valgrind checking */
|
||||
if (argc == 1) {
|
||||
const char *valgrind = getenv("TESTS_VALGRIND");
|
||||
|
||||
if (valgrind && *valgrind) {
|
||||
size_t cmdsize = strlen(valgrind) + strlen(argv[0]) + 32;
|
||||
char *cmd = alloca(cmdsize);
|
||||
|
||||
snprintf(cmd, cmdsize, "TESTS_VALGRIND="" %s %s", valgrind, argv[0]);
|
||||
return system(cmd) != 0;
|
||||
}
|
||||
}
|
||||
|
||||
test_psl();
|
||||
|
||||
if (failed) {
|
||||
printf("Summary: %d out of %d tests failed\n", failed, ok + failed);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("Summary: All %d tests passed\n", ok + failed);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,171 @@
|
|||
/*
|
||||
* Copyright(c) 2014 Tim Ruehsen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* This file is part of the test suite of libpsl.
|
||||
*
|
||||
* Test psl_registered_domain() for all entries in test_psl.dat
|
||||
*
|
||||
* Changelog
|
||||
* 26.03.2014 Tim Ruehsen created
|
||||
*
|
||||
*/
|
||||
|
||||
#if HAVE_CONFIG_H
|
||||
# include <config.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <alloca.h>
|
||||
|
||||
#include <libpsl.h>
|
||||
|
||||
static int
|
||||
ok,
|
||||
failed;
|
||||
|
||||
static void test(const psl_ctx_t *psl, const char *domain, const char *expected_result)
|
||||
{
|
||||
const char *result;
|
||||
char lookupname[128];
|
||||
|
||||
/* check if there might be some utf-8 characters */
|
||||
if (domain) {
|
||||
int utf8;
|
||||
const char *p;
|
||||
|
||||
for (p = domain, utf8 = 0; *p && !utf8; p++)
|
||||
if (*p < 0)
|
||||
utf8 = 1;
|
||||
|
||||
/* if we found utf-8, make sure to convert domain correctly to lowercase */
|
||||
/* does it work, if we are not in a utf-8 env ? */
|
||||
if (utf8) {
|
||||
FILE *pp;
|
||||
size_t cmdsize = 48 + strlen(domain);
|
||||
char *cmd = alloca(cmdsize);
|
||||
|
||||
snprintf(cmd, cmdsize, "echo -n '%s' | sed -e 's/./\\L\\0/g'", domain);
|
||||
if ((pp = popen(cmd, "r"))) {
|
||||
if (fscanf(pp, "%127s", lookupname) >= 1)
|
||||
domain = lookupname;
|
||||
pclose(pp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result = psl_registrable_domain(psl, domain);
|
||||
|
||||
if ((result && expected_result && !strcmp(result, expected_result)) || (!result && !expected_result)) {
|
||||
ok++;
|
||||
} else {
|
||||
failed++;
|
||||
printf("psl_registrable_domain(%s)=%s (expected %s)\n",
|
||||
domain, result ? result : "NULL", expected_result ? expected_result : "NULL");
|
||||
}
|
||||
}
|
||||
|
||||
static void test_psl(void)
|
||||
{
|
||||
FILE *fp;
|
||||
const psl_ctx_t *psl;
|
||||
char buf[256], domain[128], expected_regdom[128], *p;
|
||||
|
||||
psl = psl_builtin();
|
||||
|
||||
printf("have %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl));
|
||||
|
||||
/* special check with NULL values */
|
||||
test(NULL, NULL, NULL);
|
||||
|
||||
/* special check with NULL psl context */
|
||||
test(NULL, "www.example.com", NULL);
|
||||
|
||||
/* special check with NULL psl context and TLD */
|
||||
test(NULL, "com", NULL);
|
||||
|
||||
/* Norwegian with uppercase oe */
|
||||
test(psl, "www.\303\230yer.no", "www.\303\270yer.no");
|
||||
|
||||
/* Norwegian with lowercase oe */
|
||||
test(psl, "www.\303\270yer.no", "www.\303\270yer.no");
|
||||
|
||||
/* special check with NULL psl context and TLD */
|
||||
test(psl, "whoever.forgot.his.name", "whoever.forgot.his.name");
|
||||
|
||||
/* special check with NULL psl context and TLD */
|
||||
test(psl, "forgot.his.name", NULL);
|
||||
|
||||
/* special check with NULL psl context and TLD */
|
||||
test(psl, "his.name", "his.name");
|
||||
|
||||
if ((fp = fopen(PSL_TESTFILE, "r"))) {
|
||||
while ((fgets(buf, sizeof(buf), fp))) {
|
||||
if (sscanf(buf, " checkPublicSuffix('%127[^']' , '%127[^']", domain, expected_regdom) != 2) {
|
||||
if (sscanf(buf, " checkPublicSuffix('%127[^']' , %127[nul]", domain, expected_regdom) != 2)
|
||||
continue;
|
||||
}
|
||||
|
||||
/* we have to lowercase the domain - the PSL API just takes lowercase */
|
||||
for (p = domain; *p; p++)
|
||||
if (*p > 0 && isupper(*p))
|
||||
*p = tolower(*p);
|
||||
|
||||
if (!strcmp(expected_regdom, "null"))
|
||||
test(psl, domain, NULL);
|
||||
else
|
||||
test(psl, domain, expected_regdom);
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
} else {
|
||||
printf("Failed to open %s\n", PSL_TESTFILE);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, const char * const *argv)
|
||||
{
|
||||
/* if VALGRIND testing is enabled, we have to call ourselves with valgrind checking */
|
||||
if (argc == 1) {
|
||||
const char *valgrind = getenv("TESTS_VALGRIND");
|
||||
|
||||
if (valgrind && *valgrind) {
|
||||
size_t cmdsize = strlen(valgrind) + strlen(argv[0]) + 32;
|
||||
char *cmd = alloca(cmdsize);
|
||||
|
||||
snprintf(cmd, cmdsize, "TESTS_VALGRIND="" %s %s", valgrind, argv[0]);
|
||||
return system(cmd) != 0;
|
||||
}
|
||||
}
|
||||
|
||||
test_psl();
|
||||
|
||||
if (failed) {
|
||||
printf("Summary: %d out of %d tests failed\n", failed, ok + failed);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("Summary: All %d tests passed\n", ok + failed);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
bin_PROGRAMS = psl
|
||||
|
||||
AM_CPPFLAGS = -I$(top_srcdir)/include
|
||||
AM_LDFLAGS = -static
|
||||
LDADD = ../src/libpsl.la
|
|
@ -0,0 +1,176 @@
|
|||
/*
|
||||
* Copyright(c) 2014 Tim Ruehsen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* This file is part of libpsl.
|
||||
*
|
||||
* Using the libpsl functions via command line
|
||||
*
|
||||
* Changelog
|
||||
* 11.04.2014 Tim Ruehsen created
|
||||
*
|
||||
*/
|
||||
|
||||
#if HAVE_CONFIG_H
|
||||
# include <config.h>
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <libpsl.h>
|
||||
|
||||
static void usage(int err)
|
||||
{
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "Usage: psl [options] <domains...>\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "Options:\n");
|
||||
fprintf(stderr, " --version show library version information\n");
|
||||
fprintf(stderr, " --use-builtin-data use the builtin PSL data. [default]\n");
|
||||
fprintf(stderr, " --load-psl-file <filename> load PSL data from file.\n");
|
||||
fprintf(stderr, " --is-public-suffix check if domains are public suffixes or not. [default]\n");
|
||||
fprintf(stderr, " --is-cookie-domain-acceptable <cookie-domain>\n");
|
||||
fprintf(stderr, " check if cookie-domain is acceptable for domains.\n");
|
||||
fprintf(stderr, " --print-unreg-domain print the longest publix suffix part\n");
|
||||
fprintf(stderr, " --print-reg-domain print the shortest private suffix part\n");
|
||||
fprintf(stderr, " --print-info print info about library builtin data\n");
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
exit(err);
|
||||
}
|
||||
|
||||
/* RFC 2822-compliant date format */
|
||||
static const char *time2str(time_t t)
|
||||
{
|
||||
static char buf[64];
|
||||
struct tm *tp = localtime(&t);
|
||||
|
||||
strftime(buf, sizeof(buf), "%a, %d %b %Y %H:%M:%S %Z", tp);
|
||||
return buf;
|
||||
}
|
||||
|
||||
int main(int argc, const char *const *argv)
|
||||
{
|
||||
int mode = 1;
|
||||
const char *const *arg, *psl_file = NULL, *cookie_domain = NULL;
|
||||
psl_ctx_t *psl = (psl_ctx_t *) psl_builtin();
|
||||
|
||||
for (arg = argv + 1; arg < argv + argc; arg++) {
|
||||
if (!strncmp(*arg, "--", 2)) {
|
||||
if (!strcmp(*arg, "--is-public-suffix"))
|
||||
mode = 1;
|
||||
else if (!strcmp(*arg, "--print-unreg-domain"))
|
||||
mode = 2;
|
||||
else if (!strcmp(*arg, "--print-reg-domain"))
|
||||
mode = 3;
|
||||
else if (!strcmp(*arg, "--print-info"))
|
||||
mode = 99;
|
||||
else if (!strcmp(*arg, "--is-cookie-domain-acceptable") && arg < argv + argc - 1) {
|
||||
mode = 4;
|
||||
cookie_domain = *(++arg);
|
||||
}
|
||||
else if (!strcmp(*arg, "--use-builtin-data")) {
|
||||
psl_free(psl);
|
||||
if (psl_file) {
|
||||
fprintf(stderr, "Dropped data from %s\n", psl_file);
|
||||
psl_file = NULL;
|
||||
}
|
||||
if (!(psl = (psl_ctx_t *) psl_builtin()))
|
||||
printf("No builtin PSL data available\n");
|
||||
}
|
||||
else if (!strcmp(*arg, "--load-psl-file") && arg < argv + argc - 1) {
|
||||
psl_free(psl);
|
||||
if (psl_file) {
|
||||
fprintf(stderr, "Dropped data from %s\n", psl_file);
|
||||
psl_file = NULL;
|
||||
}
|
||||
if (!(psl = psl_load_file(psl_file = *(++arg)))) {
|
||||
fprintf(stderr, "Failed to load PSL data from %s\n", psl_file);
|
||||
psl_file = NULL;
|
||||
}
|
||||
}
|
||||
else if (!strcmp(*arg, "--help")) {
|
||||
usage(0);
|
||||
}
|
||||
else if (!strcmp(*arg, "--version")) {
|
||||
printf("psl %s\n", PACKAGE_VERSION);
|
||||
printf("libpsl %s\n", psl_get_version());
|
||||
printf("\n");
|
||||
printf("Copyright (C) 2014 Tim Ruehsen\n");
|
||||
printf("License: MIT\n");
|
||||
exit(0);
|
||||
}
|
||||
else if (!strcmp(*arg, "--")) {
|
||||
arg++;
|
||||
break;
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "Unknown option '%s'\n", *arg);
|
||||
usage(1);
|
||||
}
|
||||
} else
|
||||
break;
|
||||
}
|
||||
|
||||
if (!psl && mode != 99) {
|
||||
printf("No PSL data available - aborting\n");
|
||||
exit(2);
|
||||
}
|
||||
|
||||
if (mode == 1) {
|
||||
for (; arg < argv + argc; arg++)
|
||||
printf("%s: %d\n", *arg, psl_is_public_suffix(psl, *arg));
|
||||
}
|
||||
else if (mode == 2) {
|
||||
for (; arg < argv + argc; arg++)
|
||||
printf("%s: %s\n", *arg, psl_unregistrable_domain(psl, *arg));
|
||||
}
|
||||
else if (mode == 3) {
|
||||
for (; arg < argv + argc; arg++)
|
||||
printf("%s: %s\n", *arg, psl_registrable_domain(psl, *arg));
|
||||
}
|
||||
else if (mode == 4) {
|
||||
for (; arg < argv + argc; arg++)
|
||||
printf("%s: %d\n", *arg, psl_is_cookie_domain_acceptable(psl, *arg, cookie_domain));
|
||||
}
|
||||
else if (mode == 99) {
|
||||
if (psl && psl != psl_builtin()) {
|
||||
printf("suffixes: %d\n", psl_suffix_count(psl));
|
||||
printf("exceptions: %d\n", psl_suffix_exception_count(psl));
|
||||
}
|
||||
|
||||
psl_free(psl);
|
||||
psl = (psl_ctx_t *) psl_builtin();
|
||||
|
||||
if (psl) {
|
||||
printf("builtin suffixes: %d\n", psl_suffix_count(psl));
|
||||
printf("builtin exceptions: %d\n", psl_suffix_exception_count(psl));
|
||||
printf("builtin filename: %s\n", psl_builtin_filename());
|
||||
printf("builtin compile time: %ld (%s)\n", psl_builtin_compile_time(), time2str(psl_builtin_compile_time()));
|
||||
printf("builtin file time: %ld (%s)\n", psl_builtin_file_time(), time2str(psl_builtin_file_time()));
|
||||
printf("builtin SHA1 file hash: %s\n", psl_builtin_sha1sum());
|
||||
} else
|
||||
printf("No builtin PSL data available\n");
|
||||
}
|
||||
|
||||
psl_free(psl);
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue