Release v0.12.0
This commit is contained in:
commit
c69a18ff9b
52
.travis.yml
52
.travis.yml
|
@ -1,29 +1,41 @@
|
||||||
|
sudo: false
|
||||||
|
|
||||||
language: c
|
language: c
|
||||||
|
|
||||||
compiler:
|
compiler:
|
||||||
- gcc
|
- gcc
|
||||||
- clang
|
- clang
|
||||||
# Change this to your needs
|
|
||||||
|
env:
|
||||||
|
- RUNTIME=libicu
|
||||||
|
- RUNTIME=libidn2
|
||||||
|
- RUNTIME=libidn
|
||||||
|
- RUNTIME=no
|
||||||
|
|
||||||
|
addons:
|
||||||
|
apt:
|
||||||
|
packages:
|
||||||
|
- automake
|
||||||
|
- autoconf
|
||||||
|
- autopoint
|
||||||
|
- libtool
|
||||||
|
- gtk-doc-tools
|
||||||
|
- gettext
|
||||||
|
- libidn11
|
||||||
|
- libidn11-dev
|
||||||
|
- libidn2-0
|
||||||
|
- libidn2-0-dev
|
||||||
|
- libicu48
|
||||||
|
- libicu-dev
|
||||||
|
- libunistring0
|
||||||
|
- libunistring-dev
|
||||||
|
|
||||||
script:
|
script:
|
||||||
- ./autogen.sh
|
- ./autogen.sh
|
||||||
- ./configure && make -j4 && make check -j4
|
- ./configure && make -j4 && make check -j4
|
||||||
- ./configure --enable-runtime=libicu --enable-builtin=libicu && make clean && make -j4 && make check -j4
|
- ./configure --enable-runtime=$RUNTIME --enable-builtin=libicu && make clean && make -j4 && make check -j4
|
||||||
- ./configure --enable-runtime=libicu --enable-builtin=libidn2 && make clean && make -j4 && make check -j4
|
- ./configure --enable-runtime=$RUNTIME --enable-builtin=libidn2 && make clean && make -j4 && make check -j4
|
||||||
- ./configure --enable-runtime=libicu --enable-builtin=libidn && make clean && make -j4 && make check -j4
|
- ./configure --enable-runtime=$RUNTIME --enable-builtin=libidn && make clean && make -j4 && make check -j4
|
||||||
- ./configure --enable-runtime=libicu --disable-builtin && make clean && make -j4 && make check -j4
|
- ./configure --enable-runtime=$RUNTIME --disable-builtin && make clean && make -j4 && make check -j4
|
||||||
- ./configure --enable-runtime=libidn2 --enable-builtin=libicu && make clean && make -j4 && make check -j4
|
|
||||||
- ./configure --enable-runtime=libidn2 --enable-builtin=libidn2 && make clean && make -j4 && make check -j4
|
|
||||||
- ./configure --enable-runtime=libidn2 --enable-builtin=libidn && make clean && make -j4 && make check -j4
|
|
||||||
- ./configure --enable-runtime=libidn2 --disable-builtin && make clean && make -j4 && make check -j4
|
|
||||||
- ./configure --enable-runtime=libidn --enable-builtin=libicu && make clean && make -j4 && make check -j4
|
|
||||||
- ./configure --enable-runtime=libidn --enable-builtin=libidn2 && make clean && make -j4 && make check -j4
|
|
||||||
- ./configure --enable-runtime=libidn --enable-builtin=libidn && make clean && make -j4 && make check -j4
|
|
||||||
- ./configure --enable-runtime=libidn --disable-builtin && make clean && make -j4 && make check -j4
|
|
||||||
- ./configure --disable-runtime --enable-builtin=libicu && make clean && make -j4 && make check -j4
|
|
||||||
- ./configure --disable-runtime --enable-builtin=libidn2 && make clean && make -j4 && make check -j4
|
|
||||||
- ./configure --disable-runtime --enable-builtin=libidn && make clean && make -j4 && make check -j4
|
|
||||||
- ./configure --disable-runtime --disable-builtin && make clean && make -j4 && make check -j4
|
|
||||||
- ./configure --enable-gtk-doc && make -j4 && make check -j4
|
- ./configure --enable-gtk-doc && make -j4 && make check -j4
|
||||||
- make distcheck
|
- make distcheck
|
||||||
before_install:
|
|
||||||
- sudo apt-get -qq update
|
|
||||||
- sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext libidn11 libidn11-dev libidn2-0 libidn2-0-dev libicu48 libicu-dev libunistring0 libunistring-dev
|
|
||||||
|
|
1
AUTHORS
1
AUTHORS
|
@ -15,3 +15,4 @@ Dagobert Michelsen (Fixed Solaris building)
|
||||||
Christopher Meng (Fedora building)
|
Christopher Meng (Fedora building)
|
||||||
Jakub Čajka
|
Jakub Čajka
|
||||||
Giuseppe Scrivano
|
Giuseppe Scrivano
|
||||||
|
Ryan Sleevi (Discussion, Requested DAFSA format and ICANN/PRIVATE support)
|
||||||
|
|
|
@ -14,4 +14,8 @@ ACLOCAL_AMFLAGS = -I m4 ${ACLOCAL_FLAGS}
|
||||||
pkgconfigdir = $(libdir)/pkgconfig
|
pkgconfigdir = $(libdir)/pkgconfig
|
||||||
pkgconfig_DATA = libpsl.pc
|
pkgconfig_DATA = libpsl.pc
|
||||||
|
|
||||||
EXTRA_DIST = config.rpath LICENSE $(PSL_FILE) list/tests/test_psl.txt
|
EXTRA_DIST = config.rpath LICENSE
|
||||||
|
dist-hook:
|
||||||
|
mkdir -p $(distdir)/list/tests
|
||||||
|
cp -p $(PSL_FILE) $(distdir)/list
|
||||||
|
cp -p $(PSL_TESTFILE) $(distdir)/list/tests
|
||||||
|
|
17
NEWS
17
NEWS
|
@ -1,10 +1,23 @@
|
||||||
Copyright (C) 2014-2015 Tim Rühsen
|
Copyright (C) 2014-2016 Tim Rühsen
|
||||||
|
|
||||||
|
02.01.2016 Release V0.12.0
|
||||||
|
* Load DAFSA binaries via psl_load_file() via auto-detection
|
||||||
|
* Add more tests
|
||||||
|
* Remove psl_builtin_compile_time()
|
||||||
|
* Compile PSL into DAFSA using make_dafsa.py
|
||||||
|
* Avoid libicu dependency with --enable-runtime=no
|
||||||
|
* Test on new Travis-CI build farm
|
||||||
|
* Use DAFSA format for builtin PSL data
|
||||||
|
* Add function psl_is_public_suffix2()
|
||||||
|
* Fix psl_builtin_outdated()
|
||||||
|
* Fix several bugs
|
||||||
|
* Cleanup code
|
||||||
|
|
||||||
23.09.2015 Release V0.11.0
|
23.09.2015 Release V0.11.0
|
||||||
* Add new function psl_check_version_number()
|
* Add new function psl_check_version_number()
|
||||||
* Add version defines to include file
|
* Add version defines to include file
|
||||||
|
|
||||||
19.09.2025 Release V0.10.0
|
19.09.2015 Release V0.10.0
|
||||||
* Code simplified
|
* Code simplified
|
||||||
* Less data entries, faster lookups
|
* Less data entries, faster lookups
|
||||||
* Add new function psl_suffix_wildcard_count()
|
* Add new function psl_suffix_wildcard_count()
|
||||||
|
|
|
@ -14,7 +14,7 @@ Browsers and other web clients can use it to
|
||||||
|
|
||||||
Libpsl...
|
Libpsl...
|
||||||
|
|
||||||
- has built-in PSL data for fast access
|
- has built-in PSL data for fast access (DAWG/DAFSA reduces size from 180kB to ~32kB)
|
||||||
- allows to load PSL data from files
|
- allows to load PSL data from files
|
||||||
- checks if a given domain is a "public suffix"
|
- checks if a given domain is a "public suffix"
|
||||||
- provides immediate cookie domain verification
|
- provides immediate cookie domain verification
|
||||||
|
@ -28,6 +28,8 @@ Find more information about the Publix Suffix List [here](http://publicsuffix.or
|
||||||
|
|
||||||
Download the Public Suffix List [here](https://hg.mozilla.org/mozilla-central/raw-file/tip/netwerk/dns/effective_tld_names.dat).
|
Download the Public Suffix List [here](https://hg.mozilla.org/mozilla-central/raw-file/tip/netwerk/dns/effective_tld_names.dat).
|
||||||
|
|
||||||
|
The DAFSA code has been taken from [Chromium Project](https://code.google.com/p/chromium/).
|
||||||
|
|
||||||
|
|
||||||
API Documentation
|
API Documentation
|
||||||
-----------------
|
-----------------
|
||||||
|
@ -74,6 +76,8 @@ License
|
||||||
Libpsl is made available under the terms of the MIT license.<br>
|
Libpsl is made available under the terms of the MIT license.<br>
|
||||||
See the LICENSE file that accompanies this distribution for the full text of the license.
|
See the LICENSE file that accompanies this distribution for the full text of the license.
|
||||||
|
|
||||||
|
src/make_dafsa.py and src/lookup_string_in_fixed_set.c are licensed under the term written in
|
||||||
|
src/LICENSE.chromium.
|
||||||
|
|
||||||
Building from git
|
Building from git
|
||||||
-----------------
|
-----------------
|
||||||
|
|
12
autogen.sh
12
autogen.sh
|
@ -1,21 +1,21 @@
|
||||||
#!/bin/sh -e
|
#!/bin/sh
|
||||||
|
|
||||||
AUTORECONF=$(which autoreconf 2>/dev/null || true)
|
AUTORECONF=$(which autoreconf 2>/dev/null)
|
||||||
if test $? -ne 0; then
|
if test $? -ne 0; then
|
||||||
echo "No 'autoreconf' found. You must install the autoconf package."
|
echo "No 'autoreconf' found. You must install the autoconf package."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
GIT=$(which git 2>/dev/null || true)
|
GIT=$(which git 2>/dev/null)
|
||||||
if test $? -ne 0; then
|
if test $? -ne 0; then
|
||||||
echo "No 'git' found. You must install the git package."
|
echo "No 'git' found. You must install the git package."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# create m4 before gtkdocize
|
# create m4 before gtkdocize
|
||||||
mkdir m4 2>/dev/null || true
|
mkdir -p m4 2>/dev/null
|
||||||
|
|
||||||
GTKDOCIZE=$(which gtkdocize 2>/dev/null || true)
|
GTKDOCIZE=$(which gtkdocize 2>/dev/null)
|
||||||
if test $? -ne 0; then
|
if test $? -ne 0; then
|
||||||
echo "No gtk-doc support found. You can't build the docs."
|
echo "No gtk-doc support found. You can't build the docs."
|
||||||
# rm because gtk-doc.make might be a link to a protected file
|
# rm because gtk-doc.make might be a link to a protected file
|
||||||
|
@ -24,7 +24,7 @@ if test $? -ne 0; then
|
||||||
echo "CLEANFILES =" >>gtk-doc.make
|
echo "CLEANFILES =" >>gtk-doc.make
|
||||||
GTKDOCIZE=""
|
GTKDOCIZE=""
|
||||||
else
|
else
|
||||||
$GTKDOCIZE || exit $?
|
$GTKDOCIZE
|
||||||
fi
|
fi
|
||||||
|
|
||||||
$GIT submodule init
|
$GIT submodule init
|
||||||
|
|
17
configure.ac
17
configure.ac
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
AC_INIT([libpsl], [0.11.0], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl])
|
AC_INIT([libpsl], [0.12.0], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl])
|
||||||
AC_PREREQ([2.59])
|
AC_PREREQ([2.59])
|
||||||
AM_INIT_AUTOMAKE([1.10 -Wall no-define foreign])
|
AM_INIT_AUTOMAKE([1.10 -Wall no-define foreign])
|
||||||
|
|
||||||
|
@ -20,9 +20,9 @@ AC_C_INLINE
|
||||||
#
|
#
|
||||||
# Generate version defines for include file
|
# Generate version defines for include file
|
||||||
#
|
#
|
||||||
AC_SUBST([LIBPSL_VERSION_MAJOR], [`echo -n $VERSION|cut -d'.' -f1`])
|
AC_SUBST([LIBPSL_VERSION_MAJOR], [`echo $VERSION|cut -d'.' -f1`])
|
||||||
AC_SUBST([LIBPSL_VERSION_MINOR], [`echo -n $VERSION|cut -d'.' -f2`])
|
AC_SUBST([LIBPSL_VERSION_MINOR], [`echo $VERSION|cut -d'.' -f2`])
|
||||||
AC_SUBST([LIBPSL_VERSION_PATCH], [`echo -n $VERSION|cut -d'.' -f3`])
|
AC_SUBST([LIBPSL_VERSION_PATCH], [`echo $VERSION|cut -d'.' -f3`])
|
||||||
AC_SUBST([LIBPSL_VERSION_NUMBER], [`printf '0x%02x%02x%02x' $LIBPSL_VERSION_MAJOR $LIBPSL_VERSION_MINOR $LIBPSL_VERSION_PATCH`])
|
AC_SUBST([LIBPSL_VERSION_NUMBER], [`printf '0x%02x%02x%02x' $LIBPSL_VERSION_MAJOR $LIBPSL_VERSION_MINOR $LIBPSL_VERSION_PATCH`])
|
||||||
AC_CONFIG_FILES([include/libpsl.h])
|
AC_CONFIG_FILES([include/libpsl.h])
|
||||||
|
|
||||||
|
@ -85,7 +85,7 @@ PKG_PROG_PKG_CONFIG
|
||||||
# 4. If any interfaces have been added, removed, or changed since the last update, increment current, and set revision to 0.
|
# 4. If any interfaces have been added, removed, or changed since the last update, increment current, and set revision to 0.
|
||||||
# 5. If any interfaces have been added since the last public release, then increment age.
|
# 5. If any interfaces have been added since the last public release, then increment age.
|
||||||
# 6. If any existing interfaces have been removed or changed since the last public release, then set age to 0.
|
# 6. If any existing interfaces have been removed or changed since the last public release, then set age to 0.
|
||||||
AC_SUBST([LIBPSL_SO_VERSION], [4:0:4])
|
AC_SUBST([LIBPSL_SO_VERSION], [5:0:0])
|
||||||
AC_SUBST([LIBPSL_VERSION], $VERSION)
|
AC_SUBST([LIBPSL_VERSION], $VERSION)
|
||||||
|
|
||||||
# Check for enable/disable builtin PSL data
|
# Check for enable/disable builtin PSL data
|
||||||
|
@ -154,8 +154,10 @@ if test "$enable_runtime" = "libicu" -o "$enable_builtin" = "libicu"; then
|
||||||
# using AC_SEARCH_LIBS also don't work since functions have the library version appended
|
# using AC_SEARCH_LIBS also don't work since functions have the library version appended
|
||||||
PKG_CHECK_MODULES([LIBICU], [icu-uc], [
|
PKG_CHECK_MODULES([LIBICU], [icu-uc], [
|
||||||
HAVE_LIBICU=yes
|
HAVE_LIBICU=yes
|
||||||
|
if test "$enable_runtime" = "libicu"; then
|
||||||
LIBS="$LIBICU_LIBS $LIBS"
|
LIBS="$LIBICU_LIBS $LIBS"
|
||||||
CFLAGS="$LIBICU_CFLAGS $CFLAGS"
|
CFLAGS="$LIBICU_CFLAGS $CFLAGS"
|
||||||
|
fi
|
||||||
], [
|
], [
|
||||||
OLDLIBS=$LIBS
|
OLDLIBS=$LIBS
|
||||||
LIBS="-licuuc $LIBS"
|
LIBS="-licuuc $LIBS"
|
||||||
|
@ -216,6 +218,9 @@ elif test -n "$NEEDS_NSL" ; then
|
||||||
LIBS="$LIBS -lnsl"
|
LIBS="$LIBS -lnsl"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Check for clock_gettime() used for performance measurement
|
||||||
|
AC_SEARCH_LIBS(clock_gettime, rt)
|
||||||
|
|
||||||
# Check for valgrind
|
# Check for valgrind
|
||||||
ac_enable_valgrind=no
|
ac_enable_valgrind=no
|
||||||
AC_ARG_ENABLE(valgrind-tests,
|
AC_ARG_ENABLE(valgrind-tests,
|
||||||
|
@ -252,7 +257,7 @@ AC_SUBST(PSL_TESTFILE)
|
||||||
|
|
||||||
# check for alloca / alloca.h
|
# check for alloca / alloca.h
|
||||||
AC_FUNC_ALLOCA
|
AC_FUNC_ALLOCA
|
||||||
AC_CHECK_FUNCS([strndup])
|
AC_CHECK_FUNCS([strndup clock_gettime])
|
||||||
|
|
||||||
# Override the template file name of the generated .pc file, so that there
|
# Override the template file name of the generated .pc file, so that there
|
||||||
# is no need to rename the template file when the API version changes.
|
# is no need to rename the template file when the API version changes.
|
||||||
|
|
|
@ -14,7 +14,7 @@ make distclean > /dev/null || true
|
||||||
|
|
||||||
# We define _GNU_SOURCE to avoid warnings with missing prototypes.
|
# We define _GNU_SOURCE to avoid warnings with missing prototypes.
|
||||||
# C89 does not know snprintf, strdup, strndup, popen, pclose
|
# C89 does not know snprintf, strdup, strndup, popen, pclose
|
||||||
CFLAGS="-std=c89 -pedantic -O2 -g -Wall -Wextra -Wstrict-prototypes -Wold-style-definition -Wwrite-strings -Wshadow -Wformat -Wformat-security -Wunreachable-code -Wstrict-prototypes -Wmissing-prototypes -Wold-style-definition -D_GNU_SOURCE"
|
CFLAGS="-std=gnu89 -pedantic -O2 -g -Wall -Wextra -Wstrict-prototypes -Wold-style-definition -Wwrite-strings -Wshadow -Wformat -Wformat-security -Wunreachable-code -Wstrict-prototypes -Wmissing-prototypes -Wold-style-definition"
|
||||||
|
|
||||||
CACHEFILE=$PWD/config_check.cache
|
CACHEFILE=$PWD/config_check.cache
|
||||||
|
|
||||||
|
@ -40,7 +40,8 @@ for CC in gcc clang; do
|
||||||
for options in \
|
for options in \
|
||||||
"--enable-runtime=libicu --enable-builtin=libicu" \
|
"--enable-runtime=libicu --enable-builtin=libicu" \
|
||||||
"--enable-runtime=libidn2 --enable-builtin=libidn2" \
|
"--enable-runtime=libidn2 --enable-builtin=libidn2" \
|
||||||
"--enable-runtime=libidn --enable-builtin=libidn"; do
|
"--enable-runtime=libidn --enable-builtin=libidn" \
|
||||||
|
"--disable-runtime --enable-builtin=libicu"; do
|
||||||
export DISTCHECK_CONFIGURE_FLAGS="-C --cache-file=$CACHEFILE $options"
|
export DISTCHECK_CONFIGURE_FLAGS="-C --cache-file=$CACHEFILE $options"
|
||||||
echo
|
echo
|
||||||
echo " *** ./configure $DISTCHECK_CONFIGURE_FLAGS"
|
echo " *** ./configure $DISTCHECK_CONFIGURE_FLAGS"
|
||||||
|
|
|
@ -6,6 +6,9 @@ PSL_VERSION_MAJOR
|
||||||
PSL_VERSION_MINOR
|
PSL_VERSION_MINOR
|
||||||
PSL_VERSION_NUMBER
|
PSL_VERSION_NUMBER
|
||||||
PSL_VERSION_PATCH
|
PSL_VERSION_PATCH
|
||||||
|
PSL_TYPE_ICANN
|
||||||
|
PSL_TYPE_PRIVATE
|
||||||
|
PSL_TYPE_ANY
|
||||||
psl_error_t
|
psl_error_t
|
||||||
psl_ctx_t
|
psl_ctx_t
|
||||||
psl_load_file
|
psl_load_file
|
||||||
|
@ -13,12 +16,12 @@ psl_load_fp
|
||||||
psl_builtin
|
psl_builtin
|
||||||
psl_free
|
psl_free
|
||||||
psl_is_public_suffix
|
psl_is_public_suffix
|
||||||
|
psl_is_public_suffix2
|
||||||
psl_unregistrable_domain
|
psl_unregistrable_domain
|
||||||
psl_registrable_domain
|
psl_registrable_domain
|
||||||
psl_suffix_count
|
psl_suffix_count
|
||||||
psl_suffix_exception_count
|
psl_suffix_exception_count
|
||||||
psl_suffix_wildcard_count
|
psl_suffix_wildcard_count
|
||||||
psl_builtin_compile_time
|
|
||||||
psl_builtin_file_time
|
psl_builtin_file_time
|
||||||
psl_builtin_sha1sum
|
psl_builtin_sha1sum
|
||||||
psl_builtin_filename
|
psl_builtin_filename
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright(c) 2014-2015 Tim Ruehsen
|
* Copyright(c) 2014-2016 Tim Ruehsen
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
@ -44,6 +44,11 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* types for psl_is_publix_suffix2() */
|
||||||
|
#define PSL_TYPE_ICANN (1<<0)
|
||||||
|
#define PSL_TYPE_PRIVATE (1<<1)
|
||||||
|
#define PSL_TYPE_ANY (PSL_TYPE_ICANN | PSL_TYPE_PRIVATE)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* psl_error_t:
|
* psl_error_t:
|
||||||
* @PSL_SUCCESS: Successful return.
|
* @PSL_SUCCESS: Successful return.
|
||||||
|
@ -71,57 +76,75 @@ typedef struct _psl_ctx_st psl_ctx_t;
|
||||||
/* frees PSL context */
|
/* frees PSL context */
|
||||||
void
|
void
|
||||||
psl_free(psl_ctx_t *psl);
|
psl_free(psl_ctx_t *psl);
|
||||||
|
|
||||||
/* loads PSL data from file */
|
/* loads PSL data from file */
|
||||||
psl_ctx_t *
|
psl_ctx_t *
|
||||||
psl_load_file(const char *fname);
|
psl_load_file(const char *fname);
|
||||||
|
|
||||||
/* loads PSL data from FILE pointer */
|
/* loads PSL data from FILE pointer */
|
||||||
psl_ctx_t *
|
psl_ctx_t *
|
||||||
psl_load_fp(FILE *fp);
|
psl_load_fp(FILE *fp);
|
||||||
|
|
||||||
/* retrieves builtin PSL data */
|
/* retrieves builtin PSL data */
|
||||||
const psl_ctx_t *
|
const psl_ctx_t *
|
||||||
psl_builtin(void);
|
psl_builtin(void);
|
||||||
|
|
||||||
/* checks whether domain is a public suffix or not */
|
/* checks whether domain is a public suffix or not */
|
||||||
int
|
int
|
||||||
psl_is_public_suffix(const psl_ctx_t *psl, const char *domain);
|
psl_is_public_suffix(const psl_ctx_t *psl, const char *domain);
|
||||||
|
|
||||||
|
/* checks whether domain is a public suffix regarding the type or not */
|
||||||
|
int
|
||||||
|
psl_is_public_suffix2(const psl_ctx_t *psl, const char *domain, int type);
|
||||||
|
|
||||||
/* checks whether cookie_domain is acceptable for domain or not */
|
/* checks whether cookie_domain is acceptable for domain or not */
|
||||||
int
|
int
|
||||||
psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, const char *cookie_domain);
|
psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, const char *cookie_domain);
|
||||||
|
|
||||||
/* returns the longest not registrable domain within 'domain' or NULL if none found */
|
/* returns the longest not registrable domain within 'domain' or NULL if none found */
|
||||||
const char *
|
const char *
|
||||||
psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain);
|
psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain);
|
||||||
|
|
||||||
/* returns the shortest possible registrable domain part or NULL if domain is not registrable at all */
|
/* returns the shortest possible registrable domain part or NULL if domain is not registrable at all */
|
||||||
const char *
|
const char *
|
||||||
psl_registrable_domain(const psl_ctx_t *psl, const char *domain);
|
psl_registrable_domain(const psl_ctx_t *psl, const char *domain);
|
||||||
|
|
||||||
/* convert a string into lowercase UTF-8 */
|
/* convert a string into lowercase UTF-8 */
|
||||||
psl_error_t
|
psl_error_t
|
||||||
psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower);
|
psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower);
|
||||||
|
|
||||||
/* does not include exceptions */
|
/* does not include exceptions */
|
||||||
int
|
int
|
||||||
psl_suffix_count(const psl_ctx_t *psl);
|
psl_suffix_count(const psl_ctx_t *psl);
|
||||||
|
|
||||||
/* just counts exceptions */
|
/* just counts exceptions */
|
||||||
int
|
int
|
||||||
psl_suffix_exception_count(const psl_ctx_t *psl);
|
psl_suffix_exception_count(const psl_ctx_t *psl);
|
||||||
|
|
||||||
/* just counts wildcards */
|
/* just counts wildcards */
|
||||||
int
|
int
|
||||||
psl_suffix_wildcard_count(const psl_ctx_t *psl);
|
psl_suffix_wildcard_count(const psl_ctx_t *psl);
|
||||||
/* returns compilation time */
|
|
||||||
time_t
|
|
||||||
psl_builtin_compile_time(void);
|
|
||||||
/* returns mtime of PSL source file */
|
/* returns mtime of PSL source file */
|
||||||
time_t
|
time_t
|
||||||
psl_builtin_file_time(void);
|
psl_builtin_file_time(void);
|
||||||
|
|
||||||
/* returns SHA1 checksum (hex-encoded, lowercase) of PSL source file */
|
/* returns SHA1 checksum (hex-encoded, lowercase) of PSL source file */
|
||||||
const char *
|
const char *
|
||||||
psl_builtin_sha1sum(void);
|
psl_builtin_sha1sum(void);
|
||||||
|
|
||||||
/* returns file name of PSL source file */
|
/* returns file name of PSL source file */
|
||||||
const char *
|
const char *
|
||||||
psl_builtin_filename(void);
|
psl_builtin_filename(void);
|
||||||
|
|
||||||
/* returns library version string */
|
/* returns library version string */
|
||||||
const char *
|
const char *
|
||||||
psl_get_version(void);
|
psl_get_version(void);
|
||||||
|
|
||||||
/* checks library version number */
|
/* checks library version number */
|
||||||
int
|
int
|
||||||
psl_check_version_number(int version);
|
psl_check_version_number(int version);
|
||||||
|
|
||||||
/* returns wether the built-in data is outdated or not */
|
/* returns wether the built-in data is outdated or not */
|
||||||
int
|
int
|
||||||
psl_builtin_outdated(void);
|
psl_builtin_outdated(void);
|
||||||
|
|
2
list
2
list
|
@ -1 +1 @@
|
||||||
Subproject commit 2930bb4a5256279e0f7ba44cf9d174fc93ecb732
|
Subproject commit 1f3ad51171235aafe423435606e869f0161582e4
|
|
@ -0,0 +1,30 @@
|
||||||
|
* The following License is for the source code files
|
||||||
|
make_dafsa.py and lookup_string_in_fixed_set.c.
|
||||||
|
|
||||||
|
// Copyright 2015 The Chromium Authors. All rights reserved.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// * Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
// * Redistributions in binary form must reproduce the above
|
||||||
|
// copyright notice, this list of conditions and the following disclaimer
|
||||||
|
// in the documentation and/or other materials provided with the
|
||||||
|
// distribution.
|
||||||
|
// * Neither the name of Google Inc. nor the names of its
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -1,12 +1,12 @@
|
||||||
# suffixes.c must be created before psl.c is compiled
|
# suffixes.c must be created before psl.c is compiled
|
||||||
BUILT_SOURCES = suffixes.c
|
BUILT_SOURCES = suffixes_dafsa.c
|
||||||
|
|
||||||
# suffixes.c is a built source that must be cleaned
|
# suffixes.c is a built source that must be cleaned
|
||||||
CLEANFILES = suffixes.c
|
CLEANFILES = suffixes_dafsa.c
|
||||||
|
|
||||||
lib_LTLIBRARIES = libpsl.la
|
lib_LTLIBRARIES = libpsl.la
|
||||||
|
|
||||||
libpsl_la_SOURCES = psl.c
|
libpsl_la_SOURCES = psl.c lookup_string_in_fixed_set.c
|
||||||
libpsl_la_CPPFLAGS = -I$(top_srcdir)/include
|
libpsl_la_CPPFLAGS = -I$(top_srcdir)/include
|
||||||
# include ABI version information
|
# include ABI version information
|
||||||
libpsl_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSION)
|
libpsl_la_LDFLAGS = -version-info $(LIBPSL_SO_VERSION)
|
||||||
|
@ -21,8 +21,8 @@ if WITH_LIBIDN
|
||||||
endif
|
endif
|
||||||
|
|
||||||
noinst_PROGRAMS = psl2c
|
noinst_PROGRAMS = psl2c
|
||||||
psl2c_SOURCES = psl2c.c
|
psl2c_SOURCES = psl2c.c lookup_string_in_fixed_set.c
|
||||||
psl2c_CPPFLAGS = -I$(top_srcdir)/include
|
psl2c_CPPFLAGS = -I$(top_srcdir)/include -DMAKE_DAFSA=\"$(top_srcdir)/src/make_dafsa.py\"
|
||||||
if BUILTIN_GENERATOR_LIBICU
|
if BUILTIN_GENERATOR_LIBICU
|
||||||
psl2c_LDADD = -licuuc
|
psl2c_LDADD = -licuuc
|
||||||
endif
|
endif
|
||||||
|
@ -33,7 +33,9 @@ if BUILTIN_GENERATOR_LIBIDN
|
||||||
psl2c_LDADD = @LTLIBICONV@ -lidn -lunistring
|
psl2c_LDADD = @LTLIBICONV@ -lidn -lunistring
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# Build rule for suffix.c
|
# Build rule for suffix_dafsa.c
|
||||||
# PSL_FILE can be set by ./configure --with-psl-file=[PATH]
|
# PSL_FILE can be set by ./configure --with-psl-file=[PATH]
|
||||||
suffixes.c: $(PSL_FILE) psl2c$(EXEEXT)
|
suffixes_dafsa.c: $(PSL_FILE) psl2c$(EXEEXT)
|
||||||
./psl2c$(EXEEXT) "$(PSL_FILE)" suffixes.c
|
./psl2c$(EXEEXT) "$(PSL_FILE)" suffixes_dafsa.c
|
||||||
|
|
||||||
|
EXTRA_DIST = make_dafsa.py LICENSE.chromium
|
||||||
|
|
|
@ -0,0 +1,204 @@
|
||||||
|
/* Copyright 2015 The Chromium Authors. All rights reserved.
|
||||||
|
* Use of this source code is governed by a BSD-style license that can be
|
||||||
|
* found in the LICENSE.chromium file.
|
||||||
|
*
|
||||||
|
* Converted to C89 2015 by Tim Rühsen
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
#if defined(__GNUC__) && defined(__GNUC_MINOR__)
|
||||||
|
# define _GCC_VERSION_AT_LEAST(major, minor) ((__GNUC__ > (major)) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
|
||||||
|
#else
|
||||||
|
# define _GCC_VERSION_AT_LEAST(major, minor) 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if _GCC_VERSION_AT_LEAST(4,0)
|
||||||
|
# define _HIDDEN __attribute__ ((visibility ("hidden")))
|
||||||
|
#else
|
||||||
|
# define _HIDDEN
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define CHECK_LT(a, b) if ((a) >= b) return 0
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read next offset from pos.
|
||||||
|
* Returns true if an offset could be read, false otherwise.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int GetNextOffset(const unsigned char** pos,
|
||||||
|
const unsigned char* end,
|
||||||
|
const unsigned char** offset)
|
||||||
|
{
|
||||||
|
size_t bytes_consumed;
|
||||||
|
|
||||||
|
if (*pos == end)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* When reading an offset the byte array must always contain at least
|
||||||
|
* three more bytes to consume. First the offset to read, then a node
|
||||||
|
* to skip over and finally a destination node. No object can be smaller
|
||||||
|
* than one byte. */
|
||||||
|
CHECK_LT(*pos + 2, end);
|
||||||
|
switch (**pos & 0x60) {
|
||||||
|
case 0x60: /* Read three byte offset */
|
||||||
|
*offset += (((*pos)[0] & 0x1F) << 16) | ((*pos)[1] << 8) | (*pos)[2];
|
||||||
|
bytes_consumed = 3;
|
||||||
|
break;
|
||||||
|
case 0x40: /* Read two byte offset */
|
||||||
|
*offset += (((*pos)[0] & 0x1F) << 8) | (*pos)[1];
|
||||||
|
bytes_consumed = 2;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
*offset += (*pos)[0] & 0x3F;
|
||||||
|
bytes_consumed = 1;
|
||||||
|
}
|
||||||
|
if ((**pos & 0x80) != 0) {
|
||||||
|
*pos = end;
|
||||||
|
} else {
|
||||||
|
*pos += bytes_consumed;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if byte at offset is last in label.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int IsEOL(const unsigned char* offset, const unsigned char* end)
|
||||||
|
{
|
||||||
|
CHECK_LT(offset, end);
|
||||||
|
return(*offset & 0x80) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if byte at offset matches first character in key.
|
||||||
|
* This version matches characters not last in label.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int IsMatch(const unsigned char* offset,
|
||||||
|
const unsigned char* end,
|
||||||
|
const char* key)
|
||||||
|
{
|
||||||
|
CHECK_LT(offset, end);
|
||||||
|
return *offset == *key;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if byte at offset matches first character in key.
|
||||||
|
* This version matches characters last in label.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int IsEndCharMatch(const unsigned char* offset,
|
||||||
|
const unsigned char* end,
|
||||||
|
const char* key)
|
||||||
|
{
|
||||||
|
CHECK_LT(offset, end);
|
||||||
|
return *offset == (*key | 0x80);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read return value at offset.
|
||||||
|
* Returns true if a return value could be read, false otherwise.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int GetReturnValue(const unsigned char* offset,
|
||||||
|
const unsigned char* end,
|
||||||
|
int* return_value)
|
||||||
|
{
|
||||||
|
CHECK_LT(offset, end);
|
||||||
|
if ((*offset & 0xE0) == 0x80) {
|
||||||
|
*return_value = *offset & 0x0F;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Looks up the string |key| with length |key_length| in a fixed set of
|
||||||
|
* strings. The set of strings must be known at compile time. It is converted to
|
||||||
|
* a graph structure named a DAFSA (Deterministic Acyclic Finite State
|
||||||
|
* Automaton) by the script make_dafsa.py during compilation. This permits
|
||||||
|
* efficient (in time and space) lookup. The graph generated by make_dafsa.py
|
||||||
|
* takes the form of a constant byte array which should be supplied via the
|
||||||
|
* |graph| and |length| parameters. The return value is kDafsaNotFound,
|
||||||
|
* kDafsaFound, or a bitmap consisting of one or more of kDafsaExceptionRule,
|
||||||
|
* kDafsaWildcardRule and kDafsaPrivateRule ORed together.
|
||||||
|
*
|
||||||
|
* Lookup a domain key in a byte array generated by make_dafsa.py.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* prototype to skip warning with -Wmissing-prototypes */
|
||||||
|
int _HIDDEN LookupStringInFixedSet(const unsigned char*, size_t,const char*, size_t);
|
||||||
|
|
||||||
|
int _HIDDEN LookupStringInFixedSet(const unsigned char* graph,
|
||||||
|
size_t length,
|
||||||
|
const char* key,
|
||||||
|
size_t key_length)
|
||||||
|
{
|
||||||
|
const unsigned char* pos = graph;
|
||||||
|
const unsigned char* end = graph + length;
|
||||||
|
const unsigned char* offset = pos;
|
||||||
|
const char* key_end = key + key_length;
|
||||||
|
|
||||||
|
while (GetNextOffset(&pos, end, &offset)) {
|
||||||
|
/*char <char>+ end_char offsets
|
||||||
|
* char <char>+ return value
|
||||||
|
* char end_char offsets
|
||||||
|
* char return value
|
||||||
|
* end_char offsets
|
||||||
|
* return_value
|
||||||
|
*/
|
||||||
|
int did_consume = 0;
|
||||||
|
|
||||||
|
if (key != key_end && !IsEOL(offset, end)) {
|
||||||
|
/* Leading <char> is not a match. Don't dive into this child */
|
||||||
|
if (!IsMatch(offset, end, key))
|
||||||
|
continue;
|
||||||
|
did_consume = 1;
|
||||||
|
++offset;
|
||||||
|
++key;
|
||||||
|
/* Possible matches at this point:
|
||||||
|
* <char>+ end_char offsets
|
||||||
|
* <char>+ return value
|
||||||
|
* end_char offsets
|
||||||
|
* return value
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Remove all remaining <char> nodes possible */
|
||||||
|
while (!IsEOL(offset, end) && key != key_end) {
|
||||||
|
if (!IsMatch(offset, end, key))
|
||||||
|
return -1;
|
||||||
|
++key;
|
||||||
|
++offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Possible matches at this point:
|
||||||
|
* end_char offsets
|
||||||
|
* return_value
|
||||||
|
* If one or more <char> elements were consumed, a failure
|
||||||
|
* to match is terminal. Otherwise, try the next node.
|
||||||
|
*/
|
||||||
|
if (key == key_end) {
|
||||||
|
int return_value;
|
||||||
|
|
||||||
|
if (GetReturnValue(offset, end, &return_value))
|
||||||
|
return return_value;
|
||||||
|
/* The DAFSA guarantees that if the first char is a match, all
|
||||||
|
* remaining char elements MUST match if the key is truly present.
|
||||||
|
*/
|
||||||
|
if (did_consume)
|
||||||
|
return -1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!IsEndCharMatch(offset, end, key)) {
|
||||||
|
if (did_consume)
|
||||||
|
return -1; /* Unexpected */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
++key;
|
||||||
|
pos = ++offset; /* Dive into child */
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1; /* No match */
|
||||||
|
}
|
|
@ -0,0 +1,588 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# Copyright 2014 The Chromium Authors. All rights reserved.
|
||||||
|
# Use of this source code is governed by a BSD-style license that can be
|
||||||
|
# found in the LICENSE.chromium file.
|
||||||
|
|
||||||
|
"""
|
||||||
|
A Deterministic acyclic finite state automaton (DAFSA) is a compact
|
||||||
|
representation of an unordered word list (dictionary).
|
||||||
|
|
||||||
|
http://en.wikipedia.org/wiki/Deterministic_acyclic_finite_state_automaton
|
||||||
|
|
||||||
|
This python program converts a list of strings to a byte array in C++.
|
||||||
|
This python program fetches strings and return values from a gperf file
|
||||||
|
and generates a C++ file with a byte array representing graph that can be
|
||||||
|
used as a memory efficient replacement for the perfect hash table.
|
||||||
|
|
||||||
|
The input strings are assumed to consist of printable 7-bit ASCII characters
|
||||||
|
and the return values are assumed to be one digit integers.
|
||||||
|
|
||||||
|
In this program a DAFSA is a diamond shaped graph starting at a common
|
||||||
|
source node and ending at a common sink node. All internal nodes contain
|
||||||
|
a label and each word is represented by the labels in one path from
|
||||||
|
the source node to the sink node.
|
||||||
|
|
||||||
|
The following python represention is used for nodes:
|
||||||
|
|
||||||
|
Source node: [ children ]
|
||||||
|
Internal node: (label, [ children ])
|
||||||
|
Sink node: None
|
||||||
|
|
||||||
|
The graph is first compressed by prefixes like a trie. In the next step
|
||||||
|
suffixes are compressed so that the graph gets diamond shaped. Finally
|
||||||
|
one to one linked nodes are replaced by nodes with the labels joined.
|
||||||
|
|
||||||
|
The order of the operations is crucial since lookups will be performed
|
||||||
|
starting from the source with no backtracking. Thus a node must have at
|
||||||
|
most one child with a label starting by the same character. The output
|
||||||
|
is also arranged so that all jumps are to increasing addresses, thus forward
|
||||||
|
in memory.
|
||||||
|
|
||||||
|
The generated output has suffix free decoding so that the sign of leading
|
||||||
|
bits in a link (a reference to a child node) indicate if it has a size of one,
|
||||||
|
two or three bytes and if it is the last outgoing link from the actual node.
|
||||||
|
A node label is terminated by a byte with the leading bit set.
|
||||||
|
|
||||||
|
The generated byte array can described by the following BNF:
|
||||||
|
|
||||||
|
<byte> ::= < 8-bit value in range [0x00-0xFF] >
|
||||||
|
|
||||||
|
<char> ::= < printable 7-bit ASCII character, byte in range [0x20-0x7F] >
|
||||||
|
<end_char> ::= < char + 0x80, byte in range [0xA0-0xFF] >
|
||||||
|
<return value> ::= < value + 0x80, byte in range [0x80-0x8F] >
|
||||||
|
|
||||||
|
<offset1> ::= < byte in range [0x00-0x3F] >
|
||||||
|
<offset2> ::= < byte in range [0x40-0x5F] >
|
||||||
|
<offset3> ::= < byte in range [0x60-0x7F] >
|
||||||
|
|
||||||
|
<end_offset1> ::= < byte in range [0x80-0xBF] >
|
||||||
|
<end_offset2> ::= < byte in range [0xC0-0xDF] >
|
||||||
|
<end_offset3> ::= < byte in range [0xE0-0xFF] >
|
||||||
|
|
||||||
|
<prefix> ::= <char>
|
||||||
|
|
||||||
|
<label> ::= <end_char>
|
||||||
|
| <char> <label>
|
||||||
|
|
||||||
|
<end_label> ::= <return_value>
|
||||||
|
| <char> <end_label>
|
||||||
|
|
||||||
|
<offset> ::= <offset1>
|
||||||
|
| <offset2> <byte>
|
||||||
|
| <offset3> <byte> <byte>
|
||||||
|
|
||||||
|
<end_offset> ::= <end_offset1>
|
||||||
|
| <end_offset2> <byte>
|
||||||
|
| <end_offset3> <byte> <byte>
|
||||||
|
|
||||||
|
<offsets> ::= <end_offset>
|
||||||
|
| <offset> <offsets>
|
||||||
|
|
||||||
|
<source> ::= <offsets>
|
||||||
|
|
||||||
|
<node> ::= <label> <offsets>
|
||||||
|
| <prefix> <node>
|
||||||
|
| <end_label>
|
||||||
|
|
||||||
|
<dafsa> ::= <source>
|
||||||
|
| <dafsa> <node>
|
||||||
|
|
||||||
|
Decoding:
|
||||||
|
|
||||||
|
<char> -> printable 7-bit ASCII character
|
||||||
|
<end_char> & 0x7F -> printable 7-bit ASCII character
|
||||||
|
<return value> & 0x0F -> integer
|
||||||
|
<offset1 & 0x3F> -> integer
|
||||||
|
((<offset2> & 0x1F>) << 8) + <byte> -> integer
|
||||||
|
((<offset3> & 0x1F>) << 16) + (<byte> << 8) + <byte> -> integer
|
||||||
|
|
||||||
|
end_offset1, end_offset2 and and_offset3 are decoded same as offset1,
|
||||||
|
offset2 and offset3 respectively.
|
||||||
|
|
||||||
|
The first offset in a list of offsets is the distance in bytes between the
|
||||||
|
offset itself and the first child node. Subsequent offsets are the distance
|
||||||
|
between previous child node and next child node. Thus each offset links a node
|
||||||
|
to a child node. The distance is always counted between start addresses, i.e.
|
||||||
|
first byte in decoded offset or first byte in child node.
|
||||||
|
|
||||||
|
Example 1:
|
||||||
|
|
||||||
|
%%
|
||||||
|
aa, 1
|
||||||
|
a, 2
|
||||||
|
%%
|
||||||
|
|
||||||
|
The input is first parsed to a list of words:
|
||||||
|
["aa1", "a2"]
|
||||||
|
|
||||||
|
A fully expanded graph is created from the words:
|
||||||
|
source = [node1, node4]
|
||||||
|
node1 = ("a", [node2])
|
||||||
|
node2 = ("a", [node3])
|
||||||
|
node3 = ("\x01", [sink])
|
||||||
|
node4 = ("a", [node5])
|
||||||
|
node5 = ("\x02", [sink])
|
||||||
|
sink = None
|
||||||
|
|
||||||
|
Compression results in the following graph:
|
||||||
|
source = [node1]
|
||||||
|
node1 = ("a", [node2, node3])
|
||||||
|
node2 = ("\x02", [sink])
|
||||||
|
node3 = ("a\x01", [sink])
|
||||||
|
sink = None
|
||||||
|
|
||||||
|
A C++ representation of the compressed graph is generated:
|
||||||
|
|
||||||
|
const unsigned char dafsa[7] = {
|
||||||
|
0x81, 0xE1, 0x02, 0x81, 0x82, 0x61, 0x81,
|
||||||
|
};
|
||||||
|
|
||||||
|
The bytes in the generated array has the following meaning:
|
||||||
|
|
||||||
|
0: 0x81 <end_offset1> child at position 0 + (0x81 & 0x3F) -> jump to 1
|
||||||
|
|
||||||
|
1: 0xE1 <end_char> label character (0xE1 & 0x7F) -> match "a"
|
||||||
|
2: 0x02 <offset1> child at position 2 + (0x02 & 0x3F) -> jump to 4
|
||||||
|
|
||||||
|
3: 0x81 <end_offset1> child at position 4 + (0x81 & 0x3F) -> jump to 5
|
||||||
|
4: 0x82 <return_value> 0x82 & 0x0F -> return 2
|
||||||
|
|
||||||
|
5: 0x61 <char> label character 0x61 -> match "a"
|
||||||
|
6: 0x81 <return_value> 0x81 & 0x0F -> return 1
|
||||||
|
|
||||||
|
Example 2:
|
||||||
|
|
||||||
|
%%
|
||||||
|
aa, 1
|
||||||
|
bbb, 2
|
||||||
|
baa, 1
|
||||||
|
%%
|
||||||
|
|
||||||
|
The input is first parsed to a list of words:
|
||||||
|
["aa1", "bbb2", "baa1"]
|
||||||
|
|
||||||
|
Compression results in the following graph:
|
||||||
|
source = [node1, node2]
|
||||||
|
node1 = ("b", [node2, node3])
|
||||||
|
node2 = ("aa\x01", [sink])
|
||||||
|
node3 = ("bb\x02", [sink])
|
||||||
|
sink = None
|
||||||
|
|
||||||
|
A C++ representation of the compressed graph is generated:
|
||||||
|
|
||||||
|
const unsigned char dafsa[11] = {
|
||||||
|
0x02, 0x83, 0xE2, 0x02, 0x83, 0x61, 0x61, 0x81, 0x62, 0x62, 0x82,
|
||||||
|
};
|
||||||
|
|
||||||
|
The bytes in the generated array has the following meaning:
|
||||||
|
|
||||||
|
0: 0x02 <offset1> child at position 0 + (0x02 & 0x3F) -> jump to 2
|
||||||
|
1: 0x83 <end_offset1> child at position 2 + (0x83 & 0x3F) -> jump to 5
|
||||||
|
|
||||||
|
2: 0xE2 <end_char> label character (0xE2 & 0x7F) -> match "b"
|
||||||
|
3: 0x02 <offset1> child at position 3 + (0x02 & 0x3F) -> jump to 5
|
||||||
|
4: 0x83 <end_offset1> child at position 5 + (0x83 & 0x3F) -> jump to 8
|
||||||
|
|
||||||
|
5: 0x61 <char> label character 0x61 -> match "a"
|
||||||
|
6: 0x61 <char> label character 0x61 -> match "a"
|
||||||
|
7: 0x81 <return_value> 0x81 & 0x0F -> return 1
|
||||||
|
|
||||||
|
8: 0x62 <char> label character 0x62 -> match "b"
|
||||||
|
9: 0x62 <char> label character 0x62 -> match "b"
|
||||||
|
10: 0x82 <return_value> 0x82 & 0x0F -> return 2
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
class InputError(Exception):
|
||||||
|
"""Exception raised for errors in the input file."""
|
||||||
|
|
||||||
|
|
||||||
|
def to_dafsa(words):
|
||||||
|
"""Generates a DAFSA from a word list and returns the source node.
|
||||||
|
|
||||||
|
Each word is split into characters so that each character is represented by
|
||||||
|
a unique node. It is assumed the word list is not empty.
|
||||||
|
"""
|
||||||
|
if not words:
|
||||||
|
raise InputError('The domain list must not be empty')
|
||||||
|
def to_nodes(word):
|
||||||
|
"""Split words into characters"""
|
||||||
|
if not 0x1F < ord(word[0]) < 0x80:
|
||||||
|
raise InputError('Domain names must be printable 7-bit ASCII')
|
||||||
|
if len(word) == 1:
|
||||||
|
return chr(int(word[0], 16) & 0x0F), [None]
|
||||||
|
return word[0], [to_nodes(word[1:])]
|
||||||
|
return [to_nodes(word) for word in words]
|
||||||
|
|
||||||
|
|
||||||
|
def to_words(node):
|
||||||
|
"""Generates a word list from all paths starting from an internal node."""
|
||||||
|
if not node:
|
||||||
|
return ['']
|
||||||
|
return [(node[0] + word) for child in node[1] for word in to_words(child)]
|
||||||
|
|
||||||
|
|
||||||
|
def reverse(dafsa):
|
||||||
|
"""Generates a new DAFSA that is reversed, so that the old sink node becomes
|
||||||
|
the new source node.
|
||||||
|
"""
|
||||||
|
sink = []
|
||||||
|
nodemap = {}
|
||||||
|
|
||||||
|
def dfs(node, parent):
|
||||||
|
"""Creates reverse nodes.
|
||||||
|
|
||||||
|
A new reverse node will be created for each old node. The new node will
|
||||||
|
get a reversed label and the parents of the old node as children.
|
||||||
|
"""
|
||||||
|
if not node:
|
||||||
|
sink.append(parent)
|
||||||
|
elif id(node) not in nodemap:
|
||||||
|
nodemap[id(node)] = (node[0][::-1], [parent])
|
||||||
|
for child in node[1]:
|
||||||
|
dfs(child, nodemap[id(node)])
|
||||||
|
else:
|
||||||
|
nodemap[id(node)][1].append(parent)
|
||||||
|
|
||||||
|
for node in dafsa:
|
||||||
|
dfs(node, None)
|
||||||
|
return sink
|
||||||
|
|
||||||
|
|
||||||
|
def join_labels(dafsa):
|
||||||
|
"""Generates a new DAFSA where internal nodes are merged if there is a one to
|
||||||
|
one connection.
|
||||||
|
"""
|
||||||
|
parentcount = {id(None): 2}
|
||||||
|
nodemap = {id(None): None}
|
||||||
|
|
||||||
|
def count_parents(node):
|
||||||
|
"""Count incoming references"""
|
||||||
|
if id(node) in parentcount:
|
||||||
|
parentcount[id(node)] += 1
|
||||||
|
else:
|
||||||
|
parentcount[id(node)] = 1
|
||||||
|
for child in node[1]:
|
||||||
|
count_parents(child)
|
||||||
|
|
||||||
|
def join(node):
|
||||||
|
"""Create new nodes"""
|
||||||
|
if id(node) not in nodemap:
|
||||||
|
children = [join(child) for child in node[1]]
|
||||||
|
if len(children) == 1 and parentcount[id(node[1][0])] == 1:
|
||||||
|
child = children[0]
|
||||||
|
nodemap[id(node)] = (node[0] + child[0], child[1])
|
||||||
|
else:
|
||||||
|
nodemap[id(node)] = (node[0], children)
|
||||||
|
return nodemap[id(node)]
|
||||||
|
|
||||||
|
for node in dafsa:
|
||||||
|
count_parents(node)
|
||||||
|
return [join(node) for node in dafsa]
|
||||||
|
|
||||||
|
|
||||||
|
def join_suffixes(dafsa):
|
||||||
|
"""Generates a new DAFSA where nodes that represent the same word lists
|
||||||
|
towards the sink are merged.
|
||||||
|
"""
|
||||||
|
nodemap = {frozenset(('',)): None}
|
||||||
|
|
||||||
|
def join(node):
|
||||||
|
"""Returns a macthing node. A new node is created if no matching node
|
||||||
|
exists. The graph is accessed in dfs order.
|
||||||
|
"""
|
||||||
|
suffixes = frozenset(to_words(node))
|
||||||
|
if suffixes not in nodemap:
|
||||||
|
nodemap[suffixes] = (node[0], [join(child) for child in node[1]])
|
||||||
|
return nodemap[suffixes]
|
||||||
|
|
||||||
|
return [join(node) for node in dafsa]
|
||||||
|
|
||||||
|
|
||||||
|
def top_sort(dafsa):
|
||||||
|
"""Generates list of nodes in topological sort order."""
|
||||||
|
incoming = {}
|
||||||
|
|
||||||
|
def count_incoming(node):
|
||||||
|
"""Counts incoming references."""
|
||||||
|
if node:
|
||||||
|
if id(node) not in incoming:
|
||||||
|
incoming[id(node)] = 1
|
||||||
|
for child in node[1]:
|
||||||
|
count_incoming(child)
|
||||||
|
else:
|
||||||
|
incoming[id(node)] += 1
|
||||||
|
|
||||||
|
for node in dafsa:
|
||||||
|
count_incoming(node)
|
||||||
|
|
||||||
|
for node in dafsa:
|
||||||
|
incoming[id(node)] -= 1
|
||||||
|
|
||||||
|
waiting = [node for node in dafsa if incoming[id(node)] == 0]
|
||||||
|
nodes = []
|
||||||
|
|
||||||
|
while waiting:
|
||||||
|
node = waiting.pop()
|
||||||
|
assert incoming[id(node)] == 0
|
||||||
|
nodes.append(node)
|
||||||
|
for child in node[1]:
|
||||||
|
if child:
|
||||||
|
incoming[id(child)] -= 1
|
||||||
|
if incoming[id(child)] == 0:
|
||||||
|
waiting.append(child)
|
||||||
|
return nodes
|
||||||
|
|
||||||
|
|
||||||
|
def encode_links(children, offsets, current):
|
||||||
|
"""Encodes a list of children as one, two or three byte offsets."""
|
||||||
|
if not children[0]:
|
||||||
|
# This is an <end_label> node and no links follow such nodes
|
||||||
|
assert len(children) == 1
|
||||||
|
return []
|
||||||
|
guess = 3 * len(children)
|
||||||
|
assert children
|
||||||
|
children = sorted(children, key=lambda x: -offsets[id(x)])
|
||||||
|
while True:
|
||||||
|
offset = current + guess
|
||||||
|
buf = []
|
||||||
|
for child in children:
|
||||||
|
last = len(buf)
|
||||||
|
distance = offset - offsets[id(child)]
|
||||||
|
assert distance > 0 and distance < (1 << 21)
|
||||||
|
|
||||||
|
if distance < (1 << 6):
|
||||||
|
# A 6-bit offset: "s0xxxxxx"
|
||||||
|
buf.append(distance)
|
||||||
|
elif distance < (1 << 13):
|
||||||
|
# A 13-bit offset: "s10xxxxxxxxxxxxx"
|
||||||
|
buf.append(0x40 | (distance >> 8))
|
||||||
|
buf.append(distance & 0xFF)
|
||||||
|
else:
|
||||||
|
# A 21-bit offset: "s11xxxxxxxxxxxxxxxxxxxxx"
|
||||||
|
buf.append(0x60 | (distance >> 16))
|
||||||
|
buf.append((distance >> 8) & 0xFF)
|
||||||
|
buf.append(distance & 0xFF)
|
||||||
|
# Distance in first link is relative to following record.
|
||||||
|
# Distance in other links are relative to previous link.
|
||||||
|
offset -= distance
|
||||||
|
if len(buf) == guess:
|
||||||
|
break
|
||||||
|
guess = len(buf)
|
||||||
|
# Set most significant bit to mark end of links in this node.
|
||||||
|
buf[last] |= (1 << 7)
|
||||||
|
buf.reverse()
|
||||||
|
return buf
|
||||||
|
|
||||||
|
|
||||||
|
def encode_prefix(label):
|
||||||
|
"""Encodes a node label as a list of bytes without a trailing high byte.
|
||||||
|
|
||||||
|
This method encodes a node if there is exactly one child and the
|
||||||
|
child follows immidiately after so that no jump is needed. This label
|
||||||
|
will then be a prefix to the label in the child node.
|
||||||
|
"""
|
||||||
|
assert label
|
||||||
|
return [ord(c) for c in reversed(label)]
|
||||||
|
|
||||||
|
|
||||||
|
def encode_label(label):
|
||||||
|
"""Encodes a node label as a list of bytes with a trailing high byte >0x80.
|
||||||
|
"""
|
||||||
|
buf = encode_prefix(label)
|
||||||
|
# Set most significant bit to mark end of label in this node.
|
||||||
|
buf[0] |= (1 << 7)
|
||||||
|
return buf
|
||||||
|
|
||||||
|
|
||||||
|
def encode(dafsa):
|
||||||
|
"""Encodes a DAFSA to a list of bytes"""
|
||||||
|
output = []
|
||||||
|
offsets = {}
|
||||||
|
|
||||||
|
for node in reversed(top_sort(dafsa)):
|
||||||
|
if (len(node[1]) == 1 and node[1][0] and
|
||||||
|
(offsets[id(node[1][0])] == len(output))):
|
||||||
|
output.extend(encode_prefix(node[0]))
|
||||||
|
else:
|
||||||
|
output.extend(encode_links(node[1], offsets, len(output)))
|
||||||
|
output.extend(encode_label(node[0]))
|
||||||
|
offsets[id(node)] = len(output)
|
||||||
|
|
||||||
|
output.extend(encode_links(dafsa, offsets, len(output)))
|
||||||
|
output.reverse()
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def to_cxx(data):
|
||||||
|
"""Generates C++ code from a list of encoded bytes."""
|
||||||
|
text = '/* This file is generated. DO NOT EDIT!\n\n'
|
||||||
|
text += 'The byte array encodes effective tld names. See make_dafsa.py for'
|
||||||
|
text += ' documentation.'
|
||||||
|
text += '*/\n\n'
|
||||||
|
text += 'static const unsigned char kDafsa[%s] = {\n' % len(data)
|
||||||
|
for i in range(0, len(data), 12):
|
||||||
|
text += ' '
|
||||||
|
text += ', '.join('0x%02x' % byte for byte in data[i:i + 12])
|
||||||
|
text += ',\n'
|
||||||
|
text += '};\n'
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def words_to_whatever(words, converter):
|
||||||
|
"""Generates C++ code from a word list"""
|
||||||
|
dafsa = to_dafsa(words)
|
||||||
|
for fun in (reverse, join_suffixes, reverse, join_suffixes, join_labels):
|
||||||
|
dafsa = fun(dafsa)
|
||||||
|
return converter(encode(dafsa))
|
||||||
|
|
||||||
|
|
||||||
|
def words_to_cxx(words):
|
||||||
|
"""Generates C++ code from a word list"""
|
||||||
|
return words_to_whatever(words, to_cxx)
|
||||||
|
|
||||||
|
|
||||||
|
def words_to_binary(words):
|
||||||
|
"""Generates C++ code from a word list"""
|
||||||
|
return words_to_whatever(words, bytearray)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_psl2c(infile):
|
||||||
|
"""Parses file generated by psl2c and extract strings and return code"""
|
||||||
|
lines = [line.strip() for line in infile]
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
if line[-3:-1] != ', ':
|
||||||
|
raise InputError('Expected "domainname, <digit>", found "%s"' % line)
|
||||||
|
# Technically the DAFSA format could support return values in range [0-31],
|
||||||
|
# but the values below are the only with a defined meaning.
|
||||||
|
if line[-1] not in '0123456789ABCDEF':
|
||||||
|
raise InputError('Expected value to be one of {0,1,2,4,5}, found "%s"' %
|
||||||
|
line[-1])
|
||||||
|
|
||||||
|
# with open("gperf.out", 'w') as outfile:
|
||||||
|
# for line in lines:
|
||||||
|
# outfile.write(line[:-3] + line[-1] + "\n")
|
||||||
|
|
||||||
|
return [line[:-3] + line[-1] for line in lines]
|
||||||
|
|
||||||
|
|
||||||
|
def parse_psl(infile):
|
||||||
|
"""Parses PSL file and extract strings and return code"""
|
||||||
|
PSL_FLAG_EXCEPTION = (1<<0)
|
||||||
|
PSL_FLAG_WILDCARD = (1<<1)
|
||||||
|
PSL_FLAG_ICANN = (1<<2) # entry of ICANN section
|
||||||
|
PSL_FLAG_PRIVATE = (1<<3) # entry of PRIVATE section
|
||||||
|
PSL_FLAG_PLAIN = (1<<4) #just used for PSL syntax checking
|
||||||
|
|
||||||
|
psl = {}
|
||||||
|
section = 0
|
||||||
|
|
||||||
|
for line in infile:
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if line.startswith("//"):
|
||||||
|
if section == 0:
|
||||||
|
if "===BEGIN ICANN DOMAINS===" in line:
|
||||||
|
section = PSL_FLAG_ICANN
|
||||||
|
elif section == 0 and "===BEGIN PRIVATE DOMAINS===" in line:
|
||||||
|
section = PSL_FLAG_PRIVATE
|
||||||
|
elif section == PSL_FLAG_ICANN and "===END ICANN DOMAINS===" in line:
|
||||||
|
section = 0
|
||||||
|
elif section == PSL_FLAG_PRIVATE and "===END PRIVATE DOMAINS===" in line:
|
||||||
|
section = 0
|
||||||
|
continue # skip comments
|
||||||
|
|
||||||
|
if line[0] == '!':
|
||||||
|
flags = PSL_FLAG_EXCEPTION | section
|
||||||
|
line = line[1:]
|
||||||
|
elif line[0] == '*':
|
||||||
|
if line[1] != '.':
|
||||||
|
print 'Unsupported kind of rule (ignored): %s' % line
|
||||||
|
continue
|
||||||
|
flags = PSL_FLAG_WILDCARD | PSL_FLAG_PLAIN | section
|
||||||
|
line = line[2:]
|
||||||
|
else:
|
||||||
|
if not '.' in line:
|
||||||
|
continue # we do not need an explicit plain TLD rule, already covered by implicit '*' rule
|
||||||
|
flags = PSL_FLAG_PLAIN | section
|
||||||
|
|
||||||
|
line = line.decode('utf-8').encode("idna")
|
||||||
|
|
||||||
|
if line in psl:
|
||||||
|
"""Found existing entry:
|
||||||
|
Combination of exception and plain rule is ambiguous
|
||||||
|
!foo.bar
|
||||||
|
foo.bar
|
||||||
|
|
||||||
|
Allowed:
|
||||||
|
!foo.bar + *.foo.bar
|
||||||
|
foo.bar + *.foo.bar
|
||||||
|
"""
|
||||||
|
print('Found %s/%X (now %X)' % line, psl[line], flags)
|
||||||
|
continue
|
||||||
|
|
||||||
|
psl[line] = flags
|
||||||
|
|
||||||
|
# with open("psl.out", 'w') as outfile:
|
||||||
|
# for (domain, flags) in psl.iteritems():
|
||||||
|
# outfile.write(domain + "%X" % (flags & 0x0F) + "\n")
|
||||||
|
|
||||||
|
return [domain + "%X" % (flags & 0x0F) for (domain, flags) in psl.iteritems()]
|
||||||
|
|
||||||
|
|
||||||
|
def usage():
|
||||||
|
"""Prints the usage"""
|
||||||
|
print 'usage: %s [options] infile outfile' % sys.argv[0]
|
||||||
|
print ' --input-format=psl2c infile has been generated by libpsl/psl2c utility (default)'
|
||||||
|
print ' --input-format=psl infile is a Public Suffix List file'
|
||||||
|
print ' --output-format=cxx Write DAFSA as C/C++ code'
|
||||||
|
print ' --output-format=binary Write DAFSA binary data'
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Convert PSL file into C or binary DAFSA file"""
|
||||||
|
if len(sys.argv) < 3:
|
||||||
|
usage()
|
||||||
|
|
||||||
|
converter = words_to_cxx
|
||||||
|
parser = parse_psl2c
|
||||||
|
|
||||||
|
for arg in sys.argv[1:-2]:
|
||||||
|
if arg.startswith('--input-format='):
|
||||||
|
value = arg[15:].lower()
|
||||||
|
if value == 'psl':
|
||||||
|
parser = parse_psl
|
||||||
|
elif value == 'psl2c':
|
||||||
|
parser = parse_psl2c
|
||||||
|
else:
|
||||||
|
print "Unknown input format '%s'" % value
|
||||||
|
return 1
|
||||||
|
elif arg.startswith('--output-format='):
|
||||||
|
value = arg[16:].lower()
|
||||||
|
if value == 'binary':
|
||||||
|
converter = words_to_binary
|
||||||
|
elif value == 'cxx':
|
||||||
|
converter = words_to_cxx
|
||||||
|
else:
|
||||||
|
print "Unknown output format '%s'" % value
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
usage()
|
||||||
|
|
||||||
|
if sys.argv[-2] == '-':
|
||||||
|
with open(sys.argv[-1], 'w') as outfile:
|
||||||
|
outfile.write(converter(parser(sys.stdin)))
|
||||||
|
else:
|
||||||
|
with open(sys.argv[-2], 'r') as infile, open(sys.argv[-1], 'w') as outfile:
|
||||||
|
outfile.write(converter(parser(infile)))
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
200
src/psl2c.c
200
src/psl2c.c
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright(c) 2014-2015 Tim Ruehsen
|
* Copyright(c) 2014-2016 Tim Ruehsen
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
@ -45,8 +45,6 @@
|
||||||
# define _GENERATE_BUILTIN_DATA
|
# define _GENERATE_BUILTIN_DATA
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef _GENERATE_BUILTIN_DATA
|
|
||||||
|
|
||||||
#include <libpsl.h>
|
#include <libpsl.h>
|
||||||
|
|
||||||
/* here we include the library source code to have access to internal functions and data structures */
|
/* here we include the library source code to have access to internal functions and data structures */
|
||||||
|
@ -54,6 +52,8 @@
|
||||||
# include "psl.c"
|
# include "psl.c"
|
||||||
#undef _LIBPSL_INCLUDED_BY_PSL2C
|
#undef _LIBPSL_INCLUDED_BY_PSL2C
|
||||||
|
|
||||||
|
#ifdef _GENERATE_BUILTIN_DATA
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
static int _check_psl(const psl_ctx_t *psl)
|
static int _check_psl(const psl_ctx_t *psl)
|
||||||
{
|
{
|
||||||
|
@ -128,8 +128,9 @@ static int _check_psl(const psl_ctx_t *psl)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *varname)
|
static void _print_psl_entries_dafsa(FILE *fpout, const _psl_vector_t *v)
|
||||||
{
|
{
|
||||||
|
FILE *fp;
|
||||||
int it;
|
int it;
|
||||||
|
|
||||||
#ifdef BUILTIN_GENERATOR_LIBICU
|
#ifdef BUILTIN_GENERATOR_LIBICU
|
||||||
|
@ -146,139 +147,166 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *
|
||||||
#elif defined(BUILTIN_GENERATOR_LIBIDN)
|
#elif defined(BUILTIN_GENERATOR_LIBIDN)
|
||||||
fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn/%s) */\n", stringprep_check_version(NULL));
|
fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn/%s) */\n", stringprep_check_version(NULL));
|
||||||
#else
|
#else
|
||||||
fprintf(fpout, "/* automatically generated by psl2c (without punycode support) */\n");
|
fprintf(fpout, "/* automatically generated by psl2c (punycode generated internally) */\n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
fprintf(fpout, "static _psl_entry_t %s[] = {\n", varname);
|
if ((fp = fopen("in.tmp", "w"))) {
|
||||||
|
|
||||||
for (it = 0; it < v->cur; it++) {
|
for (it = 0; it < v->cur; it++) {
|
||||||
_psl_entry_t *e = _vector_get(v, it);
|
_psl_entry_t *e = _vector_get(v, it);
|
||||||
|
unsigned char *s = (unsigned char *)e->label_buf;
|
||||||
|
|
||||||
fprintf(fpout, "\t{ \"%s\", NULL, %hd, %d, %d },\n",
|
/* search for non-ASCII label and skip it */
|
||||||
e->label_buf, e->length, (int) e->nlabels, (int) e->flags);
|
while (*s && *s < 128) s++;
|
||||||
|
if (*s) continue;
|
||||||
|
|
||||||
|
fprintf(fp, "%s, %X\n", e->label_buf, (int) (e->flags & 0x0F));
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(fpout, "};\n");
|
fclose(fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((it = system(MAKE_DAFSA " in.tmp out.tmp")))
|
||||||
|
fprintf(stderr, "Failed to execute " MAKE_DAFSA "\n");
|
||||||
|
|
||||||
|
if ((fp = fopen("out.tmp", "r"))) {
|
||||||
|
char buf[256];
|
||||||
|
|
||||||
|
while (fgets(buf, sizeof(buf), fp))
|
||||||
|
fputs(buf, fpout);
|
||||||
|
|
||||||
|
fclose(fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
unlink("in.tmp");
|
||||||
|
unlink("out.tmp");
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
#if !defined(WITH_LIBICU) && !defined(WITH_IDN2)
|
|
||||||
static int _str_needs_encoding(const char *s)
|
|
||||||
{
|
|
||||||
while (*s && *((unsigned char *)s) < 128) s++;
|
|
||||||
|
|
||||||
return !!*s;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void _add_punycode_if_needed(_psl_vector_t *v)
|
|
||||||
{
|
|
||||||
int it, n;
|
|
||||||
|
|
||||||
/* do not use 'it < v->cur' since v->cur is changed by _vector_add() ! */
|
|
||||||
for (it = 0, n = v->cur; it < n; it++) {
|
|
||||||
_psl_entry_t *e = _vector_get(v, it);
|
|
||||||
|
|
||||||
if (_str_needs_encoding(e->label_buf)) {
|
|
||||||
_psl_entry_t suffix, *suffixp;
|
|
||||||
char lookupname[64] = "";
|
|
||||||
|
|
||||||
/* this is much slower than the libidn2 API but should have no license issues */
|
|
||||||
FILE *pp;
|
|
||||||
char cmd[16 + sizeof(e->label_buf)];
|
|
||||||
snprintf(cmd, sizeof(cmd), "idn2 '%s'", e->label_buf);
|
|
||||||
if ((pp = popen(cmd, "r"))) {
|
|
||||||
if (fscanf(pp, "%63s", lookupname) >= 1 && strcmp(e->label_buf, lookupname)) {
|
|
||||||
/* fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, lookupname); */
|
|
||||||
_suffix_init(&suffix, lookupname, strlen(lookupname));
|
|
||||||
suffix.wildcard = e->wildcard;
|
|
||||||
suffixp = _vector_get(v, _vector_add(v, &suffix));
|
|
||||||
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
|
||||||
}
|
|
||||||
pclose(pp);
|
|
||||||
} else
|
|
||||||
fprintf(stderr, "Failed to call popen(%s, \"r\")\n", cmd);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
_vector_sort(v);
|
|
||||||
}
|
|
||||||
#endif /* !defined(WITH_LIBICU) && !defined(WITH_IDN2) */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* _GENERATE_BUILTIN_DATA */
|
#endif /* _GENERATE_BUILTIN_DATA */
|
||||||
|
|
||||||
|
static int _print_psl_entries_dafsa_binary(const char *fname, const _psl_vector_t *v)
|
||||||
|
{
|
||||||
|
FILE *fp;
|
||||||
|
int ret = 0, it, rc;
|
||||||
|
char cmd[256];
|
||||||
|
|
||||||
|
if ((fp = fopen("in.tmp", "w"))) {
|
||||||
|
for (it = 0; it < v->cur; it++) {
|
||||||
|
_psl_entry_t *e = _vector_get(v, it);
|
||||||
|
unsigned char *s = (unsigned char *)e->label_buf;
|
||||||
|
|
||||||
|
/* search for non-ASCII label and skip it */
|
||||||
|
while (*s && *s < 128) s++;
|
||||||
|
if (*s) continue;
|
||||||
|
|
||||||
|
fprintf(fp, "%s, %X\n", e->label_buf, (int) (e->flags & 0x0F));
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(fp);
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "Failed to write open 'in.tmp'\n");
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
snprintf(cmd, sizeof(cmd), MAKE_DAFSA " --binary in.tmp %s", fname);
|
||||||
|
if ((rc = system(cmd))) {
|
||||||
|
fprintf(stderr, "Failed to execute '%s' (%d)\n", cmd, rc);
|
||||||
|
ret = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
unlink("in.tmp");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void usage(void)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Usage: psl2c [--binary] <infile> <outfile>\n");
|
||||||
|
fprintf(stderr, " <infile> is the 'public_suffix_list.dat', lowercase UTF-8 encoded\n");
|
||||||
|
fprintf(stderr, " <outfile> is the the filename to be generated from <infile>\n");
|
||||||
|
fprintf(stderr, " --binary Generate binary DAFSA output (default: C code for psl.c)\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, const char **argv)
|
int main(int argc, const char **argv)
|
||||||
{
|
{
|
||||||
FILE *fpout;
|
FILE *fpout;
|
||||||
#ifdef _GENERATE_BUILTIN_DATA
|
|
||||||
psl_ctx_t *psl;
|
psl_ctx_t *psl;
|
||||||
#endif
|
int ret = 0, argpos = 1, binary = 0;
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
if (argc != 3) {
|
if (argc < 3)
|
||||||
fprintf(stderr, "Usage: psl2c <infile> <outfile>\n");
|
usage();
|
||||||
fprintf(stderr, " <infile> is the 'public_suffix_list.dat', lowercase UTF-8 encoded\n");
|
|
||||||
fprintf(stderr, " <outfile> is the the C filename to be generated from <infile>\n");
|
if (strcmp(argv[argpos], "--binary") == 0) {
|
||||||
return 1;
|
argpos++;
|
||||||
|
binary = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (argc - argpos != 2)
|
||||||
|
usage();
|
||||||
|
|
||||||
|
if (binary) {
|
||||||
|
if (!(psl = psl_load_file(argv[argpos])))
|
||||||
|
return 2;
|
||||||
|
|
||||||
|
ret = _print_psl_entries_dafsa_binary(argv[argpos + 1], psl->suffixes);
|
||||||
|
|
||||||
|
psl_free(psl);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _GENERATE_BUILTIN_DATA
|
#ifdef _GENERATE_BUILTIN_DATA
|
||||||
if (!(psl = psl_load_file(argv[1])))
|
if (!(psl = psl_load_file(argv[argpos])))
|
||||||
return 2;
|
return 2;
|
||||||
|
|
||||||
/* look for ambigious or double entries */
|
/* look for ambiguous or double entries */
|
||||||
/* if (_check_psl(psl)) {
|
/* if (_check_psl(psl)) {
|
||||||
psl_free(psl);
|
psl_free(psl);
|
||||||
return 5;
|
return 5;
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
if ((fpout = fopen(argv[2], "w"))) {
|
if ((fpout = fopen(argv[argpos + 1], "w"))) {
|
||||||
FILE *pp;
|
FILE *pp;
|
||||||
struct stat st;
|
struct stat st;
|
||||||
size_t cmdsize = 16 + strlen(argv[1]);
|
size_t cmdsize = 16 + strlen(argv[argpos]);
|
||||||
char *cmd = alloca(cmdsize), checksum[64] = "";
|
char *cmd = alloca(cmdsize), checksum[64] = "";
|
||||||
const char *source_date_epoch = NULL;
|
char *abs_srcfile;
|
||||||
|
|
||||||
#if 0
|
_print_psl_entries_dafsa(fpout, psl->suffixes);
|
||||||
/* include library code did not generate punycode, so let's do it for the builtin data */
|
|
||||||
_add_punycode_if_needed(psl->suffixes);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
_print_psl_entries(fpout, psl->suffixes, "suffixes");
|
snprintf(cmd, cmdsize, "sha1sum %s", argv[argpos]);
|
||||||
|
|
||||||
snprintf(cmd, cmdsize, "sha1sum %s", argv[1]);
|
|
||||||
if ((pp = popen(cmd, "r"))) {
|
if ((pp = popen(cmd, "r"))) {
|
||||||
if (fscanf(pp, "%63[0-9a-zA-Z]", checksum) < 1)
|
if (fscanf(pp, "%63[0-9a-zA-Z]", checksum) < 1)
|
||||||
*checksum = 0;
|
*checksum = 0;
|
||||||
pclose(pp);
|
pclose(pp);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (stat(argv[1], &st) != 0)
|
if (stat(argv[argpos], &st) != 0)
|
||||||
st.st_mtime = 0;
|
st.st_mtime = 0;
|
||||||
fprintf(fpout, "static time_t _psl_file_time = %lu;\n", st.st_mtime);
|
fprintf(fpout, "static time_t _psl_file_time = %lu;\n", st.st_mtime);
|
||||||
if ((source_date_epoch = getenv("SOURCE_DATE_EPOCH")))
|
|
||||||
fprintf(fpout, "static time_t _psl_compile_time = %lu;\n", atol(source_date_epoch));
|
|
||||||
else
|
|
||||||
fprintf(fpout, "static time_t _psl_compile_time = %lu;\n", time(NULL));
|
|
||||||
fprintf(fpout, "static int _psl_nsuffixes = %d;\n", psl->nsuffixes);
|
fprintf(fpout, "static int _psl_nsuffixes = %d;\n", psl->nsuffixes);
|
||||||
fprintf(fpout, "static int _psl_nexceptions = %d;\n", psl->nexceptions);
|
fprintf(fpout, "static int _psl_nexceptions = %d;\n", psl->nexceptions);
|
||||||
fprintf(fpout, "static int _psl_nwildcards = %d;\n", psl->nwildcards);
|
fprintf(fpout, "static int _psl_nwildcards = %d;\n", psl->nwildcards);
|
||||||
fprintf(fpout, "static const char _psl_sha1_checksum[] = \"%s\";\n", checksum);
|
fprintf(fpout, "static const char _psl_sha1_checksum[] = \"%s\";\n", checksum);
|
||||||
fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", argv[1]);
|
|
||||||
|
/* We need an absolute path here, else psl_builtin_outdated() won't work reliable */
|
||||||
|
/* Caveat: symbolic links are resolved by realpath() */
|
||||||
|
if ((abs_srcfile = realpath(argv[argpos], NULL))) {
|
||||||
|
fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", abs_srcfile);
|
||||||
|
free(abs_srcfile);
|
||||||
|
} else
|
||||||
|
fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", argv[argpos]);
|
||||||
|
|
||||||
if (fclose(fpout) != 0)
|
if (fclose(fpout) != 0)
|
||||||
ret = 4;
|
ret = 4;
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "Failed to write open '%s'\n", argv[2]);
|
fprintf(stderr, "Failed to write open '%s'\n", argv[argpos + 1]);
|
||||||
ret = 3;
|
ret = 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
psl_free(psl);
|
psl_free(psl);
|
||||||
#else
|
#else
|
||||||
if ((fpout = fopen(argv[2], "w"))) {
|
if ((fpout = fopen(argv[argpos + 1], "w"))) {
|
||||||
fprintf(fpout, "static _psl_entry_t suffixes[1];\n");
|
fprintf(fpout, "static const unsigned char kDafsa[1];\n");
|
||||||
fprintf(fpout, "static time_t _psl_file_time;\n");
|
fprintf(fpout, "static time_t _psl_file_time;\n");
|
||||||
fprintf(fpout, "static time_t _psl_compile_time;\n");
|
|
||||||
fprintf(fpout, "static int _psl_nsuffixes = 0;\n");
|
fprintf(fpout, "static int _psl_nsuffixes = 0;\n");
|
||||||
fprintf(fpout, "static int _psl_nexceptions = 0;\n");
|
fprintf(fpout, "static int _psl_nexceptions = 0;\n");
|
||||||
fprintf(fpout, "static int _psl_nwildcards = 0;\n");
|
fprintf(fpout, "static int _psl_nwildcards = 0;\n");
|
||||||
|
@ -288,7 +316,7 @@ int main(int argc, const char **argv)
|
||||||
if (fclose(fpout) != 0)
|
if (fclose(fpout) != 0)
|
||||||
ret = 4;
|
ret = 4;
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "Failed to write open '%s'\n", argv[2]);
|
fprintf(stderr, "Failed to write open '%s'\n", argv[argpos + 1]);
|
||||||
ret = 3;
|
ret = 3;
|
||||||
}
|
}
|
||||||
#endif /* GENERATE_BUILTIN_DATA */
|
#endif /* GENERATE_BUILTIN_DATA */
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
DEFS = @DEFS@ -DDATADIR=\"$(top_srcdir)/data\" -DSRCDIR=\"$(srcdir)\" -DPSL_FILE=\"$(PSL_FILE)\" -DPSL_TESTFILE=\"$(PSL_TESTFILE)\"
|
DEFS = @DEFS@ -DSRCDIR=\"$(srcdir)\" -DPSL_FILE=\"$(PSL_FILE)\" -DPSL_TESTFILE=\"$(PSL_TESTFILE)\"
|
||||||
AM_CPPFLAGS = -I$(top_srcdir)/include
|
AM_CPPFLAGS = -I$(top_srcdir)/include
|
||||||
LDADD = ../src/libpsl.la
|
LDADD = ../src/libpsl.la
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright(c) 2014-2015 Tim Ruehsen
|
* Copyright(c) 2014-2016 Tim Ruehsen
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
@ -45,68 +45,140 @@
|
||||||
static int
|
static int
|
||||||
ok,
|
ok,
|
||||||
failed;
|
failed;
|
||||||
|
#ifdef HAVE_CLOCK_GETTIME
|
||||||
|
struct timespec ts1, ts2;
|
||||||
|
#endif
|
||||||
|
|
||||||
static inline int _isspace_ascii(const char c)
|
static inline int _isspace_ascii(const char c)
|
||||||
{
|
{
|
||||||
return c == ' ' || c == '\t' || c == '\r' || c == '\n';
|
return c == ' ' || c == '\t' || c == '\r' || c == '\n';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_psl_entry(const psl_ctx_t *psl, const char *domain, int type)
|
||||||
|
{
|
||||||
|
int result;
|
||||||
|
|
||||||
|
if (*domain == '!') { /* an exception to a wildcard, e.g. !www.ck (wildcard is *.ck) */
|
||||||
|
if ((result = psl_is_public_suffix(psl, domain + 1))) {
|
||||||
|
failed++;
|
||||||
|
printf("psl_is_public_suffix(%s)=%d (expected 0)\n", domain, result);
|
||||||
|
} else ok++;
|
||||||
|
|
||||||
|
if ((domain = strchr(domain, '.'))) {
|
||||||
|
if (!(result = psl_is_public_suffix(psl, domain + 1))) {
|
||||||
|
failed++;
|
||||||
|
printf("psl_is_public_suffix(%s)=%d (expected 1)\n", domain + 1, result);
|
||||||
|
} else ok++;
|
||||||
|
}
|
||||||
|
} else if (*domain == '*') { /* a wildcard, e.g. *.ck or *.platform.sh */
|
||||||
|
char *xdomain;
|
||||||
|
size_t len;
|
||||||
|
|
||||||
|
if (!(result = psl_is_public_suffix(psl, domain + 1))) {
|
||||||
|
failed++;
|
||||||
|
printf("psl_is_public_suffix(%s)=%d (expected 1)\n", domain + 1, result);
|
||||||
|
} else ok++;
|
||||||
|
|
||||||
|
len = strlen(domain);
|
||||||
|
xdomain = alloca(len + 1);
|
||||||
|
memcpy(xdomain, domain, len + 1);
|
||||||
|
*xdomain = 'x';
|
||||||
|
if (!(result = psl_is_public_suffix(psl, domain))) {
|
||||||
|
failed++;
|
||||||
|
printf("psl_is_public_suffix(%s)=%d (expected 1)\n", domain, result);
|
||||||
|
} else ok++;
|
||||||
|
} else {
|
||||||
|
if (!(result = psl_is_public_suffix(psl, domain))) {
|
||||||
|
failed++;
|
||||||
|
printf("psl_is_public_suffix(%s)=%d (expected 1)\n", domain, result);
|
||||||
|
} else ok++;
|
||||||
|
|
||||||
|
if (!(strchr(domain, '.'))) {
|
||||||
|
/* TLDs are always expected to be Publix Suffixes */
|
||||||
|
if (!(result = psl_is_public_suffix2(psl, domain, PSL_TYPE_PRIVATE))) {
|
||||||
|
failed++;
|
||||||
|
printf("psl_is_public_suffix2(%s, PSL_TYPE_PRIVATE)=%d (expected 1)\n", domain, result);
|
||||||
|
} else ok++;
|
||||||
|
|
||||||
|
if (!(result = psl_is_public_suffix2(psl, domain, PSL_TYPE_ICANN))) {
|
||||||
|
failed++;
|
||||||
|
printf("psl_is_public_suffix2(%s, PSL_TYPE_ICANN)=%d (expected 0)\n", domain, result);
|
||||||
|
} else ok++;
|
||||||
|
} else if (type == PSL_TYPE_PRIVATE) {
|
||||||
|
if (!(result = psl_is_public_suffix2(psl, domain, PSL_TYPE_PRIVATE))) {
|
||||||
|
failed++;
|
||||||
|
printf("psl_is_public_suffix2(%s, PSL_TYPE_PRIVATE)=%d (expected 1)\n", domain, result);
|
||||||
|
} else ok++;
|
||||||
|
|
||||||
|
if ((result = psl_is_public_suffix2(psl, domain, PSL_TYPE_ICANN))) {
|
||||||
|
failed++;
|
||||||
|
printf("psl_is_public_suffix2(%s, PSL_TYPE_ICANN)=%d (expected 0)\n", domain, result);
|
||||||
|
} else ok++;
|
||||||
|
} else if (type == PSL_TYPE_ICANN) {
|
||||||
|
if (!(result = psl_is_public_suffix2(psl, domain, PSL_TYPE_ICANN))) {
|
||||||
|
failed++;
|
||||||
|
printf("psl_is_public_suffix2(%s, PSL_TYPE_ICANN)=%d (expected 1)\n", domain, result);
|
||||||
|
} else ok++;
|
||||||
|
|
||||||
|
if ((result = psl_is_public_suffix2(psl, domain, PSL_TYPE_PRIVATE))) {
|
||||||
|
failed++;
|
||||||
|
printf("psl_is_public_suffix2(%s, PSL_TYPE_PRIVATE)=%d (expected 0)\n", domain, result);
|
||||||
|
} else ok++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void test_psl(void)
|
static void test_psl(void)
|
||||||
{
|
{
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
psl_ctx_t *psl;
|
psl_ctx_t *psl;
|
||||||
int result;
|
const psl_ctx_t *psl2;
|
||||||
|
int type = 0;
|
||||||
char buf[256], *linep, *p;
|
char buf[256], *linep, *p;
|
||||||
|
|
||||||
psl = psl_load_file(PSL_FILE); /* PSL_FILE can be set by ./configure --with-psl-file=[PATH] */
|
psl = psl_load_file(PSL_FILE); /* PSL_FILE can be set by ./configure --with-psl-file=[PATH] */
|
||||||
|
|
||||||
printf("loaded %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl));
|
printf("loaded %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl));
|
||||||
|
|
||||||
|
psl2 = psl_builtin();
|
||||||
|
printf("builtin PSL has %d suffixes and %d exceptions\n", psl_suffix_count(psl2), psl_suffix_exception_count(psl2));
|
||||||
|
|
||||||
if ((fp = fopen(PSL_FILE, "r"))) {
|
if ((fp = fopen(PSL_FILE, "r"))) {
|
||||||
|
#ifdef HAVE_CLOCK_GETTIME
|
||||||
|
clock_gettime(CLOCK_REALTIME, &ts1);
|
||||||
|
#endif
|
||||||
|
|
||||||
while ((linep = fgets(buf, sizeof(buf), fp))) {
|
while ((linep = fgets(buf, sizeof(buf), fp))) {
|
||||||
while (_isspace_ascii(*linep)) linep++; /* ignore leading whitespace */
|
while (_isspace_ascii(*linep)) linep++; /* ignore leading whitespace */
|
||||||
if (!*linep) continue; /* skip empty lines */
|
if (!*linep) continue; /* skip empty lines */
|
||||||
|
|
||||||
if (*linep == '/' && linep[1] == '/')
|
if (*linep == '/' && linep[1] == '/') {
|
||||||
|
if (!type) {
|
||||||
|
if (strstr(linep + 2, "===BEGIN ICANN DOMAINS==="))
|
||||||
|
type = PSL_TYPE_ICANN;
|
||||||
|
else if (!type && strstr(linep + 2, "===BEGIN PRIVATE DOMAINS==="))
|
||||||
|
type = PSL_TYPE_PRIVATE;
|
||||||
|
}
|
||||||
|
else if (type == PSL_TYPE_ICANN && strstr(linep + 2, "===END ICANN DOMAINS==="))
|
||||||
|
type = 0;
|
||||||
|
else if (type == PSL_TYPE_PRIVATE && strstr(linep + 2, "===END PRIVATE DOMAINS==="))
|
||||||
|
type = 0;
|
||||||
|
|
||||||
continue; /* skip comments */
|
continue; /* skip comments */
|
||||||
|
}
|
||||||
|
|
||||||
/* parse suffix rule */
|
/* parse suffix rule */
|
||||||
for (p = linep; *linep && !_isspace_ascii(*linep);) linep++;
|
for (p = linep; *linep && !_isspace_ascii(*linep);) linep++;
|
||||||
*linep = 0;
|
*linep = 0;
|
||||||
|
|
||||||
if (*p == '!') { /* an exception to a wildcard, e.g. !www.ck (wildcard is *.ck) */
|
test_psl_entry(psl, p, type);
|
||||||
if ((result = psl_is_public_suffix(psl, p + 1))) {
|
|
||||||
failed++;
|
|
||||||
printf("psl_is_public_suffix(%s)=%d (expected 0)\n", p, result);
|
|
||||||
} else ok++;
|
|
||||||
|
|
||||||
if ((p = strchr(p, '.'))) {
|
if (psl2)
|
||||||
if (!(result = psl_is_public_suffix(psl, p + 1))) {
|
test_psl_entry(psl2, p, type);
|
||||||
failed++;
|
|
||||||
printf("psl_is_public_suffix(%s)=%d (expected 1)\n", p + 1, result);
|
|
||||||
} else ok++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (*p == '*') { /* a wildcard, e.g. *.ck */
|
|
||||||
if (!(result = psl_is_public_suffix(psl, p + 1))) {
|
|
||||||
failed++;
|
|
||||||
printf("psl_is_public_suffix(%s)=%d (expected 1)\n", p + 1, result);
|
|
||||||
} else ok++;
|
|
||||||
|
|
||||||
*p = 'x';
|
|
||||||
if (!(result = psl_is_public_suffix(psl, p))) {
|
|
||||||
failed++;
|
|
||||||
printf("psl_is_public_suffix(%s)=%d (expected 1)\n", p, result);
|
|
||||||
} else ok++;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (!(result = psl_is_public_suffix(psl, p))) {
|
|
||||||
failed++;
|
|
||||||
printf("psl_is_public_suffix(%s)=%d (expected 1)\n", p, result);
|
|
||||||
} else ok++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_CLOCK_GETTIME
|
||||||
|
clock_gettime(CLOCK_REALTIME, &ts2);
|
||||||
|
#endif
|
||||||
fclose(fp);
|
fclose(fp);
|
||||||
} else {
|
} else {
|
||||||
printf("Failed to open %s\n", PSL_FILE);
|
printf("Failed to open %s\n", PSL_FILE);
|
||||||
|
@ -114,10 +186,15 @@ static void test_psl(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
psl_free(psl);
|
psl_free(psl);
|
||||||
|
psl_free((psl_ctx_t *)psl2);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, const char * const *argv)
|
int main(int argc, const char * const *argv)
|
||||||
{
|
{
|
||||||
|
#ifdef HAVE_CLOCK_GETTIME
|
||||||
|
long ns;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* if VALGRIND testing is enabled, we have to call ourselves with valgrind checking */
|
/* if VALGRIND testing is enabled, we have to call ourselves with valgrind checking */
|
||||||
if (argc == 1) {
|
if (argc == 1) {
|
||||||
const char *valgrind = getenv("TESTS_VALGRIND");
|
const char *valgrind = getenv("TESTS_VALGRIND");
|
||||||
|
@ -138,6 +215,21 @@ int main(int argc, const char * const *argv)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("Summary: All %d tests passed\n", ok + failed);
|
#ifdef HAVE_CLOCK_GETTIME
|
||||||
|
if (ts1.tv_sec == ts2.tv_sec)
|
||||||
|
ns = ts2.tv_nsec - ts1.tv_nsec;
|
||||||
|
else if (ts1.tv_sec == ts2.tv_sec - 1)
|
||||||
|
ns = 1000000000L - (ts2.tv_nsec - ts1.tv_nsec);
|
||||||
|
else
|
||||||
|
ns = 0; /* let's assume something is wrong and skip outputting measured time */
|
||||||
|
|
||||||
|
if (ns)
|
||||||
|
printf("Summary: All %d tests passed in %ld.%06ld ms\n", ok, ns / 1000000, ns % 1000000000);
|
||||||
|
else
|
||||||
|
printf("Summary: All %d tests passed\n", ok);
|
||||||
|
#else
|
||||||
|
printf("Summary: All %d tests passed\n", ok);
|
||||||
|
#endif
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright(c) 2014-2015 Tim Ruehsen
|
* Copyright(c) 2014-2016 Tim Ruehsen
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
@ -80,6 +80,10 @@ static void test_psl(void)
|
||||||
{ ".forgot.his.name", 1 },
|
{ ".forgot.his.name", 1 },
|
||||||
{ "whoever.his.name", 0 },
|
{ "whoever.his.name", 0 },
|
||||||
{ "whoever.forgot.his.name", 0 },
|
{ "whoever.forgot.his.name", 0 },
|
||||||
|
{ "whatever.platform.sh", 1 },
|
||||||
|
{ ".platform.sh", 1 },
|
||||||
|
{ "whatever.yokohama.jp", 1 },
|
||||||
|
{ ".yokohama.jp", 1 },
|
||||||
{ ".", 1 }, /* special case */
|
{ ".", 1 }, /* special case */
|
||||||
{ "", 1 }, /* special case */
|
{ "", 1 }, /* special case */
|
||||||
{ NULL, 1 }, /* special case */
|
{ NULL, 1 }, /* special case */
|
||||||
|
@ -104,9 +108,6 @@ static void test_psl(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("psl_builtin_compile_time()=%ld\n", psl_builtin_compile_time());
|
|
||||||
psl_builtin_compile_time() == 0 ? failed++ : ok++;
|
|
||||||
|
|
||||||
printf("psl_builtin_file_time()=%ld\n", psl_builtin_file_time());
|
printf("psl_builtin_file_time()=%ld\n", psl_builtin_file_time());
|
||||||
psl_builtin_file_time() == 0 ? failed++ : ok++;
|
psl_builtin_file_time() == 0 ? failed++ : ok++;
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright(c) 2014-2015 Tim Ruehsen
|
* Copyright(c) 2014-2016 Tim Ruehsen
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
@ -117,7 +117,7 @@ int main(int argc, const char *const *argv)
|
||||||
usage(0, stdout);
|
usage(0, stdout);
|
||||||
}
|
}
|
||||||
else if (!strcmp(*arg, "--version")) {
|
else if (!strcmp(*arg, "--version")) {
|
||||||
printf("psl %s\n", PACKAGE_VERSION);
|
printf("psl %s (0x%06x)\n", PACKAGE_VERSION, psl_check_version_number(0));
|
||||||
printf("libpsl %s\n", psl_get_version());
|
printf("libpsl %s\n", psl_get_version());
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("Copyright (C) 2014-2015 Tim Ruehsen\n");
|
printf("Copyright (C) 2014-2015 Tim Ruehsen\n");
|
||||||
|
@ -211,9 +211,9 @@ int main(int argc, const char *const *argv)
|
||||||
printf("builtin exceptions: %d\n", psl_suffix_exception_count(psl));
|
printf("builtin exceptions: %d\n", psl_suffix_exception_count(psl));
|
||||||
printf("builtin wildcards: %d\n", psl_suffix_wildcard_count(psl));
|
printf("builtin wildcards: %d\n", psl_suffix_wildcard_count(psl));
|
||||||
printf("builtin filename: %s\n", psl_builtin_filename());
|
printf("builtin filename: %s\n", psl_builtin_filename());
|
||||||
printf("builtin compile time: %ld (%s)\n", psl_builtin_compile_time(), time2str(psl_builtin_compile_time()));
|
|
||||||
printf("builtin file time: %ld (%s)\n", psl_builtin_file_time(), time2str(psl_builtin_file_time()));
|
printf("builtin file time: %ld (%s)\n", psl_builtin_file_time(), time2str(psl_builtin_file_time()));
|
||||||
printf("builtin SHA1 file hash: %s\n", psl_builtin_sha1sum());
|
printf("builtin SHA1 file hash: %s\n", psl_builtin_sha1sum());
|
||||||
|
printf("builtin outdated: %d\n", psl_builtin_outdated());
|
||||||
} else
|
} else
|
||||||
printf("No builtin PSL data available\n");
|
printf("No builtin PSL data available\n");
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue