Merge tag 'libpsl-0.14.0' into debian
This commit is contained in:
commit
540271f496
8
NEWS
8
NEWS
|
@ -1,5 +1,13 @@
|
||||||
Copyright (C) 2014-2016 Tim Rühsen
|
Copyright (C) 2014-2016 Tim Rühsen
|
||||||
|
|
||||||
|
30.07.2016 Release V0.14.0
|
||||||
|
* Remove unneeded libraries from tools/psl link step
|
||||||
|
* Use https instead of http where possible
|
||||||
|
* Add man page for tools/psl
|
||||||
|
* Add header magic to DAFSA files
|
||||||
|
* Rename make_dafsa.py to psl-make-dafsa
|
||||||
|
* Add man page for psl-make-dafsa
|
||||||
|
|
||||||
02.03.2016 Release V0.13.0
|
02.03.2016 Release V0.13.0
|
||||||
* Use tests.txt as PSL test file by default
|
* Use tests.txt as PSL test file by default
|
||||||
* Slightly shorter DAFSA array when sorting input
|
* Slightly shorter DAFSA array when sorting input
|
||||||
|
|
22
README.md
22
README.md
|
@ -8,7 +8,7 @@ A "public suffix" is a domain name under which Internet users can directly regis
|
||||||
Browsers and other web clients can use it to
|
Browsers and other web clients can use it to
|
||||||
|
|
||||||
- avoid privacy-leaking "supercookies"
|
- avoid privacy-leaking "supercookies"
|
||||||
- avoid privacy-leaking "super domain" certificates ([see post from Jeffry Walton](http://lists.gnu.org/archive/html/bug-wget/2014-03/msg00093.html))
|
- avoid privacy-leaking "super domain" certificates ([see post from Jeffry Walton](https://lists.gnu.org/archive/html/bug-wget/2014-03/msg00093.html))
|
||||||
- domain highlighting parts of the domain in a user interface
|
- domain highlighting parts of the domain in a user interface
|
||||||
- sorting domain lists by site
|
- sorting domain lists by site
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ Libpsl...
|
||||||
- is thread-safe
|
- is thread-safe
|
||||||
- handles IDNA2008 UTS#46 (libicu is used by psl2c if installed)
|
- handles IDNA2008 UTS#46 (libicu is used by psl2c if installed)
|
||||||
|
|
||||||
Find more information about the Publix Suffix List [here](http://publicsuffix.org/).
|
Find more information about the Publix Suffix List [here](https://publicsuffix.org/).
|
||||||
|
|
||||||
Download the Public Suffix List [here](https://hg.mozilla.org/mozilla-central/raw-file/tip/netwerk/dns/effective_tld_names.dat).
|
Download the Public Suffix List [here](https://hg.mozilla.org/mozilla-central/raw-file/tip/netwerk/dns/effective_tld_names.dat).
|
||||||
|
|
||||||
|
@ -70,13 +70,27 @@ library API via command line.
|
||||||
|
|
||||||
prints the usage.
|
prints the usage.
|
||||||
|
|
||||||
|
Convert PSL into DAFSA
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
The [DAFSA](https://en.wikipedia.org/wiki/Deterministic_acyclic_finite_state_automaton) format is a compressed
|
||||||
|
representation of strings. Here we use it to reduce the whole PSL to about 32k in size.
|
||||||
|
|
||||||
|
Generate `psl.dafsa` from `list/public_suffix_list.dat`
|
||||||
|
|
||||||
|
$ src/psl-make-dafsa --output-format=binary --input-format=psl list/public_suffix_list.dat psl.dafsa
|
||||||
|
|
||||||
|
Test the result (example)
|
||||||
|
|
||||||
|
$ tools/psl --load-psl-file psl.dafsa aeroclub.aero
|
||||||
|
|
||||||
License
|
License
|
||||||
-------
|
-------
|
||||||
|
|
||||||
Libpsl is made available under the terms of the MIT license.<br>
|
Libpsl is made available under the terms of the MIT license.<br>
|
||||||
See the LICENSE file that accompanies this distribution for the full text of the license.
|
See the LICENSE file that accompanies this distribution for the full text of the license.
|
||||||
|
|
||||||
src/make_dafsa.py and src/lookup_string_in_fixed_set.c are licensed under the term written in
|
src/psl-make-dafsa and src/lookup_string_in_fixed_set.c are licensed under the term written in
|
||||||
src/LICENSE.chromium.
|
src/LICENSE.chromium.
|
||||||
|
|
||||||
Building from git
|
Building from git
|
||||||
|
@ -86,7 +100,7 @@ You should have python2.7+ installed.
|
||||||
|
|
||||||
Download project and prepare sources with
|
Download project and prepare sources with
|
||||||
|
|
||||||
git clone http://github.com/rockdaboot/libpsl
|
git clone https://github.com/rockdaboot/libpsl
|
||||||
./autogen.sh
|
./autogen.sh
|
||||||
./configure
|
./configure
|
||||||
make
|
make
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
AC_INIT([libpsl], [0.13.0], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl])
|
AC_INIT([libpsl], [0.14.0], [tim.ruehsen@gmx.de], [libpsl], [https://github.com/rockdaboot/libpsl])
|
||||||
AC_PREREQ([2.59])
|
AC_PREREQ([2.59])
|
||||||
AM_INIT_AUTOMAKE([1.10 -Wall no-define foreign])
|
AM_INIT_AUTOMAKE([1.10 -Wall no-define foreign])
|
||||||
|
|
||||||
|
@ -72,7 +72,7 @@ AS_IF([ test "$enable_man" != no ], [
|
||||||
AC_MSG_RESULT([no])
|
AC_MSG_RESULT([no])
|
||||||
])
|
])
|
||||||
|
|
||||||
# src/make_dafsa.py needs python 2.7+
|
# src/psl-make-dafsa needs python 2.7+
|
||||||
AM_PATH_PYTHON([2.7])
|
AM_PATH_PYTHON([2.7])
|
||||||
|
|
||||||
PKG_PROG_PKG_CONFIG
|
PKG_PROG_PKG_CONFIG
|
||||||
|
@ -80,7 +80,7 @@ PKG_PROG_PKG_CONFIG
|
||||||
# Define these substitions here to keep all version information in one place.
|
# Define these substitions here to keep all version information in one place.
|
||||||
# For information on how to properly maintain the library version information,
|
# For information on how to properly maintain the library version information,
|
||||||
# refer to the libtool manual, section "Updating library version information":
|
# refer to the libtool manual, section "Updating library version information":
|
||||||
# http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html
|
# https://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html
|
||||||
#
|
#
|
||||||
# 1. Start with version information of ‘0:0:0’ for each libtool library.
|
# 1. Start with version information of ‘0:0:0’ for each libtool library.
|
||||||
# 2. Update the version information only immediately before a public release of your software. More frequent updates are unnecessary, and only guarantee that the current interface number gets larger faster.
|
# 2. Update the version information only immediately before a public release of your software. More frequent updates are unnecessary, and only guarantee that the current interface number gets larger faster.
|
||||||
|
@ -88,7 +88,7 @@ PKG_PROG_PKG_CONFIG
|
||||||
# 4. If any interfaces have been added, removed, or changed since the last update, increment current, and set revision to 0.
|
# 4. If any interfaces have been added, removed, or changed since the last update, increment current, and set revision to 0.
|
||||||
# 5. If any interfaces have been added since the last public release, then increment age.
|
# 5. If any interfaces have been added since the last public release, then increment age.
|
||||||
# 6. If any existing interfaces have been removed or changed since the last public release, then set age to 0.
|
# 6. If any existing interfaces have been removed or changed since the last public release, then set age to 0.
|
||||||
AC_SUBST([LIBPSL_SO_VERSION], [5:0:0])
|
AC_SUBST([LIBPSL_SO_VERSION], [5:1:0])
|
||||||
AC_SUBST([LIBPSL_VERSION], $VERSION)
|
AC_SUBST([LIBPSL_VERSION], $VERSION)
|
||||||
|
|
||||||
# Check for enable/disable builtin PSL data
|
# Check for enable/disable builtin PSL data
|
||||||
|
@ -158,7 +158,6 @@ if test "$enable_runtime" = "libicu" -o "$enable_builtin" = "libicu"; then
|
||||||
PKG_CHECK_MODULES([LIBICU], [icu-uc], [
|
PKG_CHECK_MODULES([LIBICU], [icu-uc], [
|
||||||
HAVE_LIBICU=yes
|
HAVE_LIBICU=yes
|
||||||
if test "$enable_runtime" = "libicu"; then
|
if test "$enable_runtime" = "libicu"; then
|
||||||
LIBS="$LIBICU_LIBS $LIBS"
|
|
||||||
CFLAGS="$LIBICU_CFLAGS $CFLAGS"
|
CFLAGS="$LIBICU_CFLAGS $CFLAGS"
|
||||||
fi
|
fi
|
||||||
], [
|
], [
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
<releaseinfo>
|
<releaseinfo>
|
||||||
for Libpsl &version;.
|
for Libpsl &version;.
|
||||||
The latest version of this documentation can be found on-line at
|
The latest version of this documentation can be found on-line at
|
||||||
<ulink role="online-location" url="http://github.com/rockdaboot/libpsl">GitHub</ulink>.
|
<ulink role="online-location" url="https://github.com/rockdaboot/libpsl">GitHub</ulink>.
|
||||||
</releaseinfo>
|
</releaseinfo>
|
||||||
</bookinfo>
|
</bookinfo>
|
||||||
|
|
||||||
|
|
|
@ -44,7 +44,7 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* types for psl_is_publix_suffix2() */
|
/* types for psl_is_public_suffix2() */
|
||||||
#define PSL_TYPE_ICANN (1<<0)
|
#define PSL_TYPE_ICANN (1<<0)
|
||||||
#define PSL_TYPE_PRIVATE (1<<1)
|
#define PSL_TYPE_PRIVATE (1<<1)
|
||||||
#define PSL_TYPE_ANY (PSL_TYPE_ICANN | PSL_TYPE_PRIVATE)
|
#define PSL_TYPE_ANY (PSL_TYPE_ICANN | PSL_TYPE_PRIVATE)
|
||||||
|
|
2
list
2
list
|
@ -1 +1 @@
|
||||||
Subproject commit e2f2f4bfe2ae57651afb7268bb9a0b53da5eb8cf
|
Subproject commit 1df90f84db1a041991a48e46e786705f7161ab4c
|
|
@ -1,5 +1,5 @@
|
||||||
* The following License is for the source code files
|
* The following License is for the source code files
|
||||||
make_dafsa.py and lookup_string_in_fixed_set.c.
|
psl-make-dafsa and lookup_string_in_fixed_set.c.
|
||||||
|
|
||||||
// Copyright 2015 The Chromium Authors. All rights reserved.
|
// Copyright 2015 The Chromium Authors. All rights reserved.
|
||||||
//
|
//
|
||||||
|
|
|
@ -22,7 +22,7 @@ endif
|
||||||
|
|
||||||
noinst_PROGRAMS = psl2c
|
noinst_PROGRAMS = psl2c
|
||||||
psl2c_SOURCES = psl2c.c lookup_string_in_fixed_set.c
|
psl2c_SOURCES = psl2c.c lookup_string_in_fixed_set.c
|
||||||
psl2c_CPPFLAGS = -I$(top_srcdir)/include -DMAKE_DAFSA=\"$(top_srcdir)/src/make_dafsa.py\"
|
psl2c_CPPFLAGS = -I$(top_srcdir)/include -DMAKE_DAFSA=\"$(top_srcdir)/src/psl-make-dafsa\"
|
||||||
if BUILTIN_GENERATOR_LIBICU
|
if BUILTIN_GENERATOR_LIBICU
|
||||||
psl2c_LDADD = -licuuc
|
psl2c_LDADD = -licuuc
|
||||||
endif
|
endif
|
||||||
|
@ -38,4 +38,4 @@ endif
|
||||||
suffixes_dafsa.c: $(PSL_FILE) psl2c$(EXEEXT)
|
suffixes_dafsa.c: $(PSL_FILE) psl2c$(EXEEXT)
|
||||||
./psl2c$(EXEEXT) "$(PSL_FILE)" suffixes_dafsa.c
|
./psl2c$(EXEEXT) "$(PSL_FILE)" suffixes_dafsa.c
|
||||||
|
|
||||||
EXTRA_DIST = make_dafsa.py LICENSE.chromium
|
EXTRA_DIST = psl-make-dafsa LICENSE.chromium
|
||||||
|
|
|
@ -118,14 +118,14 @@ static int GetReturnValue(const unsigned char* offset,
|
||||||
* Looks up the string |key| with length |key_length| in a fixed set of
|
* Looks up the string |key| with length |key_length| in a fixed set of
|
||||||
* strings. The set of strings must be known at compile time. It is converted to
|
* strings. The set of strings must be known at compile time. It is converted to
|
||||||
* a graph structure named a DAFSA (Deterministic Acyclic Finite State
|
* a graph structure named a DAFSA (Deterministic Acyclic Finite State
|
||||||
* Automaton) by the script make_dafsa.py during compilation. This permits
|
* Automaton) by the script psl-make-dafsa during compilation. This permits
|
||||||
* efficient (in time and space) lookup. The graph generated by make_dafsa.py
|
* efficient (in time and space) lookup. The graph generated by psl-make-dafsa
|
||||||
* takes the form of a constant byte array which should be supplied via the
|
* takes the form of a constant byte array which should be supplied via the
|
||||||
* |graph| and |length| parameters. The return value is kDafsaNotFound,
|
* |graph| and |length| parameters. The return value is kDafsaNotFound,
|
||||||
* kDafsaFound, or a bitmap consisting of one or more of kDafsaExceptionRule,
|
* kDafsaFound, or a bitmap consisting of one or more of kDafsaExceptionRule,
|
||||||
* kDafsaWildcardRule and kDafsaPrivateRule ORed together.
|
* kDafsaWildcardRule and kDafsaPrivateRule ORed together.
|
||||||
*
|
*
|
||||||
* Lookup a domain key in a byte array generated by make_dafsa.py.
|
* Lookup a domain key in a byte array generated by psl-make-dafsa.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* prototype to skip warning with -Wmissing-prototypes */
|
/* prototype to skip warning with -Wmissing-prototypes */
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python2
|
||||||
# Copyright 2014 The Chromium Authors. All rights reserved.
|
# Copyright 2014 The Chromium Authors. All rights reserved.
|
||||||
# Use of this source code is governed by a BSD-style license that can be
|
# Use of this source code is governed by a BSD-style license that can be
|
||||||
# found in the LICENSE.chromium file.
|
# found in the LICENSE.chromium file.
|
||||||
|
@ -7,7 +7,7 @@
|
||||||
A Deterministic acyclic finite state automaton (DAFSA) is a compact
|
A Deterministic acyclic finite state automaton (DAFSA) is a compact
|
||||||
representation of an unordered word list (dictionary).
|
representation of an unordered word list (dictionary).
|
||||||
|
|
||||||
http://en.wikipedia.org/wiki/Deterministic_acyclic_finite_state_automaton
|
https://en.wikipedia.org/wiki/Deterministic_acyclic_finite_state_automaton
|
||||||
|
|
||||||
This python program converts a list of strings to a byte array in C++.
|
This python program converts a list of strings to a byte array in C++.
|
||||||
This python program fetches strings and return values from a gperf file
|
This python program fetches strings and return values from a gperf file
|
||||||
|
@ -418,7 +418,7 @@ def encode(dafsa):
|
||||||
def to_cxx(data):
|
def to_cxx(data):
|
||||||
"""Generates C++ code from a list of encoded bytes."""
|
"""Generates C++ code from a list of encoded bytes."""
|
||||||
text = '/* This file is generated. DO NOT EDIT!\n\n'
|
text = '/* This file is generated. DO NOT EDIT!\n\n'
|
||||||
text += 'The byte array encodes effective tld names. See make_dafsa.py for'
|
text += 'The byte array encodes effective tld names. See psl-make-dafsa source for'
|
||||||
text += ' documentation.'
|
text += ' documentation.'
|
||||||
text += '*/\n\n'
|
text += '*/\n\n'
|
||||||
text += 'static const unsigned char kDafsa[%s] = {\n' % len(data)
|
text += 'static const unsigned char kDafsa[%s] = {\n' % len(data)
|
||||||
|
@ -445,7 +445,7 @@ def words_to_cxx(words):
|
||||||
|
|
||||||
def words_to_binary(words):
|
def words_to_binary(words):
|
||||||
"""Generates C++ code from a word list"""
|
"""Generates C++ code from a word list"""
|
||||||
return words_to_whatever(words, bytearray)
|
return b'.DAFSA@PSL_0 \n' + words_to_whatever(words, bytearray)
|
||||||
|
|
||||||
|
|
||||||
def parse_psl2c(infile):
|
def parse_psl2c(infile):
|
|
@ -0,0 +1,36 @@
|
||||||
|
.TH PSL "1" "July 2016" "psl 0.13.0" "User Commands"
|
||||||
|
.SH NAME
|
||||||
|
psl-make-dafsa \- generate a compact and optimized DAFSA from a Public Suffix List
|
||||||
|
.SH SYNOPSIS
|
||||||
|
.B psl-make-dafsa
|
||||||
|
[\fI\,options\/\fR] \fIinfile\fR \fIoutfile\fR
|
||||||
|
.SH DESCRIPTION
|
||||||
|
\fBpsl-make-dafsa\fR produces C/C++ code or an
|
||||||
|
architecture-independent binary object that represents a Deterministic
|
||||||
|
Acyclic Finite State Automaton (DAFSA) from a textual representation
|
||||||
|
of a Public Suffix List. Input and output files must be specified on
|
||||||
|
the command line.
|
||||||
|
|
||||||
|
This compact representation enables optimized queries of the list,
|
||||||
|
saving both time and space when compared to searches of human-readable
|
||||||
|
representations.
|
||||||
|
.SH OPTIONS
|
||||||
|
The format of the data read and written by \fBpsl-make-dafsa\fR
|
||||||
|
depends on options passed to it.
|
||||||
|
.br
|
||||||
|
.TP
|
||||||
|
\fB\-\-input\-format=\fR[\fIpsl2c\fR|\fIpsl\fR]
|
||||||
|
\fBpsl2c\fR: (default) input is C code generated by libpsl/psl2c
|
||||||
|
.br
|
||||||
|
\fBpsl\fR: input is standard textual Public Suffix List file
|
||||||
|
.TP
|
||||||
|
\fB\-\-output\-format=\fR[\fIcxx\fR|\fIbinary\fR]
|
||||||
|
\fBcxx\fR: (default) output is C/C++ code
|
||||||
|
.br
|
||||||
|
\fBbinary\fR: output is an architecture-independent binary format
|
||||||
|
.SH SEE ALSO
|
||||||
|
.IR https://publicsuffix.org/ ", " https://github.com/rockdaboot/libpsl
|
||||||
|
.SH COPYRIGHT
|
||||||
|
\fBpsl-make-dafsa\fR was originally part of the Chromium project, and
|
||||||
|
has been modified by Tim Ruehsen and Daniel Kahn Gillmor. The code
|
||||||
|
and its documentation is governed by a BSD-style license.
|
43
src/psl.c
43
src/psl.c
|
@ -133,7 +133,7 @@ static char *strndup(const char *s, size_t n)
|
||||||
* @stability: Stable
|
* @stability: Stable
|
||||||
* @include: libpsl.h
|
* @include: libpsl.h
|
||||||
*
|
*
|
||||||
* [Public Suffix List](http://publicsuffix.org/) library functions.
|
* [Public Suffix List](https://publicsuffix.org/) library functions.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -628,9 +628,9 @@ static int _str_is_ascii(const char *s)
|
||||||
* The function checks for a valid UTF-8 character sequence before
|
* The function checks for a valid UTF-8 character sequence before
|
||||||
* passing it to idna_to_ascii_8z().
|
* passing it to idna_to_ascii_8z().
|
||||||
*
|
*
|
||||||
* [1] http://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html
|
* [1] https://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html
|
||||||
* [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html
|
* [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html
|
||||||
* [3] http://curl.haxx.se/mail/lib-2015-06/0143.html
|
* [3] https://curl.haxx.se/mail/lib-2015-06/0143.html
|
||||||
*/
|
*/
|
||||||
static int _utf8_is_valid(const char *utf8)
|
static int _utf8_is_valid(const char *utf8)
|
||||||
{
|
{
|
||||||
|
@ -930,7 +930,7 @@ suffix_yes:
|
||||||
* @domain: Domain string
|
* @domain: Domain string
|
||||||
*
|
*
|
||||||
* This function checks if @domain is a public suffix by the means of the
|
* This function checks if @domain is a public suffix by the means of the
|
||||||
* [Mozilla Public Suffix List](http://publicsuffix.org).
|
* [Mozilla Public Suffix List](https://publicsuffix.org).
|
||||||
*
|
*
|
||||||
* For cookie domain checking see psl_is_cookie_domain_acceptable().
|
* For cookie domain checking see psl_is_cookie_domain_acceptable().
|
||||||
*
|
*
|
||||||
|
@ -959,7 +959,7 @@ int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
|
||||||
* @type: Domain type
|
* @type: Domain type
|
||||||
*
|
*
|
||||||
* This function checks if @domain is a public suffix by the means of the
|
* This function checks if @domain is a public suffix by the means of the
|
||||||
* [Mozilla Public Suffix List](http://publicsuffix.org).
|
* [Mozilla Public Suffix List](https://publicsuffix.org).
|
||||||
*
|
*
|
||||||
* @type specifies the PSL section where to perform the lookup. Valid values are
|
* @type specifies the PSL section where to perform the lookup. Valid values are
|
||||||
* %PSL_TYPE_PRIVATE, %PSL_TYPE_ICANN and %PSL_TYPE_ANY.
|
* %PSL_TYPE_PRIVATE, %PSL_TYPE_ICANN and %PSL_TYPE_ANY.
|
||||||
|
@ -987,8 +987,8 @@ int psl_is_public_suffix2(const psl_ctx_t *psl, const char *domain, int type)
|
||||||
* @psl: PSL context
|
* @psl: PSL context
|
||||||
* @domain: Domain string
|
* @domain: Domain string
|
||||||
*
|
*
|
||||||
* This function finds the longest publix suffix part of @domain by the means
|
* This function finds the longest public suffix part of @domain by the means
|
||||||
* of the [Mozilla Public Suffix List](http://publicsuffix.org).
|
* of the [Mozilla Public Suffix List](https://publicsuffix.org).
|
||||||
*
|
*
|
||||||
* International @domain names have to be either in lowercase UTF-8 or in ASCII form (punycode).
|
* International @domain names have to be either in lowercase UTF-8 or in ASCII form (punycode).
|
||||||
* Other encodings result in unexpected behavior.
|
* Other encodings result in unexpected behavior.
|
||||||
|
@ -1027,7 +1027,7 @@ const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
|
||||||
* @domain: Domain string
|
* @domain: Domain string
|
||||||
*
|
*
|
||||||
* This function finds the shortest private suffix part of @domain by the means
|
* This function finds the shortest private suffix part of @domain by the means
|
||||||
* of the [Mozilla Public Suffix List](http://publicsuffix.org).
|
* of the [Mozilla Public Suffix List](https://publicsuffix.org).
|
||||||
*
|
*
|
||||||
* International @domain names have to be either in lowercase UTF-8 or in ASCII form (punycode).
|
* International @domain names have to be either in lowercase UTF-8 or in ASCII form (punycode).
|
||||||
* Other encodings result in unexpected behavior.
|
* Other encodings result in unexpected behavior.
|
||||||
|
@ -1110,8 +1110,7 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
||||||
psl_ctx_t *psl;
|
psl_ctx_t *psl;
|
||||||
_psl_entry_t suffix, *suffixp;
|
_psl_entry_t suffix, *suffixp;
|
||||||
char buf[256], *linep, *p;
|
char buf[256], *linep, *p;
|
||||||
size_t n;
|
int type = 0, is_dafsa;
|
||||||
int type = 0;
|
|
||||||
_psl_idna_t *idna;
|
_psl_idna_t *idna;
|
||||||
|
|
||||||
if (!fp)
|
if (!fp)
|
||||||
|
@ -1121,14 +1120,18 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
/* read first line to allow ASCII / DAFSA detection */
|
/* read first line to allow ASCII / DAFSA detection */
|
||||||
if ((n = fread(buf, 1, sizeof(buf) - 1, fp)) < 1)
|
if (!(linep = fgets(buf, sizeof(buf) - 1, fp)))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
buf[n] = 0;
|
is_dafsa = strlen(buf) == 16 && !strncmp(buf, ".DAFSA@PSL_", 11);
|
||||||
|
|
||||||
if (!strstr(buf, "This Source Code Form is subject to")) {
|
if (is_dafsa) {
|
||||||
void *m;
|
void *m;
|
||||||
size_t size = 65536, len = n;
|
size_t size = 65536, n, len = 0;
|
||||||
|
int version = atoi(buf + 11);
|
||||||
|
|
||||||
|
if (version != 0)
|
||||||
|
goto fail;
|
||||||
|
|
||||||
if (!(psl->dafsa = malloc(size)))
|
if (!(psl->dafsa = malloc(size)))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
@ -1148,20 +1151,20 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
||||||
if ((m = realloc(psl->dafsa, len)))
|
if ((m = realloc(psl->dafsa, len)))
|
||||||
psl->dafsa = m;
|
psl->dafsa = m;
|
||||||
|
|
||||||
|
psl->dafsa_size = len;
|
||||||
|
|
||||||
return psl;
|
return psl;
|
||||||
}
|
}
|
||||||
|
|
||||||
rewind(fp);
|
|
||||||
|
|
||||||
idna = _psl_idna_open();
|
idna = _psl_idna_open();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
|
* as of 02.11.2012, the list at https://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
|
||||||
* as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
|
* as of 19.02.2014, the list at https://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
|
||||||
*/
|
*/
|
||||||
psl->suffixes = _vector_alloc(8*1024, _suffix_compare_array);
|
psl->suffixes = _vector_alloc(8*1024, _suffix_compare_array);
|
||||||
|
|
||||||
while ((linep = fgets(buf, sizeof(buf), fp))) {
|
do {
|
||||||
while (_isspace_ascii(*linep)) linep++; /* ignore leading whitespace */
|
while (_isspace_ascii(*linep)) linep++; /* ignore leading whitespace */
|
||||||
if (!*linep) continue; /* skip empty lines */
|
if (!*linep) continue; /* skip empty lines */
|
||||||
|
|
||||||
|
@ -1232,7 +1235,7 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
||||||
|
|
||||||
_add_punycode_if_needed(idna, psl->suffixes, suffixp);
|
_add_punycode_if_needed(idna, psl->suffixes, suffixp);
|
||||||
}
|
}
|
||||||
}
|
} while ((linep = fgets(buf, sizeof(buf), fp)));
|
||||||
|
|
||||||
_vector_sort(psl->suffixes);
|
_vector_sort(psl->suffixes);
|
||||||
|
|
||||||
|
|
|
@ -206,7 +206,7 @@ static int _print_psl_entries_dafsa_binary(const char *fname, const _psl_vector_
|
||||||
return 3;
|
return 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
snprintf(cmd, sizeof(cmd), MAKE_DAFSA " --binary in.tmp %s", fname);
|
snprintf(cmd, sizeof(cmd), MAKE_DAFSA " --output-format=binary in.tmp %s", fname);
|
||||||
if ((rc = system(cmd))) {
|
if ((rc = system(cmd))) {
|
||||||
fprintf(stderr, "Failed to execute '%s' (%d)\n", cmd, rc);
|
fprintf(stderr, "Failed to execute '%s' (%d)\n", cmd, rc);
|
||||||
ret = 2;
|
ret = 2;
|
||||||
|
|
|
@ -0,0 +1,63 @@
|
||||||
|
.TH PSL "1" "July 2016" "psl 0.13.0" "User Commands"
|
||||||
|
.SH NAME
|
||||||
|
psl \- Explore the Public Suffix List
|
||||||
|
.SH SYNOPSIS
|
||||||
|
.B psl
|
||||||
|
[\fI\,options\/\fR] \fI\,<domains\/\fR...\fI\,>\/\fR
|
||||||
|
.SH DESCRIPTION
|
||||||
|
`psl' explores the Public Suffix List. It takes a list of domains on
|
||||||
|
the command line, or if no domains are present on the command line, it
|
||||||
|
reads one domain per line from standard input. It prints its results
|
||||||
|
to standard output, with each line containing one domain followed by a
|
||||||
|
colon, followed by the relevant information for that domain.
|
||||||
|
.SH MODES
|
||||||
|
The information printed per domain changes based on the selected mode.
|
||||||
|
.br
|
||||||
|
Available modes are:
|
||||||
|
.TP
|
||||||
|
\fB\-\-is\-public\-suffix\fR
|
||||||
|
check if domains are public suffixes.
|
||||||
|
.br
|
||||||
|
Returned data: 1 if the \fIdomain\fR is a public suffix, 0 otherwise.
|
||||||
|
[default]
|
||||||
|
.TP
|
||||||
|
\fB\-\-is\-cookie\-domain\-acceptable\fR <cookie\-domain>
|
||||||
|
check if cookie\-domain is acceptable for domains.
|
||||||
|
.br
|
||||||
|
Returned data: 1 if \fIcookie-domain\fR is acceptable for the
|
||||||
|
\fIdomain\fR, 0 otherwise.
|
||||||
|
.TP
|
||||||
|
\fB\-\-print\-unreg\-domain\fR
|
||||||
|
Returned data: the longest public suffix part for each \fIdomain\fR.
|
||||||
|
.TP
|
||||||
|
\fB\-\-print\-reg\-domain\fR
|
||||||
|
Returned data: the shortest private suffix part for each \fIdomain\fR.
|
||||||
|
.SH VERSION INFORMATION
|
||||||
|
`psl' can instead be used to report information about the version of
|
||||||
|
the library and its built-in Public Suffix data:
|
||||||
|
.TP
|
||||||
|
\fB\-\-version\fR
|
||||||
|
show library version information
|
||||||
|
.TP
|
||||||
|
\fB\-\-print\-info\fR
|
||||||
|
print info about library builtin data
|
||||||
|
.SH PUBLIC SUFFIX DATA
|
||||||
|
By default, `psl' will use built-in Public Suffix data.
|
||||||
|
.br
|
||||||
|
You can also direct it to use a different file:
|
||||||
|
.TP
|
||||||
|
\fB\-\-use\-builtin\-data\fR
|
||||||
|
use the builtin PSL data [default]
|
||||||
|
.TP
|
||||||
|
\fB\-\-load\-psl\-file\fR <filename>
|
||||||
|
load PSL data from file
|
||||||
|
.SH SEE ALSO
|
||||||
|
https://publicsuffix.org/
|
||||||
|
https://github.com/rockdaboot/libpsl
|
||||||
|
.SH COPYRIGHT
|
||||||
|
libpsl and `psl' are copyright \(co 2014\-2016 Tim Ruehsen under an
|
||||||
|
MIT-style License.
|
||||||
|
.br
|
||||||
|
This documentation was written by Daniel Kahn Gillmor for the Debian
|
||||||
|
project, but may be used by others under the same license as libpsl
|
||||||
|
itself.
|
|
@ -50,7 +50,7 @@ static void usage(int err, FILE* f)
|
||||||
fprintf(f, " --is-public-suffix check if domains are public suffixes [default]\n");
|
fprintf(f, " --is-public-suffix check if domains are public suffixes [default]\n");
|
||||||
fprintf(f, " --is-cookie-domain-acceptable <cookie-domain>\n");
|
fprintf(f, " --is-cookie-domain-acceptable <cookie-domain>\n");
|
||||||
fprintf(f, " check if cookie-domain is acceptable for domains\n");
|
fprintf(f, " check if cookie-domain is acceptable for domains\n");
|
||||||
fprintf(f, " --print-unreg-domain print the longest publix suffix part\n");
|
fprintf(f, " --print-unreg-domain print the longest public suffix part\n");
|
||||||
fprintf(f, " --print-reg-domain print the shortest private suffix part\n");
|
fprintf(f, " --print-reg-domain print the shortest private suffix part\n");
|
||||||
fprintf(f, " --print-info print info about library builtin data\n");
|
fprintf(f, " --print-info print info about library builtin data\n");
|
||||||
fprintf(f, "\n");
|
fprintf(f, "\n");
|
||||||
|
|
Loading…
Reference in New Issue