Merge pull request #52 from dkg/dafsa-reorg
rename src/make_dafsa.py to src/psl-make-dafsa, add documentation
This commit is contained in:
commit
583cf6ae56
|
@ -78,7 +78,7 @@ representation of strings. Here we use it to reduce the whole PSL to about 32k i
|
||||||
|
|
||||||
Generate `psl.dafsa` from `list/public_suffix_list.dat`
|
Generate `psl.dafsa` from `list/public_suffix_list.dat`
|
||||||
|
|
||||||
$ src/make_dafsa.py --output-format=binary --input-format=psl list/public_suffix_list.dat psl.dafsa
|
$ src/psl-make-dafsa --output-format=binary --input-format=psl list/public_suffix_list.dat psl.dafsa
|
||||||
|
|
||||||
Test the result (example)
|
Test the result (example)
|
||||||
|
|
||||||
|
@ -90,7 +90,7 @@ License
|
||||||
Libpsl is made available under the terms of the MIT license.<br>
|
Libpsl is made available under the terms of the MIT license.<br>
|
||||||
See the LICENSE file that accompanies this distribution for the full text of the license.
|
See the LICENSE file that accompanies this distribution for the full text of the license.
|
||||||
|
|
||||||
src/make_dafsa.py and src/lookup_string_in_fixed_set.c are licensed under the term written in
|
src/psl-make-dafsa and src/lookup_string_in_fixed_set.c are licensed under the term written in
|
||||||
src/LICENSE.chromium.
|
src/LICENSE.chromium.
|
||||||
|
|
||||||
Building from git
|
Building from git
|
||||||
|
|
|
@ -72,7 +72,7 @@ AS_IF([ test "$enable_man" != no ], [
|
||||||
AC_MSG_RESULT([no])
|
AC_MSG_RESULT([no])
|
||||||
])
|
])
|
||||||
|
|
||||||
# src/make_dafsa.py needs python 2.7+
|
# src/psl-make-dafsa needs python 2.7+
|
||||||
AM_PATH_PYTHON([2.7])
|
AM_PATH_PYTHON([2.7])
|
||||||
|
|
||||||
PKG_PROG_PKG_CONFIG
|
PKG_PROG_PKG_CONFIG
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
* The following License is for the source code files
|
* The following License is for the source code files
|
||||||
make_dafsa.py and lookup_string_in_fixed_set.c.
|
psl-make-dafsa and lookup_string_in_fixed_set.c.
|
||||||
|
|
||||||
// Copyright 2015 The Chromium Authors. All rights reserved.
|
// Copyright 2015 The Chromium Authors. All rights reserved.
|
||||||
//
|
//
|
||||||
|
|
|
@ -22,7 +22,7 @@ endif
|
||||||
|
|
||||||
noinst_PROGRAMS = psl2c
|
noinst_PROGRAMS = psl2c
|
||||||
psl2c_SOURCES = psl2c.c lookup_string_in_fixed_set.c
|
psl2c_SOURCES = psl2c.c lookup_string_in_fixed_set.c
|
||||||
psl2c_CPPFLAGS = -I$(top_srcdir)/include -DMAKE_DAFSA=\"$(top_srcdir)/src/make_dafsa.py\"
|
psl2c_CPPFLAGS = -I$(top_srcdir)/include -DMAKE_DAFSA=\"$(top_srcdir)/src/psl-make-dafsa\"
|
||||||
if BUILTIN_GENERATOR_LIBICU
|
if BUILTIN_GENERATOR_LIBICU
|
||||||
psl2c_LDADD = -licuuc
|
psl2c_LDADD = -licuuc
|
||||||
endif
|
endif
|
||||||
|
@ -38,4 +38,4 @@ endif
|
||||||
suffixes_dafsa.c: $(PSL_FILE) psl2c$(EXEEXT)
|
suffixes_dafsa.c: $(PSL_FILE) psl2c$(EXEEXT)
|
||||||
./psl2c$(EXEEXT) "$(PSL_FILE)" suffixes_dafsa.c
|
./psl2c$(EXEEXT) "$(PSL_FILE)" suffixes_dafsa.c
|
||||||
|
|
||||||
EXTRA_DIST = make_dafsa.py LICENSE.chromium
|
EXTRA_DIST = psl-make-dafsa LICENSE.chromium
|
||||||
|
|
|
@ -118,14 +118,14 @@ static int GetReturnValue(const unsigned char* offset,
|
||||||
* Looks up the string |key| with length |key_length| in a fixed set of
|
* Looks up the string |key| with length |key_length| in a fixed set of
|
||||||
* strings. The set of strings must be known at compile time. It is converted to
|
* strings. The set of strings must be known at compile time. It is converted to
|
||||||
* a graph structure named a DAFSA (Deterministic Acyclic Finite State
|
* a graph structure named a DAFSA (Deterministic Acyclic Finite State
|
||||||
* Automaton) by the script make_dafsa.py during compilation. This permits
|
* Automaton) by the script psl-make-dafsa during compilation. This permits
|
||||||
* efficient (in time and space) lookup. The graph generated by make_dafsa.py
|
* efficient (in time and space) lookup. The graph generated by psl-make-dafsa
|
||||||
* takes the form of a constant byte array which should be supplied via the
|
* takes the form of a constant byte array which should be supplied via the
|
||||||
* |graph| and |length| parameters. The return value is kDafsaNotFound,
|
* |graph| and |length| parameters. The return value is kDafsaNotFound,
|
||||||
* kDafsaFound, or a bitmap consisting of one or more of kDafsaExceptionRule,
|
* kDafsaFound, or a bitmap consisting of one or more of kDafsaExceptionRule,
|
||||||
* kDafsaWildcardRule and kDafsaPrivateRule ORed together.
|
* kDafsaWildcardRule and kDafsaPrivateRule ORed together.
|
||||||
*
|
*
|
||||||
* Lookup a domain key in a byte array generated by make_dafsa.py.
|
* Lookup a domain key in a byte array generated by psl-make-dafsa.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* prototype to skip warning with -Wmissing-prototypes */
|
/* prototype to skip warning with -Wmissing-prototypes */
|
||||||
|
|
|
@ -418,7 +418,7 @@ def encode(dafsa):
|
||||||
def to_cxx(data):
|
def to_cxx(data):
|
||||||
"""Generates C++ code from a list of encoded bytes."""
|
"""Generates C++ code from a list of encoded bytes."""
|
||||||
text = '/* This file is generated. DO NOT EDIT!\n\n'
|
text = '/* This file is generated. DO NOT EDIT!\n\n'
|
||||||
text += 'The byte array encodes effective tld names. See make_dafsa.py for'
|
text += 'The byte array encodes effective tld names. See psl-make-dafsa source for'
|
||||||
text += ' documentation.'
|
text += ' documentation.'
|
||||||
text += '*/\n\n'
|
text += '*/\n\n'
|
||||||
text += 'static const unsigned char kDafsa[%s] = {\n' % len(data)
|
text += 'static const unsigned char kDafsa[%s] = {\n' % len(data)
|
|
@ -0,0 +1,36 @@
|
||||||
|
.TH PSL "1" "July 2016" "psl 0.13.0" "User Commands"
|
||||||
|
.SH NAME
|
||||||
|
psl-make-dafsa \- generate a compact and optimized DAFSA from a Public Suffix List
|
||||||
|
.SH SYNOPSIS
|
||||||
|
.B psl-make-dafsa
|
||||||
|
[\fI\,options\/\fR] \fIinfile\fR \fIoutfile\fR
|
||||||
|
.SH DESCRIPTION
|
||||||
|
\fBpsl-make-dafsa\fR produces C/C++ code or an
|
||||||
|
architecture-independent binary object that represents a Deterministic
|
||||||
|
Acyclic Finite State Automaton (DAFSA) from a textual representation
|
||||||
|
of a Public Suffix List. Input and output files must be specified on
|
||||||
|
the command line.
|
||||||
|
|
||||||
|
This compact representation enables optimized queries of the list,
|
||||||
|
saving both time and space when compared to searches of human-readable
|
||||||
|
representations.
|
||||||
|
.SH OPTIONS
|
||||||
|
The format of the data read and written by \fBpsl-make-dafsa\fR
|
||||||
|
depends on options passed to it.
|
||||||
|
.br
|
||||||
|
.TP
|
||||||
|
\fB\-\-input\-format=\fR[\fIpsl2c\fR|\fIpsl\fR]
|
||||||
|
\fBpsl2c\fR: (default) input is C code generated by libpsl/psl2c
|
||||||
|
.br
|
||||||
|
\fBpsl\fR: input is standard textual Public Suffix List file
|
||||||
|
.TP
|
||||||
|
\fB\-\-output\-format=\fR[\fIcxx\fR|\fIbinary\fR]
|
||||||
|
\fBcxx\fR: (default) output is C/C++ code
|
||||||
|
.br
|
||||||
|
\fBbinary\fR: output is an architecture-independent binary format
|
||||||
|
.SH SEE ALSO
|
||||||
|
.IR https://publicsuffix.org/ ", " https://github.com/rockdaboot/libpsl
|
||||||
|
.SH COPYRIGHT
|
||||||
|
\fBpsl-make-dafsa\fR was originally part of the Chromium project, and
|
||||||
|
has been modified by Tim Ruehsen and Daniel Kahn Gillmor. The code
|
||||||
|
and its documentation is governed by a BSD-style license.
|
Loading…
Reference in New Issue