Merge branch 'master' into debian
This commit is contained in:
commit
df2e65b9d2
11
.travis.yml
11
.travis.yml
|
@ -3,8 +3,15 @@ compiler:
|
||||||
- gcc
|
- gcc
|
||||||
- clang
|
- clang
|
||||||
# Change this to your needs
|
# Change this to your needs
|
||||||
script: ./autogen.sh && ./configure --enable-gtk-doc && make -j4 && make check -j4 && make distcheck
|
script:
|
||||||
|
- ./autogen.sh
|
||||||
|
- ./configure && make -j4 && make check -j4
|
||||||
|
- ./configure --without-libicu && make clean && make -j4 && make check -j4
|
||||||
|
- ./configure --disable-builtin && make clean && make -j4 && make check -j4
|
||||||
|
- ./configure --disable-builtin --without-libicu && make clean && make -j4 && make check -j4
|
||||||
|
- ./configure --enable-gtk-doc && make -j4 && make check -j4
|
||||||
|
- make distcheck
|
||||||
before_install:
|
before_install:
|
||||||
- apt-cache search libicu | grep icu
|
- apt-cache search libicu | grep icu
|
||||||
- sudo apt-get -qq update
|
- sudo apt-get -qq update
|
||||||
- sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev libicu-dev
|
- sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev libicu48 libicu-dev
|
||||||
|
|
4
AUTHORS
4
AUTHORS
|
@ -8,4 +8,6 @@ Please drop me a note if you feel you should have
|
||||||
been mentioned here.
|
been mentioned here.
|
||||||
|
|
||||||
Tim Ruehsen (Implementation of libpsl)
|
Tim Ruehsen (Implementation of libpsl)
|
||||||
Daniel Kahn Gillmor (Discussion, Ideas, Organization)
|
Daniel Kahn Gillmor (Discussion, Ideas, Organization, Code)
|
||||||
|
Daniel Stenberg (Discussion, Ideas)
|
||||||
|
Darshit Shah (Patching Wget to work with libpsl)
|
||||||
|
|
9
NEWS
9
NEWS
|
@ -1,5 +1,14 @@
|
||||||
Copyright (C) 2014 Tim Ruehsen
|
Copyright (C) 2014 Tim Ruehsen
|
||||||
|
|
||||||
|
23.06.2014 Release V0.4.0
|
||||||
|
* depend on libicu for punycode, utf-8 and lowercase conversions
|
||||||
|
* added function psl_str_to_utf8lower()
|
||||||
|
* fixed locale issues
|
||||||
|
* introducing psl_error_t for error codes + defines
|
||||||
|
* removed redundant code from psl2c.c
|
||||||
|
* updated docs
|
||||||
|
* psl utility reads from stdin if no argument specified
|
||||||
|
|
||||||
10.06.2014 Release V0.3.1
|
10.06.2014 Release V0.3.1
|
||||||
* link psl utility dynamically
|
* link psl utility dynamically
|
||||||
* fix output of psl_filename()
|
* fix output of psl_filename()
|
||||||
|
|
47
configure.ac
47
configure.ac
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
AC_INIT([libpsl], [0.3.1], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl])
|
AC_INIT([libpsl], [0.4.0], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl])
|
||||||
AC_PREREQ([2.59])
|
AC_PREREQ([2.59])
|
||||||
AM_INIT_AUTOMAKE([1.10 -Wall no-define])
|
AM_INIT_AUTOMAKE([1.10 -Wall no-define])
|
||||||
|
|
||||||
|
@ -62,10 +62,35 @@ AS_IF([ test "$enable_man" != no ], [
|
||||||
# 3. If the library source code has changed at all since the last update, then increment revision (‘c:r:a’ becomes ‘c:r+1:a’).
|
# 3. If the library source code has changed at all since the last update, then increment revision (‘c:r:a’ becomes ‘c:r+1:a’).
|
||||||
# 4. If any interfaces have been added, removed, or changed since the last update, increment current, and set revision to 0.
|
# 4. If any interfaces have been added, removed, or changed since the last update, increment current, and set revision to 0.
|
||||||
# 5. If any interfaces have been added since the last public release, then increment age.
|
# 5. If any interfaces have been added since the last public release, then increment age.
|
||||||
# 6. If any interfaces have been removed or changed since the last public release, then set age to 0.
|
# 6. If any existing interfaces have been removed or changed since the last public release, then set age to 0.
|
||||||
AC_SUBST([LIBPSL_SO_VERSION], [1:1:1])
|
AC_SUBST([LIBPSL_SO_VERSION], [2:0:2])
|
||||||
AC_SUBST([LIBPSL_VERSION], $VERSION)
|
AC_SUBST([LIBPSL_VERSION], $VERSION)
|
||||||
|
|
||||||
|
# Check for libicu
|
||||||
|
HAVE_LIBICU=no
|
||||||
|
AC_ARG_WITH(libicu,
|
||||||
|
AC_HELP_STRING([--without-libicu], [build libpsl without IDNA/Punycode support]),
|
||||||
|
[],
|
||||||
|
[
|
||||||
|
# using pkg-config won't work on older systems like Ubuntu 12.04 LTS Server Edition 64bit
|
||||||
|
OLDLIBS=$LIBS
|
||||||
|
LIBS="-licuuc $LIBS"
|
||||||
|
AC_MSG_CHECKING([for ICU unicode library])
|
||||||
|
AC_LINK_IFELSE(
|
||||||
|
[AC_LANG_PROGRAM(
|
||||||
|
[[#include <unicode/ustring.h>]],
|
||||||
|
[[u_strToUTF8(NULL, 0, NULL, NULL, 0, NULL);]])],
|
||||||
|
[HAVE_LIBICU=yes; AC_MSG_RESULT([yes]) AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])],
|
||||||
|
[LIBS=$OLDLIBS; AC_MSG_ERROR([no working ICU unicode library was found])])
|
||||||
|
|
||||||
|
# AC_SEARCH_LIBS(uidna_close, icuuc,
|
||||||
|
# [HAVE_LIBICU=yes; AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])],
|
||||||
|
# [AC_MSG_ERROR(*** libicu was not found. Aborting.)],
|
||||||
|
# -licudata )
|
||||||
|
# PKG_CHECK_MODULES(LIBICU, [icu-uc],
|
||||||
|
# [HAVE_LIBICU=yes; AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])])
|
||||||
|
])
|
||||||
|
|
||||||
# Check for enable/disable builtin PSL data
|
# Check for enable/disable builtin PSL data
|
||||||
AC_ARG_ENABLE(builtin,
|
AC_ARG_ENABLE(builtin,
|
||||||
AS_HELP_STRING([--disable-builtin], [do not compile PSL data into library]),
|
AS_HELP_STRING([--disable-builtin], [do not compile PSL data into library]),
|
||||||
|
@ -74,10 +99,11 @@ AC_ARG_ENABLE(builtin,
|
||||||
], [
|
], [
|
||||||
enable_builtin=yes
|
enable_builtin=yes
|
||||||
AC_DEFINE([WITH_BUILTIN], [1], [compile PSL data into library])
|
AC_DEFINE([WITH_BUILTIN], [1], [compile PSL data into library])
|
||||||
|
AS_IF([test $HAVE_LIBICU != yes],
|
||||||
PKG_CHECK_MODULES(LIBICU, [icu-uc],
|
[
|
||||||
[AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])],
|
# Check for idn2 fallback to generate punycode
|
||||||
[AC_CHECK_PROG(HAVE_IDN2, idn2, yes, AC_MSG_ERROR(Cannot find required tool 'idn2'.))])
|
AC_CHECK_PROG(HAVE_IDN2, idn2, yes, AC_MSG_ERROR(Cannot find required tool 'idn2' as fallback.))
|
||||||
|
])
|
||||||
])
|
])
|
||||||
AM_CONDITIONAL([WITH_BUILTIN], [test $enable_builtin = yes])
|
AM_CONDITIONAL([WITH_BUILTIN], [test $enable_builtin = yes])
|
||||||
|
|
||||||
|
@ -102,16 +128,14 @@ fi
|
||||||
|
|
||||||
# Check for custom PSL file
|
# Check for custom PSL file
|
||||||
AC_ARG_WITH(psl-file,
|
AC_ARG_WITH(psl-file,
|
||||||
AC_HELP_STRING([--with-psl-file=[PATH]],
|
AC_HELP_STRING([--with-psl-file=[PATH]], [path to PSL file]),
|
||||||
[path to PSL file]),
|
|
||||||
PSL_FILE=$withval,
|
PSL_FILE=$withval,
|
||||||
PSL_FILE="\$(top_srcdir)/data/effective_tld_names.dat")
|
PSL_FILE="\$(top_srcdir)/data/effective_tld_names.dat")
|
||||||
AC_SUBST(PSL_FILE)
|
AC_SUBST(PSL_FILE)
|
||||||
|
|
||||||
# Check for custom PSL test file
|
# Check for custom PSL test file
|
||||||
AC_ARG_WITH(psl-testfile,
|
AC_ARG_WITH(psl-testfile,
|
||||||
AC_HELP_STRING([--with-psl-testfile=[PATH]],
|
AC_HELP_STRING([--with-psl-testfile=[PATH]], [path to PSL test file]),
|
||||||
[path to PSL test file]),
|
|
||||||
PSL_TESTFILE=$withval,
|
PSL_TESTFILE=$withval,
|
||||||
PSL_TESTFILE="\$(top_srcdir)/data/test_psl.txt")
|
PSL_TESTFILE="\$(top_srcdir)/data/test_psl.txt")
|
||||||
AC_SUBST(PSL_TESTFILE)
|
AC_SUBST(PSL_TESTFILE)
|
||||||
|
@ -138,6 +162,7 @@ AC_MSG_NOTICE([Summary of build options:
|
||||||
Compiler: ${CC}
|
Compiler: ${CC}
|
||||||
CFlags: ${CFLAGS} ${CPPFLAGS}
|
CFlags: ${CFLAGS} ${CPPFLAGS}
|
||||||
LDFlags: ${LDFLAGS}
|
LDFlags: ${LDFLAGS}
|
||||||
|
ICU: ${HAVE_LIBICU}
|
||||||
Builtin PSL: ${enable_builtin}
|
Builtin PSL: ${enable_builtin}
|
||||||
PSL File: ${PSL_FILE}
|
PSL File: ${PSL_FILE}
|
||||||
PSL Test File: ${PSL_TESTFILE}
|
PSL Test File: ${PSL_TESTFILE}
|
||||||
|
|
|
@ -180,6 +180,7 @@ ar
|
||||||
com.ar
|
com.ar
|
||||||
edu.ar
|
edu.ar
|
||||||
gob.ar
|
gob.ar
|
||||||
|
gov.ar
|
||||||
int.ar
|
int.ar
|
||||||
mil.ar
|
mil.ar
|
||||||
net.ar
|
net.ar
|
||||||
|
@ -222,7 +223,6 @@ edu.au
|
||||||
gov.au
|
gov.au
|
||||||
asn.au
|
asn.au
|
||||||
id.au
|
id.au
|
||||||
csiro.au
|
|
||||||
// Historic 2LDs (closed to new registration, but sites still exist)
|
// Historic 2LDs (closed to new registration, but sites still exist)
|
||||||
info.au
|
info.au
|
||||||
conf.au
|
conf.au
|
||||||
|
@ -245,7 +245,7 @@ sa.edu.au
|
||||||
tas.edu.au
|
tas.edu.au
|
||||||
vic.edu.au
|
vic.edu.au
|
||||||
wa.edu.au
|
wa.edu.au
|
||||||
act.gov.au
|
// act.gov.au Bug 984824 - Removed at request of Greg Tankard
|
||||||
// nsw.gov.au Bug 547985 - Removed at request of <Shae.Donelan@services.nsw.gov.au>
|
// nsw.gov.au Bug 547985 - Removed at request of <Shae.Donelan@services.nsw.gov.au>
|
||||||
// nt.gov.au Bug 940478 - Removed at request of Greg Connors <Greg.Connors@nt.gov.au>
|
// nt.gov.au Bug 940478 - Removed at request of Greg Connors <Greg.Connors@nt.gov.au>
|
||||||
qld.gov.au
|
qld.gov.au
|
||||||
|
@ -292,6 +292,7 @@ rs.ba
|
||||||
// bb : http://en.wikipedia.org/wiki/.bb
|
// bb : http://en.wikipedia.org/wiki/.bb
|
||||||
bb
|
bb
|
||||||
biz.bb
|
biz.bb
|
||||||
|
co.bb
|
||||||
com.bb
|
com.bb
|
||||||
edu.bb
|
edu.bb
|
||||||
gov.bb
|
gov.bb
|
||||||
|
@ -299,6 +300,7 @@ info.bb
|
||||||
net.bb
|
net.bb
|
||||||
org.bb
|
org.bb
|
||||||
store.bb
|
store.bb
|
||||||
|
tv.bb
|
||||||
|
|
||||||
// bd : http://en.wikipedia.org/wiki/.bd
|
// bd : http://en.wikipedia.org/wiki/.bd
|
||||||
*.bd
|
*.bd
|
||||||
|
@ -596,9 +598,12 @@ gob.cl
|
||||||
co.cl
|
co.cl
|
||||||
mil.cl
|
mil.cl
|
||||||
|
|
||||||
// cm : http://en.wikipedia.org/wiki/.cm
|
// cm : http://en.wikipedia.org/wiki/.cm plus bug 981927
|
||||||
cm
|
cm
|
||||||
|
co.cm
|
||||||
|
com.cm
|
||||||
gov.cm
|
gov.cm
|
||||||
|
net.cm
|
||||||
|
|
||||||
// cn : http://en.wikipedia.org/wiki/.cn
|
// cn : http://en.wikipedia.org/wiki/.cn
|
||||||
// Submitted by registry <tanyaling@cnnic.cn> 2008-06-11
|
// Submitted by registry <tanyaling@cnnic.cn> 2008-06-11
|
||||||
|
@ -5146,7 +5151,24 @@ com.nr
|
||||||
nu
|
nu
|
||||||
|
|
||||||
// nz : http://en.wikipedia.org/wiki/.nz
|
// nz : http://en.wikipedia.org/wiki/.nz
|
||||||
*.nz
|
// Confirmed by registry <jay@nzrs.net.nz> 2014-05-19
|
||||||
|
nz
|
||||||
|
ac.nz
|
||||||
|
co.nz
|
||||||
|
cri.nz
|
||||||
|
geek.nz
|
||||||
|
gen.nz
|
||||||
|
govt.nz
|
||||||
|
health.nz
|
||||||
|
iwi.nz
|
||||||
|
kiwi.nz
|
||||||
|
maori.nz
|
||||||
|
mil.nz
|
||||||
|
māori.nz
|
||||||
|
net.nz
|
||||||
|
org.nz
|
||||||
|
parliament.nz
|
||||||
|
school.nz
|
||||||
|
|
||||||
// om : http://en.wikipedia.org/wiki/.om
|
// om : http://en.wikipedia.org/wiki/.om
|
||||||
om
|
om
|
||||||
|
@ -5613,7 +5635,6 @@ oryol.ru
|
||||||
palana.ru
|
palana.ru
|
||||||
penza.ru
|
penza.ru
|
||||||
perm.ru
|
perm.ru
|
||||||
pskov.ru
|
|
||||||
ptz.ru
|
ptz.ru
|
||||||
rnd.ru
|
rnd.ru
|
||||||
ryazan.ru
|
ryazan.ru
|
||||||
|
@ -6150,19 +6171,19 @@ com.ug
|
||||||
org.ug
|
org.ug
|
||||||
|
|
||||||
// uk : http://en.wikipedia.org/wiki/.uk
|
// uk : http://en.wikipedia.org/wiki/.uk
|
||||||
// Submitted by registry <noc@nominet.org.uk> 2012-10-02
|
// Submitted by registry <Michael.Daly@nominet.org.uk>
|
||||||
// and tweaked by us pending further consultation.
|
uk
|
||||||
*.uk
|
ac.uk
|
||||||
|
co.uk
|
||||||
|
gov.uk
|
||||||
|
ltd.uk
|
||||||
|
me.uk
|
||||||
|
net.uk
|
||||||
|
nhs.uk
|
||||||
|
org.uk
|
||||||
|
plc.uk
|
||||||
|
police.uk
|
||||||
*.sch.uk
|
*.sch.uk
|
||||||
!bl.uk
|
|
||||||
!british-library.uk
|
|
||||||
!jet.uk
|
|
||||||
!mod.uk
|
|
||||||
!national-library-scotland.uk
|
|
||||||
!nel.uk
|
|
||||||
!nic.uk
|
|
||||||
!nls.uk
|
|
||||||
!parliament.uk
|
|
||||||
|
|
||||||
// us : http://en.wikipedia.org/wiki/.us
|
// us : http://en.wikipedia.org/wiki/.us
|
||||||
us
|
us
|
||||||
|
@ -6440,16 +6461,24 @@ edu.vc
|
||||||
|
|
||||||
// ve : https://registro.nic.ve/
|
// ve : https://registro.nic.ve/
|
||||||
// Confirmed by registry 2012-10-04
|
// Confirmed by registry 2012-10-04
|
||||||
|
// Updated 2014-05-20 - Bug 940478
|
||||||
ve
|
ve
|
||||||
|
arts.ve
|
||||||
co.ve
|
co.ve
|
||||||
com.ve
|
com.ve
|
||||||
e12.ve
|
e12.ve
|
||||||
edu.ve
|
edu.ve
|
||||||
|
firm.ve
|
||||||
|
gob.ve
|
||||||
gov.ve
|
gov.ve
|
||||||
info.ve
|
info.ve
|
||||||
|
int.ve
|
||||||
mil.ve
|
mil.ve
|
||||||
net.ve
|
net.ve
|
||||||
org.ve
|
org.ve
|
||||||
|
rec.ve
|
||||||
|
store.ve
|
||||||
|
tec.ve
|
||||||
web.ve
|
web.ve
|
||||||
|
|
||||||
// vg : http://en.wikipedia.org/wiki/.vg
|
// vg : http://en.wikipedia.org/wiki/.vg
|
||||||
|
@ -6482,8 +6511,12 @@ pro.vn
|
||||||
health.vn
|
health.vn
|
||||||
|
|
||||||
// vu : http://en.wikipedia.org/wiki/.vu
|
// vu : http://en.wikipedia.org/wiki/.vu
|
||||||
// list of 2nd level tlds ?
|
// http://www.vunic.vu/
|
||||||
vu
|
vu
|
||||||
|
com.vu
|
||||||
|
edu.vu
|
||||||
|
net.vu
|
||||||
|
org.vu
|
||||||
|
|
||||||
// wf : http://www.afnic.fr/medias/documents/AFNIC-naming-policy2012.pdf
|
// wf : http://www.afnic.fr/medias/documents/AFNIC-naming-policy2012.pdf
|
||||||
wf
|
wf
|
||||||
|
@ -6609,7 +6642,14 @@ yt
|
||||||
فلسطين
|
فلسطين
|
||||||
|
|
||||||
// xn--90a3ac ("srb" Cyrillic) : RS
|
// xn--90a3ac ("srb" Cyrillic) : RS
|
||||||
|
// http://www.rnids.rs/en/the-.срб-domain
|
||||||
срб
|
срб
|
||||||
|
пр.срб
|
||||||
|
орг.срб
|
||||||
|
обр.срб
|
||||||
|
од.срб
|
||||||
|
упр.срб
|
||||||
|
ак.срб
|
||||||
|
|
||||||
// xn--p1ai ("rf" Russian-Cyrillic) : RU
|
// xn--p1ai ("rf" Russian-Cyrillic) : RU
|
||||||
// http://www.cctld.ru/en/docs/rulesrf.php
|
// http://www.cctld.ru/en/docs/rulesrf.php
|
||||||
|
@ -7654,6 +7694,299 @@ sca
|
||||||
// reise : 2014-03-13 dotreise GmbH
|
// reise : 2014-03-13 dotreise GmbH
|
||||||
reise
|
reise
|
||||||
|
|
||||||
|
// accountants : 2014-03-20 Knob Town, LLC
|
||||||
|
accountants
|
||||||
|
|
||||||
|
// clinic : 2014-03-20 Goose Park, LLC
|
||||||
|
clinic
|
||||||
|
|
||||||
|
// versicherung : 2014-03-20 dotversicherung-registry GmbH
|
||||||
|
versicherung
|
||||||
|
|
||||||
|
// top : 2014-03-20 Jiangsu Bangning Science & Technology Co.,Ltd.
|
||||||
|
top
|
||||||
|
|
||||||
|
// furniture : 2014-03-20 Lone Fields, LLC
|
||||||
|
furniture
|
||||||
|
|
||||||
|
// dental : 2014-03-20 Tin Birch, LLC
|
||||||
|
dental
|
||||||
|
|
||||||
|
// fund : 2014-03-20 John Castle, LLC
|
||||||
|
fund
|
||||||
|
|
||||||
|
// creditcard : 2014-03-20 Binky Frostbite, LLC
|
||||||
|
creditcard
|
||||||
|
|
||||||
|
// insure : 2014-03-20 Pioneer Willow, LLC
|
||||||
|
insure
|
||||||
|
|
||||||
|
// audio : 2014-03-20 Uniregistry, Corp.
|
||||||
|
audio
|
||||||
|
|
||||||
|
// claims : 2014-03-20 Black Corner, LLC
|
||||||
|
claims
|
||||||
|
|
||||||
|
// loans : 2014-03-20 June Woods, LLC
|
||||||
|
loans
|
||||||
|
|
||||||
|
// auction : 2014-03-20 Sand Galley, LLC
|
||||||
|
auction
|
||||||
|
|
||||||
|
// attorney : 2014-03-20 Victor North, LLC
|
||||||
|
attorney
|
||||||
|
|
||||||
|
// finance : 2014-03-20 Cotton Cypress, LLC
|
||||||
|
finance
|
||||||
|
|
||||||
|
// investments : 2014-03-20 Holly Glen, LLC
|
||||||
|
investments
|
||||||
|
|
||||||
|
// juegos : 2014-03-20 Uniregistry, Corp.
|
||||||
|
juegos
|
||||||
|
|
||||||
|
// dentist : 2014-03-20 Outer Lake, LLC
|
||||||
|
dentist
|
||||||
|
|
||||||
|
// lds : 2014-03-20 IRI Domain Management, LLC
|
||||||
|
lds
|
||||||
|
|
||||||
|
// lawyer : 2014-03-20 Atomic Station, LLC
|
||||||
|
lawyer
|
||||||
|
|
||||||
|
// surgery : 2014-03-20 Tin Avenue, LLC
|
||||||
|
surgery
|
||||||
|
|
||||||
|
// gratis : 2014-03-20 Pioneer Tigers, LLC
|
||||||
|
gratis
|
||||||
|
|
||||||
|
// software : 2014-03-20 Over Birch, LLC
|
||||||
|
software
|
||||||
|
|
||||||
|
// mortgage : 2014-03-20 Outer Gardens, LLC
|
||||||
|
mortgage
|
||||||
|
|
||||||
|
// republican : 2014-03-20 United TLD Holdco Ltd.
|
||||||
|
republican
|
||||||
|
|
||||||
|
// credit : 2014-03-20 Snow Shadow, LLC
|
||||||
|
credit
|
||||||
|
|
||||||
|
// tax : 2014-03-20 Storm Orchard, LLC
|
||||||
|
tax
|
||||||
|
|
||||||
|
// africa : 2014-03-24 ZA Central Registry NPC trading as Registry.Africa
|
||||||
|
africa
|
||||||
|
|
||||||
|
// joburg : 2014-03-24 ZA Central Registry NPC trading as ZA Central Registry
|
||||||
|
joburg
|
||||||
|
|
||||||
|
// durban : 2014-03-24 ZA Central Registry NPC trading as ZA Central Registry
|
||||||
|
durban
|
||||||
|
|
||||||
|
// capetown : 2014-03-24 ZA Central Registry NPC trading as ZA Central Registry
|
||||||
|
capetown
|
||||||
|
|
||||||
|
// sap : 2014-03-27 SAP AG
|
||||||
|
sap
|
||||||
|
|
||||||
|
// datsun : 2014-03-27 NISSAN MOTOR CO., LTD.
|
||||||
|
datsun
|
||||||
|
|
||||||
|
// infiniti : 2014-03-27 NISSAN MOTOR CO., LTD.
|
||||||
|
infiniti
|
||||||
|
|
||||||
|
// firmdale : 2014-03-27 Firmdale Holdings Limited
|
||||||
|
firmdale
|
||||||
|
|
||||||
|
// organic : 2014-03-27 Afilias Limited
|
||||||
|
organic
|
||||||
|
|
||||||
|
// nissan : 2014-03-27 NISSAN MOTOR CO., LTD.
|
||||||
|
nissan
|
||||||
|
|
||||||
|
// website : 2014-04-03 DotWebsite Inc.
|
||||||
|
website
|
||||||
|
|
||||||
|
// space : 2014-04-03 DotSpace Inc.
|
||||||
|
space
|
||||||
|
|
||||||
|
// schmidt : 2014-04-03 SALM S.A.S.
|
||||||
|
schmidt
|
||||||
|
|
||||||
|
// cuisinella : 2014-04-03 SALM S.A.S.
|
||||||
|
cuisinella
|
||||||
|
|
||||||
|
// samsung : 2014-04-03 SAMSUNG SDS CO., LTD
|
||||||
|
samsung
|
||||||
|
|
||||||
|
// crs : 2014-04-03 Federated Co operatives Limited
|
||||||
|
crs
|
||||||
|
|
||||||
|
// doosan : 2014-04-03 Doosan Corporation
|
||||||
|
doosan
|
||||||
|
|
||||||
|
// press : 2014-04-03 DotPress Inc.
|
||||||
|
press
|
||||||
|
|
||||||
|
// emerck : 2014-04-03 Merck KGaA
|
||||||
|
emerck
|
||||||
|
|
||||||
|
// erni : 2014-04-03 ERNI Group Holding AG
|
||||||
|
erni
|
||||||
|
|
||||||
|
// direct : 2014-04-10 Half Trail, LLC
|
||||||
|
direct
|
||||||
|
|
||||||
|
// yandex : 2014-04-10 YANDEX, LLC
|
||||||
|
yandex
|
||||||
|
|
||||||
|
// lotto : 2014-04-10 Afilias Limited
|
||||||
|
lotto
|
||||||
|
|
||||||
|
// toshiba : 2014-04-10 TOSHIBA Corporation
|
||||||
|
toshiba
|
||||||
|
|
||||||
|
// bauhaus : 2014-04-17 Werkhaus GmbH
|
||||||
|
bauhaus
|
||||||
|
|
||||||
|
// host : 2014-04-17 DotHost Inc.
|
||||||
|
host
|
||||||
|
|
||||||
|
// ltda : 2014-04-17 DOMAIN ROBOT SERVICOS DE HOSPEDAGEM NA INTERNET LTDA
|
||||||
|
ltda
|
||||||
|
|
||||||
|
// global : 2014-04-17 Dot GLOBAL AS
|
||||||
|
global
|
||||||
|
|
||||||
|
// abogado : 2014-04-24 Top Level Domain Holdings Limited
|
||||||
|
abogado
|
||||||
|
|
||||||
|
// place : 2014-04-24 Snow Galley, LLC
|
||||||
|
place
|
||||||
|
|
||||||
|
// tirol : 2014-04-24 punkt Tirol GmbH
|
||||||
|
tirol
|
||||||
|
|
||||||
|
// gmx : 2014-04-24 1&1 Mail & Media GmbH
|
||||||
|
gmx
|
||||||
|
|
||||||
|
// tatar : 2014-04-24 Limited Liability Company "Coordination Center of Regional Domain of Tatarstan Republic"
|
||||||
|
tatar
|
||||||
|
|
||||||
|
// scholarships : 2014-04-24 Scholarships.com, LLC
|
||||||
|
scholarships
|
||||||
|
|
||||||
|
// eurovision : 2014-04-24 European Broadcasting Union (EBU)
|
||||||
|
eurovision
|
||||||
|
|
||||||
|
// wedding : 2014-04-24 Top Level Domain Holdings Limited
|
||||||
|
wedding
|
||||||
|
|
||||||
|
// active : 2014-05-01 The Active Network, Inc
|
||||||
|
active
|
||||||
|
|
||||||
|
// madrid : 2014-05-01 Comunidad de Madrid
|
||||||
|
madrid
|
||||||
|
|
||||||
|
// youtube : 2014-05-01 Charleston Road Registry Inc.
|
||||||
|
youtube
|
||||||
|
|
||||||
|
// sharp : 2014-05-01 Sharp Corporation
|
||||||
|
sharp
|
||||||
|
|
||||||
|
// uol : 2014-05-01 UBN INTERNET LTDA.
|
||||||
|
uol
|
||||||
|
|
||||||
|
// physio : 2014-05-01 PhysBiz Pty Ltd
|
||||||
|
physio
|
||||||
|
|
||||||
|
// gmail : 2014-05-01 Charleston Road Registry Inc.
|
||||||
|
gmail
|
||||||
|
|
||||||
|
// channel : 2014-05-08 Charleston Road Registry Inc.
|
||||||
|
channel
|
||||||
|
|
||||||
|
// fly : 2014-05-08 Charleston Road Registry Inc.
|
||||||
|
fly
|
||||||
|
|
||||||
|
// zip : 2014-05-08 Charleston Road Registry Inc.
|
||||||
|
zip
|
||||||
|
|
||||||
|
// esq : 2014-05-08 Charleston Road Registry Inc.
|
||||||
|
esq
|
||||||
|
|
||||||
|
// rsvp : 2014-05-08 Charleston Road Registry Inc.
|
||||||
|
rsvp
|
||||||
|
|
||||||
|
// wales : 2014-05-08 Nominet UK
|
||||||
|
wales
|
||||||
|
|
||||||
|
// cymru : 2014-05-08 Nominet UK
|
||||||
|
cymru
|
||||||
|
|
||||||
|
// green : 2014-05-08 Afilias Limited
|
||||||
|
green
|
||||||
|
|
||||||
|
// lgbt : 2014-05-08 Afilias Limited
|
||||||
|
lgbt
|
||||||
|
|
||||||
|
// xn--hxt814e : 2014-05-15 Zodiac Libra Limited
|
||||||
|
网店
|
||||||
|
|
||||||
|
// cancerresearch : 2014-05-15 Australian Cancer Research Foundation
|
||||||
|
cancerresearch
|
||||||
|
|
||||||
|
// everbank : 2014-05-15 EverBank
|
||||||
|
everbank
|
||||||
|
|
||||||
|
// frl : 2014-05-15 FRLregistry B.V.
|
||||||
|
frl
|
||||||
|
|
||||||
|
// property : 2014-05-22 Uniregistry, Corp.
|
||||||
|
property
|
||||||
|
|
||||||
|
// forsale : 2014-05-22 Sea Oaks, LLC
|
||||||
|
forsale
|
||||||
|
|
||||||
|
// seat : 2014-05-22 SEAT, S.A. (Sociedad Unipersonal)
|
||||||
|
seat
|
||||||
|
|
||||||
|
// deals : 2014-05-22 Sand Sunset, LLC
|
||||||
|
deals
|
||||||
|
|
||||||
|
// nra : 2014-05-22 NRA Holdings Company, INC.
|
||||||
|
nra
|
||||||
|
|
||||||
|
// xn--fjq720a : 2014-05-22 Will Bloom, LLC
|
||||||
|
娱乐
|
||||||
|
|
||||||
|
// realtor : 2014-05-29 Real Estate Domains LLC
|
||||||
|
realtor
|
||||||
|
|
||||||
|
// bnpparibas : 2014-05-29 BNP Paribas
|
||||||
|
bnpparibas
|
||||||
|
|
||||||
|
// melbourne : 2014-05-29 The Crown in right of the State of Victoria, represented by its Department of State Development, Business and Innovation
|
||||||
|
melbourne
|
||||||
|
|
||||||
|
// hosting : 2014-05-29 Uniregistry, Corp.
|
||||||
|
hosting
|
||||||
|
|
||||||
|
// yoga : 2014-05-29 Top Level Domain Holdings Limited
|
||||||
|
yoga
|
||||||
|
|
||||||
|
// city : 2014-05-29 Snow Sky, LLC
|
||||||
|
city
|
||||||
|
|
||||||
|
// bond : 2014-06-05 Bond University Limited
|
||||||
|
bond
|
||||||
|
|
||||||
|
// click : 2014-06-05 Uniregistry, Corp.
|
||||||
|
click
|
||||||
|
|
||||||
|
// cern : 2014-06-05 European Organization for Nuclear Research ("CERN")
|
||||||
|
cern
|
||||||
|
|
||||||
// ===END ICANN DOMAINS===
|
// ===END ICANN DOMAINS===
|
||||||
// ===BEGIN PRIVATE DOMAINS===
|
// ===BEGIN PRIVATE DOMAINS===
|
||||||
|
@ -7663,20 +7996,22 @@ reise
|
||||||
cloudfront.net
|
cloudfront.net
|
||||||
|
|
||||||
// Amazon Elastic Compute Cloud: https://aws.amazon.com/ec2/
|
// Amazon Elastic Compute Cloud: https://aws.amazon.com/ec2/
|
||||||
// Submitted by Osman Surkatty <osmans@amazon.com> 2013-04-02
|
// Submitted by Osman Surkatty <osmans@amazon.com> 2014-05-20
|
||||||
compute.amazonaws.com
|
|
||||||
us-east-1.amazonaws.com
|
|
||||||
compute-1.amazonaws.com
|
|
||||||
z-1.compute-1.amazonaws.com
|
|
||||||
z-2.compute-1.amazonaws.com
|
|
||||||
ap-northeast-1.compute.amazonaws.com
|
ap-northeast-1.compute.amazonaws.com
|
||||||
ap-southeast-1.compute.amazonaws.com
|
ap-southeast-1.compute.amazonaws.com
|
||||||
ap-southeast-2.compute.amazonaws.com
|
ap-southeast-2.compute.amazonaws.com
|
||||||
|
cn-north-1.compute.amazonaws.cn
|
||||||
|
compute.amazonaws.cn
|
||||||
|
compute.amazonaws.com
|
||||||
|
compute-1.amazonaws.com
|
||||||
eu-west-1.compute.amazonaws.com
|
eu-west-1.compute.amazonaws.com
|
||||||
sa-east-1.compute.amazonaws.com
|
sa-east-1.compute.amazonaws.com
|
||||||
|
us-east-1.amazonaws.com
|
||||||
us-gov-west-1.compute.amazonaws.com
|
us-gov-west-1.compute.amazonaws.com
|
||||||
us-west-1.compute.amazonaws.com
|
us-west-1.compute.amazonaws.com
|
||||||
us-west-2.compute.amazonaws.com
|
us-west-2.compute.amazonaws.com
|
||||||
|
z-1.compute-1.amazonaws.com
|
||||||
|
z-2.compute-1.amazonaws.com
|
||||||
|
|
||||||
// Amazon Elastic Beanstalk : https://aws.amazon.com/elasticbeanstalk/
|
// Amazon Elastic Beanstalk : https://aws.amazon.com/elasticbeanstalk/
|
||||||
// Submitted by Adam Stein <astein@amazon.com> 2013-04-02
|
// Submitted by Adam Stein <astein@amazon.com> 2013-04-02
|
||||||
|
@ -7719,6 +8054,7 @@ ar.com
|
||||||
br.com
|
br.com
|
||||||
cn.com
|
cn.com
|
||||||
com.de
|
com.de
|
||||||
|
com.se
|
||||||
de.com
|
de.com
|
||||||
eu.com
|
eu.com
|
||||||
gb.com
|
gb.com
|
||||||
|
@ -8074,6 +8410,10 @@ global.ssl.fastly.net
|
||||||
a.prod.fastly.net
|
a.prod.fastly.net
|
||||||
global.prod.fastly.net
|
global.prod.fastly.net
|
||||||
|
|
||||||
|
// Firebase, Inc.
|
||||||
|
// Submitted by Chris Raynor <chris@firebase.com> 2014-01-21
|
||||||
|
firebaseapp.com
|
||||||
|
|
||||||
// GitHub, Inc.
|
// GitHub, Inc.
|
||||||
// Submitted by Ben Toews <btoews@github.com> 2014-02-06
|
// Submitted by Ben Toews <btoews@github.com> 2014-02-06
|
||||||
github.io
|
github.io
|
||||||
|
@ -8153,10 +8493,18 @@ azurewebsites.net
|
||||||
azure-mobile.net
|
azure-mobile.net
|
||||||
cloudapp.net
|
cloudapp.net
|
||||||
|
|
||||||
|
// NFSN, Inc. : https://www.NearlyFreeSpeech.NET/
|
||||||
|
// Submitted by Jeff Wheelhouse <support@nearlyfreespeech.net> 2014-02-02
|
||||||
|
nfshost.com
|
||||||
|
|
||||||
// NYC.mn : http://www.information.nyc.mn
|
// NYC.mn : http://www.information.nyc.mn
|
||||||
// Submitted by Matthew Brown <mattbrown@nyc.mn> 2013-03-11
|
// Submitted by Matthew Brown <mattbrown@nyc.mn> 2013-03-11
|
||||||
nyc.mn
|
nyc.mn
|
||||||
|
|
||||||
|
// One Fold Media : http://www.onefoldmedia.com/
|
||||||
|
// Submitted by Eddie Jones <eddie@onefoldmedia.com> 2014-06-10
|
||||||
|
nid.io
|
||||||
|
|
||||||
// Opera Software, A.S.A.
|
// Opera Software, A.S.A.
|
||||||
// Submitted by Yngve Pettersen <yngve@opera.com> 2009-11-26
|
// Submitted by Yngve Pettersen <yngve@opera.com> 2009-11-26
|
||||||
operaunite.com
|
operaunite.com
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
<SECTION>
|
<SECTION>
|
||||||
<FILE>libpsl</FILE>
|
<FILE>libpsl</FILE>
|
||||||
<TITLE>Public Suffix List functions</TITLE>
|
<TITLE>Public Suffix List functions</TITLE>
|
||||||
|
psl_error_t
|
||||||
psl_ctx_t
|
psl_ctx_t
|
||||||
psl_load_file
|
psl_load_file
|
||||||
psl_load_fp
|
psl_load_fp
|
||||||
|
@ -17,4 +18,5 @@ psl_builtin_sha1sum
|
||||||
psl_builtin_filename
|
psl_builtin_filename
|
||||||
psl_is_cookie_domain_acceptable
|
psl_is_cookie_domain_acceptable
|
||||||
psl_get_version
|
psl_get_version
|
||||||
|
psl_str_to_utf8lower
|
||||||
</SECTION>
|
</SECTION>
|
||||||
|
|
|
@ -38,6 +38,27 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* psl_error_t:
|
||||||
|
* @PSL_SUCCESS: Successful return.
|
||||||
|
* @PSL_ERR_INVALID_ARG: Invalid argument.
|
||||||
|
* @PSL_ERR_CONVERTER: Failed to open libicu utf-16 converter
|
||||||
|
* @PSL_ERR_TO_UTF16: Failed to convert to utf-16.
|
||||||
|
* @PSL_ERR_TO_LOWER: Failed to convert utf-16 to lowercase.
|
||||||
|
* @PSL_ERR_TO_UTF8: Failed to convert utf-16 to utf-8.
|
||||||
|
*
|
||||||
|
* Return codes for PSL functions.
|
||||||
|
* Negative return codes mean failure.
|
||||||
|
* Positive values are reserved for non-error return codes.
|
||||||
|
*/
|
||||||
|
typedef enum {
|
||||||
|
PSL_SUCCESS = 0,
|
||||||
|
PSL_ERR_INVALID_ARG = -1,
|
||||||
|
PSL_ERR_CONVERTER = -2, /* failed to open libicu utf-16 converter */
|
||||||
|
PSL_ERR_TO_UTF16 = -3, /* failed to convert to utf-16 */
|
||||||
|
PSL_ERR_TO_LOWER = -4, /* failed to convert utf-16 to lowercase */
|
||||||
|
PSL_ERR_TO_UTF8 = -5 /* failed to convert utf-16 to utf-8 */
|
||||||
|
} psl_error_t;
|
||||||
|
|
||||||
typedef struct _psl_ctx_st psl_ctx_t;
|
typedef struct _psl_ctx_st psl_ctx_t;
|
||||||
|
|
||||||
|
@ -65,6 +86,9 @@ const char *
|
||||||
/* returns the shortest possible registrable domain part or NULL if domain is not registrable at all */
|
/* returns the shortest possible registrable domain part or NULL if domain is not registrable at all */
|
||||||
const char *
|
const char *
|
||||||
psl_registrable_domain(const psl_ctx_t *psl, const char *domain);
|
psl_registrable_domain(const psl_ctx_t *psl, const char *domain);
|
||||||
|
/* convert a string into lowercase UTF-8 */
|
||||||
|
int
|
||||||
|
psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower);
|
||||||
/* does not include exceptions */
|
/* does not include exceptions */
|
||||||
int
|
int
|
||||||
psl_suffix_count(const psl_ctx_t *psl);
|
psl_suffix_count(const psl_ctx_t *psl);
|
||||||
|
|
286
src/psl.c
286
src/psl.c
|
@ -49,9 +49,20 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
#include <alloca.h>
|
||||||
|
|
||||||
|
#ifdef WITH_LIBICU
|
||||||
|
# include <unicode/uversion.h>
|
||||||
|
# include <unicode/ustring.h>
|
||||||
|
# include <unicode/uidna.h>
|
||||||
|
# include <unicode/ucnv.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <libpsl.h>
|
#include <libpsl.h>
|
||||||
|
|
||||||
|
/* number of elements within an array */
|
||||||
|
#define countof(a) (sizeof(a)/sizeof(*(a)))
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* SECTION:libpsl
|
* SECTION:libpsl
|
||||||
* @short_description: Public Suffix List library functions
|
* @short_description: Public Suffix List library functions
|
||||||
|
@ -95,7 +106,17 @@ struct _psl_ctx_st {
|
||||||
};
|
};
|
||||||
|
|
||||||
/* include the PSL data compiled by 'psl2c' */
|
/* include the PSL data compiled by 'psl2c' */
|
||||||
#include "suffixes.c"
|
#ifndef _LIBPSL_INCLUDED_BY_PSL2C
|
||||||
|
# include "suffixes.c"
|
||||||
|
#else
|
||||||
|
/* if this source file is included by psl2c.c, provide empty builtin data */
|
||||||
|
static _psl_entry_t suffixes[1];
|
||||||
|
static _psl_entry_t suffix_exceptions[1];
|
||||||
|
static time_t _psl_file_time;
|
||||||
|
static time_t _psl_compile_time;
|
||||||
|
static const char _psl_sha1_checksum[] = "";
|
||||||
|
static const char _psl_filename[] = "";
|
||||||
|
#endif
|
||||||
|
|
||||||
/* references to this PSL will result in lookups to built-in data */
|
/* references to this PSL will result in lookups to built-in data */
|
||||||
static const psl_ctx_t
|
static const psl_ctx_t
|
||||||
|
@ -239,39 +260,19 @@ static int _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length)
|
||||||
for (dst = suffix->label_buf, src = rule; *src;) {
|
for (dst = suffix->label_buf, src = rule; *src;) {
|
||||||
if (*src == '.')
|
if (*src == '.')
|
||||||
suffix->nlabels++;
|
suffix->nlabels++;
|
||||||
*dst++ = tolower(*src++);
|
*dst++ = *src++;
|
||||||
}
|
}
|
||||||
*dst = 0;
|
*dst = 0;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
|
||||||
* psl_is_public_suffix:
|
|
||||||
* @psl: PSL context
|
|
||||||
* @domain: Domain string
|
|
||||||
*
|
|
||||||
* This function checks if @domain is a public suffix by the means of the
|
|
||||||
* [Mozilla Public Suffix List](http://publicsuffix.org).
|
|
||||||
*
|
|
||||||
* For cookie domain checking see psl_is_cookie_domain_acceptable().
|
|
||||||
*
|
|
||||||
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
|
|
||||||
* psl_builtin().
|
|
||||||
*
|
|
||||||
* Returns: 1 if domain is a public suffix, 0 if not.
|
|
||||||
*
|
|
||||||
* Since: 0.1
|
|
||||||
*/
|
|
||||||
int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
|
|
||||||
{
|
{
|
||||||
_psl_entry_t suffix, *rule;
|
_psl_entry_t suffix, *rule;
|
||||||
const char *p, *label_bak;
|
const char *p, *label_bak;
|
||||||
unsigned short length_bak;
|
unsigned short length_bak;
|
||||||
|
|
||||||
if (!psl || !domain)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
/* this function should be called without leading dots, just make sure */
|
/* this function should be called without leading dots, just make sure */
|
||||||
suffix.label = domain + (*domain == '.');
|
suffix.label = domain + (*domain == '.');
|
||||||
suffix.length = strlen(suffix.label);
|
suffix.length = strlen(suffix.label);
|
||||||
|
@ -340,6 +341,34 @@ int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* psl_is_public_suffix:
|
||||||
|
* @psl: PSL context
|
||||||
|
* @domain: Domain string
|
||||||
|
*
|
||||||
|
* This function checks if @domain is a public suffix by the means of the
|
||||||
|
* [Mozilla Public Suffix List](http://publicsuffix.org).
|
||||||
|
*
|
||||||
|
* For cookie domain checking see psl_is_cookie_domain_acceptable().
|
||||||
|
*
|
||||||
|
* International @domain names have to be either in lowercase UTF-8 or in ASCII form (punycode).
|
||||||
|
* Other encodings result in unexpected behavior.
|
||||||
|
*
|
||||||
|
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
|
||||||
|
* psl_builtin().
|
||||||
|
*
|
||||||
|
* Returns: 1 if domain is a public suffix, 0 if not.
|
||||||
|
*
|
||||||
|
* Since: 0.1
|
||||||
|
*/
|
||||||
|
int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
|
||||||
|
{
|
||||||
|
if (!psl || !domain)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return _psl_is_public_suffix(psl, domain);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* psl_unregistrable_domain:
|
* psl_unregistrable_domain:
|
||||||
* @psl: PSL context
|
* @psl: PSL context
|
||||||
|
@ -348,6 +377,9 @@ int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
|
||||||
* This function finds the longest publix suffix part of @domain by the means
|
* This function finds the longest publix suffix part of @domain by the means
|
||||||
* of the [Mozilla Public Suffix List](http://publicsuffix.org).
|
* of the [Mozilla Public Suffix List](http://publicsuffix.org).
|
||||||
*
|
*
|
||||||
|
* International @domain names have to be either in lowercase UTF-8 or in ASCII form (punycode).
|
||||||
|
* Other encodings result in unexpected behavior.
|
||||||
|
*
|
||||||
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
|
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
|
||||||
* psl_builtin().
|
* psl_builtin().
|
||||||
*
|
*
|
||||||
|
@ -366,7 +398,7 @@ const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
|
||||||
* 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
|
* 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
while (!psl_is_public_suffix(psl, domain)) {
|
while (!_psl_is_public_suffix(psl, domain)) {
|
||||||
if ((domain = strchr(domain, '.')))
|
if ((domain = strchr(domain, '.')))
|
||||||
domain++;
|
domain++;
|
||||||
else
|
else
|
||||||
|
@ -384,6 +416,9 @@ const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
|
||||||
* This function finds the shortest private suffix part of @domain by the means
|
* This function finds the shortest private suffix part of @domain by the means
|
||||||
* of the [Mozilla Public Suffix List](http://publicsuffix.org).
|
* of the [Mozilla Public Suffix List](http://publicsuffix.org).
|
||||||
*
|
*
|
||||||
|
* International @domain names have to be either in lowercase UTF-8 or in ASCII form (punycode).
|
||||||
|
* Other encodings result in unexpected behavior.
|
||||||
|
*
|
||||||
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
|
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
|
||||||
* psl_builtin().
|
* psl_builtin().
|
||||||
*
|
*
|
||||||
|
@ -404,7 +439,7 @@ const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
|
||||||
* 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
|
* 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
while (!psl_is_public_suffix(psl, domain)) {
|
while (!_psl_is_public_suffix(psl, domain)) {
|
||||||
if ((p = strchr(domain, '.'))) {
|
if ((p = strchr(domain, '.'))) {
|
||||||
regdom = domain;
|
regdom = domain;
|
||||||
domain = p + 1;
|
domain = p + 1;
|
||||||
|
@ -415,6 +450,51 @@ const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
|
||||||
return regdom;
|
return regdom;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int _str_is_ascii(const char *s)
|
||||||
|
{
|
||||||
|
while (*s > 0) s++;
|
||||||
|
|
||||||
|
return !*s;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef WITH_LIBICU
|
||||||
|
static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t *e)
|
||||||
|
{
|
||||||
|
if (_str_is_ascii(e->label_buf))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* IDNA2008 UTS#46 punycode conversion */
|
||||||
|
if (idna) {
|
||||||
|
_psl_entry_t suffix, *suffixp;
|
||||||
|
char lookupname[128] = "";
|
||||||
|
UErrorCode status = 0;
|
||||||
|
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
|
||||||
|
UChar utf16_dst[128], utf16_src[128];
|
||||||
|
int32_t utf16_src_length;
|
||||||
|
|
||||||
|
u_strFromUTF8(utf16_src, sizeof(utf16_src)/sizeof(utf16_src[0]), &utf16_src_length, e->label_buf, -1, &status);
|
||||||
|
if (U_SUCCESS(status)) {
|
||||||
|
int32_t dst_length = uidna_nameToASCII(idna, utf16_src, utf16_src_length, utf16_dst, sizeof(utf16_dst)/sizeof(utf16_dst[0]), &info, &status);
|
||||||
|
if (U_SUCCESS(status)) {
|
||||||
|
u_strToUTF8(lookupname, sizeof(lookupname), NULL, utf16_dst, dst_length, &status);
|
||||||
|
if (U_SUCCESS(status)) {
|
||||||
|
if (strcmp(e->label_buf, lookupname)) {
|
||||||
|
/* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */
|
||||||
|
_suffix_init(&suffix, lookupname, strlen(lookupname));
|
||||||
|
suffix.wildcard = e->wildcard;
|
||||||
|
suffixp = _vector_get(v, _vector_add(v, &suffix));
|
||||||
|
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
||||||
|
} /* else ignore */
|
||||||
|
} /* else
|
||||||
|
fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */
|
||||||
|
} /* else
|
||||||
|
fprintf(stderr, "Failed to convert to ASCII (status %d)\n", status); */
|
||||||
|
} /* else
|
||||||
|
fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status); */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* psl_load_file:
|
* psl_load_file:
|
||||||
* @fname: Name of PSL file
|
* @fname: Name of PSL file
|
||||||
|
@ -422,13 +502,7 @@ const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
|
||||||
* This function loads the public suffixes file named @fname.
|
* This function loads the public suffixes file named @fname.
|
||||||
* To free the allocated resources, call psl_free().
|
* To free the allocated resources, call psl_free().
|
||||||
*
|
*
|
||||||
* If you want to use punycode representations for functions like psl_is_public_suffix(),
|
* The suffixes are expected to be lowercase UTF-8 encoded if they are international.
|
||||||
* these have to exist as entries within @fname. This is a design decision to not pull in
|
|
||||||
* dependencies for UTF-8 case-handling and IDNA libraries.
|
|
||||||
*
|
|
||||||
* On the contrary, the builtin data already contains punycode entries.
|
|
||||||
*
|
|
||||||
* Have a look into psl2c.c for example code on how to convert UTF-8 to lowercase and to punycode.
|
|
||||||
*
|
*
|
||||||
* Returns: Pointer to a PSL context or %NULL on failure.
|
* Returns: Pointer to a PSL context or %NULL on failure.
|
||||||
*
|
*
|
||||||
|
@ -457,7 +531,7 @@ psl_ctx_t *psl_load_file(const char *fname)
|
||||||
* This function loads the public suffixes from a FILE pointer.
|
* This function loads the public suffixes from a FILE pointer.
|
||||||
* To free the allocated resources, call psl_free().
|
* To free the allocated resources, call psl_free().
|
||||||
*
|
*
|
||||||
* Have a look at psl_load_fp() for punycode considerations.
|
* The suffixes are expected to be lowercase UTF-8 encoded if they are international.
|
||||||
*
|
*
|
||||||
* Returns: Pointer to a PSL context or %NULL on failure.
|
* Returns: Pointer to a PSL context or %NULL on failure.
|
||||||
*
|
*
|
||||||
|
@ -467,8 +541,11 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
||||||
{
|
{
|
||||||
psl_ctx_t *psl;
|
psl_ctx_t *psl;
|
||||||
_psl_entry_t suffix, *suffixp;
|
_psl_entry_t suffix, *suffixp;
|
||||||
int nsuffixes = 0;
|
|
||||||
char buf[256], *linep, *p;
|
char buf[256], *linep, *p;
|
||||||
|
#ifdef WITH_LIBICU
|
||||||
|
UIDNA *idna;
|
||||||
|
UErrorCode status = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
if (!fp)
|
if (!fp)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -476,6 +553,10 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
||||||
if (!(psl = calloc(1, sizeof(psl_ctx_t))))
|
if (!(psl = calloc(1, sizeof(psl_ctx_t))))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
#ifdef WITH_LIBICU
|
||||||
|
idna = uidna_openUTS46(UIDNA_USE_STD3_RULES, &status);
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
|
* as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
|
||||||
* as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
|
* as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
|
||||||
|
@ -496,26 +577,33 @@ psl_ctx_t *psl_load_fp(FILE *fp)
|
||||||
|
|
||||||
if (*p == '!') {
|
if (*p == '!') {
|
||||||
/* add to exceptions */
|
/* add to exceptions */
|
||||||
if (_suffix_init(&suffix, p + 1, linep - p - 1) == 0)
|
if (_suffix_init(&suffix, p + 1, linep - p - 1) == 0) {
|
||||||
suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix));
|
suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix));
|
||||||
else
|
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
||||||
suffixp = NULL;
|
#ifdef WITH_LIBICU
|
||||||
|
_add_punycode_if_needed(idna, psl->suffix_exceptions, suffixp);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if (_suffix_init(&suffix, p, linep - p) == 0)
|
/* add to suffixes */
|
||||||
|
if (_suffix_init(&suffix, p, linep - p) == 0) {
|
||||||
suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix));
|
suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix));
|
||||||
else
|
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
||||||
suffixp = NULL;
|
#ifdef WITH_LIBICU
|
||||||
|
_add_punycode_if_needed(idna, psl->suffixes, suffixp);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (suffixp)
|
|
||||||
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
|
||||||
|
|
||||||
nsuffixes++;;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_vector_sort(psl->suffix_exceptions);
|
_vector_sort(psl->suffix_exceptions);
|
||||||
_vector_sort(psl->suffixes);
|
_vector_sort(psl->suffixes);
|
||||||
|
|
||||||
|
#ifdef WITH_LIBICU
|
||||||
|
if (idna)
|
||||||
|
uidna_close(idna);
|
||||||
|
#endif
|
||||||
|
|
||||||
return psl;
|
return psl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -685,7 +773,13 @@ const char *psl_builtin_filename(void)
|
||||||
**/
|
**/
|
||||||
const char *psl_get_version (void)
|
const char *psl_get_version (void)
|
||||||
{
|
{
|
||||||
return PACKAGE_VERSION;
|
return PACKAGE_VERSION
|
||||||
|
#ifdef WITH_LIBICU
|
||||||
|
" (+libicu/" U_ICU_VERSION ")"
|
||||||
|
#else
|
||||||
|
" (limited IDNA support)"
|
||||||
|
#endif
|
||||||
|
;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -697,6 +791,9 @@ const char *psl_get_version (void)
|
||||||
* This helper function checks whether @cookie_domain is an acceptable cookie domain value for the request
|
* This helper function checks whether @cookie_domain is an acceptable cookie domain value for the request
|
||||||
* @hostname.
|
* @hostname.
|
||||||
*
|
*
|
||||||
|
* For international domain names both, @hostname and @cookie_domain, have to be either in lowercase UTF-8
|
||||||
|
* or in ASCII form (punycode). Other encodings or mixing UTF-8 and punycode result in unexpected behavior.
|
||||||
|
*
|
||||||
* Examples:
|
* Examples:
|
||||||
* 1. Cookie domain 'example.com' would be acceptable for hostname 'www.example.com',
|
* 1. Cookie domain 'example.com' would be acceptable for hostname 'www.example.com',
|
||||||
* but '.com' or 'com' would NOT be acceptable since 'com' is a public suffix.
|
* but '.com' or 'com' would NOT be acceptable since 'com' is a public suffix.
|
||||||
|
@ -741,3 +838,100 @@ int psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname,
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* psl_str_to_utf8lower:
|
||||||
|
* @str: string to convert
|
||||||
|
* @encoding: charset encoding of @str, e.g. 'iso-8859-1' or %NULL
|
||||||
|
* @locale: locale of @str for to lowercase conversion, e.g. 'de' or %NULL
|
||||||
|
* @lower: return value containing the converted string
|
||||||
|
*
|
||||||
|
* This helper function converts a string to lowercase UTF-8 representation.
|
||||||
|
* Lowercase UTF-8 is needed as input to the domain checking functions.
|
||||||
|
*
|
||||||
|
* @lower is %NULL on error.
|
||||||
|
* The return value 'lower' must be freed after usage.
|
||||||
|
*
|
||||||
|
* Returns: psl_error_t value.
|
||||||
|
* PSL_SUCCESS: Success
|
||||||
|
* PSL_ERR_INVALID_ARG: @str is a %NULL value.
|
||||||
|
* PSL_ERR_CONVERTER: Failed to open the unicode converter with name @encoding
|
||||||
|
* PSL_ERR_TO_UTF16: Failed to convert @str to unicode
|
||||||
|
* PSL_ERR_TO_LOWER: Failed to convert unicode to lowercase
|
||||||
|
* PSL_ERR_TO_UTF8: Failed to convert unicode to UTF-8
|
||||||
|
*
|
||||||
|
* Since: 0.4
|
||||||
|
*/
|
||||||
|
psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower)
|
||||||
|
{
|
||||||
|
int ret = PSL_ERR_INVALID_ARG;
|
||||||
|
|
||||||
|
if (lower)
|
||||||
|
*lower = NULL;
|
||||||
|
|
||||||
|
if (!str)
|
||||||
|
return PSL_ERR_INVALID_ARG;
|
||||||
|
|
||||||
|
/* shortcut to avoid costly conversion */
|
||||||
|
if (_str_is_ascii(str)) {
|
||||||
|
if (lower) {
|
||||||
|
char *p;
|
||||||
|
|
||||||
|
*lower = strdup(str);
|
||||||
|
|
||||||
|
/* convert ASCII string to lowercase */
|
||||||
|
for (p = *lower; *p; p++)
|
||||||
|
if (isupper(*p))
|
||||||
|
*p = tolower(*p);
|
||||||
|
}
|
||||||
|
return PSL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef WITH_LIBICU
|
||||||
|
do {
|
||||||
|
size_t str_length = strlen(str);
|
||||||
|
UErrorCode status = 0;
|
||||||
|
UChar *utf16_dst, *utf16_lower;
|
||||||
|
int32_t utf16_dst_length;
|
||||||
|
char *utf8_lower;
|
||||||
|
UConverter *uconv;
|
||||||
|
|
||||||
|
/* C89 allocation */
|
||||||
|
utf16_dst = alloca(sizeof(UChar) * (str_length * 2 + 1));
|
||||||
|
utf16_lower = alloca(sizeof(UChar) * (str_length * 2 + 1));
|
||||||
|
utf8_lower = alloca(str_length * 2 + 1);
|
||||||
|
|
||||||
|
uconv = ucnv_open(encoding, &status);
|
||||||
|
if (U_SUCCESS(status)) {
|
||||||
|
utf16_dst_length = ucnv_toUChars(uconv, utf16_dst, str_length * 2 + 1, str, str_length, &status);
|
||||||
|
ucnv_close(uconv);
|
||||||
|
|
||||||
|
if (U_SUCCESS(status)) {
|
||||||
|
int32_t utf16_lower_length = u_strToLower(utf16_lower, str_length * 2 + 1, utf16_dst, utf16_dst_length, locale, &status);
|
||||||
|
if (U_SUCCESS(status)) {
|
||||||
|
u_strToUTF8(utf8_lower, str_length * 8 + 1, NULL, utf16_lower, utf16_lower_length, &status);
|
||||||
|
if (U_SUCCESS(status)) {
|
||||||
|
if (lower)
|
||||||
|
*lower = strdup(utf8_lower);
|
||||||
|
ret = PSL_SUCCESS;
|
||||||
|
} else {
|
||||||
|
ret = PSL_ERR_TO_UTF8;
|
||||||
|
/* fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ret = PSL_ERR_TO_LOWER;
|
||||||
|
/* fprintf(stderr, "Failed to convert UTF-16 to lowercase (status %d)\n", status); */
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ret = PSL_ERR_TO_UTF16;
|
||||||
|
/* fprintf(stderr, "Failed to convert string to UTF-16 (status %d)\n", status); */
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ret = PSL_ERR_CONVERTER;
|
||||||
|
/* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */
|
||||||
|
}
|
||||||
|
} while (0);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
310
src/psl2c.c
310
src/psl2c.c
|
@ -39,235 +39,14 @@
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
|
|
||||||
/*
|
|
||||||
#ifdef WITH_LIBIDN2
|
|
||||||
# include <idn2.h>
|
|
||||||
#endif
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifdef WITH_LIBICU
|
|
||||||
# include <unicode/uversion.h>
|
|
||||||
# include <unicode/ustring.h>
|
|
||||||
# include <unicode/uidna.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef WITH_BUILTIN
|
#ifdef WITH_BUILTIN
|
||||||
|
|
||||||
#include <libpsl.h>
|
#include <libpsl.h>
|
||||||
|
|
||||||
typedef struct {
|
/* here we include the library source code to have access to internal functions and data structures */
|
||||||
char
|
#define _LIBPSL_INCLUDED_BY_PSL2C
|
||||||
label_buf[48];
|
# include "psl.c"
|
||||||
const char *
|
#undef _LIBPSL_INCLUDED_BY_PSL2C
|
||||||
label;
|
|
||||||
unsigned short
|
|
||||||
length;
|
|
||||||
unsigned char
|
|
||||||
nlabels, /* number of labels */
|
|
||||||
wildcard; /* this is a wildcard rule (e.g. *.sapporo.jp) */
|
|
||||||
} _psl_entry_t;
|
|
||||||
|
|
||||||
/* stripped down version libmget vector routines */
|
|
||||||
typedef struct {
|
|
||||||
int
|
|
||||||
(*cmp)(const _psl_entry_t *, const _psl_entry_t *); /* comparison function */
|
|
||||||
_psl_entry_t
|
|
||||||
**entry; /* pointer to array of pointers to elements */
|
|
||||||
int
|
|
||||||
max, /* allocated elements */
|
|
||||||
cur; /* number of elements in use */
|
|
||||||
} _psl_vector_t;
|
|
||||||
|
|
||||||
struct _psl_ctx_st {
|
|
||||||
_psl_vector_t
|
|
||||||
*suffixes,
|
|
||||||
*suffix_exceptions;
|
|
||||||
};
|
|
||||||
|
|
||||||
static _psl_vector_t *_vector_alloc(int max, int (*cmp)(const _psl_entry_t *, const _psl_entry_t *))
|
|
||||||
{
|
|
||||||
_psl_vector_t *v;
|
|
||||||
|
|
||||||
if (!(v = calloc(1, sizeof(_psl_vector_t))))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
if (!(v->entry = malloc(max * sizeof(_psl_entry_t *)))) {
|
|
||||||
free(v);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
v->max = max;
|
|
||||||
v->cmp = cmp;
|
|
||||||
return v;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void _vector_free(_psl_vector_t **v)
|
|
||||||
{
|
|
||||||
if (v && *v) {
|
|
||||||
if ((*v)->entry) {
|
|
||||||
int it;
|
|
||||||
|
|
||||||
for (it = 0; it < (*v)->cur; it++)
|
|
||||||
free((*v)->entry[it]);
|
|
||||||
|
|
||||||
free((*v)->entry);
|
|
||||||
}
|
|
||||||
free(*v);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static _psl_entry_t *_vector_get(const _psl_vector_t *v, int pos)
|
|
||||||
{
|
|
||||||
if (pos < 0 || !v || pos >= v->cur) return NULL;
|
|
||||||
|
|
||||||
return v->entry[pos];
|
|
||||||
}
|
|
||||||
|
|
||||||
static int _vector_add(_psl_vector_t *v, const _psl_entry_t *elem)
|
|
||||||
{
|
|
||||||
if (v) {
|
|
||||||
void *elemp;
|
|
||||||
|
|
||||||
elemp = malloc(sizeof(_psl_entry_t));
|
|
||||||
memcpy(elemp, elem, sizeof(_psl_entry_t));
|
|
||||||
|
|
||||||
if (v->max == v->cur)
|
|
||||||
v->entry = realloc(v->entry, (v->max *= 2) * sizeof(_psl_entry_t *));
|
|
||||||
|
|
||||||
v->entry[v->cur++] = elemp;
|
|
||||||
return v->cur - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int _compare(const void *p1, const void *p2, void *v)
|
|
||||||
{
|
|
||||||
return ((_psl_vector_t *)v)->cmp(*((_psl_entry_t **)p1), *((_psl_entry_t **)p2));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void _vector_sort(_psl_vector_t *v)
|
|
||||||
{
|
|
||||||
if (v && v->cmp)
|
|
||||||
qsort_r(v->entry, v->cur, sizeof(_psl_vector_t *), _compare, v);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* by this kind of sorting, we can easily see if a domain matches or not (match = supercookie !) */
|
|
||||||
|
|
||||||
static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2)
|
|
||||||
{
|
|
||||||
int n;
|
|
||||||
|
|
||||||
if ((n = s2->nlabels - s1->nlabels))
|
|
||||||
return n; /* most labels first */
|
|
||||||
|
|
||||||
if ((n = s1->length - s2->length))
|
|
||||||
return n; /* shorter rules first */
|
|
||||||
|
|
||||||
return strcmp(s1->label, s2->label);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length)
|
|
||||||
{
|
|
||||||
const char *src;
|
|
||||||
char *dst;
|
|
||||||
|
|
||||||
suffix->label = suffix->label_buf;
|
|
||||||
|
|
||||||
if (length >= sizeof(suffix->label_buf) - 1) {
|
|
||||||
suffix->nlabels = 0;
|
|
||||||
fprintf(stderr, "Suffix rule too long (%d, ignored): %s\n", (int) length, rule);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (*rule == '*') {
|
|
||||||
if (*++rule != '.') {
|
|
||||||
suffix->nlabels = 0;
|
|
||||||
fprintf(stderr, "Unsupported kind of rule (ignored): %s\n", rule);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
rule++;
|
|
||||||
suffix->wildcard = 1;
|
|
||||||
suffix->length = (unsigned char)length - 2;
|
|
||||||
} else {
|
|
||||||
suffix->wildcard = 0;
|
|
||||||
suffix->length = (unsigned char)length;
|
|
||||||
}
|
|
||||||
|
|
||||||
suffix->nlabels = 1;
|
|
||||||
|
|
||||||
for (dst = suffix->label_buf, src = rule; *src;) {
|
|
||||||
if (*src == '.')
|
|
||||||
suffix->nlabels++;
|
|
||||||
*dst++ = tolower(*src++);
|
|
||||||
}
|
|
||||||
*dst = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
psl_ctx_t *psl_load_file(const char *fname)
|
|
||||||
{
|
|
||||||
FILE *fp;
|
|
||||||
psl_ctx_t *psl = NULL;
|
|
||||||
|
|
||||||
if ((fp = fopen(fname, "r"))) {
|
|
||||||
psl = psl_load_fp(fp);
|
|
||||||
fclose(fp);
|
|
||||||
}
|
|
||||||
|
|
||||||
return psl;
|
|
||||||
}
|
|
||||||
|
|
||||||
psl_ctx_t *psl_load_fp(FILE *fp)
|
|
||||||
{
|
|
||||||
psl_ctx_t *psl;
|
|
||||||
_psl_entry_t suffix, *suffixp;
|
|
||||||
int nsuffixes = 0;
|
|
||||||
char buf[256], *linep, *p;
|
|
||||||
|
|
||||||
if (!fp)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
if (!(psl = calloc(1, sizeof(psl_ctx_t))))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
|
|
||||||
* as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
|
|
||||||
*/
|
|
||||||
psl->suffixes = _vector_alloc(8*1024, _suffix_compare);
|
|
||||||
psl->suffix_exceptions = _vector_alloc(64, _suffix_compare);
|
|
||||||
|
|
||||||
while ((linep = fgets(buf, sizeof(buf), fp))) {
|
|
||||||
while (isspace(*linep)) linep++; /* ignore leading whitespace */
|
|
||||||
if (!*linep) continue; /* skip empty lines */
|
|
||||||
|
|
||||||
if (*linep == '/' && linep[1] == '/')
|
|
||||||
continue; /* skip comments */
|
|
||||||
|
|
||||||
/* parse suffix rule */
|
|
||||||
for (p = linep; *linep && !isspace(*linep);) linep++;
|
|
||||||
*linep = 0;
|
|
||||||
|
|
||||||
if (*p == '!') {
|
|
||||||
/* add to exceptions */
|
|
||||||
_suffix_init(&suffix, p + 1, linep - p - 1);
|
|
||||||
suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix));
|
|
||||||
} else {
|
|
||||||
_suffix_init(&suffix, p, linep - p);
|
|
||||||
suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (suffixp)
|
|
||||||
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
|
||||||
|
|
||||||
nsuffixes++;;
|
|
||||||
}
|
|
||||||
|
|
||||||
_vector_sort(psl->suffix_exceptions);
|
|
||||||
_vector_sort(psl->suffixes);
|
|
||||||
|
|
||||||
return psl;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *varname)
|
static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *varname)
|
||||||
{
|
{
|
||||||
|
@ -283,8 +62,8 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *
|
||||||
fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libicu/%s) */\n", version);
|
fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libicu/%s) */\n", version);
|
||||||
} while (0);
|
} while (0);
|
||||||
#else
|
#else
|
||||||
fprintf(fpout, "/* automatically generated by psl2c (punycode generated with idn2) */\n");
|
fprintf(fpout, "/* automatically generated by psl2c (without punycode support) */\n");
|
||||||
#endif
|
#endif /* WITH_LIBICU */
|
||||||
|
|
||||||
fprintf(fpout, "static _psl_entry_t %s[] = {\n", varname);
|
fprintf(fpout, "static _psl_entry_t %s[] = {\n", varname);
|
||||||
|
|
||||||
|
@ -298,15 +77,7 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *
|
||||||
fprintf(fpout, "};\n");
|
fprintf(fpout, "};\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void psl_free(psl_ctx_t *psl)
|
#ifndef WITH_LIBICU
|
||||||
{
|
|
||||||
if (psl) {
|
|
||||||
_vector_free(&psl->suffixes);
|
|
||||||
_vector_free(&psl->suffix_exceptions);
|
|
||||||
free(psl);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int _str_needs_encoding(const char *s)
|
static int _str_needs_encoding(const char *s)
|
||||||
{
|
{
|
||||||
while (*s > 0) s++;
|
while (*s > 0) s++;
|
||||||
|
@ -326,60 +97,6 @@ static void _add_punycode_if_needed(_psl_vector_t *v)
|
||||||
_psl_entry_t suffix, *suffixp;
|
_psl_entry_t suffix, *suffixp;
|
||||||
char lookupname[64] = "";
|
char lookupname[64] = "";
|
||||||
|
|
||||||
/* the following lines will have GPL3+ license issues */
|
|
||||||
/* char *asc = NULL;
|
|
||||||
int rc;
|
|
||||||
|
|
||||||
if ((rc = idn2_lookup_u8((uint8_t *)e->label_buf, (uint8_t **)&asc, 0)) == IDN2_OK) {
|
|
||||||
// fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, asc);
|
|
||||||
_suffix_init(&suffix, asc, strlen(asc));
|
|
||||||
suffix.wildcard = e->wildcard;
|
|
||||||
suffixp = _vector_get(v, _vector_add(v, &suffix));
|
|
||||||
suffixp->label = suffixp->e_label_buf; // set label to changed address
|
|
||||||
} else
|
|
||||||
fprintf(stderr, "toASCII(%s) failed (%d): %s\n", e->label_buf, rc, idn2_strerror(rc));
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifdef WITH_LIBICU
|
|
||||||
UIDNA *idna;
|
|
||||||
UErrorCode status = 0;
|
|
||||||
|
|
||||||
/* IDNA2003 punycode conversion */
|
|
||||||
/* destLen = uidna_toASCII(e->label_buf, (int32_t) strlen(e->label_buf), lookupname, (int32_t) sizeof(lookupname),
|
|
||||||
UIDNA_DEFAULT, NULL, &status);
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* IDNA2008 UTS#46 punycode conversion */
|
|
||||||
if ((idna = uidna_openUTS46(UIDNA_USE_STD3_RULES, &status))) {
|
|
||||||
UChar utf16_dst[64], utf16_src[64];
|
|
||||||
int32_t utf16_src_length;
|
|
||||||
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
|
|
||||||
|
|
||||||
u_strFromUTF8(utf16_src, sizeof(utf16_src)/sizeof(utf16_src[0]), &utf16_src_length, e->label_buf, (int32_t) strlen(e->label_buf), &status);
|
|
||||||
if (U_SUCCESS(status)) {
|
|
||||||
int32_t dst_length = uidna_nameToASCII(idna, utf16_src, utf16_src_length, utf16_dst, sizeof(utf16_dst)/sizeof(utf16_dst[0]), &info, &status);
|
|
||||||
if (U_SUCCESS(status)) {
|
|
||||||
u_strToUTF8(lookupname, (int32_t) sizeof(lookupname), NULL, utf16_dst, dst_length, &status);
|
|
||||||
if (U_SUCCESS(status)) {
|
|
||||||
if (strcmp(e->label_buf, lookupname)) {
|
|
||||||
/* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */
|
|
||||||
_suffix_init(&suffix, lookupname, strlen(lookupname));
|
|
||||||
suffix.wildcard = e->wildcard;
|
|
||||||
suffixp = _vector_get(v, _vector_add(v, &suffix));
|
|
||||||
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
|
||||||
} /* else ignore */
|
|
||||||
} else
|
|
||||||
fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status);
|
|
||||||
} else
|
|
||||||
fprintf(stderr, "Failed to convert to ASCII (status %d)\n", status);
|
|
||||||
} else
|
|
||||||
fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status);
|
|
||||||
|
|
||||||
uidna_close(idna);
|
|
||||||
} else
|
|
||||||
fprintf(stderr, "Failed to get UTS46 IDNA handle\n");
|
|
||||||
|
|
||||||
#else
|
|
||||||
/* this is much slower than the libidn2 API but should have no license issues */
|
/* this is much slower than the libidn2 API but should have no license issues */
|
||||||
FILE *pp;
|
FILE *pp;
|
||||||
char cmd[16 + sizeof(e->label_buf)];
|
char cmd[16 + sizeof(e->label_buf)];
|
||||||
|
@ -395,12 +112,13 @@ static void _add_punycode_if_needed(_psl_vector_t *v)
|
||||||
pclose(pp);
|
pclose(pp);
|
||||||
} else
|
} else
|
||||||
fprintf(stderr, "Failed to call popen(%s, \"r\")\n", cmd);
|
fprintf(stderr, "Failed to call popen(%s, \"r\")\n", cmd);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_vector_sort(v);
|
_vector_sort(v);
|
||||||
}
|
}
|
||||||
|
#endif /* ! WITH_LIBICU */
|
||||||
|
|
||||||
#endif /* WITH_BUILTIN */
|
#endif /* WITH_BUILTIN */
|
||||||
|
|
||||||
int main(int argc, const char **argv)
|
int main(int argc, const char **argv)
|
||||||
|
@ -413,7 +131,7 @@ int main(int argc, const char **argv)
|
||||||
|
|
||||||
if (argc != 3) {
|
if (argc != 3) {
|
||||||
fprintf(stderr, "Usage: psl2c <infile> <outfile>\n");
|
fprintf(stderr, "Usage: psl2c <infile> <outfile>\n");
|
||||||
fprintf(stderr, " <infile> is the 'effective_tld_names.dat' (aka Public Suffix List)\n");
|
fprintf(stderr, " <infile> is the 'effective_tld_names.dat' (aka Public Suffix List), lowercase UTF-8 encoded\n");
|
||||||
fprintf(stderr, " <outfile> is the the C filename to be generated from <infile>\n");
|
fprintf(stderr, " <outfile> is the the C filename to be generated from <infile>\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -428,8 +146,12 @@ int main(int argc, const char **argv)
|
||||||
size_t cmdsize = 16 + strlen(argv[1]);
|
size_t cmdsize = 16 + strlen(argv[1]);
|
||||||
char *cmd = alloca(cmdsize), checksum[64] = "";
|
char *cmd = alloca(cmdsize), checksum[64] = "";
|
||||||
|
|
||||||
|
#ifndef WITH_LIBICU
|
||||||
|
/* If libicu is not configured, we still need to have punycode in our built-in data. */
|
||||||
|
/* Else the test suite fails. */
|
||||||
_add_punycode_if_needed(psl->suffixes);
|
_add_punycode_if_needed(psl->suffixes);
|
||||||
_add_punycode_if_needed(psl->suffix_exceptions);
|
_add_punycode_if_needed(psl->suffix_exceptions);
|
||||||
|
#endif
|
||||||
|
|
||||||
_print_psl_entries(fpout, psl->suffixes, "suffixes");
|
_print_psl_entries(fpout, psl->suffixes, "suffixes");
|
||||||
_print_psl_entries(fpout, psl->suffix_exceptions, "suffix_exceptions");
|
_print_psl_entries(fpout, psl->suffix_exceptions, "suffix_exceptions");
|
||||||
|
@ -458,8 +180,8 @@ int main(int argc, const char **argv)
|
||||||
psl_free(psl);
|
psl_free(psl);
|
||||||
#else
|
#else
|
||||||
if ((fpout = fopen(argv[2], "w"))) {
|
if ((fpout = fopen(argv[2], "w"))) {
|
||||||
fprintf(fpout, "static _psl_entry_t suffixes[0];\n");
|
fprintf(fpout, "static _psl_entry_t suffixes[1];\n");
|
||||||
fprintf(fpout, "static _psl_entry_t suffix_exceptions[0];\n");
|
fprintf(fpout, "static _psl_entry_t suffix_exceptions[1];\n");
|
||||||
fprintf(fpout, "static time_t _psl_file_time;\n");
|
fprintf(fpout, "static time_t _psl_file_time;\n");
|
||||||
fprintf(fpout, "static time_t _psl_compile_time;\n");
|
fprintf(fpout, "static time_t _psl_compile_time;\n");
|
||||||
fprintf(fpout, "static const char _psl_sha1_checksum[] = \"\";\n");
|
fprintf(fpout, "static const char _psl_sha1_checksum[] = \"\";\n");
|
||||||
|
|
|
@ -47,8 +47,8 @@ static int
|
||||||
|
|
||||||
static void test_psl(void)
|
static void test_psl(void)
|
||||||
{
|
{
|
||||||
/* punycode generation: idn 商标 */
|
/* punycode generation: idn ?? */
|
||||||
/* octal code generation: echo -n "商标" | od -b */
|
/* octal code generation: echo -n "??" | od -b */
|
||||||
static const struct test_data {
|
static const struct test_data {
|
||||||
const char
|
const char
|
||||||
*domain;
|
*domain;
|
||||||
|
@ -65,7 +65,7 @@ static void test_psl(void)
|
||||||
{ "abc.www.ck", 0 },
|
{ "abc.www.ck", 0 },
|
||||||
{ "xxx.ck", 1 },
|
{ "xxx.ck", 1 },
|
||||||
{ "www.xxx.ck", 0 },
|
{ "www.xxx.ck", 0 },
|
||||||
{ "\345\225\206\346\240\207", 1 }, /* xn--czr694b oder 商标 */
|
{ "\345\225\206\346\240\207", 1 }, /* xn--czr694b oder ?? */
|
||||||
{ "www.\345\225\206\346\240\207", 0 },
|
{ "www.\345\225\206\346\240\207", 0 },
|
||||||
{ "xn--czr694b", 1 },
|
{ "xn--czr694b", 1 },
|
||||||
{ "www.xn--czr694b", 0 },
|
{ "www.xn--czr694b", 0 },
|
||||||
|
|
|
@ -47,8 +47,8 @@ static int
|
||||||
|
|
||||||
static void test_psl(void)
|
static void test_psl(void)
|
||||||
{
|
{
|
||||||
/* punycode generation: idn 商标 */
|
/* punycode generation: idn ?? */
|
||||||
/* octal code generation: echo -n "商标" | od -b */
|
/* octal code generation: echo -n "??" | od -b */
|
||||||
static const struct test_data {
|
static const struct test_data {
|
||||||
const char
|
const char
|
||||||
*domain;
|
*domain;
|
||||||
|
@ -65,7 +65,7 @@ static void test_psl(void)
|
||||||
{ "abc.www.ck", 0 },
|
{ "abc.www.ck", 0 },
|
||||||
{ "xxx.ck", 1 },
|
{ "xxx.ck", 1 },
|
||||||
{ "www.xxx.ck", 0 },
|
{ "www.xxx.ck", 0 },
|
||||||
{ "\345\225\206\346\240\207", 1 }, /* xn--czr694b oder 商标 */
|
{ "\345\225\206\346\240\207", 1 }, /* xn--czr694b or ?? */
|
||||||
{ "www.\345\225\206\346\240\207", 0 },
|
{ "www.\345\225\206\346\240\207", 0 },
|
||||||
/* some special test follow ('name' and 'forgot.his.name' are public, but e.g. his.name is not) */
|
/* some special test follow ('name' and 'forgot.his.name' are public, but e.g. his.name is not) */
|
||||||
{ "name", 1 },
|
{ "name", 1 },
|
||||||
|
|
|
@ -38,6 +38,11 @@
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <alloca.h>
|
#include <alloca.h>
|
||||||
|
|
||||||
|
#ifdef WITH_LIBICU
|
||||||
|
# include <unicode/uversion.h>
|
||||||
|
# include <unicode/ustring.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <libpsl.h>
|
#include <libpsl.h>
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -47,32 +52,11 @@ static int
|
||||||
static void test(const psl_ctx_t *psl, const char *domain, const char *expected_result)
|
static void test(const psl_ctx_t *psl, const char *domain, const char *expected_result)
|
||||||
{
|
{
|
||||||
const char *result;
|
const char *result;
|
||||||
char lookupname[128];
|
char *lower;
|
||||||
|
|
||||||
/* check if there might be some utf-8 characters */
|
/* our test data is fixed to UTF-8 (english), so provide it here */
|
||||||
if (domain) {
|
if (psl_str_to_utf8lower(domain, "utf-8", "en", &lower) == PSL_SUCCESS)
|
||||||
int utf8;
|
domain = lower;
|
||||||
const char *p;
|
|
||||||
|
|
||||||
for (p = domain, utf8 = 0; *p && !utf8; p++)
|
|
||||||
if (*p < 0)
|
|
||||||
utf8 = 1;
|
|
||||||
|
|
||||||
/* if we found utf-8, make sure to convert domain correctly to lowercase */
|
|
||||||
/* does it work, if we are not in a utf-8 env ? */
|
|
||||||
if (utf8) {
|
|
||||||
FILE *pp;
|
|
||||||
size_t cmdsize = 48 + strlen(domain);
|
|
||||||
char *cmd = alloca(cmdsize);
|
|
||||||
|
|
||||||
snprintf(cmd, cmdsize, "echo -n '%s' | sed -e 's/./\\L\\0/g'", domain);
|
|
||||||
if ((pp = popen(cmd, "r"))) {
|
|
||||||
if (fscanf(pp, "%127s", lookupname) >= 1)
|
|
||||||
domain = lookupname;
|
|
||||||
pclose(pp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
result = psl_registrable_domain(psl, domain);
|
result = psl_registrable_domain(psl, domain);
|
||||||
|
|
||||||
|
@ -83,13 +67,15 @@ static void test(const psl_ctx_t *psl, const char *domain, const char *expected_
|
||||||
printf("psl_registrable_domain(%s)=%s (expected %s)\n",
|
printf("psl_registrable_domain(%s)=%s (expected %s)\n",
|
||||||
domain, result ? result : "NULL", expected_result ? expected_result : "NULL");
|
domain, result ? result : "NULL", expected_result ? expected_result : "NULL");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
free(lower);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_psl(void)
|
static void test_psl(void)
|
||||||
{
|
{
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
const psl_ctx_t *psl;
|
const psl_ctx_t *psl;
|
||||||
char buf[256], domain[128], expected_regdom[128], *p;
|
char buf[256], domain[128], expected_regdom[128];
|
||||||
|
|
||||||
psl = psl_builtin();
|
psl = psl_builtin();
|
||||||
|
|
||||||
|
@ -105,7 +91,9 @@ static void test_psl(void)
|
||||||
test(NULL, "com", NULL);
|
test(NULL, "com", NULL);
|
||||||
|
|
||||||
/* Norwegian with uppercase oe */
|
/* Norwegian with uppercase oe */
|
||||||
|
#ifdef WITH_LIBICU
|
||||||
test(psl, "www.\303\230yer.no", "www.\303\270yer.no");
|
test(psl, "www.\303\230yer.no", "www.\303\270yer.no");
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Norwegian with lowercase oe */
|
/* Norwegian with lowercase oe */
|
||||||
test(psl, "www.\303\270yer.no", "www.\303\270yer.no");
|
test(psl, "www.\303\270yer.no", "www.\303\270yer.no");
|
||||||
|
@ -126,11 +114,6 @@ static void test_psl(void)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* we have to lowercase the domain - the PSL API just takes lowercase */
|
|
||||||
for (p = domain; *p; p++)
|
|
||||||
if (*p > 0 && isupper(*p))
|
|
||||||
*p = tolower(*p);
|
|
||||||
|
|
||||||
if (!strcmp(expected_regdom, "null"))
|
if (!strcmp(expected_regdom, "null"))
|
||||||
test(psl, domain, NULL);
|
test(psl, domain, NULL);
|
||||||
else
|
else
|
||||||
|
|
54
tools/psl.c
54
tools/psl.c
|
@ -32,8 +32,16 @@
|
||||||
# include <config.h>
|
# include <config.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
|
#ifdef WITH_LIBICU
|
||||||
|
# include <unicode/uloc.h>
|
||||||
|
# include <unicode/ucnv.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <libpsl.h>
|
#include <libpsl.h>
|
||||||
|
|
||||||
static void usage(int err, FILE* f)
|
static void usage(int err, FILE* f)
|
||||||
|
@ -71,6 +79,10 @@ int main(int argc, const char *const *argv)
|
||||||
const char *const *arg, *psl_file = NULL, *cookie_domain = NULL;
|
const char *const *arg, *psl_file = NULL, *cookie_domain = NULL;
|
||||||
psl_ctx_t *psl = (psl_ctx_t *) psl_builtin();
|
psl_ctx_t *psl = (psl_ctx_t *) psl_builtin();
|
||||||
|
|
||||||
|
/* set current locale according to the environment variables */
|
||||||
|
#include <locale.h>
|
||||||
|
setlocale(LC_ALL, "");
|
||||||
|
|
||||||
for (arg = argv + 1; arg < argv + argc; arg++) {
|
for (arg = argv + 1; arg < argv + argc; arg++) {
|
||||||
if (!strncmp(*arg, "--", 2)) {
|
if (!strncmp(*arg, "--", 2)) {
|
||||||
if (!strcmp(*arg, "--is-public-suffix"))
|
if (!strcmp(*arg, "--is-public-suffix"))
|
||||||
|
@ -135,8 +147,41 @@ int main(int argc, const char *const *argv)
|
||||||
exit(2);
|
exit(2);
|
||||||
}
|
}
|
||||||
if (arg >= argv + argc) {
|
if (arg >= argv + argc) {
|
||||||
fprintf(stderr, "No domains given - aborting\n");
|
char buf[256], *domain, *lower;
|
||||||
exit(3);
|
size_t len;
|
||||||
|
psl_error_t rc;
|
||||||
|
|
||||||
|
/* read URLs from STDIN */
|
||||||
|
while (fgets(buf, sizeof(buf), stdin)) {
|
||||||
|
for (domain = buf; isspace(*domain); domain++); /* skip leading spaces */
|
||||||
|
if (*domain == '#' || !*domain) continue; /* skip empty lines and comments */
|
||||||
|
for (len = strlen(domain); len && isspace(domain[len - 1]); len--); /* skip trailing spaces */
|
||||||
|
domain[len] = 0;
|
||||||
|
|
||||||
|
if ((rc = psl_str_to_utf8lower(domain, NULL, NULL, &lower)) != PSL_SUCCESS)
|
||||||
|
fprintf(stderr, "%s: Failed to convert to lowercase UTF-8 (%d)\n", domain, rc);
|
||||||
|
else if (mode == 1)
|
||||||
|
printf("%s: %d (%s)\n", domain, psl_is_public_suffix(psl, lower), lower);
|
||||||
|
else if (mode == 2)
|
||||||
|
printf("%s: %s\n", domain, psl_unregistrable_domain(psl, lower));
|
||||||
|
else if (mode == 3)
|
||||||
|
printf("%s: %s\n", domain, psl_registrable_domain(psl, lower));
|
||||||
|
else if (mode == 4) {
|
||||||
|
char *cookie_domain_lower;
|
||||||
|
|
||||||
|
if ((rc = psl_str_to_utf8lower(domain, NULL, NULL, &cookie_domain_lower)) != PSL_SUCCESS)
|
||||||
|
fprintf(stderr, "%s: Failed to convert cookie domain '%s' to lowercase UTF-8 (%d)\n", domain, cookie_domain, rc);
|
||||||
|
else
|
||||||
|
printf("%s: %d\n", domain, psl_is_cookie_domain_acceptable(psl, lower, cookie_domain));
|
||||||
|
|
||||||
|
free(cookie_domain_lower);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(lower);
|
||||||
|
}
|
||||||
|
|
||||||
|
psl_free(psl);
|
||||||
|
exit(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -172,6 +217,11 @@ int main(int argc, const char *const *argv)
|
||||||
printf("builtin compile time: %ld (%s)\n", psl_builtin_compile_time(), time2str(psl_builtin_compile_time()));
|
printf("builtin compile time: %ld (%s)\n", psl_builtin_compile_time(), time2str(psl_builtin_compile_time()));
|
||||||
printf("builtin file time: %ld (%s)\n", psl_builtin_file_time(), time2str(psl_builtin_file_time()));
|
printf("builtin file time: %ld (%s)\n", psl_builtin_file_time(), time2str(psl_builtin_file_time()));
|
||||||
printf("builtin SHA1 file hash: %s\n", psl_builtin_sha1sum());
|
printf("builtin SHA1 file hash: %s\n", psl_builtin_sha1sum());
|
||||||
|
|
||||||
|
#ifdef WITH_LIBICU
|
||||||
|
printf("uloc_getDefault=%s\n", uloc_getDefault());
|
||||||
|
printf("ucnv_getDefaultName=%s\n", ucnv_getDefaultName());
|
||||||
|
#endif
|
||||||
} else
|
} else
|
||||||
printf("No builtin PSL data available\n");
|
printf("No builtin PSL data available\n");
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue