Merge branch 'master' into debian

This commit is contained in:
Daniel Kahn Gillmor 2014-06-23 12:39:47 -04:00
commit df2e65b9d2
13 changed files with 784 additions and 418 deletions

View File

@ -3,8 +3,15 @@ compiler:
- gcc
- clang
# Change this to your needs
script: ./autogen.sh && ./configure --enable-gtk-doc && make -j4 && make check -j4 && make distcheck
script:
- ./autogen.sh
- ./configure && make -j4 && make check -j4
- ./configure --without-libicu && make clean && make -j4 && make check -j4
- ./configure --disable-builtin && make clean && make -j4 && make check -j4
- ./configure --disable-builtin --without-libicu && make clean && make -j4 && make check -j4
- ./configure --enable-gtk-doc && make -j4 && make check -j4
- make distcheck
before_install:
- apt-cache search libicu | grep icu
- sudo apt-get -qq update
- sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev libicu-dev
- sudo apt-get -q install autoconf automake autopoint libtool gtk-doc-tools gettext idn2 libidn2-0 libidn2-0-dev libicu48 libicu-dev

View File

@ -8,4 +8,6 @@ Please drop me a note if you feel you should have
been mentioned here.
Tim Ruehsen (Implementation of libpsl)
Daniel Kahn Gillmor (Discussion, Ideas, Organization)
Daniel Kahn Gillmor (Discussion, Ideas, Organization, Code)
Daniel Stenberg (Discussion, Ideas)
Darshit Shah (Patching Wget to work with libpsl)

9
NEWS
View File

@ -1,5 +1,14 @@
Copyright (C) 2014 Tim Ruehsen
23.06.2014 Release V0.4.0
* depend on libicu for punycode, utf-8 and lowercase conversions
* added function psl_str_to_utf8lower()
* fixed locale issues
* introducing psl_error_t for error codes + defines
* removed redundant code from psl2c.c
* updated docs
* psl utility reads from stdin if no argument specified
10.06.2014 Release V0.3.1
* link psl utility dynamically
* fix output of psl_filename()

View File

@ -1,5 +1,5 @@
AC_INIT([libpsl], [0.3.1], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl])
AC_INIT([libpsl], [0.4.0], [tim.ruehsen@gmx.de], [libpsl], [http://github.com/rockdaboot/libpsl])
AC_PREREQ([2.59])
AM_INIT_AUTOMAKE([1.10 -Wall no-define])
@ -62,10 +62,35 @@ AS_IF([ test "$enable_man" != no ], [
# 3. If the library source code has changed at all since the last update, then increment revision (c:r:a becomes c:r+1:a).
# 4. If any interfaces have been added, removed, or changed since the last update, increment current, and set revision to 0.
# 5. If any interfaces have been added since the last public release, then increment age.
# 6. If any interfaces have been removed or changed since the last public release, then set age to 0.
AC_SUBST([LIBPSL_SO_VERSION], [1:1:1])
# 6. If any existing interfaces have been removed or changed since the last public release, then set age to 0.
AC_SUBST([LIBPSL_SO_VERSION], [2:0:2])
AC_SUBST([LIBPSL_VERSION], $VERSION)
# Check for libicu
HAVE_LIBICU=no
AC_ARG_WITH(libicu,
AC_HELP_STRING([--without-libicu], [build libpsl without IDNA/Punycode support]),
[],
[
# using pkg-config won't work on older systems like Ubuntu 12.04 LTS Server Edition 64bit
OLDLIBS=$LIBS
LIBS="-licuuc $LIBS"
AC_MSG_CHECKING([for ICU unicode library])
AC_LINK_IFELSE(
[AC_LANG_PROGRAM(
[[#include <unicode/ustring.h>]],
[[u_strToUTF8(NULL, 0, NULL, NULL, 0, NULL);]])],
[HAVE_LIBICU=yes; AC_MSG_RESULT([yes]) AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])],
[LIBS=$OLDLIBS; AC_MSG_ERROR([no working ICU unicode library was found])])
# AC_SEARCH_LIBS(uidna_close, icuuc,
# [HAVE_LIBICU=yes; AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])],
# [AC_MSG_ERROR(*** libicu was not found. Aborting.)],
# -licudata )
# PKG_CHECK_MODULES(LIBICU, [icu-uc],
# [HAVE_LIBICU=yes; AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])])
])
# Check for enable/disable builtin PSL data
AC_ARG_ENABLE(builtin,
AS_HELP_STRING([--disable-builtin], [do not compile PSL data into library]),
@ -74,10 +99,11 @@ AC_ARG_ENABLE(builtin,
], [
enable_builtin=yes
AC_DEFINE([WITH_BUILTIN], [1], [compile PSL data into library])
PKG_CHECK_MODULES(LIBICU, [icu-uc],
[AC_DEFINE([WITH_LIBICU], [1], [generate PSL data with IDNA2008 UTS#46 punycode])],
[AC_CHECK_PROG(HAVE_IDN2, idn2, yes, AC_MSG_ERROR(Cannot find required tool 'idn2'.))])
AS_IF([test $HAVE_LIBICU != yes],
[
# Check for idn2 fallback to generate punycode
AC_CHECK_PROG(HAVE_IDN2, idn2, yes, AC_MSG_ERROR(Cannot find required tool 'idn2' as fallback.))
])
])
AM_CONDITIONAL([WITH_BUILTIN], [test $enable_builtin = yes])
@ -102,16 +128,14 @@ fi
# Check for custom PSL file
AC_ARG_WITH(psl-file,
AC_HELP_STRING([--with-psl-file=[PATH]],
[path to PSL file]),
AC_HELP_STRING([--with-psl-file=[PATH]], [path to PSL file]),
PSL_FILE=$withval,
PSL_FILE="\$(top_srcdir)/data/effective_tld_names.dat")
AC_SUBST(PSL_FILE)
# Check for custom PSL test file
AC_ARG_WITH(psl-testfile,
AC_HELP_STRING([--with-psl-testfile=[PATH]],
[path to PSL test file]),
AC_HELP_STRING([--with-psl-testfile=[PATH]], [path to PSL test file]),
PSL_TESTFILE=$withval,
PSL_TESTFILE="\$(top_srcdir)/data/test_psl.txt")
AC_SUBST(PSL_TESTFILE)
@ -138,6 +162,7 @@ AC_MSG_NOTICE([Summary of build options:
Compiler: ${CC}
CFlags: ${CFLAGS} ${CPPFLAGS}
LDFlags: ${LDFLAGS}
ICU: ${HAVE_LIBICU}
Builtin PSL: ${enable_builtin}
PSL File: ${PSL_FILE}
PSL Test File: ${PSL_TESTFILE}

View File

@ -180,6 +180,7 @@ ar
com.ar
edu.ar
gob.ar
gov.ar
int.ar
mil.ar
net.ar
@ -222,7 +223,6 @@ edu.au
gov.au
asn.au
id.au
csiro.au
// Historic 2LDs (closed to new registration, but sites still exist)
info.au
conf.au
@ -245,7 +245,7 @@ sa.edu.au
tas.edu.au
vic.edu.au
wa.edu.au
act.gov.au
// act.gov.au Bug 984824 - Removed at request of Greg Tankard
// nsw.gov.au Bug 547985 - Removed at request of <Shae.Donelan@services.nsw.gov.au>
// nt.gov.au Bug 940478 - Removed at request of Greg Connors <Greg.Connors@nt.gov.au>
qld.gov.au
@ -292,6 +292,7 @@ rs.ba
// bb : http://en.wikipedia.org/wiki/.bb
bb
biz.bb
co.bb
com.bb
edu.bb
gov.bb
@ -299,6 +300,7 @@ info.bb
net.bb
org.bb
store.bb
tv.bb
// bd : http://en.wikipedia.org/wiki/.bd
*.bd
@ -596,9 +598,12 @@ gob.cl
co.cl
mil.cl
// cm : http://en.wikipedia.org/wiki/.cm
// cm : http://en.wikipedia.org/wiki/.cm plus bug 981927
cm
co.cm
com.cm
gov.cm
net.cm
// cn : http://en.wikipedia.org/wiki/.cn
// Submitted by registry <tanyaling@cnnic.cn> 2008-06-11
@ -5146,7 +5151,24 @@ com.nr
nu
// nz : http://en.wikipedia.org/wiki/.nz
*.nz
// Confirmed by registry <jay@nzrs.net.nz> 2014-05-19
nz
ac.nz
co.nz
cri.nz
geek.nz
gen.nz
govt.nz
health.nz
iwi.nz
kiwi.nz
maori.nz
mil.nz
māori.nz
net.nz
org.nz
parliament.nz
school.nz
// om : http://en.wikipedia.org/wiki/.om
om
@ -5613,7 +5635,6 @@ oryol.ru
palana.ru
penza.ru
perm.ru
pskov.ru
ptz.ru
rnd.ru
ryazan.ru
@ -6150,19 +6171,19 @@ com.ug
org.ug
// uk : http://en.wikipedia.org/wiki/.uk
// Submitted by registry <noc@nominet.org.uk> 2012-10-02
// and tweaked by us pending further consultation.
*.uk
// Submitted by registry <Michael.Daly@nominet.org.uk>
uk
ac.uk
co.uk
gov.uk
ltd.uk
me.uk
net.uk
nhs.uk
org.uk
plc.uk
police.uk
*.sch.uk
!bl.uk
!british-library.uk
!jet.uk
!mod.uk
!national-library-scotland.uk
!nel.uk
!nic.uk
!nls.uk
!parliament.uk
// us : http://en.wikipedia.org/wiki/.us
us
@ -6440,16 +6461,24 @@ edu.vc
// ve : https://registro.nic.ve/
// Confirmed by registry 2012-10-04
// Updated 2014-05-20 - Bug 940478
ve
arts.ve
co.ve
com.ve
e12.ve
edu.ve
firm.ve
gob.ve
gov.ve
info.ve
int.ve
mil.ve
net.ve
org.ve
rec.ve
store.ve
tec.ve
web.ve
// vg : http://en.wikipedia.org/wiki/.vg
@ -6482,8 +6511,12 @@ pro.vn
health.vn
// vu : http://en.wikipedia.org/wiki/.vu
// list of 2nd level tlds ?
// http://www.vunic.vu/
vu
com.vu
edu.vu
net.vu
org.vu
// wf : http://www.afnic.fr/medias/documents/AFNIC-naming-policy2012.pdf
wf
@ -6609,7 +6642,14 @@ yt
فلسطين
// xn--90a3ac ("srb" Cyrillic) : RS
// http://www.rnids.rs/en/the-.срб-domain
срб
пр.срб
орг.срб
обр.срб
од.срб
упр.срб
ак.срб
// xn--p1ai ("rf" Russian-Cyrillic) : RU
// http://www.cctld.ru/en/docs/rulesrf.php
@ -7654,6 +7694,299 @@ sca
// reise : 2014-03-13 dotreise GmbH
reise
// accountants : 2014-03-20 Knob Town, LLC
accountants
// clinic : 2014-03-20 Goose Park, LLC
clinic
// versicherung : 2014-03-20 dotversicherung-registry GmbH
versicherung
// top : 2014-03-20 Jiangsu Bangning Science & Technology Co.,Ltd.
top
// furniture : 2014-03-20 Lone Fields, LLC
furniture
// dental : 2014-03-20 Tin Birch, LLC
dental
// fund : 2014-03-20 John Castle, LLC
fund
// creditcard : 2014-03-20 Binky Frostbite, LLC
creditcard
// insure : 2014-03-20 Pioneer Willow, LLC
insure
// audio : 2014-03-20 Uniregistry, Corp.
audio
// claims : 2014-03-20 Black Corner, LLC
claims
// loans : 2014-03-20 June Woods, LLC
loans
// auction : 2014-03-20 Sand Galley, LLC
auction
// attorney : 2014-03-20 Victor North, LLC
attorney
// finance : 2014-03-20 Cotton Cypress, LLC
finance
// investments : 2014-03-20 Holly Glen, LLC
investments
// juegos : 2014-03-20 Uniregistry, Corp.
juegos
// dentist : 2014-03-20 Outer Lake, LLC
dentist
// lds : 2014-03-20 IRI Domain Management, LLC
lds
// lawyer : 2014-03-20 Atomic Station, LLC
lawyer
// surgery : 2014-03-20 Tin Avenue, LLC
surgery
// gratis : 2014-03-20 Pioneer Tigers, LLC
gratis
// software : 2014-03-20 Over Birch, LLC
software
// mortgage : 2014-03-20 Outer Gardens, LLC
mortgage
// republican : 2014-03-20 United TLD Holdco Ltd.
republican
// credit : 2014-03-20 Snow Shadow, LLC
credit
// tax : 2014-03-20 Storm Orchard, LLC
tax
// africa : 2014-03-24 ZA Central Registry NPC trading as Registry.Africa
africa
// joburg : 2014-03-24 ZA Central Registry NPC trading as ZA Central Registry
joburg
// durban : 2014-03-24 ZA Central Registry NPC trading as ZA Central Registry
durban
// capetown : 2014-03-24 ZA Central Registry NPC trading as ZA Central Registry
capetown
// sap : 2014-03-27 SAP AG
sap
// datsun : 2014-03-27 NISSAN MOTOR CO., LTD.
datsun
// infiniti : 2014-03-27 NISSAN MOTOR CO., LTD.
infiniti
// firmdale : 2014-03-27 Firmdale Holdings Limited
firmdale
// organic : 2014-03-27 Afilias Limited
organic
// nissan : 2014-03-27 NISSAN MOTOR CO., LTD.
nissan
// website : 2014-04-03 DotWebsite Inc.
website
// space : 2014-04-03 DotSpace Inc.
space
// schmidt : 2014-04-03 SALM S.A.S.
schmidt
// cuisinella : 2014-04-03 SALM S.A.S.
cuisinella
// samsung : 2014-04-03 SAMSUNG SDS CO., LTD
samsung
// crs : 2014-04-03 Federated Co operatives Limited
crs
// doosan : 2014-04-03 Doosan Corporation
doosan
// press : 2014-04-03 DotPress Inc.
press
// emerck : 2014-04-03 Merck KGaA
emerck
// erni : 2014-04-03 ERNI Group Holding AG
erni
// direct : 2014-04-10 Half Trail, LLC
direct
// yandex : 2014-04-10 YANDEX, LLC
yandex
// lotto : 2014-04-10 Afilias Limited
lotto
// toshiba : 2014-04-10 TOSHIBA Corporation
toshiba
// bauhaus : 2014-04-17 Werkhaus GmbH
bauhaus
// host : 2014-04-17 DotHost Inc.
host
// ltda : 2014-04-17 DOMAIN ROBOT SERVICOS DE HOSPEDAGEM NA INTERNET LTDA
ltda
// global : 2014-04-17 Dot GLOBAL AS
global
// abogado : 2014-04-24 Top Level Domain Holdings Limited
abogado
// place : 2014-04-24 Snow Galley, LLC
place
// tirol : 2014-04-24 punkt Tirol GmbH
tirol
// gmx : 2014-04-24 1&1 Mail & Media GmbH
gmx
// tatar : 2014-04-24 Limited Liability Company "Coordination Center of Regional Domain of Tatarstan Republic"
tatar
// scholarships : 2014-04-24 Scholarships.com, LLC
scholarships
// eurovision : 2014-04-24 European Broadcasting Union (EBU)
eurovision
// wedding : 2014-04-24 Top Level Domain Holdings Limited
wedding
// active : 2014-05-01 The Active Network, Inc
active
// madrid : 2014-05-01 Comunidad de Madrid
madrid
// youtube : 2014-05-01 Charleston Road Registry Inc.
youtube
// sharp : 2014-05-01 Sharp Corporation
sharp
// uol : 2014-05-01 UBN INTERNET LTDA.
uol
// physio : 2014-05-01 PhysBiz Pty Ltd
physio
// gmail : 2014-05-01 Charleston Road Registry Inc.
gmail
// channel : 2014-05-08 Charleston Road Registry Inc.
channel
// fly : 2014-05-08 Charleston Road Registry Inc.
fly
// zip : 2014-05-08 Charleston Road Registry Inc.
zip
// esq : 2014-05-08 Charleston Road Registry Inc.
esq
// rsvp : 2014-05-08 Charleston Road Registry Inc.
rsvp
// wales : 2014-05-08 Nominet UK
wales
// cymru : 2014-05-08 Nominet UK
cymru
// green : 2014-05-08 Afilias Limited
green
// lgbt : 2014-05-08 Afilias Limited
lgbt
// xn--hxt814e : 2014-05-15 Zodiac Libra Limited
网店
// cancerresearch : 2014-05-15 Australian Cancer Research Foundation
cancerresearch
// everbank : 2014-05-15 EverBank
everbank
// frl : 2014-05-15 FRLregistry B.V.
frl
// property : 2014-05-22 Uniregistry, Corp.
property
// forsale : 2014-05-22 Sea Oaks, LLC
forsale
// seat : 2014-05-22 SEAT, S.A. (Sociedad Unipersonal)
seat
// deals : 2014-05-22 Sand Sunset, LLC
deals
// nra : 2014-05-22 NRA Holdings Company, INC.
nra
// xn--fjq720a : 2014-05-22 Will Bloom, LLC
娱乐
// realtor : 2014-05-29 Real Estate Domains LLC
realtor
// bnpparibas : 2014-05-29 BNP Paribas
bnpparibas
// melbourne : 2014-05-29 The Crown in right of the State of Victoria, represented by its Department of State Development, Business and Innovation
melbourne
// hosting : 2014-05-29 Uniregistry, Corp.
hosting
// yoga : 2014-05-29 Top Level Domain Holdings Limited
yoga
// city : 2014-05-29 Snow Sky, LLC
city
// bond : 2014-06-05 Bond University Limited
bond
// click : 2014-06-05 Uniregistry, Corp.
click
// cern : 2014-06-05 European Organization for Nuclear Research ("CERN")
cern
// ===END ICANN DOMAINS===
// ===BEGIN PRIVATE DOMAINS===
@ -7663,20 +7996,22 @@ reise
cloudfront.net
// Amazon Elastic Compute Cloud: https://aws.amazon.com/ec2/
// Submitted by Osman Surkatty <osmans@amazon.com> 2013-04-02
compute.amazonaws.com
us-east-1.amazonaws.com
compute-1.amazonaws.com
z-1.compute-1.amazonaws.com
z-2.compute-1.amazonaws.com
// Submitted by Osman Surkatty <osmans@amazon.com> 2014-05-20
ap-northeast-1.compute.amazonaws.com
ap-southeast-1.compute.amazonaws.com
ap-southeast-2.compute.amazonaws.com
cn-north-1.compute.amazonaws.cn
compute.amazonaws.cn
compute.amazonaws.com
compute-1.amazonaws.com
eu-west-1.compute.amazonaws.com
sa-east-1.compute.amazonaws.com
us-east-1.amazonaws.com
us-gov-west-1.compute.amazonaws.com
us-west-1.compute.amazonaws.com
us-west-2.compute.amazonaws.com
z-1.compute-1.amazonaws.com
z-2.compute-1.amazonaws.com
// Amazon Elastic Beanstalk : https://aws.amazon.com/elasticbeanstalk/
// Submitted by Adam Stein <astein@amazon.com> 2013-04-02
@ -7719,6 +8054,7 @@ ar.com
br.com
cn.com
com.de
com.se
de.com
eu.com
gb.com
@ -8074,6 +8410,10 @@ global.ssl.fastly.net
a.prod.fastly.net
global.prod.fastly.net
// Firebase, Inc.
// Submitted by Chris Raynor <chris@firebase.com> 2014-01-21
firebaseapp.com
// GitHub, Inc.
// Submitted by Ben Toews <btoews@github.com> 2014-02-06
github.io
@ -8153,10 +8493,18 @@ azurewebsites.net
azure-mobile.net
cloudapp.net
// NFSN, Inc. : https://www.NearlyFreeSpeech.NET/
// Submitted by Jeff Wheelhouse <support@nearlyfreespeech.net> 2014-02-02
nfshost.com
// NYC.mn : http://www.information.nyc.mn
// Submitted by Matthew Brown <mattbrown@nyc.mn> 2013-03-11
nyc.mn
// One Fold Media : http://www.onefoldmedia.com/
// Submitted by Eddie Jones <eddie@onefoldmedia.com> 2014-06-10
nid.io
// Opera Software, A.S.A.
// Submitted by Yngve Pettersen <yngve@opera.com> 2009-11-26
operaunite.com

View File

@ -1,6 +1,7 @@
<SECTION>
<FILE>libpsl</FILE>
<TITLE>Public Suffix List functions</TITLE>
psl_error_t
psl_ctx_t
psl_load_file
psl_load_fp
@ -17,4 +18,5 @@ psl_builtin_sha1sum
psl_builtin_filename
psl_is_cookie_domain_acceptable
psl_get_version
psl_str_to_utf8lower
</SECTION>

View File

@ -38,6 +38,27 @@
extern "C" {
#endif
/**
* psl_error_t:
* @PSL_SUCCESS: Successful return.
* @PSL_ERR_INVALID_ARG: Invalid argument.
* @PSL_ERR_CONVERTER: Failed to open libicu utf-16 converter
* @PSL_ERR_TO_UTF16: Failed to convert to utf-16.
* @PSL_ERR_TO_LOWER: Failed to convert utf-16 to lowercase.
* @PSL_ERR_TO_UTF8: Failed to convert utf-16 to utf-8.
*
* Return codes for PSL functions.
* Negative return codes mean failure.
* Positive values are reserved for non-error return codes.
*/
typedef enum {
PSL_SUCCESS = 0,
PSL_ERR_INVALID_ARG = -1,
PSL_ERR_CONVERTER = -2, /* failed to open libicu utf-16 converter */
PSL_ERR_TO_UTF16 = -3, /* failed to convert to utf-16 */
PSL_ERR_TO_LOWER = -4, /* failed to convert utf-16 to lowercase */
PSL_ERR_TO_UTF8 = -5 /* failed to convert utf-16 to utf-8 */
} psl_error_t;
typedef struct _psl_ctx_st psl_ctx_t;
@ -65,6 +86,9 @@ const char *
/* returns the shortest possible registrable domain part or NULL if domain is not registrable at all */
const char *
psl_registrable_domain(const psl_ctx_t *psl, const char *domain);
/* convert a string into lowercase UTF-8 */
int
psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower);
/* does not include exceptions */
int
psl_suffix_count(const psl_ctx_t *psl);

286
src/psl.c
View File

@ -49,9 +49,20 @@
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <alloca.h>
#ifdef WITH_LIBICU
# include <unicode/uversion.h>
# include <unicode/ustring.h>
# include <unicode/uidna.h>
# include <unicode/ucnv.h>
#endif
#include <libpsl.h>
/* number of elements within an array */
#define countof(a) (sizeof(a)/sizeof(*(a)))
/**
* SECTION:libpsl
* @short_description: Public Suffix List library functions
@ -95,7 +106,17 @@ struct _psl_ctx_st {
};
/* include the PSL data compiled by 'psl2c' */
#include "suffixes.c"
#ifndef _LIBPSL_INCLUDED_BY_PSL2C
# include "suffixes.c"
#else
/* if this source file is included by psl2c.c, provide empty builtin data */
static _psl_entry_t suffixes[1];
static _psl_entry_t suffix_exceptions[1];
static time_t _psl_file_time;
static time_t _psl_compile_time;
static const char _psl_sha1_checksum[] = "";
static const char _psl_filename[] = "";
#endif
/* references to this PSL will result in lookups to built-in data */
static const psl_ctx_t
@ -239,39 +260,19 @@ static int _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length)
for (dst = suffix->label_buf, src = rule; *src;) {
if (*src == '.')
suffix->nlabels++;
*dst++ = tolower(*src++);
*dst++ = *src++;
}
*dst = 0;
return 0;
}
/**
* psl_is_public_suffix:
* @psl: PSL context
* @domain: Domain string
*
* This function checks if @domain is a public suffix by the means of the
* [Mozilla Public Suffix List](http://publicsuffix.org).
*
* For cookie domain checking see psl_is_cookie_domain_acceptable().
*
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
* psl_builtin().
*
* Returns: 1 if domain is a public suffix, 0 if not.
*
* Since: 0.1
*/
int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
{
_psl_entry_t suffix, *rule;
const char *p, *label_bak;
unsigned short length_bak;
if (!psl || !domain)
return 1;
/* this function should be called without leading dots, just make sure */
suffix.label = domain + (*domain == '.');
suffix.length = strlen(suffix.label);
@ -340,6 +341,34 @@ int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
return 0;
}
/**
* psl_is_public_suffix:
* @psl: PSL context
* @domain: Domain string
*
* This function checks if @domain is a public suffix by the means of the
* [Mozilla Public Suffix List](http://publicsuffix.org).
*
* For cookie domain checking see psl_is_cookie_domain_acceptable().
*
* International @domain names have to be either in lowercase UTF-8 or in ASCII form (punycode).
* Other encodings result in unexpected behavior.
*
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
* psl_builtin().
*
* Returns: 1 if domain is a public suffix, 0 if not.
*
* Since: 0.1
*/
int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
{
if (!psl || !domain)
return 1;
return _psl_is_public_suffix(psl, domain);
}
/**
* psl_unregistrable_domain:
* @psl: PSL context
@ -348,6 +377,9 @@ int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
* This function finds the longest publix suffix part of @domain by the means
* of the [Mozilla Public Suffix List](http://publicsuffix.org).
*
* International @domain names have to be either in lowercase UTF-8 or in ASCII form (punycode).
* Other encodings result in unexpected behavior.
*
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
* psl_builtin().
*
@ -366,7 +398,7 @@ const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
* 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
*/
while (!psl_is_public_suffix(psl, domain)) {
while (!_psl_is_public_suffix(psl, domain)) {
if ((domain = strchr(domain, '.')))
domain++;
else
@ -384,6 +416,9 @@ const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
* This function finds the shortest private suffix part of @domain by the means
* of the [Mozilla Public Suffix List](http://publicsuffix.org).
*
* International @domain names have to be either in lowercase UTF-8 or in ASCII form (punycode).
* Other encodings result in unexpected behavior.
*
* @psl is a context returned by either psl_load_file(), psl_load_fp() or
* psl_builtin().
*
@ -404,7 +439,7 @@ const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
* 'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
*/
while (!psl_is_public_suffix(psl, domain)) {
while (!_psl_is_public_suffix(psl, domain)) {
if ((p = strchr(domain, '.'))) {
regdom = domain;
domain = p + 1;
@ -415,6 +450,51 @@ const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
return regdom;
}
static int _str_is_ascii(const char *s)
{
while (*s > 0) s++;
return !*s;
}
#ifdef WITH_LIBICU
static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t *e)
{
if (_str_is_ascii(e->label_buf))
return;
/* IDNA2008 UTS#46 punycode conversion */
if (idna) {
_psl_entry_t suffix, *suffixp;
char lookupname[128] = "";
UErrorCode status = 0;
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
UChar utf16_dst[128], utf16_src[128];
int32_t utf16_src_length;
u_strFromUTF8(utf16_src, sizeof(utf16_src)/sizeof(utf16_src[0]), &utf16_src_length, e->label_buf, -1, &status);
if (U_SUCCESS(status)) {
int32_t dst_length = uidna_nameToASCII(idna, utf16_src, utf16_src_length, utf16_dst, sizeof(utf16_dst)/sizeof(utf16_dst[0]), &info, &status);
if (U_SUCCESS(status)) {
u_strToUTF8(lookupname, sizeof(lookupname), NULL, utf16_dst, dst_length, &status);
if (U_SUCCESS(status)) {
if (strcmp(e->label_buf, lookupname)) {
/* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */
_suffix_init(&suffix, lookupname, strlen(lookupname));
suffix.wildcard = e->wildcard;
suffixp = _vector_get(v, _vector_add(v, &suffix));
suffixp->label = suffixp->label_buf; /* set label to changed address */
} /* else ignore */
} /* else
fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */
} /* else
fprintf(stderr, "Failed to convert to ASCII (status %d)\n", status); */
} /* else
fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status); */
}
}
#endif
/**
* psl_load_file:
* @fname: Name of PSL file
@ -422,13 +502,7 @@ const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
* This function loads the public suffixes file named @fname.
* To free the allocated resources, call psl_free().
*
* If you want to use punycode representations for functions like psl_is_public_suffix(),
* these have to exist as entries within @fname. This is a design decision to not pull in
* dependencies for UTF-8 case-handling and IDNA libraries.
*
* On the contrary, the builtin data already contains punycode entries.
*
* Have a look into psl2c.c for example code on how to convert UTF-8 to lowercase and to punycode.
* The suffixes are expected to be lowercase UTF-8 encoded if they are international.
*
* Returns: Pointer to a PSL context or %NULL on failure.
*
@ -457,7 +531,7 @@ psl_ctx_t *psl_load_file(const char *fname)
* This function loads the public suffixes from a FILE pointer.
* To free the allocated resources, call psl_free().
*
* Have a look at psl_load_fp() for punycode considerations.
* The suffixes are expected to be lowercase UTF-8 encoded if they are international.
*
* Returns: Pointer to a PSL context or %NULL on failure.
*
@ -467,8 +541,11 @@ psl_ctx_t *psl_load_fp(FILE *fp)
{
psl_ctx_t *psl;
_psl_entry_t suffix, *suffixp;
int nsuffixes = 0;
char buf[256], *linep, *p;
#ifdef WITH_LIBICU
UIDNA *idna;
UErrorCode status = 0;
#endif
if (!fp)
return NULL;
@ -476,6 +553,10 @@ psl_ctx_t *psl_load_fp(FILE *fp)
if (!(psl = calloc(1, sizeof(psl_ctx_t))))
return NULL;
#ifdef WITH_LIBICU
idna = uidna_openUTS46(UIDNA_USE_STD3_RULES, &status);
#endif
/*
* as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
* as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
@ -496,26 +577,33 @@ psl_ctx_t *psl_load_fp(FILE *fp)
if (*p == '!') {
/* add to exceptions */
if (_suffix_init(&suffix, p + 1, linep - p - 1) == 0)
if (_suffix_init(&suffix, p + 1, linep - p - 1) == 0) {
suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix));
else
suffixp = NULL;
suffixp->label = suffixp->label_buf; /* set label to changed address */
#ifdef WITH_LIBICU
_add_punycode_if_needed(idna, psl->suffix_exceptions, suffixp);
#endif
}
} else {
if (_suffix_init(&suffix, p, linep - p) == 0)
/* add to suffixes */
if (_suffix_init(&suffix, p, linep - p) == 0) {
suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix));
else
suffixp = NULL;
suffixp->label = suffixp->label_buf; /* set label to changed address */
#ifdef WITH_LIBICU
_add_punycode_if_needed(idna, psl->suffixes, suffixp);
#endif
}
}
if (suffixp)
suffixp->label = suffixp->label_buf; /* set label to changed address */
nsuffixes++;;
}
_vector_sort(psl->suffix_exceptions);
_vector_sort(psl->suffixes);
#ifdef WITH_LIBICU
if (idna)
uidna_close(idna);
#endif
return psl;
}
@ -685,7 +773,13 @@ const char *psl_builtin_filename(void)
**/
const char *psl_get_version (void)
{
return PACKAGE_VERSION;
return PACKAGE_VERSION
#ifdef WITH_LIBICU
" (+libicu/" U_ICU_VERSION ")"
#else
" (limited IDNA support)"
#endif
;
}
/**
@ -697,6 +791,9 @@ const char *psl_get_version (void)
* This helper function checks whether @cookie_domain is an acceptable cookie domain value for the request
* @hostname.
*
* For international domain names both, @hostname and @cookie_domain, have to be either in lowercase UTF-8
* or in ASCII form (punycode). Other encodings or mixing UTF-8 and punycode result in unexpected behavior.
*
* Examples:
* 1. Cookie domain 'example.com' would be acceptable for hostname 'www.example.com',
* but '.com' or 'com' would NOT be acceptable since 'com' is a public suffix.
@ -741,3 +838,100 @@ int psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname,
return 0;
}
/**
* psl_str_to_utf8lower:
* @str: string to convert
* @encoding: charset encoding of @str, e.g. 'iso-8859-1' or %NULL
* @locale: locale of @str for to lowercase conversion, e.g. 'de' or %NULL
* @lower: return value containing the converted string
*
* This helper function converts a string to lowercase UTF-8 representation.
* Lowercase UTF-8 is needed as input to the domain checking functions.
*
* @lower is %NULL on error.
* The return value 'lower' must be freed after usage.
*
* Returns: psl_error_t value.
* PSL_SUCCESS: Success
* PSL_ERR_INVALID_ARG: @str is a %NULL value.
* PSL_ERR_CONVERTER: Failed to open the unicode converter with name @encoding
* PSL_ERR_TO_UTF16: Failed to convert @str to unicode
* PSL_ERR_TO_LOWER: Failed to convert unicode to lowercase
* PSL_ERR_TO_UTF8: Failed to convert unicode to UTF-8
*
* Since: 0.4
*/
psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower)
{
int ret = PSL_ERR_INVALID_ARG;
if (lower)
*lower = NULL;
if (!str)
return PSL_ERR_INVALID_ARG;
/* shortcut to avoid costly conversion */
if (_str_is_ascii(str)) {
if (lower) {
char *p;
*lower = strdup(str);
/* convert ASCII string to lowercase */
for (p = *lower; *p; p++)
if (isupper(*p))
*p = tolower(*p);
}
return PSL_SUCCESS;
}
#ifdef WITH_LIBICU
do {
size_t str_length = strlen(str);
UErrorCode status = 0;
UChar *utf16_dst, *utf16_lower;
int32_t utf16_dst_length;
char *utf8_lower;
UConverter *uconv;
/* C89 allocation */
utf16_dst = alloca(sizeof(UChar) * (str_length * 2 + 1));
utf16_lower = alloca(sizeof(UChar) * (str_length * 2 + 1));
utf8_lower = alloca(str_length * 2 + 1);
uconv = ucnv_open(encoding, &status);
if (U_SUCCESS(status)) {
utf16_dst_length = ucnv_toUChars(uconv, utf16_dst, str_length * 2 + 1, str, str_length, &status);
ucnv_close(uconv);
if (U_SUCCESS(status)) {
int32_t utf16_lower_length = u_strToLower(utf16_lower, str_length * 2 + 1, utf16_dst, utf16_dst_length, locale, &status);
if (U_SUCCESS(status)) {
u_strToUTF8(utf8_lower, str_length * 8 + 1, NULL, utf16_lower, utf16_lower_length, &status);
if (U_SUCCESS(status)) {
if (lower)
*lower = strdup(utf8_lower);
ret = PSL_SUCCESS;
} else {
ret = PSL_ERR_TO_UTF8;
/* fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */
}
} else {
ret = PSL_ERR_TO_LOWER;
/* fprintf(stderr, "Failed to convert UTF-16 to lowercase (status %d)\n", status); */
}
} else {
ret = PSL_ERR_TO_UTF16;
/* fprintf(stderr, "Failed to convert string to UTF-16 (status %d)\n", status); */
}
} else {
ret = PSL_ERR_CONVERTER;
/* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */
}
} while (0);
#endif
return ret;
}

View File

@ -39,235 +39,14 @@
#include <ctype.h>
#include <sys/stat.h>
/*
#ifdef WITH_LIBIDN2
# include <idn2.h>
#endif
*/
#ifdef WITH_LIBICU
# include <unicode/uversion.h>
# include <unicode/ustring.h>
# include <unicode/uidna.h>
#endif
#ifdef WITH_BUILTIN
#include <libpsl.h>
typedef struct {
char
label_buf[48];
const char *
label;
unsigned short
length;
unsigned char
nlabels, /* number of labels */
wildcard; /* this is a wildcard rule (e.g. *.sapporo.jp) */
} _psl_entry_t;
/* stripped down version libmget vector routines */
typedef struct {
int
(*cmp)(const _psl_entry_t *, const _psl_entry_t *); /* comparison function */
_psl_entry_t
**entry; /* pointer to array of pointers to elements */
int
max, /* allocated elements */
cur; /* number of elements in use */
} _psl_vector_t;
struct _psl_ctx_st {
_psl_vector_t
*suffixes,
*suffix_exceptions;
};
static _psl_vector_t *_vector_alloc(int max, int (*cmp)(const _psl_entry_t *, const _psl_entry_t *))
{
_psl_vector_t *v;
if (!(v = calloc(1, sizeof(_psl_vector_t))))
return NULL;
if (!(v->entry = malloc(max * sizeof(_psl_entry_t *)))) {
free(v);
return NULL;
}
v->max = max;
v->cmp = cmp;
return v;
}
static void _vector_free(_psl_vector_t **v)
{
if (v && *v) {
if ((*v)->entry) {
int it;
for (it = 0; it < (*v)->cur; it++)
free((*v)->entry[it]);
free((*v)->entry);
}
free(*v);
}
}
static _psl_entry_t *_vector_get(const _psl_vector_t *v, int pos)
{
if (pos < 0 || !v || pos >= v->cur) return NULL;
return v->entry[pos];
}
static int _vector_add(_psl_vector_t *v, const _psl_entry_t *elem)
{
if (v) {
void *elemp;
elemp = malloc(sizeof(_psl_entry_t));
memcpy(elemp, elem, sizeof(_psl_entry_t));
if (v->max == v->cur)
v->entry = realloc(v->entry, (v->max *= 2) * sizeof(_psl_entry_t *));
v->entry[v->cur++] = elemp;
return v->cur - 1;
}
return -1;
}
static int _compare(const void *p1, const void *p2, void *v)
{
return ((_psl_vector_t *)v)->cmp(*((_psl_entry_t **)p1), *((_psl_entry_t **)p2));
}
static void _vector_sort(_psl_vector_t *v)
{
if (v && v->cmp)
qsort_r(v->entry, v->cur, sizeof(_psl_vector_t *), _compare, v);
}
/* by this kind of sorting, we can easily see if a domain matches or not (match = supercookie !) */
static int _suffix_compare(const _psl_entry_t *s1, const _psl_entry_t *s2)
{
int n;
if ((n = s2->nlabels - s1->nlabels))
return n; /* most labels first */
if ((n = s1->length - s2->length))
return n; /* shorter rules first */
return strcmp(s1->label, s2->label);
}
static void _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length)
{
const char *src;
char *dst;
suffix->label = suffix->label_buf;
if (length >= sizeof(suffix->label_buf) - 1) {
suffix->nlabels = 0;
fprintf(stderr, "Suffix rule too long (%d, ignored): %s\n", (int) length, rule);
return;
}
if (*rule == '*') {
if (*++rule != '.') {
suffix->nlabels = 0;
fprintf(stderr, "Unsupported kind of rule (ignored): %s\n", rule);
return;
}
rule++;
suffix->wildcard = 1;
suffix->length = (unsigned char)length - 2;
} else {
suffix->wildcard = 0;
suffix->length = (unsigned char)length;
}
suffix->nlabels = 1;
for (dst = suffix->label_buf, src = rule; *src;) {
if (*src == '.')
suffix->nlabels++;
*dst++ = tolower(*src++);
}
*dst = 0;
}
psl_ctx_t *psl_load_file(const char *fname)
{
FILE *fp;
psl_ctx_t *psl = NULL;
if ((fp = fopen(fname, "r"))) {
psl = psl_load_fp(fp);
fclose(fp);
}
return psl;
}
psl_ctx_t *psl_load_fp(FILE *fp)
{
psl_ctx_t *psl;
_psl_entry_t suffix, *suffixp;
int nsuffixes = 0;
char buf[256], *linep, *p;
if (!fp)
return NULL;
if (!(psl = calloc(1, sizeof(psl_ctx_t))))
return NULL;
/*
* as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
* as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
*/
psl->suffixes = _vector_alloc(8*1024, _suffix_compare);
psl->suffix_exceptions = _vector_alloc(64, _suffix_compare);
while ((linep = fgets(buf, sizeof(buf), fp))) {
while (isspace(*linep)) linep++; /* ignore leading whitespace */
if (!*linep) continue; /* skip empty lines */
if (*linep == '/' && linep[1] == '/')
continue; /* skip comments */
/* parse suffix rule */
for (p = linep; *linep && !isspace(*linep);) linep++;
*linep = 0;
if (*p == '!') {
/* add to exceptions */
_suffix_init(&suffix, p + 1, linep - p - 1);
suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix));
} else {
_suffix_init(&suffix, p, linep - p);
suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix));
}
if (suffixp)
suffixp->label = suffixp->label_buf; /* set label to changed address */
nsuffixes++;;
}
_vector_sort(psl->suffix_exceptions);
_vector_sort(psl->suffixes);
return psl;
}
/* here we include the library source code to have access to internal functions and data structures */
#define _LIBPSL_INCLUDED_BY_PSL2C
# include "psl.c"
#undef _LIBPSL_INCLUDED_BY_PSL2C
static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *varname)
{
@ -283,8 +62,8 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *
fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libicu/%s) */\n", version);
} while (0);
#else
fprintf(fpout, "/* automatically generated by psl2c (punycode generated with idn2) */\n");
#endif
fprintf(fpout, "/* automatically generated by psl2c (without punycode support) */\n");
#endif /* WITH_LIBICU */
fprintf(fpout, "static _psl_entry_t %s[] = {\n", varname);
@ -298,15 +77,7 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *
fprintf(fpout, "};\n");
}
void psl_free(psl_ctx_t *psl)
{
if (psl) {
_vector_free(&psl->suffixes);
_vector_free(&psl->suffix_exceptions);
free(psl);
}
}
#ifndef WITH_LIBICU
static int _str_needs_encoding(const char *s)
{
while (*s > 0) s++;
@ -326,60 +97,6 @@ static void _add_punycode_if_needed(_psl_vector_t *v)
_psl_entry_t suffix, *suffixp;
char lookupname[64] = "";
/* the following lines will have GPL3+ license issues */
/* char *asc = NULL;
int rc;
if ((rc = idn2_lookup_u8((uint8_t *)e->label_buf, (uint8_t **)&asc, 0)) == IDN2_OK) {
// fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, asc);
_suffix_init(&suffix, asc, strlen(asc));
suffix.wildcard = e->wildcard;
suffixp = _vector_get(v, _vector_add(v, &suffix));
suffixp->label = suffixp->e_label_buf; // set label to changed address
} else
fprintf(stderr, "toASCII(%s) failed (%d): %s\n", e->label_buf, rc, idn2_strerror(rc));
*/
#ifdef WITH_LIBICU
UIDNA *idna;
UErrorCode status = 0;
/* IDNA2003 punycode conversion */
/* destLen = uidna_toASCII(e->label_buf, (int32_t) strlen(e->label_buf), lookupname, (int32_t) sizeof(lookupname),
UIDNA_DEFAULT, NULL, &status);
*/
/* IDNA2008 UTS#46 punycode conversion */
if ((idna = uidna_openUTS46(UIDNA_USE_STD3_RULES, &status))) {
UChar utf16_dst[64], utf16_src[64];
int32_t utf16_src_length;
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
u_strFromUTF8(utf16_src, sizeof(utf16_src)/sizeof(utf16_src[0]), &utf16_src_length, e->label_buf, (int32_t) strlen(e->label_buf), &status);
if (U_SUCCESS(status)) {
int32_t dst_length = uidna_nameToASCII(idna, utf16_src, utf16_src_length, utf16_dst, sizeof(utf16_dst)/sizeof(utf16_dst[0]), &info, &status);
if (U_SUCCESS(status)) {
u_strToUTF8(lookupname, (int32_t) sizeof(lookupname), NULL, utf16_dst, dst_length, &status);
if (U_SUCCESS(status)) {
if (strcmp(e->label_buf, lookupname)) {
/* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */
_suffix_init(&suffix, lookupname, strlen(lookupname));
suffix.wildcard = e->wildcard;
suffixp = _vector_get(v, _vector_add(v, &suffix));
suffixp->label = suffixp->label_buf; /* set label to changed address */
} /* else ignore */
} else
fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status);
} else
fprintf(stderr, "Failed to convert to ASCII (status %d)\n", status);
} else
fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status);
uidna_close(idna);
} else
fprintf(stderr, "Failed to get UTS46 IDNA handle\n");
#else
/* this is much slower than the libidn2 API but should have no license issues */
FILE *pp;
char cmd[16 + sizeof(e->label_buf)];
@ -395,12 +112,13 @@ static void _add_punycode_if_needed(_psl_vector_t *v)
pclose(pp);
} else
fprintf(stderr, "Failed to call popen(%s, \"r\")\n", cmd);
#endif
}
}
_vector_sort(v);
}
#endif /* ! WITH_LIBICU */
#endif /* WITH_BUILTIN */
int main(int argc, const char **argv)
@ -413,7 +131,7 @@ int main(int argc, const char **argv)
if (argc != 3) {
fprintf(stderr, "Usage: psl2c <infile> <outfile>\n");
fprintf(stderr, " <infile> is the 'effective_tld_names.dat' (aka Public Suffix List)\n");
fprintf(stderr, " <infile> is the 'effective_tld_names.dat' (aka Public Suffix List), lowercase UTF-8 encoded\n");
fprintf(stderr, " <outfile> is the the C filename to be generated from <infile>\n");
return 1;
}
@ -428,8 +146,12 @@ int main(int argc, const char **argv)
size_t cmdsize = 16 + strlen(argv[1]);
char *cmd = alloca(cmdsize), checksum[64] = "";
#ifndef WITH_LIBICU
/* If libicu is not configured, we still need to have punycode in our built-in data. */
/* Else the test suite fails. */
_add_punycode_if_needed(psl->suffixes);
_add_punycode_if_needed(psl->suffix_exceptions);
#endif
_print_psl_entries(fpout, psl->suffixes, "suffixes");
_print_psl_entries(fpout, psl->suffix_exceptions, "suffix_exceptions");
@ -458,8 +180,8 @@ int main(int argc, const char **argv)
psl_free(psl);
#else
if ((fpout = fopen(argv[2], "w"))) {
fprintf(fpout, "static _psl_entry_t suffixes[0];\n");
fprintf(fpout, "static _psl_entry_t suffix_exceptions[0];\n");
fprintf(fpout, "static _psl_entry_t suffixes[1];\n");
fprintf(fpout, "static _psl_entry_t suffix_exceptions[1];\n");
fprintf(fpout, "static time_t _psl_file_time;\n");
fprintf(fpout, "static time_t _psl_compile_time;\n");
fprintf(fpout, "static const char _psl_sha1_checksum[] = \"\";\n");

View File

@ -47,8 +47,8 @@ static int
static void test_psl(void)
{
/* punycode generation: idn 商标 */
/* octal code generation: echo -n "商标" | od -b */
/* punycode generation: idn ?? */
/* octal code generation: echo -n "??" | od -b */
static const struct test_data {
const char
*domain;
@ -65,7 +65,7 @@ static void test_psl(void)
{ "abc.www.ck", 0 },
{ "xxx.ck", 1 },
{ "www.xxx.ck", 0 },
{ "\345\225\206\346\240\207", 1 }, /* xn--czr694b oder 商标 */
{ "\345\225\206\346\240\207", 1 }, /* xn--czr694b oder ?? */
{ "www.\345\225\206\346\240\207", 0 },
{ "xn--czr694b", 1 },
{ "www.xn--czr694b", 0 },

View File

@ -47,8 +47,8 @@ static int
static void test_psl(void)
{
/* punycode generation: idn 商标 */
/* octal code generation: echo -n "商标" | od -b */
/* punycode generation: idn ?? */
/* octal code generation: echo -n "??" | od -b */
static const struct test_data {
const char
*domain;
@ -65,7 +65,7 @@ static void test_psl(void)
{ "abc.www.ck", 0 },
{ "xxx.ck", 1 },
{ "www.xxx.ck", 0 },
{ "\345\225\206\346\240\207", 1 }, /* xn--czr694b oder 商标 */
{ "\345\225\206\346\240\207", 1 }, /* xn--czr694b or ?? */
{ "www.\345\225\206\346\240\207", 0 },
/* some special test follow ('name' and 'forgot.his.name' are public, but e.g. his.name is not) */
{ "name", 1 },

View File

@ -38,6 +38,11 @@
#include <ctype.h>
#include <alloca.h>
#ifdef WITH_LIBICU
# include <unicode/uversion.h>
# include <unicode/ustring.h>
#endif
#include <libpsl.h>
static int
@ -47,32 +52,11 @@ static int
static void test(const psl_ctx_t *psl, const char *domain, const char *expected_result)
{
const char *result;
char lookupname[128];
char *lower;
/* check if there might be some utf-8 characters */
if (domain) {
int utf8;
const char *p;
for (p = domain, utf8 = 0; *p && !utf8; p++)
if (*p < 0)
utf8 = 1;
/* if we found utf-8, make sure to convert domain correctly to lowercase */
/* does it work, if we are not in a utf-8 env ? */
if (utf8) {
FILE *pp;
size_t cmdsize = 48 + strlen(domain);
char *cmd = alloca(cmdsize);
snprintf(cmd, cmdsize, "echo -n '%s' | sed -e 's/./\\L\\0/g'", domain);
if ((pp = popen(cmd, "r"))) {
if (fscanf(pp, "%127s", lookupname) >= 1)
domain = lookupname;
pclose(pp);
}
}
}
/* our test data is fixed to UTF-8 (english), so provide it here */
if (psl_str_to_utf8lower(domain, "utf-8", "en", &lower) == PSL_SUCCESS)
domain = lower;
result = psl_registrable_domain(psl, domain);
@ -83,13 +67,15 @@ static void test(const psl_ctx_t *psl, const char *domain, const char *expected_
printf("psl_registrable_domain(%s)=%s (expected %s)\n",
domain, result ? result : "NULL", expected_result ? expected_result : "NULL");
}
free(lower);
}
static void test_psl(void)
{
FILE *fp;
const psl_ctx_t *psl;
char buf[256], domain[128], expected_regdom[128], *p;
char buf[256], domain[128], expected_regdom[128];
psl = psl_builtin();
@ -105,7 +91,9 @@ static void test_psl(void)
test(NULL, "com", NULL);
/* Norwegian with uppercase oe */
#ifdef WITH_LIBICU
test(psl, "www.\303\230yer.no", "www.\303\270yer.no");
#endif
/* Norwegian with lowercase oe */
test(psl, "www.\303\270yer.no", "www.\303\270yer.no");
@ -126,11 +114,6 @@ static void test_psl(void)
continue;
}
/* we have to lowercase the domain - the PSL API just takes lowercase */
for (p = domain; *p; p++)
if (*p > 0 && isupper(*p))
*p = tolower(*p);
if (!strcmp(expected_regdom, "null"))
test(psl, domain, NULL);
else

View File

@ -32,8 +32,16 @@
# include <config.h>
#endif
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#ifdef WITH_LIBICU
# include <unicode/uloc.h>
# include <unicode/ucnv.h>
#endif
#include <libpsl.h>
static void usage(int err, FILE* f)
@ -71,6 +79,10 @@ int main(int argc, const char *const *argv)
const char *const *arg, *psl_file = NULL, *cookie_domain = NULL;
psl_ctx_t *psl = (psl_ctx_t *) psl_builtin();
/* set current locale according to the environment variables */
#include <locale.h>
setlocale(LC_ALL, "");
for (arg = argv + 1; arg < argv + argc; arg++) {
if (!strncmp(*arg, "--", 2)) {
if (!strcmp(*arg, "--is-public-suffix"))
@ -135,8 +147,41 @@ int main(int argc, const char *const *argv)
exit(2);
}
if (arg >= argv + argc) {
fprintf(stderr, "No domains given - aborting\n");
exit(3);
char buf[256], *domain, *lower;
size_t len;
psl_error_t rc;
/* read URLs from STDIN */
while (fgets(buf, sizeof(buf), stdin)) {
for (domain = buf; isspace(*domain); domain++); /* skip leading spaces */
if (*domain == '#' || !*domain) continue; /* skip empty lines and comments */
for (len = strlen(domain); len && isspace(domain[len - 1]); len--); /* skip trailing spaces */
domain[len] = 0;
if ((rc = psl_str_to_utf8lower(domain, NULL, NULL, &lower)) != PSL_SUCCESS)
fprintf(stderr, "%s: Failed to convert to lowercase UTF-8 (%d)\n", domain, rc);
else if (mode == 1)
printf("%s: %d (%s)\n", domain, psl_is_public_suffix(psl, lower), lower);
else if (mode == 2)
printf("%s: %s\n", domain, psl_unregistrable_domain(psl, lower));
else if (mode == 3)
printf("%s: %s\n", domain, psl_registrable_domain(psl, lower));
else if (mode == 4) {
char *cookie_domain_lower;
if ((rc = psl_str_to_utf8lower(domain, NULL, NULL, &cookie_domain_lower)) != PSL_SUCCESS)
fprintf(stderr, "%s: Failed to convert cookie domain '%s' to lowercase UTF-8 (%d)\n", domain, cookie_domain, rc);
else
printf("%s: %d\n", domain, psl_is_cookie_domain_acceptable(psl, lower, cookie_domain));
free(cookie_domain_lower);
}
free(lower);
}
psl_free(psl);
exit(0);
}
}
@ -172,6 +217,11 @@ int main(int argc, const char *const *argv)
printf("builtin compile time: %ld (%s)\n", psl_builtin_compile_time(), time2str(psl_builtin_compile_time()));
printf("builtin file time: %ld (%s)\n", psl_builtin_file_time(), time2str(psl_builtin_file_time()));
printf("builtin SHA1 file hash: %s\n", psl_builtin_sha1sum());
#ifdef WITH_LIBICU
printf("uloc_getDefault=%s\n", uloc_getDefault());
printf("ucnv_getDefaultName=%s\n", ucnv_getDefaultName());
#endif
} else
printf("No builtin PSL data available\n");
}