From e3c28f8a86de471cf3e9b0822e28735d42a525d7 Mon Sep 17 00:00:00 2001 From: Daniel Kahn Gillmor Date: Fri, 21 Mar 2014 14:18:36 -0400 Subject: [PATCH 1/5] fgets into buf directly, rather than the stack --- src/psl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/psl.c b/src/psl.c index c011137..5a2f819 100644 --- a/src/psl.c +++ b/src/psl.c @@ -281,7 +281,7 @@ psl_ctx_t *psl_load_file(const char *fname) psl->suffix_exceptions = _vector_alloc(64, _suffix_compare); if ((fp = fopen(fname, "r"))) { - while ((linep = fgets(&buf, sizeof(buf), fp))) { + while ((linep = fgets(buf, sizeof(buf), fp))) { while (isspace(*linep)) linep++; // ignore leading whitespace if (!*linep) continue; // skip empty lines From cdeea860f722fff43d1e62232d21684ab58379c6 Mon Sep 17 00:00:00 2001 From: Daniel Kahn Gillmor Date: Fri, 21 Mar 2014 14:19:25 -0400 Subject: [PATCH 2/5] git should ignore ephemeral files. --- .gitignore | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..de162c9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,57 @@ +*~ +Makefile +Makefile.in +aclocal.m4 +autom4te.cache/ +compile +config.guess +config.h +config.h.in +config.log +config.rpath +config.status +config.sub +configure +data/Makefile +data/Makefile.in +depcomp +include/Makefile +include/Makefile.in +install-sh +libpsl-*.pc +libtool +ltmain.sh +m4/ +missing +po/Makefile +po/Makefile.in +po/Makefile.in.in +po/Makevars.template +po/POTFILES +po/Rules-quot +po/boldquot.sed +po/en@boldquot.header +po/en@quot.header +po/insert-header.sin +po/psl.pot +po/quot.sed +po/remove-potcdate.sin +po/remove-potcdate.sed +po/stamp-po +src/.deps/ +src/.libs/ +src/Makefile +src/Makefile.in +src/libpsl-*.la +src/libpsl_*_la-psl.lo +stamp-h1 +test-driver +tests/.deps/ +tests/Makefile +tests/Makefile.in +tests/test-is-tld +tests/test-is-tld.log +tests/test-is-tld.o +tests/test-is-tld.trs +tests/test-suite.log +psl-*.tar.gz From 2d99b964ff6eb373ddcbcaa7047b5ae6fe7bc6fe Mon Sep 17 00:00:00 2001 From: Daniel Kahn Gillmor Date: Fri, 21 Mar 2014 14:26:55 -0400 Subject: [PATCH 3/5] avoid a printf in the library In general, we don't want libraries to send data to the standard file descriptors. There are more that need fixing. Note: this introduces a new API (psl_suffix_count() and psl_suffix_exception_count) to enable the same sort of output from the test. But this new API seems to imply the internal structure of the public suffix list. Do we want to expose this API? There could be some other PSL mechanism (e.g. DBOUND) that doesn't have these counts, and a drop-in replacement would not know what to return here. --- include/libpsl.h | 8 ++++++++ src/psl.c | 15 +++++++++++++-- tests/test-is-tld.c | 3 +++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/include/libpsl.h b/include/libpsl.h index fd2a404..9a7c934 100644 --- a/include/libpsl.h +++ b/include/libpsl.h @@ -55,6 +55,14 @@ psl_ctx_t * int psl_is_tld(const psl_ctx_t *psl, const char *domain); +/* does not include exceptions */ +int + psl_suffix_count(const psl_ctx_t *psl); +/* just counts exceptions */ +int + psl_suffix_exception_count(const psl_ctx_t *psl); + + PSL_END_DECLS #endif /* _LIBPSL_LIBPSL_H */ diff --git a/src/psl.c b/src/psl.c index 5a2f819..cd3e5fa 100644 --- a/src/psl.c +++ b/src/psl.c @@ -312,14 +312,25 @@ psl_ctx_t *psl_load_file(const char *fname) _vector_sort(psl->suffix_exceptions); _vector_sort(psl->suffixes); - printf("loaded %d (%d/%d) suffixes\n", nsuffixes, psl->suffixes->cur, psl->suffix_exceptions->cur); - } else fprintf(stderr, _("Failed to open PSL file '%s'\n"), fname); return psl; } + +/* does not include exceptions */ +int psl_suffix_count(const psl_ctx_t *psl) +{ + return psl->suffixes->cur; +} +/* just counts exceptions */ +int psl_suffix_exception_count(const psl_ctx_t *psl) +{ + return psl->suffix_exceptions->cur; +} + + void psl_free(psl_ctx_t **psl) { if (psl && *psl) { diff --git a/tests/test-is-tld.c b/tests/test-is-tld.c index 0f63b9b..1868af0 100644 --- a/tests/test-is-tld.c +++ b/tests/test-is-tld.c @@ -64,6 +64,9 @@ static void test_psl(void) psl = psl_load_file(DATADIR "/effective_tld_names.dat"); + printf("loaded %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl)); + + for (it = 0; it < countof(test_data); it++) { const struct test_data *t = &test_data[it]; int result = psl_is_tld(psl, t->domain); From 4e674ccbae09672bda9adec75debc9d434226340 Mon Sep 17 00:00:00 2001 From: Daniel Kahn Gillmor Date: Fri, 21 Mar 2014 14:39:17 -0400 Subject: [PATCH 4/5] document indentation conventions for emacs users --- .dir-locals.el | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 .dir-locals.el diff --git a/.dir-locals.el b/.dir-locals.el new file mode 100644 index 0000000..9b28057 --- /dev/null +++ b/.dir-locals.el @@ -0,0 +1,9 @@ +;; emacs local configuration settings for libpsl source +;; surmised by dkg on 2014-03-21 14:35:49-0400 + +((c-mode + (indent-tabs-mode . t) + (tab-width . 4) + (c-basic-offset . 4) + (c-file-style . "linux")) + ) From c07ea9d0a05ec09568ffa2b804b90b1551669517 Mon Sep 17 00:00:00 2001 From: Daniel Kahn Gillmor Date: Fri, 21 Mar 2014 14:43:27 -0400 Subject: [PATCH 5/5] return NULL from psl_load_file() if the file could not be read. --- src/psl.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/psl.c b/src/psl.c index cd3e5fa..88ccc9e 100644 --- a/src/psl.c +++ b/src/psl.c @@ -275,12 +275,12 @@ psl_ctx_t *psl_load_file(const char *fname) if (!(psl = calloc(1, sizeof(psl_ctx_t)))) return NULL; - // as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions. - // as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions. - psl->suffixes = _vector_alloc(8*1024, _suffix_compare); - psl->suffix_exceptions = _vector_alloc(64, _suffix_compare); - if ((fp = fopen(fname, "r"))) { + // as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions. + // as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions. + psl->suffixes = _vector_alloc(8*1024, _suffix_compare); + psl->suffix_exceptions = _vector_alloc(64, _suffix_compare); + while ((linep = fgets(buf, sizeof(buf), fp))) { while (isspace(*linep)) linep++; // ignore leading whitespace if (!*linep) continue; // skip empty lines @@ -312,8 +312,10 @@ psl_ctx_t *psl_load_file(const char *fname) _vector_sort(psl->suffix_exceptions); _vector_sort(psl->suffixes); - } else - fprintf(stderr, _("Failed to open PSL file '%s'\n"), fname); + } else { + free(psl); + return NULL; + } return psl; }