2014-03-22 20:35:56 +01:00
|
|
|
/*
|
2015-09-23 14:50:01 +02:00
|
|
|
* Copyright(c) 2014-2015 Tim Ruehsen
|
2014-03-22 20:35:56 +01:00
|
|
|
*
|
2014-03-24 20:41:46 +01:00
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
2014-03-22 20:35:56 +01:00
|
|
|
*
|
2014-03-24 20:41:46 +01:00
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
2014-03-22 20:35:56 +01:00
|
|
|
*
|
2014-03-24 20:41:46 +01:00
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
|
|
* DEALINGS IN THE SOFTWARE.
|
2014-03-22 20:35:56 +01:00
|
|
|
*
|
2014-03-24 20:41:46 +01:00
|
|
|
* This file is part of libpsl.
|
2014-03-22 20:35:56 +01:00
|
|
|
*
|
2014-03-29 17:42:54 +01:00
|
|
|
* Precompile Public Suffix List into a C source file
|
2014-03-22 20:35:56 +01:00
|
|
|
*
|
|
|
|
* Changelog
|
|
|
|
* 22.03.2014 Tim Ruehsen created
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#if HAVE_CONFIG_H
|
|
|
|
# include <config.h>
|
|
|
|
#endif
|
|
|
|
|
2014-03-23 21:49:19 +01:00
|
|
|
#include <stdio.h>
|
2014-03-24 17:29:56 +01:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2014-03-23 21:49:19 +01:00
|
|
|
#include <time.h>
|
|
|
|
#include <sys/stat.h>
|
2014-10-28 15:41:35 +01:00
|
|
|
#ifdef HAVE_ALLOCA_H
|
|
|
|
# include <alloca.h>
|
|
|
|
#endif
|
2014-03-23 21:49:19 +01:00
|
|
|
|
2014-06-29 22:56:33 +02:00
|
|
|
#if defined(BUILTIN_GENERATOR_LIBICU) || defined(BUILTIN_GENERATOR_LIBIDN2) || defined(BUILTIN_GENERATOR_LIBIDN)
|
|
|
|
# define _GENERATE_BUILTIN_DATA
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef _GENERATE_BUILTIN_DATA
|
2014-03-30 17:02:56 +02:00
|
|
|
|
2014-03-24 17:29:56 +01:00
|
|
|
#include <libpsl.h>
|
|
|
|
|
2014-06-19 12:06:54 +02:00
|
|
|
/* here we include the library source code to have access to internal functions and data structures */
|
|
|
|
#define _LIBPSL_INCLUDED_BY_PSL2C
|
|
|
|
# include "psl.c"
|
|
|
|
#undef _LIBPSL_INCLUDED_BY_PSL2C
|
2014-03-22 20:35:56 +01:00
|
|
|
|
2015-09-19 10:50:00 +02:00
|
|
|
#if 0
|
2015-09-15 11:46:21 +02:00
|
|
|
static int _check_psl(const psl_ctx_t *psl)
|
|
|
|
{
|
|
|
|
int it, pos, err = 0;
|
|
|
|
|
|
|
|
/* check if plain suffix also appears in exceptions */
|
|
|
|
for (it = 0; it < psl->suffixes->cur; it++) {
|
|
|
|
_psl_entry_t *e = _vector_get(psl->suffixes, it);
|
|
|
|
|
|
|
|
if (!e->wildcard && _vector_find(psl->suffix_exceptions, e) >= 0) {
|
|
|
|
fprintf(stderr, "Found entry '%s' also in exceptions\n", e->label);
|
|
|
|
err = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check if exception also appears in suffix list as plain entry */
|
|
|
|
for (it = 0; it < psl->suffix_exceptions->cur; it++) {
|
|
|
|
_psl_entry_t *e2, *e = _vector_get(psl->suffix_exceptions, it);
|
|
|
|
|
|
|
|
if ((e2 = _vector_get(psl->suffixes, pos = _vector_find(psl->suffixes, e)))) {
|
|
|
|
if (!e2->wildcard) {
|
|
|
|
fprintf(stderr, "Found exception '!%s' also as suffix\n", e->label);
|
|
|
|
err = 1;
|
|
|
|
}
|
|
|
|
/* Two same domains in a row are allowed: wildcard and non-wildcard.
|
|
|
|
* Binary search find either of them, so also check previous and next entry. */
|
|
|
|
else if (pos > 0 && _suffix_compare(e, e2 = _vector_get(psl->suffixes, pos - 1)) == 0 && !e2->wildcard) {
|
|
|
|
fprintf(stderr, "Found exception '!%s' also as suffix\n", e->label);
|
|
|
|
err = 1;
|
|
|
|
}
|
|
|
|
else if (pos < psl->suffixes->cur - 1 && _suffix_compare(e, e2 = _vector_get(psl->suffixes, pos + 1)) == 0 && !e2->wildcard) {
|
|
|
|
fprintf(stderr, "Found exception '!%s' also as suffix\n", e->label);
|
|
|
|
err = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check if non-wildcard entry is already covered by wildcard entry */
|
|
|
|
for (it = 0; it < psl->suffixes->cur; it++) {
|
|
|
|
const char *p;
|
|
|
|
_psl_entry_t *e = _vector_get(psl->suffixes, it);
|
|
|
|
|
|
|
|
if (e->nlabels > 1 && !e->wildcard && (p = strchr(e->label, '.'))) {
|
|
|
|
_psl_entry_t *e2, *e3, suffix;
|
|
|
|
|
|
|
|
suffix.label = p + 1;
|
|
|
|
suffix.length = strlen(p + 1);
|
|
|
|
suffix.nlabels = e->nlabels - 1;
|
|
|
|
|
|
|
|
e2 = _vector_get(psl->suffixes, pos = _vector_find(psl->suffixes, &suffix));
|
|
|
|
|
|
|
|
if (e2) {
|
|
|
|
if (e2->wildcard) {
|
|
|
|
fprintf(stderr, "Found superfluous '%s' already covered by '*.%s'\n", e->label, e2->label);
|
|
|
|
err = 1;
|
|
|
|
}
|
|
|
|
/* Two same domains in a row are allowed: wildcard and non-wildcard.
|
|
|
|
* Binary search find either of them, so also check previous and next entry. */
|
|
|
|
else if (pos > 0 && _suffix_compare(e2, e3 = _vector_get(psl->suffixes, pos - 1)) == 0 && e3->wildcard) {
|
|
|
|
fprintf(stderr, "Found superfluous '%s' already covered by '*.%s'\n", e->label, e2->label);
|
|
|
|
err = 1;
|
|
|
|
}
|
|
|
|
else if (pos < psl->suffixes->cur - 1 && _suffix_compare(e2, e3 = _vector_get(psl->suffixes, pos + 1)) == 0 && e3->wildcard) {
|
|
|
|
fprintf(stderr, "Found superfluous '%s' already covered by '*.%s'\n", e->label, e2->label);
|
|
|
|
err = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
2015-09-19 10:50:00 +02:00
|
|
|
#endif
|
2015-09-15 11:46:21 +02:00
|
|
|
|
2015-12-04 21:26:30 +01:00
|
|
|
static void _print_psl_entries_dafsa(FILE *fpout, const _psl_vector_t *v)
|
2015-12-04 17:15:03 +01:00
|
|
|
{
|
2015-12-04 21:26:30 +01:00
|
|
|
FILE *fp;
|
2015-12-04 17:15:03 +01:00
|
|
|
int it;
|
|
|
|
|
|
|
|
#ifdef BUILTIN_GENERATOR_LIBICU
|
|
|
|
do {
|
|
|
|
UVersionInfo version_info;
|
|
|
|
char version[U_MAX_VERSION_STRING_LENGTH];
|
|
|
|
|
|
|
|
u_getVersion(version_info);
|
|
|
|
u_versionToString(version_info, version);
|
|
|
|
fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libicu/%s) */\n", version);
|
|
|
|
} while (0);
|
|
|
|
#elif defined(BUILTIN_GENERATOR_LIBIDN2)
|
|
|
|
fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn2/%s) */\n", idn2_check_version(NULL));
|
|
|
|
#elif defined(BUILTIN_GENERATOR_LIBIDN)
|
|
|
|
fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn/%s) */\n", stringprep_check_version(NULL));
|
|
|
|
#else
|
|
|
|
fprintf(fpout, "/* automatically generated by psl2c (without punycode support) */\n");
|
|
|
|
#endif
|
|
|
|
|
2015-12-04 21:26:30 +01:00
|
|
|
if ((fp = fopen("in.tmp", "w"))) {
|
|
|
|
for (it = 0; it < v->cur; it++) {
|
|
|
|
_psl_entry_t *e = _vector_get(v, it);
|
|
|
|
unsigned char *s = (unsigned char *)e->label_buf;
|
2015-12-04 17:15:03 +01:00
|
|
|
|
2015-12-04 21:26:30 +01:00
|
|
|
/* search for non-ASCII label and skip it */
|
|
|
|
while (*s && *s < 128) s++;
|
|
|
|
if (*s) continue;
|
2015-12-04 17:15:03 +01:00
|
|
|
|
2015-12-09 09:35:04 +01:00
|
|
|
fprintf(fp, "%s, %X\n", e->label_buf, (int) (e->flags & 0x0F));
|
2015-12-04 21:26:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
fclose(fp);
|
|
|
|
}
|
|
|
|
|
2015-12-09 09:35:04 +01:00
|
|
|
if ((it = system(MAKE_DAFSA " in.tmp out.tmp")))
|
|
|
|
fprintf(stderr, "Failed to execute " MAKE_DAFSA "\n");
|
2015-12-04 21:26:30 +01:00
|
|
|
|
|
|
|
if ((fp = fopen("out.tmp", "r"))) {
|
|
|
|
char buf[256];
|
|
|
|
|
|
|
|
while (fgets(buf, sizeof(buf), fp))
|
|
|
|
fputs(buf, fpout);
|
|
|
|
|
|
|
|
fclose(fp);
|
2015-12-04 17:15:03 +01:00
|
|
|
}
|
2015-12-09 09:35:04 +01:00
|
|
|
|
|
|
|
unlink("in.tmp");
|
|
|
|
unlink("out.tmp");
|
2015-12-04 17:15:03 +01:00
|
|
|
}
|
|
|
|
|
2014-06-29 22:56:33 +02:00
|
|
|
#if 0
|
2014-06-27 17:13:30 +02:00
|
|
|
#if !defined(WITH_LIBICU) && !defined(WITH_IDN2)
|
2014-06-19 13:15:31 +02:00
|
|
|
static int _str_needs_encoding(const char *s)
|
|
|
|
{
|
2015-01-26 11:04:22 +01:00
|
|
|
while (*s && *((unsigned char *)s) < 128) s++;
|
2014-06-19 13:15:31 +02:00
|
|
|
|
|
|
|
return !!*s;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void _add_punycode_if_needed(_psl_vector_t *v)
|
|
|
|
{
|
|
|
|
int it, n;
|
|
|
|
|
|
|
|
/* do not use 'it < v->cur' since v->cur is changed by _vector_add() ! */
|
|
|
|
for (it = 0, n = v->cur; it < n; it++) {
|
|
|
|
_psl_entry_t *e = _vector_get(v, it);
|
|
|
|
|
|
|
|
if (_str_needs_encoding(e->label_buf)) {
|
|
|
|
_psl_entry_t suffix, *suffixp;
|
|
|
|
char lookupname[64] = "";
|
|
|
|
|
|
|
|
/* this is much slower than the libidn2 API but should have no license issues */
|
|
|
|
FILE *pp;
|
|
|
|
char cmd[16 + sizeof(e->label_buf)];
|
|
|
|
snprintf(cmd, sizeof(cmd), "idn2 '%s'", e->label_buf);
|
|
|
|
if ((pp = popen(cmd, "r"))) {
|
|
|
|
if (fscanf(pp, "%63s", lookupname) >= 1 && strcmp(e->label_buf, lookupname)) {
|
|
|
|
/* fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, lookupname); */
|
|
|
|
_suffix_init(&suffix, lookupname, strlen(lookupname));
|
|
|
|
suffix.wildcard = e->wildcard;
|
|
|
|
suffixp = _vector_get(v, _vector_add(v, &suffix));
|
|
|
|
suffixp->label = suffixp->label_buf; /* set label to changed address */
|
|
|
|
}
|
|
|
|
pclose(pp);
|
|
|
|
} else
|
|
|
|
fprintf(stderr, "Failed to call popen(%s, \"r\")\n", cmd);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
_vector_sort(v);
|
|
|
|
}
|
2014-06-27 17:13:30 +02:00
|
|
|
#endif /* !defined(WITH_LIBICU) && !defined(WITH_IDN2) */
|
2014-06-29 22:56:33 +02:00
|
|
|
#endif
|
2014-06-19 13:15:31 +02:00
|
|
|
|
2014-06-29 22:56:33 +02:00
|
|
|
#endif /* _GENERATE_BUILTIN_DATA */
|
2014-03-22 20:35:56 +01:00
|
|
|
|
2014-03-23 21:49:19 +01:00
|
|
|
int main(int argc, const char **argv)
|
2014-03-22 20:35:56 +01:00
|
|
|
{
|
2014-03-23 21:49:19 +01:00
|
|
|
FILE *fpout;
|
2014-06-29 22:56:33 +02:00
|
|
|
#ifdef _GENERATE_BUILTIN_DATA
|
2014-03-22 20:35:56 +01:00
|
|
|
psl_ctx_t *psl;
|
2014-03-30 17:02:56 +02:00
|
|
|
#endif
|
2015-12-09 09:35:04 +01:00
|
|
|
int ret = 0, argpos = 1;
|
2014-03-22 20:35:56 +01:00
|
|
|
|
2015-12-04 17:15:03 +01:00
|
|
|
if (argc - argpos != 2) {
|
2015-12-09 09:35:04 +01:00
|
|
|
fprintf(stderr, "Usage: psl2c <infile> <outfile>\n");
|
2015-07-14 13:15:49 +02:00
|
|
|
fprintf(stderr, " <infile> is the 'public_suffix_list.dat', lowercase UTF-8 encoded\n");
|
2014-03-23 21:49:19 +01:00
|
|
|
fprintf(stderr, " <outfile> is the the C filename to be generated from <infile>\n");
|
2014-03-22 20:35:56 +01:00
|
|
|
return 1;
|
2014-03-23 21:49:19 +01:00
|
|
|
}
|
|
|
|
|
2014-06-29 22:56:33 +02:00
|
|
|
#ifdef _GENERATE_BUILTIN_DATA
|
2015-12-04 17:15:03 +01:00
|
|
|
if (!(psl = psl_load_file(argv[argpos])))
|
2014-03-23 21:49:19 +01:00
|
|
|
return 2;
|
|
|
|
|
2015-09-15 11:46:21 +02:00
|
|
|
/* look for ambigious or double entries */
|
2015-09-19 10:50:00 +02:00
|
|
|
/* if (_check_psl(psl)) {
|
2015-09-15 11:46:21 +02:00
|
|
|
psl_free(psl);
|
|
|
|
return 5;
|
|
|
|
}
|
2015-09-19 10:50:00 +02:00
|
|
|
*/
|
2015-12-04 21:26:30 +01:00
|
|
|
if ((fpout = fopen(argv[argpos + 1], "w"))) {
|
2014-03-23 21:49:19 +01:00
|
|
|
FILE *pp;
|
|
|
|
struct stat st;
|
2015-12-04 21:26:30 +01:00
|
|
|
size_t cmdsize = 16 + strlen(argv[argpos]);
|
2014-05-12 12:27:32 +02:00
|
|
|
char *cmd = alloca(cmdsize), checksum[64] = "";
|
2015-11-19 11:18:17 +01:00
|
|
|
char *abs_srcfile;
|
2015-07-12 22:10:46 +02:00
|
|
|
const char *source_date_epoch = NULL;
|
2014-03-22 20:35:56 +01:00
|
|
|
|
2014-06-29 22:56:33 +02:00
|
|
|
#if 0
|
|
|
|
/* include library code did not generate punycode, so let's do it for the builtin data */
|
2014-06-19 13:15:31 +02:00
|
|
|
_add_punycode_if_needed(psl->suffixes);
|
|
|
|
#endif
|
|
|
|
|
2015-12-09 09:35:04 +01:00
|
|
|
_print_psl_entries_dafsa(fpout, psl->suffixes);
|
2014-03-23 21:49:19 +01:00
|
|
|
|
2015-12-04 21:26:30 +01:00
|
|
|
snprintf(cmd, cmdsize, "sha1sum %s", argv[argpos]);
|
2014-05-12 12:27:32 +02:00
|
|
|
if ((pp = popen(cmd, "r"))) {
|
|
|
|
if (fscanf(pp, "%63[0-9a-zA-Z]", checksum) < 1)
|
|
|
|
*checksum = 0;
|
|
|
|
pclose(pp);
|
2014-03-23 21:49:19 +01:00
|
|
|
}
|
|
|
|
|
2015-12-04 21:26:30 +01:00
|
|
|
if (stat(argv[argpos], &st) != 0)
|
2014-03-23 21:49:19 +01:00
|
|
|
st.st_mtime = 0;
|
|
|
|
fprintf(fpout, "static time_t _psl_file_time = %lu;\n", st.st_mtime);
|
2015-11-19 11:18:17 +01:00
|
|
|
|
2015-07-12 22:10:46 +02:00
|
|
|
if ((source_date_epoch = getenv("SOURCE_DATE_EPOCH")))
|
|
|
|
fprintf(fpout, "static time_t _psl_compile_time = %lu;\n", atol(source_date_epoch));
|
|
|
|
else
|
|
|
|
fprintf(fpout, "static time_t _psl_compile_time = %lu;\n", time(NULL));
|
2015-09-19 10:50:00 +02:00
|
|
|
fprintf(fpout, "static int _psl_nsuffixes = %d;\n", psl->nsuffixes);
|
|
|
|
fprintf(fpout, "static int _psl_nexceptions = %d;\n", psl->nexceptions);
|
2015-09-19 10:55:09 +02:00
|
|
|
fprintf(fpout, "static int _psl_nwildcards = %d;\n", psl->nwildcards);
|
2014-04-17 12:31:06 +02:00
|
|
|
fprintf(fpout, "static const char _psl_sha1_checksum[] = \"%s\";\n", checksum);
|
2015-11-19 11:18:17 +01:00
|
|
|
|
|
|
|
/* We need an absolute path here, else psl_builtin_outdated() won't work reliable */
|
|
|
|
/* Caveat: symbolic links are resolved by realpath() */
|
2015-12-04 21:26:30 +01:00
|
|
|
if ((abs_srcfile = realpath(argv[argpos], NULL))) {
|
2015-11-19 11:18:17 +01:00
|
|
|
fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", abs_srcfile);
|
|
|
|
free(abs_srcfile);
|
|
|
|
} else
|
2015-12-04 21:26:30 +01:00
|
|
|
fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", argv[argpos]);
|
2014-03-23 21:49:19 +01:00
|
|
|
|
|
|
|
if (fclose(fpout) != 0)
|
|
|
|
ret = 4;
|
|
|
|
} else {
|
2015-12-04 21:26:30 +01:00
|
|
|
fprintf(stderr, "Failed to write open '%s'\n", argv[argpos + 1]);
|
2014-03-23 21:49:19 +01:00
|
|
|
ret = 3;
|
|
|
|
}
|
2014-03-22 20:35:56 +01:00
|
|
|
|
2014-03-27 18:16:54 +01:00
|
|
|
psl_free(psl);
|
2014-03-30 17:02:56 +02:00
|
|
|
#else
|
2015-12-04 21:26:30 +01:00
|
|
|
if ((fpout = fopen(argv[argpos + 1], "w"))) {
|
2015-12-15 20:46:25 +01:00
|
|
|
fprintf(fpout, "static const unsigned char kDafsa[1];\n");
|
2014-03-30 17:02:56 +02:00
|
|
|
fprintf(fpout, "static time_t _psl_file_time;\n");
|
|
|
|
fprintf(fpout, "static time_t _psl_compile_time;\n");
|
2015-09-19 10:50:00 +02:00
|
|
|
fprintf(fpout, "static int _psl_nsuffixes = 0;\n");
|
|
|
|
fprintf(fpout, "static int _psl_nexceptions = 0;\n");
|
2015-09-19 10:55:09 +02:00
|
|
|
fprintf(fpout, "static int _psl_nwildcards = 0;\n");
|
2014-04-17 12:31:06 +02:00
|
|
|
fprintf(fpout, "static const char _psl_sha1_checksum[] = \"\";\n");
|
|
|
|
fprintf(fpout, "static const char _psl_filename[] = \"\";\n");
|
2014-03-30 17:02:56 +02:00
|
|
|
|
|
|
|
if (fclose(fpout) != 0)
|
|
|
|
ret = 4;
|
|
|
|
} else {
|
2015-12-04 21:26:30 +01:00
|
|
|
fprintf(stderr, "Failed to write open '%s'\n", argv[argpos + 1]);
|
2014-03-30 17:02:56 +02:00
|
|
|
ret = 3;
|
|
|
|
}
|
2014-06-29 22:56:33 +02:00
|
|
|
#endif /* GENERATE_BUILTIN_DATA */
|
2014-03-30 17:02:56 +02:00
|
|
|
|
2014-03-23 21:49:19 +01:00
|
|
|
return ret;
|
2014-03-22 20:35:56 +01:00
|
|
|
}
|