From 093d5eac3d6a2bc04e4c8b67b1f6076ce4aa15d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Sat, 26 Dec 2015 14:15:08 +0100 Subject: [PATCH] Fix ./configure --disable-runtime Added runtime punycode generation code from http://www.nicemice.net/idn/punycode-spec.gz --- contrib/check-hard | 5 +- src/psl.c | 292 ++++++++++++++++++++++++++++++++++++++++++++- src/psl2c.c | 6 +- 3 files changed, 297 insertions(+), 6 deletions(-) diff --git a/contrib/check-hard b/contrib/check-hard index a307ff3..a3a3c3a 100755 --- a/contrib/check-hard +++ b/contrib/check-hard @@ -14,7 +14,7 @@ make distclean > /dev/null || true # We define _GNU_SOURCE to avoid warnings with missing prototypes. # C89 does not know snprintf, strdup, strndup, popen, pclose -CFLAGS="-std=c89 -pedantic -O2 -g -Wall -Wextra -Wstrict-prototypes -Wold-style-definition -Wwrite-strings -Wshadow -Wformat -Wformat-security -Wunreachable-code -Wstrict-prototypes -Wmissing-prototypes -Wold-style-definition -D_GNU_SOURCE" +CFLAGS="-std=gnu89 -pedantic -O2 -g -Wall -Wextra -Wstrict-prototypes -Wold-style-definition -Wwrite-strings -Wshadow -Wformat -Wformat-security -Wunreachable-code -Wstrict-prototypes -Wmissing-prototypes -Wold-style-definition" CACHEFILE=$PWD/config_check.cache @@ -40,7 +40,8 @@ for CC in gcc clang; do for options in \ "--enable-runtime=libicu --enable-builtin=libicu" \ "--enable-runtime=libidn2 --enable-builtin=libidn2" \ - "--enable-runtime=libidn --enable-builtin=libidn"; do + "--enable-runtime=libidn --enable-builtin=libidn" \ + "--disable-runtime --enable-builtin=libicu"; do export DISTCHECK_CONFIGURE_FLAGS="-C --cache-file=$CACHEFILE $options" echo echo " *** ./configure $DISTCHECK_CONFIGURE_FLAGS" diff --git a/src/psl.c b/src/psl.c index 7bb1e4d..7d434e7 100644 --- a/src/psl.c +++ b/src/psl.c @@ -74,6 +74,7 @@ #include #include #include +#include /* for UINT_MAX */ #include #include #ifdef HAVE_ALLOCA_H @@ -325,6 +326,285 @@ static int _suffix_init(_psl_entry_t *suffix, const char *rule, size_t length) return 0; } +#if !defined(WITH_LIBIDN) && !defined(WITH_LIBIDN2) && !defined(WITH_LIBICU) +/* + * When configured without runtime IDNA support (./configure --disable-runtime), we need a pure ASCII + * representation of non-ASCII characters in labels as found in UTF-8 domain names. + * This is because the current DAFSA format used may only hold character values [21..127]. + * + Code copied from http://www.nicemice.net/idn/punycode-spec.gz on + 2011-01-04 with SHA-1 a966a8017f6be579d74a50a226accc7607c40133 + labeled punycode-spec 1.0.3 (2006-Mar-24-Thu). It is modified for + libpsl by Tim Rühsen. License on the original code: + + punycode-spec 1.0.3 (2006-Mar-23-Thu) + http://www.nicemice.net/idn/ + Adam M. Costello + http://www.nicemice.net/amc/ + + B. Disclaimer and license + + Regarding this entire document or any portion of it (including + the pseudocode and C code), the author makes no guarantees and + is not responsible for any damage resulting from its use. The + author grants irrevocable permission to anyone to use, modify, + and distribute it in any way that does not diminish the rights + of anyone else to use, modify, and distribute it, provided that + redistributed derivative works do not contain misleading author or + version information. Derivative works need not be licensed under + similar terms. + + C. Punycode sample implementation + + punycode-sample.c 2.0.0 (2004-Mar-21-Sun) + http://www.nicemice.net/idn/ + Adam M. Costello + http://www.nicemice.net/amc/ + + This is ANSI C code (C89) implementing Punycode 1.0.x. + */ +enum punycode_status { + punycode_success = 0, + punycode_bad_input = 1, /* Input is invalid. */ + punycode_big_output = 2, /* Output would exceed the space provided. */ + punycode_overflow = 3 /* Wider integers needed to process input. */ +}; + +#ifdef PUNYCODE_UINT + typedef PUNYCODE_UINT punycode_uint; +#elif UINT_MAX >= (1 << 26) - 1 + typedef unsigned int punycode_uint; +#else + typedef unsigned long punycode_uint; +#endif + +/*** Bootstring parameters for Punycode ***/ +enum { + base = 36, tmin = 1, tmax = 26, skew = 38, damp = 700, + initial_bias = 72, initial_n = 0x80, delimiter = 0x2D +}; + +static char encode_digit(punycode_uint d) +{ + return d + 22 + 75 * (d < 26); + /* 0..25 map to ASCII a..z or A..Z */ + /* 26..35 map to ASCII 0..9 */ +} +#define flagged(bcp) ((punycode_uint)(bcp) - 65 < 26) +static const punycode_uint maxint = -1; + +static punycode_uint adapt(punycode_uint delta, punycode_uint numpoints, int firsttime) +{ + punycode_uint k; + + delta = firsttime ? delta / damp : delta >> 1; + /* delta >> 1 is a faster way of doing delta / 2 */ + delta += delta / numpoints; + + for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) { + delta /= base - tmin; + } + + return k + (base - tmin + 1) * delta / (delta + skew); +} + +static enum punycode_status punycode_encode( + size_t input_length_orig, + const punycode_uint input[], + size_t *output_length, + char output[]) +{ + punycode_uint input_length, n, delta, h, b, bias, j, m, q, k, t; + size_t out, max_out; + + /* The Punycode spec assumes that the input length is the same type */ + /* of integer as a code point, so we need to convert the size_t to */ + /* a punycode_uint, which could overflow. */ + + if (input_length_orig > maxint) + return punycode_overflow; + + input_length = (punycode_uint) input_length_orig; + + /* Initialize the state: */ + + n = initial_n; + delta = 0; + out = 0; + max_out = *output_length; + bias = initial_bias; + + /* Handle the basic code points: */ + for (j = 0; j < input_length; ++j) { + if (input[j] < 0x80) { + if (max_out - out < 2) + return punycode_big_output; + output[out++] = (char) input[j]; + } + /* else if (input[j] < n) return punycode_bad_input; */ + /* (not needed for Punycode with unsigned code points) */ + } + + h = b = (punycode_uint) out; + /* cannot overflow because out <= input_length <= maxint */ + + /* h is the number of code points that have been handled, b is the */ + /* number of basic code points, and out is the number of ASCII code */ + /* points that have been output. */ + + if (b > 0) + output[out++] = delimiter; + + /* Main encoding loop: */ + + while (h < input_length) { + /* All non-basic code points < n have been */ + /* handled already. Find the next larger one: */ + + for (m = maxint, j = 0; j < input_length; ++j) { + /* if (basic(input[j])) continue; */ + /* (not needed for Punycode) */ + if (input[j] >= n && input[j] < m) + m = input[j]; + } + + /* Increase delta enough to advance the decoder's */ + /* state to , but guard against overflow: */ + + if (m - n > (maxint - delta) / (h + 1)) + return punycode_overflow; + delta += (m - n) * (h + 1); + n = m; + + for (j = 0; j < input_length; ++j) { + /* Punycode does not need to check whether input[j] is basic: */ + if (input[j] < n /* || basic(input[j]) */) { + if (++delta == 0) + return punycode_overflow; + } + + if (input[j] == n) { + /* Represent delta as a generalized variable-length integer: */ + + for (q = delta, k = base;; k += base) { + if (out >= max_out) + return punycode_big_output; + t = k <= bias /* + tmin */ ? tmin : /* +tmin not needed */ + k >= bias + tmax ? tmax : k - bias; + if (q < t) + break; + output[out++] = encode_digit(t + (q - t) % (base - t)); + q = (q - t) / (base - t); + } + + output[out++] = encode_digit(q); + bias = adapt(delta, h + 1, h == b); + delta = 0; + ++h; + } + } + + ++delta, ++n; + } + + *output_length = out; + return punycode_success; +} + +static ssize_t _utf8_to_utf32(const char *in, size_t inlen, punycode_uint *out, size_t outlen) +{ + size_t n = 0; + unsigned char *s; + + if (!outlen) + return -1; + + outlen--; + + s = alloca(inlen + 1); + memcpy(s, in, inlen); + s[inlen] = 0; + + while (*s && n < outlen) { + if ((*s & 0x80) == 0) { /* 0xxxxxxx ASCII char */ + out[n++] = *s; + s++; + } else if ((*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */ { + if ((s[1] & 0xC0) != 0x80) + return -1; + out[n++] = ((*s & 0x1F) << 6) | (s[1] & 0x3F); + s += 2; + } else if ((*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */ { + if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80) + return -1; + out[n++] = ((*s & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); + s += 3; + } else if ((*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ { + if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80) + return -1; + out[n++] = ((*s & 0x07) << 18) | ((s[1] & 0x3F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); + s += 4; + } else + return -1; + } + + return n; +} + +static int _mem_is_ascii(const char *s, size_t n) +{ + while (n--) + if (*((unsigned char *)s++) >= 128) + return 0; + + return 1; +} + +static int _domain_to_punycode(const char *domain, char *out, size_t outsize) +{ + size_t outlen = 0, labellen; + punycode_uint input[256]; + const char *label, *e; + + for (e = label = domain; e; label = e + 1) { + e = strchr(label, '.'); + labellen = e ? (size_t) (e - label) : strlen(label); + /* printf("s=%s inlen=%zd\n", label, labellen); */ + + if (_mem_is_ascii(label, labellen)) { + if (outlen + labellen + (e != NULL)>= outsize) + return 1; + + /* printf("outlen=%zd labellen=%zd\n", outlen, labellen); */ + memcpy(out + outlen, label, labellen); + outlen += labellen; + } else { + ssize_t inputlen = 0; + + if (outlen + labellen + (e != NULL) + 4 >= outsize) + return 1; + + if ((inputlen = _utf8_to_utf32(label, labellen, input, sizeof (input) / sizeof (input[0]))) < 0) + return 1; + + memcpy(out + outlen, "xn--", 4); + outlen += 4; + + labellen = outsize - outlen; + /* printf("n=%zd space_left=%zd\n", n, labellen); */ + if (punycode_encode(inputlen, input, &labellen, out + outlen)) + return 1; + outlen += labellen; + } + + if (e) + out[outlen++] = '.'; + out[outlen] = 0; + } + + return 0; +} +#endif static inline int _isspace_ascii(const char c) { @@ -466,6 +746,14 @@ static int _psl_idna_toASCII(_psl_idna_t *idna _UNUSED, const char *utf8, char * ret = 0; } /* else fprintf(_(stderr, "toASCII failed (%d): %s\n"), rc, idna_strerror(rc)); */ +#else + char lookupname[128]; + + if (_domain_to_punycode(utf8, lookupname, sizeof(lookupname)) == 0) { + if (ascii) + *ascii = strdup(lookupname); + ret = 0; + } #endif return ret; @@ -541,6 +829,8 @@ static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain, int t suffix.length = p - suffix.label; } + printf("domain=%s label=%s\n", domain, suffix.label); + if (psl == &_builtin_psl) { int rc = LookupStringInFixedSet(kDafsa, sizeof(kDafsa), suffix.label, suffix.length); if (rc != -1) { @@ -1250,7 +1540,7 @@ int psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, * * Since: 0.4 */ -psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale _UNUSED, char **lower) +psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding _UNUSED, const char *locale _UNUSED, char **lower) { int ret = PSL_ERR_INVALID_ARG; diff --git a/src/psl2c.c b/src/psl2c.c index 09712e1..f8255e1 100644 --- a/src/psl2c.c +++ b/src/psl2c.c @@ -143,11 +143,11 @@ static void _print_psl_entries_dafsa(FILE *fpout, const _psl_vector_t *v) fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libicu/%s) */\n", version); } while (0); #elif defined(BUILTIN_GENERATOR_LIBIDN2) - fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn2/%s) */\n", idn2_check_version(NULL)); + fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn2/%s) */\n", idn2_check_version(NULL)); #elif defined(BUILTIN_GENERATOR_LIBIDN) - fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn/%s) */\n", stringprep_check_version(NULL)); + fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn/%s) */\n", stringprep_check_version(NULL)); #else - fprintf(fpout, "/* automatically generated by psl2c (without punycode support) */\n"); + fprintf(fpout, "/* automatically generated by psl2c (punycode generated internally) */\n"); #endif if ((fp = fopen("in.tmp", "w"))) {