2014-08-10 18:09:24 +02:00
|
|
|
/*************************************************
|
|
|
|
* Perl-Compatible Regular Expressions *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
|
|
|
and semantics are as close as possible to those of the Perl 5 language.
|
|
|
|
|
|
|
|
Written by Philip Hazel
|
|
|
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
2020-03-20 19:09:59 +01:00
|
|
|
New API code Copyright (c) 2016-2020 University of Cambridge
|
2014-08-10 18:09:24 +02:00
|
|
|
|
|
|
|
-----------------------------------------------------------------------------
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
|
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
|
|
|
this list of conditions and the following disclaimer.
|
|
|
|
|
|
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer in the
|
|
|
|
documentation and/or other materials provided with the distribution.
|
|
|
|
|
|
|
|
* Neither the name of the University of Cambridge nor the names of its
|
|
|
|
contributors may be used to endorse or promote products derived from
|
|
|
|
this software without specific prior written permission.
|
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
-----------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/* This is a freestanding support program to generate a file containing
|
2020-03-20 19:09:59 +01:00
|
|
|
character tables for PCRE2. The tables are built using the pcre2_maketables()
|
|
|
|
function, which is part of the PCRE2 API. By default, the system's "C" locale
|
|
|
|
is used rather than what the building user happens to have set, but the -L
|
|
|
|
option can be used to select the current locale from the LC_ALL environment
|
|
|
|
variable. By default, the tables are written in source form, but if -b is
|
|
|
|
given, they are written in binary. */
|
2014-08-10 18:09:24 +02:00
|
|
|
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
|
|
#include "config.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <locale.h>
|
|
|
|
|
2014-10-15 17:57:49 +02:00
|
|
|
#define PCRE2_CODE_UNIT_WIDTH 0 /* Must be set, but not relevant here */
|
2014-08-10 18:09:24 +02:00
|
|
|
#include "pcre2_internal.h"
|
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
#define PCRE2_DFTABLES /* pcre2_maketables.c notices this */
|
2014-08-10 18:09:24 +02:00
|
|
|
#include "pcre2_maketables.c"
|
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
|
2020-11-06 18:27:35 +01:00
|
|
|
static const char *classlist[] =
|
2020-03-20 19:09:59 +01:00
|
|
|
{
|
2020-04-15 18:34:36 +02:00
|
|
|
"space", "xdigit", "digit", "upper", "lower",
|
|
|
|
"word", "graph", "print", "punct", "cntrl"
|
|
|
|
};
|
2020-03-20 19:09:59 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
2020-04-15 18:34:36 +02:00
|
|
|
/*************************************************
|
2020-03-20 19:09:59 +01:00
|
|
|
* Usage *
|
|
|
|
*************************************************/
|
2020-04-15 18:34:36 +02:00
|
|
|
|
|
|
|
static void
|
|
|
|
usage(void)
|
|
|
|
{
|
|
|
|
(void)fprintf(stderr,
|
2020-03-20 19:09:59 +01:00
|
|
|
"Usage: pcre2_dftables [options] <output file>\n"
|
|
|
|
" -b Write output in binary (default is source code)\n"
|
2020-04-15 18:34:36 +02:00
|
|
|
" -L Use locale from LC_ALL (default is \"C\" locale)\n"
|
2020-03-20 19:09:59 +01:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Entry point *
|
|
|
|
*************************************************/
|
|
|
|
|
2014-08-10 18:09:24 +02:00
|
|
|
int main(int argc, char **argv)
|
|
|
|
{
|
|
|
|
FILE *f;
|
2020-03-20 19:09:59 +01:00
|
|
|
int i;
|
|
|
|
int nclass = 0;
|
|
|
|
BOOL binary = FALSE;
|
2020-11-06 18:27:35 +01:00
|
|
|
char *env = (char *)"C";
|
2014-08-10 18:09:24 +02:00
|
|
|
const unsigned char *tables;
|
|
|
|
const unsigned char *base_of_tables;
|
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
/* Process options */
|
2014-08-10 18:09:24 +02:00
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
for (i = 1; i < argc; i++)
|
2014-08-10 18:09:24 +02:00
|
|
|
{
|
2020-11-06 18:27:35 +01:00
|
|
|
char *arg = argv[i];
|
2020-03-20 19:09:59 +01:00
|
|
|
if (*arg != '-') break;
|
2020-04-15 18:34:36 +02:00
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0)
|
|
|
|
{
|
|
|
|
usage();
|
2020-04-15 18:34:36 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
else if (strcmp(arg, "-L") == 0)
|
2020-04-15 18:34:36 +02:00
|
|
|
{
|
2020-03-20 19:09:59 +01:00
|
|
|
if (setlocale(LC_ALL, "") == NULL)
|
|
|
|
{
|
|
|
|
(void)fprintf(stderr, "pcre2_dftables: setlocale() failed\n");
|
2020-04-15 18:34:36 +02:00
|
|
|
return 1;
|
2020-03-20 19:09:59 +01:00
|
|
|
}
|
2020-04-15 18:34:36 +02:00
|
|
|
env = getenv("LC_ALL");
|
|
|
|
}
|
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
else if (strcmp(arg, "-b") == 0)
|
|
|
|
binary = TRUE;
|
2020-04-15 18:34:36 +02:00
|
|
|
|
|
|
|
else
|
2020-03-20 19:09:59 +01:00
|
|
|
{
|
|
|
|
(void)fprintf(stderr, "pcre2_dftables: unrecognized option %s\n", arg);
|
|
|
|
return 1;
|
2020-04-15 18:34:36 +02:00
|
|
|
}
|
|
|
|
}
|
2014-08-10 18:09:24 +02:00
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
if (i != argc - 1)
|
2014-08-10 18:09:24 +02:00
|
|
|
{
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(stderr, "pcre2_dftables: one filename argument is required\n");
|
2014-08-10 18:09:24 +02:00
|
|
|
return 1;
|
|
|
|
}
|
2020-04-15 18:34:36 +02:00
|
|
|
|
|
|
|
/* Make the tables */
|
2014-08-10 18:09:24 +02:00
|
|
|
|
|
|
|
tables = maketables();
|
|
|
|
base_of_tables = tables;
|
|
|
|
|
|
|
|
f = fopen(argv[i], "wb");
|
|
|
|
if (f == NULL)
|
|
|
|
{
|
2020-03-20 19:09:59 +01:00
|
|
|
fprintf(stderr, "pcre2_dftables: failed to open %s for writing\n", argv[1]);
|
2014-08-10 18:09:24 +02:00
|
|
|
return 1;
|
|
|
|
}
|
2020-04-15 18:34:36 +02:00
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
/* If -b was specified, we write the tables in binary. */
|
2014-08-10 18:09:24 +02:00
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
if (binary)
|
|
|
|
{
|
2020-04-15 18:34:36 +02:00
|
|
|
int yield = 0;
|
2020-03-20 19:09:59 +01:00
|
|
|
size_t len = fwrite(tables, 1, TABLES_LENGTH, f);
|
|
|
|
if (len != TABLES_LENGTH)
|
|
|
|
{
|
|
|
|
(void)fprintf(stderr, "pcre2_dftables: fwrite() returned wrong length %d "
|
|
|
|
"instead of %d\n", (int)len, TABLES_LENGTH);
|
|
|
|
yield = 1;
|
2020-04-15 18:34:36 +02:00
|
|
|
}
|
2020-03-20 19:09:59 +01:00
|
|
|
fclose(f);
|
|
|
|
free((void *)base_of_tables);
|
|
|
|
return yield;
|
|
|
|
}
|
2014-08-10 18:09:24 +02:00
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
/* Write the tables as source code for inclusion in the PCRE2 library. There
|
|
|
|
are several fprintf() calls here, because gcc in pedantic mode complains about
|
|
|
|
the very long string otherwise. */
|
|
|
|
|
|
|
|
(void)fprintf(f,
|
2014-08-10 18:09:24 +02:00
|
|
|
"/*************************************************\n"
|
|
|
|
"* Perl-Compatible Regular Expressions *\n"
|
|
|
|
"*************************************************/\n\n"
|
2020-03-20 19:09:59 +01:00
|
|
|
"/* This file was automatically written by the pcre2_dftables auxiliary\n"
|
2014-08-10 18:09:24 +02:00
|
|
|
"program. It contains character tables that are used when no external\n"
|
|
|
|
"tables are passed to PCRE2 by the application that calls it. The tables\n"
|
2015-07-08 10:30:23 +02:00
|
|
|
"are used only for characters whose code values are less than 256. */\n\n");
|
2020-04-15 18:34:36 +02:00
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f,
|
2020-04-15 18:34:36 +02:00
|
|
|
"/* This set of tables was written in the %s locale. */\n\n", env);
|
2014-08-10 18:09:24 +02:00
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f,
|
|
|
|
"/* The pcre2_ftables program (which is distributed with PCRE2) can be used\n"
|
|
|
|
"to build alternative versions of this file. This is necessary if you are\n"
|
2018-08-19 17:44:06 +02:00
|
|
|
"running in an EBCDIC environment, or if you want to default to a different\n"
|
2020-03-20 19:09:59 +01:00
|
|
|
"encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates\n"
|
|
|
|
"these tables in the \"C\" locale by default. This happens automatically if\n"
|
|
|
|
"PCRE2 is configured with --enable-rebuild-chartables. However, you can run\n"
|
|
|
|
"pcre2_dftables manually with the -L option to build tables using the LC_ALL\n"
|
|
|
|
"locale. */\n\n");
|
2018-08-19 17:44:06 +02:00
|
|
|
|
2014-08-10 18:09:24 +02:00
|
|
|
/* Force config.h in z/OS */
|
|
|
|
|
|
|
|
#if defined NATIVE_ZOS
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f,
|
2014-08-10 18:09:24 +02:00
|
|
|
"/* For z/OS, config.h is forced */\n"
|
|
|
|
"#ifndef HAVE_CONFIG_H\n"
|
|
|
|
"#define HAVE_CONFIG_H 1\n"
|
|
|
|
"#endif\n\n");
|
|
|
|
#endif
|
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f,
|
2018-08-19 17:44:06 +02:00
|
|
|
"/* The following #include is present because without it gcc 4.x may remove\n"
|
2014-08-10 18:09:24 +02:00
|
|
|
"the array definition from the final binary if PCRE2 is built into a static\n"
|
|
|
|
"library and dead code stripping is activated. This leads to link errors.\n"
|
|
|
|
"Pulling in the header ensures that the array gets flagged as \"someone\n"
|
|
|
|
"outside this compilation unit might reference this\" and so it will always\n"
|
|
|
|
"be supplied to the linker. */\n\n");
|
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f,
|
2014-08-10 18:09:24 +02:00
|
|
|
"#ifdef HAVE_CONFIG_H\n"
|
|
|
|
"#include \"config.h\"\n"
|
|
|
|
"#endif\n\n"
|
|
|
|
"#include \"pcre2_internal.h\"\n\n");
|
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f,
|
2014-08-10 18:09:24 +02:00
|
|
|
"const uint8_t PRIV(default_tables)[] = {\n\n"
|
|
|
|
"/* This table is a lower casing table. */\n\n");
|
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f, " ");
|
2014-08-10 18:09:24 +02:00
|
|
|
for (i = 0; i < 256; i++)
|
|
|
|
{
|
|
|
|
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
|
|
|
fprintf(f, "%3d", *tables++);
|
|
|
|
if (i != 255) fprintf(f, ",");
|
|
|
|
}
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f, ",\n\n");
|
2014-08-10 18:09:24 +02:00
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f, "/* This table is a case flipping table. */\n\n");
|
2014-08-10 18:09:24 +02:00
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f, " ");
|
2014-08-10 18:09:24 +02:00
|
|
|
for (i = 0; i < 256; i++)
|
|
|
|
{
|
|
|
|
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
|
|
|
fprintf(f, "%3d", *tables++);
|
|
|
|
if (i != 255) fprintf(f, ",");
|
|
|
|
}
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f, ",\n\n");
|
2014-08-10 18:09:24 +02:00
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f,
|
2018-08-19 17:44:06 +02:00
|
|
|
"/* This table contains bit maps for various character classes. Each map is 32\n"
|
|
|
|
"bytes long and the bits run from the least significant end of each byte. The\n"
|
|
|
|
"classes that have their own maps are: space, xdigit, digit, upper, lower, word,\n"
|
2020-03-20 19:09:59 +01:00
|
|
|
"graph, print, punct, and cntrl. Other classes are built from combinations. */\n\n");
|
2014-08-10 18:09:24 +02:00
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f, " ");
|
2014-08-10 18:09:24 +02:00
|
|
|
for (i = 0; i < cbit_length; i++)
|
|
|
|
{
|
|
|
|
if ((i & 7) == 0 && i != 0)
|
|
|
|
{
|
2020-03-20 19:09:59 +01:00
|
|
|
if ((i & 31) == 0) (void)fprintf(f, "\n");
|
2020-04-15 18:34:36 +02:00
|
|
|
if ((i & 24) == 8) (void)fprintf(f, " /* %s */", classlist[nclass++]);
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f, "\n ");
|
2014-08-10 18:09:24 +02:00
|
|
|
}
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f, "0x%02x", *tables++);
|
|
|
|
if (i != cbit_length - 1) (void)fprintf(f, ",");
|
2014-08-10 18:09:24 +02:00
|
|
|
}
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f, ",\n\n");
|
2014-08-10 18:09:24 +02:00
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f,
|
2014-08-10 18:09:24 +02:00
|
|
|
"/* This table identifies various classes of character by individual bits:\n"
|
|
|
|
" 0x%02x white space character\n"
|
|
|
|
" 0x%02x letter\n"
|
2018-09-24 18:23:53 +02:00
|
|
|
" 0x%02x lower case letter\n"
|
2014-08-10 18:09:24 +02:00
|
|
|
" 0x%02x decimal digit\n"
|
2018-08-19 17:44:06 +02:00
|
|
|
" 0x%02x alphanumeric or '_'\n*/\n\n",
|
2018-09-24 18:23:53 +02:00
|
|
|
ctype_space, ctype_letter, ctype_lcletter, ctype_digit, ctype_word);
|
2014-08-10 18:09:24 +02:00
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f, " ");
|
2014-08-10 18:09:24 +02:00
|
|
|
for (i = 0; i < 256; i++)
|
|
|
|
{
|
|
|
|
if ((i & 7) == 0 && i != 0)
|
|
|
|
{
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f, " /* ");
|
|
|
|
if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
|
|
|
|
else (void)fprintf(f, "%3d-", i-8);
|
|
|
|
if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
|
|
|
|
else (void)fprintf(f, "%3d", i-1);
|
|
|
|
(void)fprintf(f, " */\n ");
|
2014-08-10 18:09:24 +02:00
|
|
|
}
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f, "0x%02x", *tables++);
|
|
|
|
if (i != 255) (void)fprintf(f, ",");
|
2014-08-10 18:09:24 +02:00
|
|
|
}
|
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
(void)fprintf(f, "};/* ");
|
|
|
|
if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
|
|
|
|
else (void)fprintf(f, "%3d-", i-8);
|
|
|
|
if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
|
|
|
|
else (void)fprintf(f, "%3d", i-1);
|
|
|
|
(void)fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
|
2014-08-10 18:09:24 +02:00
|
|
|
|
|
|
|
fclose(f);
|
|
|
|
free((void *)base_of_tables);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-03-20 19:09:59 +01:00
|
|
|
/* End of pcre2_dftables.c */
|