Add PCRE2_CONFIG_UNICODE_VERSION to give the Unicode version string.
This commit is contained in:
parent
059a8ebfe4
commit
803c38f004
|
@ -120,6 +120,7 @@
|
||||||
# 13-May-2014: Updated for PCRE2
|
# 13-May-2014: Updated for PCRE2
|
||||||
# 03-June-2014: Updated for Python 3
|
# 03-June-2014: Updated for Python 3
|
||||||
# 20-June-2014: Updated for Unicode 7.0.0
|
# 20-June-2014: Updated for Unicode 7.0.0
|
||||||
|
# 12-August-2014: Updated to put Unicode version into the file
|
||||||
##############################################################################
|
##############################################################################
|
||||||
|
|
||||||
|
|
||||||
|
@ -130,6 +131,7 @@ import sys
|
||||||
MAX_UNICODE = 0x110000
|
MAX_UNICODE = 0x110000
|
||||||
NOTACHAR = 0xffffffff
|
NOTACHAR = 0xffffffff
|
||||||
|
|
||||||
|
|
||||||
# Parse a line of Scripts.txt, GraphemeBreakProperty.txt or DerivedGeneralCategory.txt
|
# Parse a line of Scripts.txt, GraphemeBreakProperty.txt or DerivedGeneralCategory.txt
|
||||||
def make_get_names(enum):
|
def make_get_names(enum):
|
||||||
return lambda chardata: enum.index(chardata[1])
|
return lambda chardata: enum.index(chardata[1])
|
||||||
|
@ -141,9 +143,21 @@ def get_other_case(chardata):
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
# Read the whole table in memory
|
# Read the whole table in memory, setting/checking the Unicode version
|
||||||
def read_table(file_name, get_value, default_value):
|
def read_table(file_name, get_value, default_value):
|
||||||
|
global unicode_version
|
||||||
|
|
||||||
|
f = re.match(r'^[^/]+/([^.]+)\.txt$', file_name)
|
||||||
|
file_base = f.group(1)
|
||||||
|
version_pat = r"^# " + re.escape(file_base) + r"-(\d+\.\d+\.\d+)\.txt$"
|
||||||
file = open(file_name, 'r', encoding='utf-8')
|
file = open(file_name, 'r', encoding='utf-8')
|
||||||
|
f = re.match(version_pat, file.readline())
|
||||||
|
version = f.group(1)
|
||||||
|
if unicode_version == "":
|
||||||
|
unicode_version = version
|
||||||
|
elif unicode_version != version:
|
||||||
|
print("WARNING: Unicode version differs in %s", file_name, file=sys.stderr)
|
||||||
|
|
||||||
table = [default_value] * MAX_UNICODE
|
table = [default_value] * MAX_UNICODE
|
||||||
for line in file:
|
for line in file:
|
||||||
line = re.sub(r'#.*', '', line)
|
line = re.sub(r'#.*', '', line)
|
||||||
|
@ -327,6 +341,7 @@ break_property_names = ['CR', 'LF', 'Control', 'Extend', 'Prepend',
|
||||||
'SpacingMark', 'L', 'V', 'T', 'LV', 'LVT', 'Regional_Indicator', 'Other' ]
|
'SpacingMark', 'L', 'V', 'T', 'LV', 'LVT', 'Regional_Indicator', 'Other' ]
|
||||||
|
|
||||||
test_record_size()
|
test_record_size()
|
||||||
|
unicode_version = ""
|
||||||
|
|
||||||
script = read_table('Unicode.tables/Scripts.txt', make_get_names(script_names), script_names.index('Common'))
|
script = read_table('Unicode.tables/Scripts.txt', make_get_names(script_names), script_names.index('Common'))
|
||||||
category = read_table('Unicode.tables/DerivedGeneralCategory.txt', make_get_names(category_names), category_names.index('Cn'))
|
category = read_table('Unicode.tables/DerivedGeneralCategory.txt', make_get_names(category_names), category_names.index('Cn'))
|
||||||
|
@ -464,6 +479,8 @@ print("const uint16_t PRIV(ucd_stage2)[] = {0};")
|
||||||
print("const uint32_t PRIV(ucd_caseless_sets)[] = {0};")
|
print("const uint32_t PRIV(ucd_caseless_sets)[] = {0};")
|
||||||
print("#else")
|
print("#else")
|
||||||
print()
|
print()
|
||||||
|
print("const char *PRIV(unicode_version) = \"{}\";".format(unicode_version))
|
||||||
|
print()
|
||||||
print(record_struct)
|
print(record_struct)
|
||||||
|
|
||||||
# --- Added by PH: output the table of caseless character sets ---
|
# --- Added by PH: output the table of caseless character sets ---
|
||||||
|
|
|
@ -257,8 +257,9 @@ must all be greater than zero. */
|
||||||
#define PCRE2_CONFIG_PARENSLIMIT 7
|
#define PCRE2_CONFIG_PARENSLIMIT 7
|
||||||
#define PCRE2_CONFIG_RECURSIONLIMIT 5
|
#define PCRE2_CONFIG_RECURSIONLIMIT 5
|
||||||
#define PCRE2_CONFIG_STACKRECURSE 8
|
#define PCRE2_CONFIG_STACKRECURSE 8
|
||||||
#define PCRE2_CONFIG_UTF 9
|
#define PCRE2_CONFIG_UNICODE_VERSION 9
|
||||||
#define PCRE2_CONFIG_VERSION 10
|
#define PCRE2_CONFIG_UTF 10
|
||||||
|
#define PCRE2_CONFIG_VERSION 11
|
||||||
|
|
||||||
/* Types for code units in patterns and subject strings. */
|
/* Types for code units in patterns and subject strings. */
|
||||||
|
|
||||||
|
|
|
@ -142,6 +142,21 @@ switch (what)
|
||||||
*((int *)where) = 1;
|
*((int *)where) = 1;
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||||
|
{
|
||||||
|
#if defined SUPPORT_UTF
|
||||||
|
const char *v = PRIV(unicode_version);
|
||||||
|
#else
|
||||||
|
const char *v = "Unicode not supported";
|
||||||
|
#endif
|
||||||
|
PCRE2_UCHAR *t = (PCRE2_UCHAR *)where;
|
||||||
|
if (strlen(v) >= BYTES2CU(length) - 1) return PCRE2_ERROR_BADLENGTH;
|
||||||
|
while (*v != 0) *t++ = *v++;
|
||||||
|
*t = 0;
|
||||||
|
return t - (PCRE2_UCHAR *)where;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case PCRE2_CONFIG_UTF:
|
case PCRE2_CONFIG_UTF:
|
||||||
#if defined SUPPORT_UTF
|
#if defined SUPPORT_UTF
|
||||||
|
|
|
@ -1795,6 +1795,7 @@ extern const uint8_t PRIV(utf8_table4)[];
|
||||||
#define _pcre2_ucp_gbtable PCRE2_SUFFIX(_pcre2_ucp_gbtable_)
|
#define _pcre2_ucp_gbtable PCRE2_SUFFIX(_pcre2_ucp_gbtable_)
|
||||||
#define _pcre2_ucp_gentype PCRE2_SUFFIX(_pcre2_ucp_gentype_)
|
#define _pcre2_ucp_gentype PCRE2_SUFFIX(_pcre2_ucp_gentype_)
|
||||||
#define _pcre2_ucp_typerange PCRE2_SUFFIX(_pcre2_ucp_typerange_)
|
#define _pcre2_ucp_typerange PCRE2_SUFFIX(_pcre2_ucp_typerange_)
|
||||||
|
#define _pcre2_unicode_version PCRE2_SUFFIX(_pcre2_unicode_version_)
|
||||||
#define _pcre2_utt PCRE2_SUFFIX(_pcre2_utt_)
|
#define _pcre2_utt PCRE2_SUFFIX(_pcre2_utt_)
|
||||||
#define _pcre2_utt_names PCRE2_SUFFIX(_pcre2_utt_names_)
|
#define _pcre2_utt_names PCRE2_SUFFIX(_pcre2_utt_names_)
|
||||||
#define _pcre2_utt_size PCRE2_SUFFIX(_pcre2_utt_size_)
|
#define _pcre2_utt_size PCRE2_SUFFIX(_pcre2_utt_size_)
|
||||||
|
@ -1812,6 +1813,7 @@ extern const uint32_t PRIV(ucp_gentype)[];
|
||||||
#ifdef SUPPORT_JIT
|
#ifdef SUPPORT_JIT
|
||||||
extern const int PRIV(ucp_typerange)[];
|
extern const int PRIV(ucp_typerange)[];
|
||||||
#endif
|
#endif
|
||||||
|
extern const char *PRIV(unicode_version);
|
||||||
extern const ucp_type_table PRIV(utt)[];
|
extern const ucp_type_table PRIV(utt)[];
|
||||||
extern const char PRIV(utt_names)[];
|
extern const char PRIV(utt_names)[];
|
||||||
extern const size_t PRIV(utt_size);
|
extern const size_t PRIV(utt_size);
|
||||||
|
|
|
@ -39,6 +39,8 @@ const uint16_t PRIV(ucd_stage2)[] = {0};
|
||||||
const uint32_t PRIV(ucd_caseless_sets)[] = {0};
|
const uint32_t PRIV(ucd_caseless_sets)[] = {0};
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
const char *PRIV(unicode_version) = "7.0.0";
|
||||||
|
|
||||||
/* When recompiling tables with a new Unicode version, please check the
|
/* When recompiling tables with a new Unicode version, please check the
|
||||||
types in this structure definition from pcre2_internal.h (the actual
|
types in this structure definition from pcre2_internal.h (the actual
|
||||||
field names will be different):
|
field names will be different):
|
||||||
|
|
|
@ -164,8 +164,8 @@ void vms_setsymbol( char *, char *, int );
|
||||||
#define CFAIL_UNSET UINT32_MAX /* Unset value for cfail fields */
|
#define CFAIL_UNSET UINT32_MAX /* Unset value for cfail fields */
|
||||||
#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
|
#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
|
||||||
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
|
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
|
||||||
#define LOOPREPEAT 500000 /* Default loop count for timing. */
|
#define LOOPREPEAT 500000 /* Default loop count for timing */
|
||||||
#define VERSION_SIZE 64 /* Size of buffer for the version string. */
|
#define VERSION_SIZE 64 /* Size of buffer for the version strings */
|
||||||
|
|
||||||
/* Execution modes */
|
/* Execution modes */
|
||||||
|
|
||||||
|
@ -615,6 +615,7 @@ static uint32_t max_oveccount;
|
||||||
static uint32_t callout_count;
|
static uint32_t callout_count;
|
||||||
|
|
||||||
static VERSION_TYPE version[VERSION_SIZE];
|
static VERSION_TYPE version[VERSION_SIZE];
|
||||||
|
static VERSION_TYPE uversion[VERSION_SIZE];
|
||||||
|
|
||||||
static patctl def_patctl;
|
static patctl def_patctl;
|
||||||
static patctl pat_patctl;
|
static patctl pat_patctl;
|
||||||
|
@ -5220,7 +5221,10 @@ printf(" 32-bit support\n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_UTF, &rc, sizeof(rc));
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_UTF, &rc, sizeof(rc));
|
||||||
printf (" %sUTF support\n", rc ? "" : "No ");
|
if (rc != 0)
|
||||||
|
printf(" UTF support (Unicode version %s)\n", uversion);
|
||||||
|
else
|
||||||
|
printf(" No UTF support\n");
|
||||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &rc, sizeof(rc));
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &rc, sizeof(rc));
|
||||||
if (rc != 0)
|
if (rc != 0)
|
||||||
{
|
{
|
||||||
|
@ -5289,9 +5293,11 @@ if (PO(options) != DO(options) || PO(control) != DO(control))
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get the PCRE version number information. */
|
/* Get the PCRE2 and Unicode version number information. */
|
||||||
|
|
||||||
PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version, sizeof(VERSION_TYPE)*VERSION_SIZE);
|
PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version, sizeof(VERSION_TYPE)*VERSION_SIZE);
|
||||||
|
PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion,
|
||||||
|
sizeof(VERSION_TYPE)*VERSION_SIZE);
|
||||||
|
|
||||||
/* Get buffers from malloc() so that valgrind will check their misuse when
|
/* Get buffers from malloc() so that valgrind will check their misuse when
|
||||||
debugging. They grow automatically when very long lines are read. The 16-
|
debugging. They grow automatically when very long lines are read. The 16-
|
||||||
|
|
Loading…
Reference in New Issue