Add PCRE2_CONFIG_UNICODE_VERSION to give the Unicode version string.
This commit is contained in:
parent
059a8ebfe4
commit
803c38f004
|
@ -120,6 +120,7 @@
|
|||
# 13-May-2014: Updated for PCRE2
|
||||
# 03-June-2014: Updated for Python 3
|
||||
# 20-June-2014: Updated for Unicode 7.0.0
|
||||
# 12-August-2014: Updated to put Unicode version into the file
|
||||
##############################################################################
|
||||
|
||||
|
||||
|
@ -130,6 +131,7 @@ import sys
|
|||
MAX_UNICODE = 0x110000
|
||||
NOTACHAR = 0xffffffff
|
||||
|
||||
|
||||
# Parse a line of Scripts.txt, GraphemeBreakProperty.txt or DerivedGeneralCategory.txt
|
||||
def make_get_names(enum):
|
||||
return lambda chardata: enum.index(chardata[1])
|
||||
|
@ -141,9 +143,21 @@ def get_other_case(chardata):
|
|||
return 0
|
||||
|
||||
|
||||
# Read the whole table in memory
|
||||
# Read the whole table in memory, setting/checking the Unicode version
|
||||
def read_table(file_name, get_value, default_value):
|
||||
global unicode_version
|
||||
|
||||
f = re.match(r'^[^/]+/([^.]+)\.txt$', file_name)
|
||||
file_base = f.group(1)
|
||||
version_pat = r"^# " + re.escape(file_base) + r"-(\d+\.\d+\.\d+)\.txt$"
|
||||
file = open(file_name, 'r', encoding='utf-8')
|
||||
f = re.match(version_pat, file.readline())
|
||||
version = f.group(1)
|
||||
if unicode_version == "":
|
||||
unicode_version = version
|
||||
elif unicode_version != version:
|
||||
print("WARNING: Unicode version differs in %s", file_name, file=sys.stderr)
|
||||
|
||||
table = [default_value] * MAX_UNICODE
|
||||
for line in file:
|
||||
line = re.sub(r'#.*', '', line)
|
||||
|
@ -327,6 +341,7 @@ break_property_names = ['CR', 'LF', 'Control', 'Extend', 'Prepend',
|
|||
'SpacingMark', 'L', 'V', 'T', 'LV', 'LVT', 'Regional_Indicator', 'Other' ]
|
||||
|
||||
test_record_size()
|
||||
unicode_version = ""
|
||||
|
||||
script = read_table('Unicode.tables/Scripts.txt', make_get_names(script_names), script_names.index('Common'))
|
||||
category = read_table('Unicode.tables/DerivedGeneralCategory.txt', make_get_names(category_names), category_names.index('Cn'))
|
||||
|
@ -464,6 +479,8 @@ print("const uint16_t PRIV(ucd_stage2)[] = {0};")
|
|||
print("const uint32_t PRIV(ucd_caseless_sets)[] = {0};")
|
||||
print("#else")
|
||||
print()
|
||||
print("const char *PRIV(unicode_version) = \"{}\";".format(unicode_version))
|
||||
print()
|
||||
print(record_struct)
|
||||
|
||||
# --- Added by PH: output the table of caseless character sets ---
|
||||
|
|
|
@ -257,8 +257,9 @@ must all be greater than zero. */
|
|||
#define PCRE2_CONFIG_PARENSLIMIT 7
|
||||
#define PCRE2_CONFIG_RECURSIONLIMIT 5
|
||||
#define PCRE2_CONFIG_STACKRECURSE 8
|
||||
#define PCRE2_CONFIG_UTF 9
|
||||
#define PCRE2_CONFIG_VERSION 10
|
||||
#define PCRE2_CONFIG_UNICODE_VERSION 9
|
||||
#define PCRE2_CONFIG_UTF 10
|
||||
#define PCRE2_CONFIG_VERSION 11
|
||||
|
||||
/* Types for code units in patterns and subject strings. */
|
||||
|
||||
|
|
|
@ -143,6 +143,21 @@ switch (what)
|
|||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||
{
|
||||
#if defined SUPPORT_UTF
|
||||
const char *v = PRIV(unicode_version);
|
||||
#else
|
||||
const char *v = "Unicode not supported";
|
||||
#endif
|
||||
PCRE2_UCHAR *t = (PCRE2_UCHAR *)where;
|
||||
if (strlen(v) >= BYTES2CU(length) - 1) return PCRE2_ERROR_BADLENGTH;
|
||||
while (*v != 0) *t++ = *v++;
|
||||
*t = 0;
|
||||
return t - (PCRE2_UCHAR *)where;
|
||||
}
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_UTF:
|
||||
#if defined SUPPORT_UTF
|
||||
*((int *)where) = 1;
|
||||
|
|
|
@ -1795,6 +1795,7 @@ extern const uint8_t PRIV(utf8_table4)[];
|
|||
#define _pcre2_ucp_gbtable PCRE2_SUFFIX(_pcre2_ucp_gbtable_)
|
||||
#define _pcre2_ucp_gentype PCRE2_SUFFIX(_pcre2_ucp_gentype_)
|
||||
#define _pcre2_ucp_typerange PCRE2_SUFFIX(_pcre2_ucp_typerange_)
|
||||
#define _pcre2_unicode_version PCRE2_SUFFIX(_pcre2_unicode_version_)
|
||||
#define _pcre2_utt PCRE2_SUFFIX(_pcre2_utt_)
|
||||
#define _pcre2_utt_names PCRE2_SUFFIX(_pcre2_utt_names_)
|
||||
#define _pcre2_utt_size PCRE2_SUFFIX(_pcre2_utt_size_)
|
||||
|
@ -1812,6 +1813,7 @@ extern const uint32_t PRIV(ucp_gentype)[];
|
|||
#ifdef SUPPORT_JIT
|
||||
extern const int PRIV(ucp_typerange)[];
|
||||
#endif
|
||||
extern const char *PRIV(unicode_version);
|
||||
extern const ucp_type_table PRIV(utt)[];
|
||||
extern const char PRIV(utt_names)[];
|
||||
extern const size_t PRIV(utt_size);
|
||||
|
|
|
@ -39,6 +39,8 @@ const uint16_t PRIV(ucd_stage2)[] = {0};
|
|||
const uint32_t PRIV(ucd_caseless_sets)[] = {0};
|
||||
#else
|
||||
|
||||
const char *PRIV(unicode_version) = "7.0.0";
|
||||
|
||||
/* When recompiling tables with a new Unicode version, please check the
|
||||
types in this structure definition from pcre2_internal.h (the actual
|
||||
field names will be different):
|
||||
|
|
|
@ -164,8 +164,8 @@ void vms_setsymbol( char *, char *, int );
|
|||
#define CFAIL_UNSET UINT32_MAX /* Unset value for cfail fields */
|
||||
#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
|
||||
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
|
||||
#define LOOPREPEAT 500000 /* Default loop count for timing. */
|
||||
#define VERSION_SIZE 64 /* Size of buffer for the version string. */
|
||||
#define LOOPREPEAT 500000 /* Default loop count for timing */
|
||||
#define VERSION_SIZE 64 /* Size of buffer for the version strings */
|
||||
|
||||
/* Execution modes */
|
||||
|
||||
|
@ -615,6 +615,7 @@ static uint32_t max_oveccount;
|
|||
static uint32_t callout_count;
|
||||
|
||||
static VERSION_TYPE version[VERSION_SIZE];
|
||||
static VERSION_TYPE uversion[VERSION_SIZE];
|
||||
|
||||
static patctl def_patctl;
|
||||
static patctl pat_patctl;
|
||||
|
@ -5220,7 +5221,10 @@ printf(" 32-bit support\n");
|
|||
#endif
|
||||
|
||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_UTF, &rc, sizeof(rc));
|
||||
printf (" %sUTF support\n", rc ? "" : "No ");
|
||||
if (rc != 0)
|
||||
printf(" UTF support (Unicode version %s)\n", uversion);
|
||||
else
|
||||
printf(" No UTF support\n");
|
||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &rc, sizeof(rc));
|
||||
if (rc != 0)
|
||||
{
|
||||
|
@ -5289,9 +5293,11 @@ if (PO(options) != DO(options) || PO(control) != DO(control))
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* Get the PCRE version number information. */
|
||||
/* Get the PCRE2 and Unicode version number information. */
|
||||
|
||||
PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version, sizeof(VERSION_TYPE)*VERSION_SIZE);
|
||||
PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion,
|
||||
sizeof(VERSION_TYPE)*VERSION_SIZE);
|
||||
|
||||
/* Get buffers from malloc() so that valgrind will check their misuse when
|
||||
debugging. They grow automatically when very long lines are read. The 16-
|
||||
|
|
Loading…
Reference in New Issue