Fix 32-bit non-UTF property test crash.

This commit is contained in:
Philip.Hazel 2017-02-24 18:25:32 +00:00
parent f6f7c9148f
commit 53bf29d689
7 changed files with 61 additions and 2 deletions

View File

@ -10,6 +10,10 @@ Version 10.24 14-February-2017
(a) Check for malloc failures when getting memory for the ovector (POSIX) or
the match data block (non-POSIX).
2. In the 32-bit library in non-UTF mode, an attempt to find a Unicode property
for a character with a code point greater than 0x10ffff (the Unicode maximum)
caused a crash.
Version 10.23 14-February-2017
------------------------------

View File

@ -236,7 +236,8 @@ def print_table(table, table_name, block_size = None):
fmt = "%3d," * ELEMS_PER_LINE + " /* U+%04X */"
mult = MAX_UNICODE / len(table)
for i in range(0, len(table), ELEMS_PER_LINE):
print(fmt % (table[i:i+ELEMS_PER_LINE] + (i * mult,)))
print(fmt % (table[i:i+ELEMS_PER_LINE] +
(int(i * mult),)))
else:
if block_size > ELEMS_PER_LINE:
el = ELEMS_PER_LINE
@ -485,6 +486,20 @@ print("#else")
print()
print("const char *PRIV(unicode_version) = \"{}\";".format(unicode_version))
print()
print("/* If the 32-bit library is run in non-32-bit mode, character values")
print("greater than 0x10ffff may be encountered. For these we set up a")
print("special record. */")
print()
print("#if PCRE2_CODE_UNIT_WIDTH == 32")
print("const ucd_record PRIV(dummy_ucd_record)[] = {{")
print(" ucp_Common, /* script */")
print(" ucp_Cn, /* type unassigned */")
print(" ucp_gbOther, /* grapheme break property */")
print(" 0, /* case set */")
print(" 0, /* other case */")
print(" }};")
print("#endif")
print()
print(record_struct)
# --- Added by PH: output the table of caseless character sets ---

View File

@ -1774,10 +1774,17 @@ typedef struct {
/* UCD access macros */
#define UCD_BLOCK_SIZE 128
#define GET_UCD(ch) (PRIV(ucd_records) + \
#define REAL_GET_UCD(ch) (PRIV(ucd_records) + \
PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \
UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE])
#if PCRE2_CODE_UNIT_WIDTH == 32
#define GET_UCD(ch) ((ch > MAX_UTF_CODE_POINT)? \
PRIV(dummy_ucd_record) : REAL_GET_UCD(ch))
#else
#define GET_UCD(ch) REAL_GET_UCD(ch)
#endif
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
#define UCD_SCRIPT(ch) GET_UCD(ch)->script
#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
@ -1834,6 +1841,9 @@ extern const uint8_t PRIV(utf8_table4)[];
#define _pcre2_default_compile_context PCRE2_SUFFIX(_pcre2_default_compile_context_)
#define _pcre2_default_match_context PCRE2_SUFFIX(_pcre2_default_match_context_)
#define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_)
#if PCRE2_CODE_UNIT_WIDTH == 32
#define _pcre2_dummy_ucd_record PCRE2_SUFFIX(_pcre2_dummy_ucd_record_)
#endif
#define _pcre2_hspace_list PCRE2_SUFFIX(_pcre2_hspace_list_)
#define _pcre2_vspace_list PCRE2_SUFFIX(_pcre2_vspace_list_)
#define _pcre2_ucd_caseless_sets PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_)
@ -1858,6 +1868,9 @@ extern const uint32_t PRIV(hspace_list)[];
extern const uint32_t PRIV(vspace_list)[];
extern const uint32_t PRIV(ucd_caseless_sets)[];
extern const ucd_record PRIV(ucd_records)[];
#if PCRE2_CODE_UNIT_WIDTH == 32
extern const ucd_record PRIV(dummy_ucd_record)[];
#endif
extern const uint8_t PRIV(ucd_stage1)[];
extern const uint16_t PRIV(ucd_stage2)[];
extern const uint32_t PRIV(ucp_gbtable)[];

View File

@ -41,6 +41,20 @@ const uint32_t PRIV(ucd_caseless_sets)[] = {0};
const char *PRIV(unicode_version) = "8.0.0";
/* If the 32-bit library is run in non-32-bit mode, character values
greater than 0x10ffff may be encountered. For these we set up a
special record. */
#if PCRE2_CODE_UNIT_WIDTH == 32
const ucd_record PRIV(dummy_ucd_record)[] = {{
ucp_Common, /* script */
ucp_Cn, /* type unassigned */
ucp_gbOther, /* grapheme break property */
0, /* case set */
0, /* other case */
}};
#endif
/* When recompiling tables with a new Unicode version, please check the
types in this structure definition from pcre2_internal.h (the actual
field names will be different):

View File

@ -360,4 +360,7 @@
/[\s[:^ascii:]]/B,ucp
/\pP/ucp
\x{7fffffff}\=no_jit
# End of testinput12

View File

@ -1415,4 +1415,10 @@ No match
End
------------------------------------------------------------------
/\pP/ucp
\x{7fffffff}\=no_jit
** Character \x{7fffffff} is greater than 0xffff and UTF-16 mode is not enabled.
** Truncation will probably give the wrong result.
No match
# End of testinput12

View File

@ -1409,4 +1409,8 @@ No match
End
------------------------------------------------------------------
/\pP/ucp
\x{7fffffff}\=no_jit
No match
# End of testinput12