Fix 32-bit non-UTF property test crash.
This commit is contained in:
parent
f6f7c9148f
commit
53bf29d689
|
@ -10,6 +10,10 @@ Version 10.24 14-February-2017
|
|||
(a) Check for malloc failures when getting memory for the ovector (POSIX) or
|
||||
the match data block (non-POSIX).
|
||||
|
||||
2. In the 32-bit library in non-UTF mode, an attempt to find a Unicode property
|
||||
for a character with a code point greater than 0x10ffff (the Unicode maximum)
|
||||
caused a crash.
|
||||
|
||||
|
||||
Version 10.23 14-February-2017
|
||||
------------------------------
|
||||
|
|
|
@ -236,7 +236,8 @@ def print_table(table, table_name, block_size = None):
|
|||
fmt = "%3d," * ELEMS_PER_LINE + " /* U+%04X */"
|
||||
mult = MAX_UNICODE / len(table)
|
||||
for i in range(0, len(table), ELEMS_PER_LINE):
|
||||
print(fmt % (table[i:i+ELEMS_PER_LINE] + (i * mult,)))
|
||||
print(fmt % (table[i:i+ELEMS_PER_LINE] +
|
||||
(int(i * mult),)))
|
||||
else:
|
||||
if block_size > ELEMS_PER_LINE:
|
||||
el = ELEMS_PER_LINE
|
||||
|
@ -485,6 +486,20 @@ print("#else")
|
|||
print()
|
||||
print("const char *PRIV(unicode_version) = \"{}\";".format(unicode_version))
|
||||
print()
|
||||
print("/* If the 32-bit library is run in non-32-bit mode, character values")
|
||||
print("greater than 0x10ffff may be encountered. For these we set up a")
|
||||
print("special record. */")
|
||||
print()
|
||||
print("#if PCRE2_CODE_UNIT_WIDTH == 32")
|
||||
print("const ucd_record PRIV(dummy_ucd_record)[] = {{")
|
||||
print(" ucp_Common, /* script */")
|
||||
print(" ucp_Cn, /* type unassigned */")
|
||||
print(" ucp_gbOther, /* grapheme break property */")
|
||||
print(" 0, /* case set */")
|
||||
print(" 0, /* other case */")
|
||||
print(" }};")
|
||||
print("#endif")
|
||||
print()
|
||||
print(record_struct)
|
||||
|
||||
# --- Added by PH: output the table of caseless character sets ---
|
||||
|
|
|
@ -1774,10 +1774,17 @@ typedef struct {
|
|||
/* UCD access macros */
|
||||
|
||||
#define UCD_BLOCK_SIZE 128
|
||||
#define GET_UCD(ch) (PRIV(ucd_records) + \
|
||||
#define REAL_GET_UCD(ch) (PRIV(ucd_records) + \
|
||||
PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \
|
||||
UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE])
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define GET_UCD(ch) ((ch > MAX_UTF_CODE_POINT)? \
|
||||
PRIV(dummy_ucd_record) : REAL_GET_UCD(ch))
|
||||
#else
|
||||
#define GET_UCD(ch) REAL_GET_UCD(ch)
|
||||
#endif
|
||||
|
||||
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
|
||||
#define UCD_SCRIPT(ch) GET_UCD(ch)->script
|
||||
#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
|
||||
|
@ -1834,6 +1841,9 @@ extern const uint8_t PRIV(utf8_table4)[];
|
|||
#define _pcre2_default_compile_context PCRE2_SUFFIX(_pcre2_default_compile_context_)
|
||||
#define _pcre2_default_match_context PCRE2_SUFFIX(_pcre2_default_match_context_)
|
||||
#define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_)
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define _pcre2_dummy_ucd_record PCRE2_SUFFIX(_pcre2_dummy_ucd_record_)
|
||||
#endif
|
||||
#define _pcre2_hspace_list PCRE2_SUFFIX(_pcre2_hspace_list_)
|
||||
#define _pcre2_vspace_list PCRE2_SUFFIX(_pcre2_vspace_list_)
|
||||
#define _pcre2_ucd_caseless_sets PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_)
|
||||
|
@ -1858,6 +1868,9 @@ extern const uint32_t PRIV(hspace_list)[];
|
|||
extern const uint32_t PRIV(vspace_list)[];
|
||||
extern const uint32_t PRIV(ucd_caseless_sets)[];
|
||||
extern const ucd_record PRIV(ucd_records)[];
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
extern const ucd_record PRIV(dummy_ucd_record)[];
|
||||
#endif
|
||||
extern const uint8_t PRIV(ucd_stage1)[];
|
||||
extern const uint16_t PRIV(ucd_stage2)[];
|
||||
extern const uint32_t PRIV(ucp_gbtable)[];
|
||||
|
|
|
@ -41,6 +41,20 @@ const uint32_t PRIV(ucd_caseless_sets)[] = {0};
|
|||
|
||||
const char *PRIV(unicode_version) = "8.0.0";
|
||||
|
||||
/* If the 32-bit library is run in non-32-bit mode, character values
|
||||
greater than 0x10ffff may be encountered. For these we set up a
|
||||
special record. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
const ucd_record PRIV(dummy_ucd_record)[] = {{
|
||||
ucp_Common, /* script */
|
||||
ucp_Cn, /* type unassigned */
|
||||
ucp_gbOther, /* grapheme break property */
|
||||
0, /* case set */
|
||||
0, /* other case */
|
||||
}};
|
||||
#endif
|
||||
|
||||
/* When recompiling tables with a new Unicode version, please check the
|
||||
types in this structure definition from pcre2_internal.h (the actual
|
||||
field names will be different):
|
||||
|
|
|
@ -360,4 +360,7 @@
|
|||
|
||||
/[\s[:^ascii:]]/B,ucp
|
||||
|
||||
/\pP/ucp
|
||||
\x{7fffffff}\=no_jit
|
||||
|
||||
# End of testinput12
|
||||
|
|
|
@ -1415,4 +1415,10 @@ No match
|
|||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\pP/ucp
|
||||
\x{7fffffff}\=no_jit
|
||||
** Character \x{7fffffff} is greater than 0xffff and UTF-16 mode is not enabled.
|
||||
** Truncation will probably give the wrong result.
|
||||
No match
|
||||
|
||||
# End of testinput12
|
||||
|
|
|
@ -1409,4 +1409,8 @@ No match
|
|||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\pP/ucp
|
||||
\x{7fffffff}\=no_jit
|
||||
No match
|
||||
|
||||
# End of testinput12
|
||||
|
|
Loading…
Reference in New Issue