Fix 32-bit non-UTF property test crash.

This commit is contained in:
Philip.Hazel 2017-02-24 18:25:32 +00:00
parent f6f7c9148f
commit 53bf29d689
7 changed files with 61 additions and 2 deletions

View File

@ -9,6 +9,10 @@ Version 10.24 14-February-2017
(a) Check for malloc failures when getting memory for the ovector (POSIX) or (a) Check for malloc failures when getting memory for the ovector (POSIX) or
the match data block (non-POSIX). the match data block (non-POSIX).
2. In the 32-bit library in non-UTF mode, an attempt to find a Unicode property
for a character with a code point greater than 0x10ffff (the Unicode maximum)
caused a crash.
Version 10.23 14-February-2017 Version 10.23 14-February-2017

View File

@ -236,7 +236,8 @@ def print_table(table, table_name, block_size = None):
fmt = "%3d," * ELEMS_PER_LINE + " /* U+%04X */" fmt = "%3d," * ELEMS_PER_LINE + " /* U+%04X */"
mult = MAX_UNICODE / len(table) mult = MAX_UNICODE / len(table)
for i in range(0, len(table), ELEMS_PER_LINE): for i in range(0, len(table), ELEMS_PER_LINE):
print(fmt % (table[i:i+ELEMS_PER_LINE] + (i * mult,))) print(fmt % (table[i:i+ELEMS_PER_LINE] +
(int(i * mult),)))
else: else:
if block_size > ELEMS_PER_LINE: if block_size > ELEMS_PER_LINE:
el = ELEMS_PER_LINE el = ELEMS_PER_LINE
@ -485,6 +486,20 @@ print("#else")
print() print()
print("const char *PRIV(unicode_version) = \"{}\";".format(unicode_version)) print("const char *PRIV(unicode_version) = \"{}\";".format(unicode_version))
print() print()
print("/* If the 32-bit library is run in non-32-bit mode, character values")
print("greater than 0x10ffff may be encountered. For these we set up a")
print("special record. */")
print()
print("#if PCRE2_CODE_UNIT_WIDTH == 32")
print("const ucd_record PRIV(dummy_ucd_record)[] = {{")
print(" ucp_Common, /* script */")
print(" ucp_Cn, /* type unassigned */")
print(" ucp_gbOther, /* grapheme break property */")
print(" 0, /* case set */")
print(" 0, /* other case */")
print(" }};")
print("#endif")
print()
print(record_struct) print(record_struct)
# --- Added by PH: output the table of caseless character sets --- # --- Added by PH: output the table of caseless character sets ---

View File

@ -1774,10 +1774,17 @@ typedef struct {
/* UCD access macros */ /* UCD access macros */
#define UCD_BLOCK_SIZE 128 #define UCD_BLOCK_SIZE 128
#define GET_UCD(ch) (PRIV(ucd_records) + \ #define REAL_GET_UCD(ch) (PRIV(ucd_records) + \
PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \ PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \
UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE]) UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE])
#if PCRE2_CODE_UNIT_WIDTH == 32
#define GET_UCD(ch) ((ch > MAX_UTF_CODE_POINT)? \
PRIV(dummy_ucd_record) : REAL_GET_UCD(ch))
#else
#define GET_UCD(ch) REAL_GET_UCD(ch)
#endif
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype #define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
#define UCD_SCRIPT(ch) GET_UCD(ch)->script #define UCD_SCRIPT(ch) GET_UCD(ch)->script
#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)] #define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
@ -1834,6 +1841,9 @@ extern const uint8_t PRIV(utf8_table4)[];
#define _pcre2_default_compile_context PCRE2_SUFFIX(_pcre2_default_compile_context_) #define _pcre2_default_compile_context PCRE2_SUFFIX(_pcre2_default_compile_context_)
#define _pcre2_default_match_context PCRE2_SUFFIX(_pcre2_default_match_context_) #define _pcre2_default_match_context PCRE2_SUFFIX(_pcre2_default_match_context_)
#define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_) #define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_)
#if PCRE2_CODE_UNIT_WIDTH == 32
#define _pcre2_dummy_ucd_record PCRE2_SUFFIX(_pcre2_dummy_ucd_record_)
#endif
#define _pcre2_hspace_list PCRE2_SUFFIX(_pcre2_hspace_list_) #define _pcre2_hspace_list PCRE2_SUFFIX(_pcre2_hspace_list_)
#define _pcre2_vspace_list PCRE2_SUFFIX(_pcre2_vspace_list_) #define _pcre2_vspace_list PCRE2_SUFFIX(_pcre2_vspace_list_)
#define _pcre2_ucd_caseless_sets PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_) #define _pcre2_ucd_caseless_sets PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_)
@ -1858,6 +1868,9 @@ extern const uint32_t PRIV(hspace_list)[];
extern const uint32_t PRIV(vspace_list)[]; extern const uint32_t PRIV(vspace_list)[];
extern const uint32_t PRIV(ucd_caseless_sets)[]; extern const uint32_t PRIV(ucd_caseless_sets)[];
extern const ucd_record PRIV(ucd_records)[]; extern const ucd_record PRIV(ucd_records)[];
#if PCRE2_CODE_UNIT_WIDTH == 32
extern const ucd_record PRIV(dummy_ucd_record)[];
#endif
extern const uint8_t PRIV(ucd_stage1)[]; extern const uint8_t PRIV(ucd_stage1)[];
extern const uint16_t PRIV(ucd_stage2)[]; extern const uint16_t PRIV(ucd_stage2)[];
extern const uint32_t PRIV(ucp_gbtable)[]; extern const uint32_t PRIV(ucp_gbtable)[];

View File

@ -41,6 +41,20 @@ const uint32_t PRIV(ucd_caseless_sets)[] = {0};
const char *PRIV(unicode_version) = "8.0.0"; const char *PRIV(unicode_version) = "8.0.0";
/* If the 32-bit library is run in non-32-bit mode, character values
greater than 0x10ffff may be encountered. For these we set up a
special record. */
#if PCRE2_CODE_UNIT_WIDTH == 32
const ucd_record PRIV(dummy_ucd_record)[] = {{
ucp_Common, /* script */
ucp_Cn, /* type unassigned */
ucp_gbOther, /* grapheme break property */
0, /* case set */
0, /* other case */
}};
#endif
/* When recompiling tables with a new Unicode version, please check the /* When recompiling tables with a new Unicode version, please check the
types in this structure definition from pcre2_internal.h (the actual types in this structure definition from pcre2_internal.h (the actual
field names will be different): field names will be different):

View File

@ -360,4 +360,7 @@
/[\s[:^ascii:]]/B,ucp /[\s[:^ascii:]]/B,ucp
/\pP/ucp
\x{7fffffff}\=no_jit
# End of testinput12 # End of testinput12

View File

@ -1415,4 +1415,10 @@ No match
End End
------------------------------------------------------------------ ------------------------------------------------------------------
/\pP/ucp
\x{7fffffff}\=no_jit
** Character \x{7fffffff} is greater than 0xffff and UTF-16 mode is not enabled.
** Truncation will probably give the wrong result.
No match
# End of testinput12 # End of testinput12

View File

@ -1409,4 +1409,8 @@ No match
End End
------------------------------------------------------------------ ------------------------------------------------------------------
/\pP/ucp
\x{7fffffff}\=no_jit
No match
# End of testinput12 # End of testinput12