Fix \a and \e in pcre2test, and \a in pcre2_compile, on EBCDIC platforms.

This commit is contained in:
Philip.Hazel 2015-06-12 16:25:23 +00:00
parent 1d44822484
commit 149aa29209
4 changed files with 48 additions and 34 deletions

View File

@ -155,6 +155,12 @@ being treated as a literal 'l' instead of causing an error.
an empty string was repeated, it was not identified as matching an empty string
itself. For example: /^(?:(?(1)x|)+)+$()/.
40. In an EBCDIC environment, pcretest was mishandling the escape sequences
\a and \e in test subject lines.
41. In an EBCDIC environment, \a in a pattern was converted to the ASCII
instead of the EBCDIC value.
Version 10.10 06-March-2015
---------------------------

View File

@ -296,7 +296,7 @@ static const short int escapes[] = {
-ESC_Z, CHAR_LEFT_SQUARE_BRACKET,
CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET,
CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE,
CHAR_GRAVE_ACCENT, 7,
CHAR_GRAVE_ACCENT, ESC_a,
-ESC_b, 0,
-ESC_d, ESC_e,
ESC_f, 0,
@ -328,7 +328,7 @@ because it is defined as 'a', which of course picks up the ASCII value. */
#endif
static const short int escapes[] = {
/* 80 */ 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
/* 80 */ ESC_a, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
/* 88 */-ESC_h, 0, 0, '{', 0, 0, 0, 0,
/* 90 */ 0, 0, -ESC_k, 0, 0, ESC_n, 0, -ESC_p,
/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0,

View File

@ -1192,31 +1192,6 @@ only. */
/* -------------------- Definitions for compiled patterns -------------------*/
/* Escape items that are just an encoding of a particular data value. */
#ifndef ESC_e
#define ESC_e CHAR_ESC
#endif
#ifndef ESC_f
#define ESC_f CHAR_FF
#endif
#ifndef ESC_n
#define ESC_n CHAR_LF
#endif
#ifndef ESC_r
#define ESC_r CHAR_CR
#endif
/* We can't officially use ESC_t because it is a POSIX reserved identifier
(presumably because of all the others like size_t). */
#ifndef ESC_tee
#define ESC_tee CHAR_HT
#endif
/* Codes for different types of Unicode property */
#define PT_ANY 0 /* Any property - matches all chars */
@ -1255,13 +1230,46 @@ contain characters with values greater than 255. */
#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
/* Escape items that are just an encoding of a particular data value. These
appear in the escapes[] table in pcre2_compile.c as positive numbers. */
#ifndef ESC_a
#define ESC_a CHAR_BEL
#endif
#ifndef ESC_e
#define ESC_e CHAR_ESC
#endif
#ifndef ESC_f
#define ESC_f CHAR_FF
#endif
#ifndef ESC_n
#define ESC_n CHAR_LF
#endif
#ifndef ESC_r
#define ESC_r CHAR_CR
#endif
/* We can't officially use ESC_t because it is a POSIX reserved identifier
(presumably because of all the others like size_t). */
#ifndef ESC_tee
#define ESC_tee CHAR_HT
#endif
/* These are escaped items that aren't just an encoding of a particular data
value such as \n. They must have non-zero values, as check_escape() returns 0
for a data character. Also, they must appear in the same order as in the
opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
corresponds to "." in DOTALL mode rather than an escape sequence. It is also
used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In
non-DOTALL mode, "." behaves like \N.
for a data character. In the escapes[] table in pcre2_compile.c their values
are negated in order to distinguish them from data values.
They must appear here in the same order as in the opcode definitions below, up
to ESC_z. There's a dummy for OP_ALLANY because it corresponds to "." in DOTALL
mode rather than an escape sequence. It is also used for [^] in JavaScript
compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves
like \N.
The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
when PCRE_UCP is set and replacement of \d etc by \p sequences is required.

View File

@ -5181,9 +5181,9 @@ while ((c = *p++) != 0)
else switch ((c = *p++))
{
case '\\': break;
case 'a': c = 7; break;
case 'a': c = CHAR_BEL; break;
case 'b': c = '\b'; break;
case 'e': c = 27; break;
case 'e': c = CHAR_ESC; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;