Fix \a and \e in pcre2test, and \a in pcre2_compile, on EBCDIC platforms.

This commit is contained in:
Philip.Hazel 2015-06-12 16:25:23 +00:00
parent 1d44822484
commit 149aa29209
4 changed files with 48 additions and 34 deletions

View File

@ -155,6 +155,12 @@ being treated as a literal 'l' instead of causing an error.
an empty string was repeated, it was not identified as matching an empty string an empty string was repeated, it was not identified as matching an empty string
itself. For example: /^(?:(?(1)x|)+)+$()/. itself. For example: /^(?:(?(1)x|)+)+$()/.
40. In an EBCDIC environment, pcretest was mishandling the escape sequences
\a and \e in test subject lines.
41. In an EBCDIC environment, \a in a pattern was converted to the ASCII
instead of the EBCDIC value.
Version 10.10 06-March-2015 Version 10.10 06-March-2015
--------------------------- ---------------------------

View File

@ -296,7 +296,7 @@ static const short int escapes[] = {
-ESC_Z, CHAR_LEFT_SQUARE_BRACKET, -ESC_Z, CHAR_LEFT_SQUARE_BRACKET,
CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET, CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET,
CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE, CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE,
CHAR_GRAVE_ACCENT, 7, CHAR_GRAVE_ACCENT, ESC_a,
-ESC_b, 0, -ESC_b, 0,
-ESC_d, ESC_e, -ESC_d, ESC_e,
ESC_f, 0, ESC_f, 0,
@ -328,7 +328,7 @@ because it is defined as 'a', which of course picks up the ASCII value. */
#endif #endif
static const short int escapes[] = { static const short int escapes[] = {
/* 80 */ 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, /* 80 */ ESC_a, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
/* 88 */-ESC_h, 0, 0, '{', 0, 0, 0, 0, /* 88 */-ESC_h, 0, 0, '{', 0, 0, 0, 0,
/* 90 */ 0, 0, -ESC_k, 0, 0, ESC_n, 0, -ESC_p, /* 90 */ 0, 0, -ESC_k, 0, 0, ESC_n, 0, -ESC_p,
/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0, /* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0,

View File

@ -1192,31 +1192,6 @@ only. */
/* -------------------- Definitions for compiled patterns -------------------*/ /* -------------------- Definitions for compiled patterns -------------------*/
/* Escape items that are just an encoding of a particular data value. */
#ifndef ESC_e
#define ESC_e CHAR_ESC
#endif
#ifndef ESC_f
#define ESC_f CHAR_FF
#endif
#ifndef ESC_n
#define ESC_n CHAR_LF
#endif
#ifndef ESC_r
#define ESC_r CHAR_CR
#endif
/* We can't officially use ESC_t because it is a POSIX reserved identifier
(presumably because of all the others like size_t). */
#ifndef ESC_tee
#define ESC_tee CHAR_HT
#endif
/* Codes for different types of Unicode property */ /* Codes for different types of Unicode property */
#define PT_ANY 0 /* Any property - matches all chars */ #define PT_ANY 0 /* Any property - matches all chars */
@ -1255,13 +1230,46 @@ contain characters with values greater than 255. */
#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */ #define XCL_PROP 3 /* Unicode property (2-byte property code follows) */
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ #define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
/* Escape items that are just an encoding of a particular data value. These
appear in the escapes[] table in pcre2_compile.c as positive numbers. */
#ifndef ESC_a
#define ESC_a CHAR_BEL
#endif
#ifndef ESC_e
#define ESC_e CHAR_ESC
#endif
#ifndef ESC_f
#define ESC_f CHAR_FF
#endif
#ifndef ESC_n
#define ESC_n CHAR_LF
#endif
#ifndef ESC_r
#define ESC_r CHAR_CR
#endif
/* We can't officially use ESC_t because it is a POSIX reserved identifier
(presumably because of all the others like size_t). */
#ifndef ESC_tee
#define ESC_tee CHAR_HT
#endif
/* These are escaped items that aren't just an encoding of a particular data /* These are escaped items that aren't just an encoding of a particular data
value such as \n. They must have non-zero values, as check_escape() returns 0 value such as \n. They must have non-zero values, as check_escape() returns 0
for a data character. Also, they must appear in the same order as in the for a data character. In the escapes[] table in pcre2_compile.c their values
opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it are negated in order to distinguish them from data values.
corresponds to "." in DOTALL mode rather than an escape sequence. It is also
used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In They must appear here in the same order as in the opcode definitions below, up
non-DOTALL mode, "." behaves like \N. to ESC_z. There's a dummy for OP_ALLANY because it corresponds to "." in DOTALL
mode rather than an escape sequence. It is also used for [^] in JavaScript
compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves
like \N.
The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc. The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
when PCRE_UCP is set and replacement of \d etc by \p sequences is required. when PCRE_UCP is set and replacement of \d etc by \p sequences is required.

View File

@ -5181,9 +5181,9 @@ while ((c = *p++) != 0)
else switch ((c = *p++)) else switch ((c = *p++))
{ {
case '\\': break; case '\\': break;
case 'a': c = 7; break; case 'a': c = CHAR_BEL; break;
case 'b': c = '\b'; break; case 'b': c = '\b'; break;
case 'e': c = 27; break; case 'e': c = CHAR_ESC; break;
case 'f': c = '\f'; break; case 'f': c = '\f'; break;
case 'n': c = '\n'; break; case 'n': c = '\n'; break;
case 'r': c = '\r'; break; case 'r': c = '\r'; break;