Fix \a and \e in pcre2test, and \a in pcre2_compile, on EBCDIC platforms.

2015-06-12 16:25:23 +00:00 · 2015-06-12 16:25:23 +00:00 · 149aa29209
parent 1d44822484
commit 149aa29209
4 changed files with 48 additions and 34 deletions
--- a/6
+++ b/6
@ -155,6 +155,12 @@ being treated as a literal 'l' instead of causing an error.
 an empty string was repeated, it was not identified as matching an empty string 
 itself. For example: /^(?:(?(1)x|)+)+$()/.
 40. In an EBCDIC environment, pcretest was mishandling the escape sequences
 \a and \e in test subject lines. 
 41. In an EBCDIC environment, \a in a pattern was converted to the ASCII
 instead of the EBCDIC value.
 Version 10.10 06-March-2015
 ---------------------------
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@ -296,7 +296,7 @@ static const short int escapes[] = {
     -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,
     CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,
     CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,
-     CHAR_GRAVE_ACCENT,       7,
+     CHAR_GRAVE_ACCENT,       ESC_a,
     -ESC_b,                  0,
     -ESC_d,                  ESC_e,
     ESC_f,                   0,
@ -328,7 +328,7 @@ because it is defined as 'a', which of course picks up the ASCII value. */
 #endif
 static const short int escapes[] = {
-/*  80 */            7, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,
+/*  80 */        ESC_a, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,
 /*  88 */-ESC_h,     0,      0,     '{',      0,     0,      0,      0,
 /*  90 */     0,     0, -ESC_k,       0,      0, ESC_n,      0, -ESC_p,
 /*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0,
--- a/src/pcre2_internal.h
+++ b/src/pcre2_internal.h
@ -1192,31 +1192,6 @@ only. */
 /* -------------------- Definitions for compiled patterns -------------------*/
 /* Escape items that are just an encoding of a particular data value. */
 #ifndef ESC_e
 #define ESC_e CHAR_ESC
 #endif
 #ifndef ESC_f
 #define ESC_f CHAR_FF
 #endif
 #ifndef ESC_n
 #define ESC_n CHAR_LF
 #endif
 #ifndef ESC_r
 #define ESC_r CHAR_CR
 #endif
 /* We can't officially use ESC_t because it is a POSIX reserved identifier
 (presumably because of all the others like size_t). */
 #ifndef ESC_tee
 #define ESC_tee CHAR_HT
 #endif
 /* Codes for different types of Unicode property */
 #define PT_ANY        0    /* Any property - matches all chars */
@ -1255,13 +1230,46 @@ contain characters with values greater than 255. */
 #define XCL_PROP      3    /* Unicode property (2-byte property code follows) */
 #define XCL_NOTPROP   4    /* Unicode inverted property (ditto) */
 /* Escape items that are just an encoding of a particular data value. These 
 appear in the escapes[] table in pcre2_compile.c as positive numbers. */
 #ifndef ESC_a
 #define ESC_a CHAR_BEL
 #endif
 #ifndef ESC_e
 #define ESC_e CHAR_ESC
 #endif
 #ifndef ESC_f
 #define ESC_f CHAR_FF
 #endif
 #ifndef ESC_n
 #define ESC_n CHAR_LF
 #endif
 #ifndef ESC_r
 #define ESC_r CHAR_CR
 #endif
 /* We can't officially use ESC_t because it is a POSIX reserved identifier
 (presumably because of all the others like size_t). */
 #ifndef ESC_tee
 #define ESC_tee CHAR_HT
 #endif
 /* These are escaped items that aren't just an encoding of a particular data
 value such as \n. They must have non-zero values, as check_escape() returns 0
-for a data character.  Also, they must appear in the same order as in the
+for a data character. In the escapes[] table in pcre2_compile.c their values 
-opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
+are negated in order to distinguish them from data values.
-corresponds to "." in DOTALL mode rather than an escape sequence. It is also
+
-used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In
+They must appear here in the same order as in the opcode definitions below, up
-non-DOTALL mode, "." behaves like \N.
+to ESC_z. There's a dummy for OP_ALLANY because it corresponds to "." in DOTALL
 mode rather than an escape sequence. It is also used for [^] in JavaScript
 compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves
 like \N.
 The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
 when PCRE_UCP is set and replacement of \d etc by \p sequences is required.
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@ -5181,9 +5181,9 @@ while ((c = *p++) != 0)
  else switch ((c = *p++))
    {
    case '\\': break;
-    case 'a': c =    7; break;
+    case 'a': c = CHAR_BEL; break;
    case 'b': c = '\b'; break;
-    case 'e': c =   27; break;
+    case 'e': c = CHAR_ESC; break;
    case 'f': c = '\f'; break;
    case 'n': c = '\n'; break;
    case 'r': c = '\r'; break;