From 149aa29209ad9cec64314484e46a37d92f210305 Mon Sep 17 00:00:00 2001
From: "Philip.Hazel" <Philip.Hazel@gmail.com>
Date: Fri, 12 Jun 2015 16:25:23 +0000
Subject: [PATCH] Fix \a and \e in pcre2test, and \a in pcre2_compile, on
 EBCDIC platforms.

---
 ChangeLog            |  6 ++++
 src/pcre2_compile.c  |  4 +--
 src/pcre2_internal.h | 68 +++++++++++++++++++++++++-------------------
 src/pcre2test.c      |  4 +--
 4 files changed, 48 insertions(+), 34 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 37b565d..766f6b2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -155,6 +155,12 @@ being treated as a literal 'l' instead of causing an error.
 an empty string was repeated, it was not identified as matching an empty string 
 itself. For example: /^(?:(?(1)x|)+)+$()/.
 
+40. In an EBCDIC environment, pcretest was mishandling the escape sequences
+\a and \e in test subject lines. 
+
+41. In an EBCDIC environment, \a in a pattern was converted to the ASCII
+instead of the EBCDIC value.
+
 
 Version 10.10 06-March-2015
 ---------------------------
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 80c2d08..9ad36d0 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -296,7 +296,7 @@ static const short int escapes[] = {
      -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,
      CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,
      CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,
-     CHAR_GRAVE_ACCENT,       7,
+     CHAR_GRAVE_ACCENT,       ESC_a,
      -ESC_b,                  0,
      -ESC_d,                  ESC_e,
      ESC_f,                   0,
@@ -328,7 +328,7 @@ because it is defined as 'a', which of course picks up the ASCII value. */
 #endif
 
 static const short int escapes[] = {
-/*  80 */            7, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,
+/*  80 */        ESC_a, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,
 /*  88 */-ESC_h,     0,      0,     '{',      0,     0,      0,      0,
 /*  90 */     0,     0, -ESC_k,       0,      0, ESC_n,      0, -ESC_p,
 /*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0,
diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h
index e2a9252..c6d1427 100644
--- a/src/pcre2_internal.h
+++ b/src/pcre2_internal.h
@@ -1192,31 +1192,6 @@ only. */
 
 /* -------------------- Definitions for compiled patterns -------------------*/
 
-/* Escape items that are just an encoding of a particular data value. */
-
-#ifndef ESC_e
-#define ESC_e CHAR_ESC
-#endif
-
-#ifndef ESC_f
-#define ESC_f CHAR_FF
-#endif
-
-#ifndef ESC_n
-#define ESC_n CHAR_LF
-#endif
-
-#ifndef ESC_r
-#define ESC_r CHAR_CR
-#endif
-
-/* We can't officially use ESC_t because it is a POSIX reserved identifier
-(presumably because of all the others like size_t). */
-
-#ifndef ESC_tee
-#define ESC_tee CHAR_HT
-#endif
-
 /* Codes for different types of Unicode property */
 
 #define PT_ANY        0    /* Any property - matches all chars */
@@ -1255,13 +1230,46 @@ contain characters with values greater than 255. */
 #define XCL_PROP      3    /* Unicode property (2-byte property code follows) */
 #define XCL_NOTPROP   4    /* Unicode inverted property (ditto) */
 
+/* Escape items that are just an encoding of a particular data value. These 
+appear in the escapes[] table in pcre2_compile.c as positive numbers. */
+
+#ifndef ESC_a
+#define ESC_a CHAR_BEL
+#endif
+
+#ifndef ESC_e
+#define ESC_e CHAR_ESC
+#endif
+
+#ifndef ESC_f
+#define ESC_f CHAR_FF
+#endif
+
+#ifndef ESC_n
+#define ESC_n CHAR_LF
+#endif
+
+#ifndef ESC_r
+#define ESC_r CHAR_CR
+#endif
+
+/* We can't officially use ESC_t because it is a POSIX reserved identifier
+(presumably because of all the others like size_t). */
+
+#ifndef ESC_tee
+#define ESC_tee CHAR_HT
+#endif
+
 /* These are escaped items that aren't just an encoding of a particular data
 value such as \n. They must have non-zero values, as check_escape() returns 0
-for a data character.  Also, they must appear in the same order as in the
-opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
-corresponds to "." in DOTALL mode rather than an escape sequence. It is also
-used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In
-non-DOTALL mode, "." behaves like \N.
+for a data character. In the escapes[] table in pcre2_compile.c their values 
+are negated in order to distinguish them from data values.
+
+They must appear here in the same order as in the opcode definitions below, up
+to ESC_z. There's a dummy for OP_ALLANY because it corresponds to "." in DOTALL
+mode rather than an escape sequence. It is also used for [^] in JavaScript
+compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves
+like \N.
 
 The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
 when PCRE_UCP is set and replacement of \d etc by \p sequences is required.
diff --git a/src/pcre2test.c b/src/pcre2test.c
index 97ba5bb..1759a22 100644
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@@ -5181,9 +5181,9 @@ while ((c = *p++) != 0)
   else switch ((c = *p++))
     {
     case '\\': break;
-    case 'a': c =    7; break;
+    case 'a': c = CHAR_BEL; break;
     case 'b': c = '\b'; break;
-    case 'e': c =   27; break;
+    case 'e': c = CHAR_ESC; break;
     case 'f': c = '\f'; break;
     case 'n': c = '\n'; break;
     case 'r': c = '\r'; break;