diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 062a7de..78182b2 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -304,10 +304,17 @@ static const short int escapes[] = { #else /* This is the "abnormal" table for EBCDIC systems without UTF-8 support. -It runs from 'a' to '9'. */ +It runs from 'a' to '9'. For some minimal testing of EBCDIC features, the code +is sometimes compiled on an ASCII system. In this case, we must not use CHAR_a +because it is defined as 'a', which of course picks up the ASCII value. */ +#if 'a' == 0x81 /* Check for a real EBCDIC environment */ #define ESCAPES_FIRST CHAR_a #define ESCAPES_LAST CHAR_9 +#else /* Testing in an ASCII environment */ +#define ESCAPES_FIRST ((unsigned char)'\x81') /* EBCDIC 'a' */ +#define ESCAPES_LAST ((unsigned char)'\xf9') /* EBCDIC '9' */ +#endif static const short int escapes[] = { /* 80 */ 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, @@ -328,7 +335,7 @@ static const short int escapes[] = { /* F8 */ 0, 0 }; -#endif +#endif /* EBCDIC */ /* Table of special "verbs" like (*PRUNE). This is a short table, so it is diff --git a/testdata/testinputEBC b/testdata/testinputEBC new file mode 100644 index 0000000..e3f1154 --- /dev/null +++ b/testdata/testinputEBC @@ -0,0 +1,121 @@ +# This is a specialized test for checking, when PCRE2 is compiled with the +# EBCDIC option but in an ASCII environment, that newline and white space +# functionality is working. It catches cases where explicit values such as 0x0a +# have been used instead of names like CHAR_LF. Needless to say, it is not a +# genuine EBCDIC test! In patterns, alphabetic characters that follow a +# backslash must be in EBCDIC code. In data, NL, NEL, LF, ESC, and DEL must be +# in EBCDIC, but can of course be specified as escapes. + +# Test default newline and variations + +/^A/m + ABC + 12\x15ABC + +/^A/m,newline=any + 12\x15ABC + 12\x0dABC + 12\x0d\x15ABC + 12\x25ABC + +/^A/m,newline=anycrlf + 12\x15ABC + 12\x0dABC + 12\x0d\x15ABC + ** Fail + 12\x25ABC + +# Test \h + +/^A\ˆ/ + A B + +# Test \H + +/^A\È/ + AB + ** Fail + A B + +# Test \R + +/^A\Ù/ + A\x15B + A\x0dB + A\x25B + A\x0bB + A\x0cB + ** Fail + A B + +# Test \v + +/^A\¥/ + A\x15B + A\x0dB + A\x25B + A\x0bB + A\x0cB + ** Fail + A B + +# Test \V + +/^A\å/ + A B + ** Fail + A\x15B + A\x0dB + A\x25B + A\x0bB + A\x0cB + +# For repeated items, use an atomic group so that the output is the same +# for DFA matching (otherwise it may show multiple matches). + +# Test \h+ + +/^A(?>\ˆ+)/ + A B + +# Test \H+ + +/^A(?>\È+)/ + AB + ** Fail + A B + +# Test \R+ + +/^A(?>\Ù+)/ + A\x15B + A\x0dB + A\x25B + A\x0bB + A\x0cB + ** Fail + A B + +# Test \v+ + +/^A(?>\¥+)/ + A\x15B + A\x0dB + A\x25B + A\x0bB + A\x0cB + ** Fail + A B + +# Test \V+ + +/^A(?>\å+)/ + A B + ** Fail + A\x15B + A\x0dB + A\x25B + A\x0bB + A\x0cB + +# End diff --git a/testdata/testoutputEBC b/testdata/testoutputEBC new file mode 100644 index 0000000..7904d22 --- /dev/null +++ b/testdata/testoutputEBC @@ -0,0 +1,182 @@ +# This is a specialized test for checking, when PCRE2 is compiled with the +# EBCDIC option but in an ASCII environment, that newline and white space +# functionality is working. It catches cases where explicit values such as 0x0a +# have been used instead of names like CHAR_LF. Needless to say, it is not a +# genuine EBCDIC test! In patterns, alphabetic characters that follow a +# backslash must be in EBCDIC code. In data, NL, NEL, LF, ESC, and DEL must be +# in EBCDIC, but can of course be specified as escapes. + +# Test default newline and variations + +/^A/m + ABC + 0: A + 12\x15ABC + 0: A + +/^A/m,newline=any + 12\x15ABC + 0: A + 12\x0dABC + 0: A + 12\x0d\x15ABC + 0: A + 12\x25ABC + 0: A + +/^A/m,newline=anycrlf + 12\x15ABC + 0: A + 12\x0dABC + 0: A + 12\x0d\x15ABC + 0: A + ** Fail +No match + 12\x25ABC +No match + +# Test \h + +/^A\ˆ/ + A B + 0: A\x20 + +# Test \H + +/^A\È/ + AB + 0: AB + ** Fail +No match + A B +No match + +# Test \R + +/^A\Ù/ + A\x15B + 0: A\x15 + A\x0dB + 0: A\x0d + A\x25B + 0: A\x25 + A\x0bB + 0: A\x0b + A\x0cB + 0: A\x0c + ** Fail +No match + A B +No match + +# Test \v + +/^A\¥/ + A\x15B + 0: A\x15 + A\x0dB + 0: A\x0d + A\x25B + 0: A\x25 + A\x0bB + 0: A\x0b + A\x0cB + 0: A\x0c + ** Fail +No match + A B +No match + +# Test \V + +/^A\å/ + A B + 0: A\x20 + ** Fail +No match + A\x15B +No match + A\x0dB +No match + A\x25B +No match + A\x0bB +No match + A\x0cB +No match + +# For repeated items, use an atomic group so that the output is the same +# for DFA matching (otherwise it may show multiple matches). + +# Test \h+ + +/^A(?>\ˆ+)/ + A B + 0: A\x20 + +# Test \H+ + +/^A(?>\È+)/ + AB + 0: AB + ** Fail +No match + A B +No match + +# Test \R+ + +/^A(?>\Ù+)/ + A\x15B + 0: A\x15 + A\x0dB + 0: A\x0d + A\x25B + 0: A\x25 + A\x0bB + 0: A\x0b + A\x0cB + 0: A\x0c + ** Fail +No match + A B +No match + +# Test \v+ + +/^A(?>\¥+)/ + A\x15B + 0: A\x15 + A\x0dB + 0: A\x0d + A\x25B + 0: A\x25 + A\x0bB + 0: A\x0b + A\x0cB + 0: A\x0c + ** Fail +No match + A B +No match + +# Test \V+ + +/^A(?>\å+)/ + A B + 0: A\x20B + ** Fail +No match + A\x15B +No match + A\x0dB +No match + A\x25B +No match + A\x0bB +No match + A\x0cB +No match + +# End