Convert the special "EBCDIC on an ASCII system" test.

This commit is contained in:
Philip.Hazel 2014-10-26 18:00:19 +00:00
parent 69530d5b36
commit 3989f00461
3 changed files with 312 additions and 2 deletions

View File

@ -304,10 +304,17 @@ static const short int escapes[] = {
#else
/* This is the "abnormal" table for EBCDIC systems without UTF-8 support.
It runs from 'a' to '9'. */
It runs from 'a' to '9'. For some minimal testing of EBCDIC features, the code
is sometimes compiled on an ASCII system. In this case, we must not use CHAR_a
because it is defined as 'a', which of course picks up the ASCII value. */
#if 'a' == 0x81 /* Check for a real EBCDIC environment */
#define ESCAPES_FIRST CHAR_a
#define ESCAPES_LAST CHAR_9
#else /* Testing in an ASCII environment */
#define ESCAPES_FIRST ((unsigned char)'\x81') /* EBCDIC 'a' */
#define ESCAPES_LAST ((unsigned char)'\xf9') /* EBCDIC '9' */
#endif
static const short int escapes[] = {
/* 80 */ 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
@ -328,7 +335,7 @@ static const short int escapes[] = {
/* F8 */ 0, 0
};
#endif
#endif /* EBCDIC */
/* Table of special "verbs" like (*PRUNE). This is a short table, so it is

121
testdata/testinputEBC vendored Normal file
View File

@ -0,0 +1,121 @@
# This is a specialized test for checking, when PCRE2 is compiled with the
# EBCDIC option but in an ASCII environment, that newline and white space
# functionality is working. It catches cases where explicit values such as 0x0a
# have been used instead of names like CHAR_LF. Needless to say, it is not a
# genuine EBCDIC test! In patterns, alphabetic characters that follow a
# backslash must be in EBCDIC code. In data, NL, NEL, LF, ESC, and DEL must be
# in EBCDIC, but can of course be specified as escapes.
# Test default newline and variations
/^A/m
ABC
12\x15ABC
/^A/m,newline=any
12\x15ABC
12\x0dABC
12\x0d\x15ABC
12\x25ABC
/^A/m,newline=anycrlf
12\x15ABC
12\x0dABC
12\x0d\x15ABC
** Fail
12\x25ABC
# Test \h
/^A\ˆ/
A B
# Test \H
/^A\È/
AB
** Fail
A B
# Test \R
/^A\Ù/
A\x15B
A\x0dB
A\x25B
A\x0bB
A\x0cB
** Fail
A B
# Test \v
/^A\¥/
A\x15B
A\x0dB
A\x25B
A\x0bB
A\x0cB
** Fail
A B
# Test \V
/^A\å/
A B
** Fail
A\x15B
A\x0dB
A\x25B
A\x0bB
A\x0cB
# For repeated items, use an atomic group so that the output is the same
# for DFA matching (otherwise it may show multiple matches).
# Test \h+
/^A(?>\ˆ+)/
A B
# Test \H+
/^A(?>\È+)/
AB
** Fail
A B
# Test \R+
/^A(?>\Ù+)/
A\x15B
A\x0dB
A\x25B
A\x0bB
A\x0cB
** Fail
A B
# Test \v+
/^A(?>\¥+)/
A\x15B
A\x0dB
A\x25B
A\x0bB
A\x0cB
** Fail
A B
# Test \V+
/^A(?>\å+)/
A B
** Fail
A\x15B
A\x0dB
A\x25B
A\x0bB
A\x0cB
# End

182
testdata/testoutputEBC vendored Normal file
View File

@ -0,0 +1,182 @@
# This is a specialized test for checking, when PCRE2 is compiled with the
# EBCDIC option but in an ASCII environment, that newline and white space
# functionality is working. It catches cases where explicit values such as 0x0a
# have been used instead of names like CHAR_LF. Needless to say, it is not a
# genuine EBCDIC test! In patterns, alphabetic characters that follow a
# backslash must be in EBCDIC code. In data, NL, NEL, LF, ESC, and DEL must be
# in EBCDIC, but can of course be specified as escapes.
# Test default newline and variations
/^A/m
ABC
0: A
12\x15ABC
0: A
/^A/m,newline=any
12\x15ABC
0: A
12\x0dABC
0: A
12\x0d\x15ABC
0: A
12\x25ABC
0: A
/^A/m,newline=anycrlf
12\x15ABC
0: A
12\x0dABC
0: A
12\x0d\x15ABC
0: A
** Fail
No match
12\x25ABC
No match
# Test \h
/^A\ˆ/
A B
0: A\x20
# Test \H
/^A\È/
AB
0: AB
** Fail
No match
A B
No match
# Test \R
/^A\Ù/
A\x15B
0: A\x15
A\x0dB
0: A\x0d
A\x25B
0: A\x25
A\x0bB
0: A\x0b
A\x0cB
0: A\x0c
** Fail
No match
A B
No match
# Test \v
/^A\¥/
A\x15B
0: A\x15
A\x0dB
0: A\x0d
A\x25B
0: A\x25
A\x0bB
0: A\x0b
A\x0cB
0: A\x0c
** Fail
No match
A B
No match
# Test \V
/^A\å/
A B
0: A\x20
** Fail
No match
A\x15B
No match
A\x0dB
No match
A\x25B
No match
A\x0bB
No match
A\x0cB
No match
# For repeated items, use an atomic group so that the output is the same
# for DFA matching (otherwise it may show multiple matches).
# Test \h+
/^A(?>\ˆ+)/
A B
0: A\x20
# Test \H+
/^A(?>\È+)/
AB
0: AB
** Fail
No match
A B
No match
# Test \R+
/^A(?>\Ù+)/
A\x15B
0: A\x15
A\x0dB
0: A\x0d
A\x25B
0: A\x25
A\x0bB
0: A\x0b
A\x0cB
0: A\x0c
** Fail
No match
A B
No match
# Test \v+
/^A(?>\¥+)/
A\x15B
0: A\x15
A\x0dB
0: A\x0d
A\x25B
0: A\x25
A\x0bB
0: A\x0b
A\x0cB
0: A\x0c
** Fail
No match
A B
No match
# Test \V+
/^A(?>\å+)/
A B
0: A\x20B
** Fail
No match
A\x15B
No match
A\x0dB
No match
A\x25B
No match
A\x0bB
No match
A\x0cB
No match
# End